]>
Commit | Line | Data |
---|---|---|
1 | <?php | |
2 | ||
3 | namespace Tests\Wallabag\CoreBundle\Helper; | |
4 | ||
5 | use Graby\Graby; | |
6 | use Monolog\Handler\TestHandler; | |
7 | use Monolog\Logger; | |
8 | use PHPUnit\Framework\TestCase; | |
9 | use Psr\Log\NullLogger; | |
10 | use Symfony\Component\Validator\ConstraintViolation; | |
11 | use Symfony\Component\Validator\ConstraintViolationList; | |
12 | use Symfony\Component\Validator\Validator\RecursiveValidator; | |
13 | use Wallabag\CoreBundle\Entity\Entry; | |
14 | use Wallabag\CoreBundle\Helper\ContentProxy; | |
15 | use Wallabag\CoreBundle\Helper\RuleBasedTagger; | |
16 | use Wallabag\UserBundle\Entity\User; | |
17 | ||
18 | class ContentProxyTest extends TestCase | |
19 | { | |
20 | private $fetchingErrorMessage = 'wallabag can\'t retrieve contents for this article. Please <a href="http://doc.wallabag.org/en/user/errors_during_fetching.html#how-can-i-help-to-fix-that">troubleshoot this issue</a>.'; | |
21 | ||
22 | public function testWithBadUrl() | |
23 | { | |
24 | $tagger = $this->getTaggerMock(); | |
25 | $tagger->expects($this->once()) | |
26 | ->method('tag'); | |
27 | ||
28 | $graby = $this->getMockBuilder('Graby\Graby') | |
29 | ->setMethods(['fetchContent']) | |
30 | ->disableOriginalConstructor() | |
31 | ->getMock(); | |
32 | ||
33 | $graby->expects($this->any()) | |
34 | ->method('fetchContent') | |
35 | ->willReturn([ | |
36 | 'html' => false, | |
37 | 'title' => '', | |
38 | 'url' => '', | |
39 | 'content_type' => '', | |
40 | 'language' => '', | |
41 | ]); | |
42 | ||
43 | $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); | |
44 | $entry = new Entry(new User()); | |
45 | $proxy->updateEntry($entry, 'http://user@:80'); | |
46 | ||
47 | $this->assertSame('http://user@:80', $entry->getUrl()); | |
48 | $this->assertEmpty($entry->getTitle()); | |
49 | $this->assertSame($this->fetchingErrorMessage, $entry->getContent()); | |
50 | $this->assertEmpty($entry->getPreviewPicture()); | |
51 | $this->assertEmpty($entry->getMimetype()); | |
52 | $this->assertEmpty($entry->getLanguage()); | |
53 | $this->assertSame(0.0, $entry->getReadingTime()); | |
54 | $this->assertNull($entry->getDomainName()); | |
55 | } | |
56 | ||
57 | public function testWithEmptyContent() | |
58 | { | |
59 | $tagger = $this->getTaggerMock(); | |
60 | $tagger->expects($this->once()) | |
61 | ->method('tag'); | |
62 | ||
63 | $graby = $this->getMockBuilder('Graby\Graby') | |
64 | ->setMethods(['fetchContent']) | |
65 | ->disableOriginalConstructor() | |
66 | ->getMock(); | |
67 | ||
68 | $graby->expects($this->any()) | |
69 | ->method('fetchContent') | |
70 | ->willReturn([ | |
71 | 'html' => false, | |
72 | 'title' => '', | |
73 | 'url' => '', | |
74 | 'content_type' => '', | |
75 | 'language' => '', | |
76 | ]); | |
77 | ||
78 | $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); | |
79 | $entry = new Entry(new User()); | |
80 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | |
81 | ||
82 | $this->assertSame('http://0.0.0.0', $entry->getUrl()); | |
83 | $this->assertEmpty($entry->getTitle()); | |
84 | $this->assertSame($this->fetchingErrorMessage, $entry->getContent()); | |
85 | $this->assertEmpty($entry->getPreviewPicture()); | |
86 | $this->assertEmpty($entry->getMimetype()); | |
87 | $this->assertEmpty($entry->getLanguage()); | |
88 | $this->assertSame(0.0, $entry->getReadingTime()); | |
89 | $this->assertSame('0.0.0.0', $entry->getDomainName()); | |
90 | } | |
91 | ||
92 | public function testWithEmptyContentButOG() | |
93 | { | |
94 | $tagger = $this->getTaggerMock(); | |
95 | $tagger->expects($this->once()) | |
96 | ->method('tag'); | |
97 | ||
98 | $graby = $this->getMockBuilder('Graby\Graby') | |
99 | ->setMethods(['fetchContent']) | |
100 | ->disableOriginalConstructor() | |
101 | ->getMock(); | |
102 | ||
103 | $graby->expects($this->any()) | |
104 | ->method('fetchContent') | |
105 | ->willReturn([ | |
106 | 'html' => false, | |
107 | 'title' => '', | |
108 | 'url' => '', | |
109 | 'content_type' => '', | |
110 | 'language' => '', | |
111 | 'status' => '', | |
112 | 'open_graph' => [ | |
113 | 'og_title' => 'my title', | |
114 | 'og_description' => 'desc', | |
115 | ], | |
116 | ]); | |
117 | ||
118 | $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); | |
119 | $entry = new Entry(new User()); | |
120 | $proxy->updateEntry($entry, 'http://domain.io'); | |
121 | ||
122 | $this->assertSame('http://domain.io', $entry->getUrl()); | |
123 | $this->assertSame('my title', $entry->getTitle()); | |
124 | $this->assertSame($this->fetchingErrorMessage . '<p><i>But we found a short description: </i></p>desc', $entry->getContent()); | |
125 | $this->assertEmpty($entry->getPreviewPicture()); | |
126 | $this->assertEmpty($entry->getLanguage()); | |
127 | $this->assertEmpty($entry->getHttpStatus()); | |
128 | $this->assertEmpty($entry->getMimetype()); | |
129 | $this->assertSame(0.0, $entry->getReadingTime()); | |
130 | $this->assertSame('domain.io', $entry->getDomainName()); | |
131 | } | |
132 | ||
133 | public function testWithContent() | |
134 | { | |
135 | $tagger = $this->getTaggerMock(); | |
136 | $tagger->expects($this->once()) | |
137 | ->method('tag'); | |
138 | ||
139 | $graby = $this->getMockBuilder('Graby\Graby') | |
140 | ->setMethods(['fetchContent']) | |
141 | ->disableOriginalConstructor() | |
142 | ->getMock(); | |
143 | ||
144 | $graby->expects($this->any()) | |
145 | ->method('fetchContent') | |
146 | ->willReturn([ | |
147 | 'html' => str_repeat('this is my content', 325), | |
148 | 'title' => 'this is my title', | |
149 | 'url' => 'http://1.1.1.1', | |
150 | 'content_type' => 'text/html', | |
151 | 'language' => 'fr', | |
152 | 'status' => '200', | |
153 | 'open_graph' => [ | |
154 | 'og_title' => 'my OG title', | |
155 | 'og_description' => 'OG desc', | |
156 | 'og_image' => 'http://3.3.3.3/cover.jpg', | |
157 | ], | |
158 | ]); | |
159 | ||
160 | $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); | |
161 | $entry = new Entry(new User()); | |
162 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | |
163 | ||
164 | $this->assertSame('http://1.1.1.1', $entry->getUrl()); | |
165 | $this->assertSame('this is my title', $entry->getTitle()); | |
166 | $this->assertContains('content', $entry->getContent()); | |
167 | $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture()); | |
168 | $this->assertSame('text/html', $entry->getMimetype()); | |
169 | $this->assertSame('fr', $entry->getLanguage()); | |
170 | $this->assertSame('200', $entry->getHttpStatus()); | |
171 | $this->assertSame(4.0, $entry->getReadingTime()); | |
172 | $this->assertSame('1.1.1.1', $entry->getDomainName()); | |
173 | } | |
174 | ||
175 | public function testWithContentAndNoOgImage() | |
176 | { | |
177 | $tagger = $this->getTaggerMock(); | |
178 | $tagger->expects($this->once()) | |
179 | ->method('tag'); | |
180 | ||
181 | $graby = $this->getMockBuilder('Graby\Graby') | |
182 | ->setMethods(['fetchContent']) | |
183 | ->disableOriginalConstructor() | |
184 | ->getMock(); | |
185 | ||
186 | $graby->expects($this->any()) | |
187 | ->method('fetchContent') | |
188 | ->willReturn([ | |
189 | 'html' => str_repeat('this is my content', 325), | |
190 | 'title' => 'this is my title', | |
191 | 'url' => 'http://1.1.1.1', | |
192 | 'content_type' => 'text/html', | |
193 | 'language' => 'fr', | |
194 | 'status' => '200', | |
195 | 'open_graph' => [ | |
196 | 'og_title' => 'my OG title', | |
197 | 'og_description' => 'OG desc', | |
198 | 'og_image' => null, | |
199 | ], | |
200 | ]); | |
201 | ||
202 | $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); | |
203 | $entry = new Entry(new User()); | |
204 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | |
205 | ||
206 | $this->assertSame('http://1.1.1.1', $entry->getUrl()); | |
207 | $this->assertSame('this is my title', $entry->getTitle()); | |
208 | $this->assertContains('content', $entry->getContent()); | |
209 | $this->assertNull($entry->getPreviewPicture()); | |
210 | $this->assertSame('text/html', $entry->getMimetype()); | |
211 | $this->assertSame('fr', $entry->getLanguage()); | |
212 | $this->assertSame('200', $entry->getHttpStatus()); | |
213 | $this->assertSame(4.0, $entry->getReadingTime()); | |
214 | $this->assertSame('1.1.1.1', $entry->getDomainName()); | |
215 | } | |
216 | ||
217 | public function testWithContentAndContentImage() | |
218 | { | |
219 | $tagger = $this->getTaggerMock(); | |
220 | $tagger->expects($this->once()) | |
221 | ->method('tag'); | |
222 | ||
223 | $graby = $this->getMockBuilder('Graby\Graby') | |
224 | ->setMethods(['fetchContent']) | |
225 | ->disableOriginalConstructor() | |
226 | ->getMock(); | |
227 | ||
228 | $graby->expects($this->any()) | |
229 | ->method('fetchContent') | |
230 | ->willReturn([ | |
231 | 'html' => "<h1>Test</h1><p><img src='http://3.3.3.3/cover.jpg'/></p>", | |
232 | 'title' => 'this is my title', | |
233 | 'url' => 'http://1.1.1.1', | |
234 | 'content_type' => 'text/html', | |
235 | 'language' => 'fr', | |
236 | 'status' => '200', | |
237 | 'open_graph' => [ | |
238 | 'og_title' => 'my OG title', | |
239 | 'og_description' => 'OG desc', | |
240 | 'og_image' => null, | |
241 | ], | |
242 | ]); | |
243 | ||
244 | $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); | |
245 | $entry = new Entry(new User()); | |
246 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | |
247 | ||
248 | $this->assertSame('http://1.1.1.1', $entry->getUrl()); | |
249 | $this->assertSame('this is my title', $entry->getTitle()); | |
250 | $this->assertSame("<h1>Test</h1><p><img src='http://3.3.3.3/cover.jpg'/></p>", $entry->getContent()); | |
251 | $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture()); | |
252 | $this->assertSame('text/html', $entry->getMimetype()); | |
253 | $this->assertSame('fr', $entry->getLanguage()); | |
254 | $this->assertSame('200', $entry->getHttpStatus()); | |
255 | $this->assertSame(0.0, $entry->getReadingTime()); | |
256 | $this->assertSame('1.1.1.1', $entry->getDomainName()); | |
257 | } | |
258 | ||
259 | public function testWithContentImageAndOgImage() | |
260 | { | |
261 | $tagger = $this->getTaggerMock(); | |
262 | $tagger->expects($this->once()) | |
263 | ->method('tag'); | |
264 | ||
265 | $graby = $this->getMockBuilder('Graby\Graby') | |
266 | ->setMethods(['fetchContent']) | |
267 | ->disableOriginalConstructor() | |
268 | ->getMock(); | |
269 | ||
270 | $graby->expects($this->any()) | |
271 | ->method('fetchContent') | |
272 | ->willReturn([ | |
273 | 'html' => "<h1>Test</h1><p><img src='http://3.3.3.3/nevermind.jpg'/></p>", | |
274 | 'title' => 'this is my title', | |
275 | 'url' => 'http://1.1.1.1', | |
276 | 'content_type' => 'text/html', | |
277 | 'language' => 'fr', | |
278 | 'status' => '200', | |
279 | 'open_graph' => [ | |
280 | 'og_title' => 'my OG title', | |
281 | 'og_description' => 'OG desc', | |
282 | 'og_image' => 'http://3.3.3.3/cover.jpg', | |
283 | ], | |
284 | ]); | |
285 | ||
286 | $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); | |
287 | $entry = new Entry(new User()); | |
288 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | |
289 | ||
290 | $this->assertSame('http://1.1.1.1', $entry->getUrl()); | |
291 | $this->assertSame('this is my title', $entry->getTitle()); | |
292 | $this->assertSame("<h1>Test</h1><p><img src='http://3.3.3.3/nevermind.jpg'/></p>", $entry->getContent()); | |
293 | $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture()); | |
294 | $this->assertSame('text/html', $entry->getMimetype()); | |
295 | $this->assertSame('fr', $entry->getLanguage()); | |
296 | $this->assertSame('200', $entry->getHttpStatus()); | |
297 | $this->assertSame(0.0, $entry->getReadingTime()); | |
298 | $this->assertSame('1.1.1.1', $entry->getDomainName()); | |
299 | } | |
300 | ||
301 | public function testWithContentAndBadLanguage() | |
302 | { | |
303 | $tagger = $this->getTaggerMock(); | |
304 | $tagger->expects($this->once()) | |
305 | ->method('tag'); | |
306 | ||
307 | $validator = $this->getValidator(false); | |
308 | $validator->expects($this->once()) | |
309 | ->method('validate') | |
310 | ->willReturn(new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'language', 'dontexist')])); | |
311 | ||
312 | $graby = $this->getMockBuilder('Graby\Graby') | |
313 | ->setMethods(['fetchContent']) | |
314 | ->disableOriginalConstructor() | |
315 | ->getMock(); | |
316 | ||
317 | $graby->expects($this->any()) | |
318 | ->method('fetchContent') | |
319 | ->willReturn([ | |
320 | 'html' => str_repeat('this is my content', 325), | |
321 | 'title' => 'this is my title', | |
322 | 'url' => 'http://1.1.1.1', | |
323 | 'content_type' => 'text/html', | |
324 | 'language' => 'dontexist', | |
325 | 'status' => '200', | |
326 | ]); | |
327 | ||
328 | $proxy = new ContentProxy($graby, $tagger, $validator, $this->getLogger(), $this->fetchingErrorMessage); | |
329 | $entry = new Entry(new User()); | |
330 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | |
331 | ||
332 | $this->assertSame('http://1.1.1.1', $entry->getUrl()); | |
333 | $this->assertSame('this is my title', $entry->getTitle()); | |
334 | $this->assertContains('content', $entry->getContent()); | |
335 | $this->assertSame('text/html', $entry->getMimetype()); | |
336 | $this->assertNull($entry->getLanguage()); | |
337 | $this->assertSame('200', $entry->getHttpStatus()); | |
338 | $this->assertSame(4.0, $entry->getReadingTime()); | |
339 | $this->assertSame('1.1.1.1', $entry->getDomainName()); | |
340 | } | |
341 | ||
342 | public function testWithContentAndBadOgImage() | |
343 | { | |
344 | $tagger = $this->getTaggerMock(); | |
345 | $tagger->expects($this->once()) | |
346 | ->method('tag'); | |
347 | ||
348 | $validator = $this->getValidator(false); | |
349 | $validator->expects($this->exactly(2)) | |
350 | ->method('validate') | |
351 | ->will($this->onConsecutiveCalls( | |
352 | new ConstraintViolationList(), | |
353 | new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'url', 'https://')]) | |
354 | )); | |
355 | ||
356 | $graby = $this->getMockBuilder('Graby\Graby') | |
357 | ->setMethods(['fetchContent']) | |
358 | ->disableOriginalConstructor() | |
359 | ->getMock(); | |
360 | ||
361 | $graby->expects($this->any()) | |
362 | ->method('fetchContent') | |
363 | ->willReturn([ | |
364 | 'html' => str_repeat('this is my content', 325), | |
365 | 'title' => 'this is my title', | |
366 | 'url' => 'http://1.1.1.1', | |
367 | 'content_type' => 'text/html', | |
368 | 'language' => 'fr', | |
369 | 'status' => '200', | |
370 | 'open_graph' => [ | |
371 | 'og_title' => 'my OG title', | |
372 | 'og_description' => 'OG desc', | |
373 | 'og_image' => 'https://', | |
374 | ], | |
375 | ]); | |
376 | ||
377 | $proxy = new ContentProxy($graby, $tagger, $validator, $this->getLogger(), $this->fetchingErrorMessage); | |
378 | $entry = new Entry(new User()); | |
379 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | |
380 | ||
381 | $this->assertSame('http://1.1.1.1', $entry->getUrl()); | |
382 | $this->assertSame('this is my title', $entry->getTitle()); | |
383 | $this->assertContains('content', $entry->getContent()); | |
384 | $this->assertNull($entry->getPreviewPicture()); | |
385 | $this->assertSame('text/html', $entry->getMimetype()); | |
386 | $this->assertSame('fr', $entry->getLanguage()); | |
387 | $this->assertSame('200', $entry->getHttpStatus()); | |
388 | $this->assertSame(4.0, $entry->getReadingTime()); | |
389 | $this->assertSame('1.1.1.1', $entry->getDomainName()); | |
390 | } | |
391 | ||
392 | public function testWithForcedContent() | |
393 | { | |
394 | $tagger = $this->getTaggerMock(); | |
395 | $tagger->expects($this->once()) | |
396 | ->method('tag'); | |
397 | ||
398 | $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage, true); | |
399 | $entry = new Entry(new User()); | |
400 | $proxy->updateEntry( | |
401 | $entry, | |
402 | 'http://0.0.0.0', | |
403 | [ | |
404 | 'html' => str_repeat('this is my content', 325), | |
405 | 'title' => 'this is my title', | |
406 | 'url' => 'http://1.1.1.1', | |
407 | 'content_type' => 'text/html', | |
408 | 'language' => 'fr', | |
409 | 'date' => '1395635872', | |
410 | 'authors' => ['Jeremy', 'Nico', 'Thomas'], | |
411 | 'all_headers' => [ | |
412 | 'Cache-Control' => 'no-cache', | |
413 | ], | |
414 | ] | |
415 | ); | |
416 | ||
417 | $this->assertSame('http://1.1.1.1', $entry->getUrl()); | |
418 | $this->assertSame('this is my title', $entry->getTitle()); | |
419 | $this->assertContains('content', $entry->getContent()); | |
420 | $this->assertSame('text/html', $entry->getMimetype()); | |
421 | $this->assertSame('fr', $entry->getLanguage()); | |
422 | $this->assertSame(4.0, $entry->getReadingTime()); | |
423 | $this->assertSame('1.1.1.1', $entry->getDomainName()); | |
424 | $this->assertSame('24/03/2014', $entry->getPublishedAt()->format('d/m/Y')); | |
425 | $this->assertContains('Jeremy', $entry->getPublishedBy()); | |
426 | $this->assertContains('Nico', $entry->getPublishedBy()); | |
427 | $this->assertContains('Thomas', $entry->getPublishedBy()); | |
428 | $this->assertNotNull($entry->getHeaders(), 'Headers are stored, so value is not null'); | |
429 | $this->assertContains('no-cache', $entry->getHeaders()); | |
430 | } | |
431 | ||
432 | public function testWithForcedContentAndDatetime() | |
433 | { | |
434 | $tagger = $this->getTaggerMock(); | |
435 | $tagger->expects($this->once()) | |
436 | ->method('tag'); | |
437 | ||
438 | $logHandler = new TestHandler(); | |
439 | $logger = new Logger('test', [$logHandler]); | |
440 | ||
441 | $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $logger, $this->fetchingErrorMessage); | |
442 | $entry = new Entry(new User()); | |
443 | $proxy->updateEntry( | |
444 | $entry, | |
445 | 'http://1.1.1.1', | |
446 | [ | |
447 | 'html' => str_repeat('this is my content', 325), | |
448 | 'title' => 'this is my title', | |
449 | 'url' => 'http://1.1.1.1', | |
450 | 'content_type' => 'text/html', | |
451 | 'language' => 'fr', | |
452 | 'date' => '2016-09-08T11:55:58+0200', | |
453 | ] | |
454 | ); | |
455 | ||
456 | $this->assertSame('http://1.1.1.1', $entry->getUrl()); | |
457 | $this->assertSame('this is my title', $entry->getTitle()); | |
458 | $this->assertContains('content', $entry->getContent()); | |
459 | $this->assertSame('text/html', $entry->getMimetype()); | |
460 | $this->assertSame('fr', $entry->getLanguage()); | |
461 | $this->assertSame(4.0, $entry->getReadingTime()); | |
462 | $this->assertSame('1.1.1.1', $entry->getDomainName()); | |
463 | $this->assertSame('08/09/2016', $entry->getPublishedAt()->format('d/m/Y')); | |
464 | } | |
465 | ||
466 | public function testWithForcedContentAndBadDate() | |
467 | { | |
468 | $tagger = $this->getTaggerMock(); | |
469 | $tagger->expects($this->once()) | |
470 | ->method('tag'); | |
471 | ||
472 | $logger = new Logger('foo'); | |
473 | $handler = new TestHandler(); | |
474 | $logger->pushHandler($handler); | |
475 | ||
476 | $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $logger, $this->fetchingErrorMessage); | |
477 | $entry = new Entry(new User()); | |
478 | $proxy->updateEntry( | |
479 | $entry, | |
480 | 'http://1.1.1.1', | |
481 | [ | |
482 | 'html' => str_repeat('this is my content', 325), | |
483 | 'title' => 'this is my title', | |
484 | 'url' => 'http://1.1.1.1', | |
485 | 'content_type' => 'text/html', | |
486 | 'language' => 'fr', | |
487 | 'date' => '01 02 2012', | |
488 | ] | |
489 | ); | |
490 | ||
491 | $this->assertSame('http://1.1.1.1', $entry->getUrl()); | |
492 | $this->assertSame('this is my title', $entry->getTitle()); | |
493 | $this->assertContains('content', $entry->getContent()); | |
494 | $this->assertSame('text/html', $entry->getMimetype()); | |
495 | $this->assertSame('fr', $entry->getLanguage()); | |
496 | $this->assertSame(4.0, $entry->getReadingTime()); | |
497 | $this->assertSame('1.1.1.1', $entry->getDomainName()); | |
498 | $this->assertNull($entry->getPublishedAt()); | |
499 | ||
500 | $records = $handler->getRecords(); | |
501 | ||
502 | $this->assertCount(3, $records); | |
503 | $this->assertContains('Error while defining date', $records[0]['message']); | |
504 | } | |
505 | ||
506 | public function testTaggerThrowException() | |
507 | { | |
508 | $tagger = $this->getTaggerMock(); | |
509 | $tagger->expects($this->once()) | |
510 | ->method('tag') | |
511 | ->will($this->throwException(new \Exception())); | |
512 | ||
513 | $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); | |
514 | $entry = new Entry(new User()); | |
515 | $proxy->updateEntry( | |
516 | $entry, | |
517 | 'http://1.1.1.1', | |
518 | [ | |
519 | 'html' => str_repeat('this is my content', 325), | |
520 | 'title' => 'this is my title', | |
521 | 'url' => 'http://1.1.1.1', | |
522 | 'content_type' => 'text/html', | |
523 | 'language' => 'fr', | |
524 | ] | |
525 | ); | |
526 | ||
527 | $this->assertCount(0, $entry->getTags()); | |
528 | } | |
529 | ||
530 | public function dataForCrazyHtml() | |
531 | { | |
532 | return [ | |
533 | 'script and comment' => [ | |
534 | '<strong>Script inside:</strong> <!--[if gte IE 4]><script>alert(\'lol\');</script><![endif]--><br />', | |
535 | 'lol', | |
536 | ], | |
537 | 'script' => [ | |
538 | '<strong>Script inside:</strong><script>alert(\'lol\');</script>', | |
539 | 'script', | |
540 | ], | |
541 | ]; | |
542 | } | |
543 | ||
544 | /** | |
545 | * @dataProvider dataForCrazyHtml | |
546 | */ | |
547 | public function testWithCrazyHtmlContent($html, $escapedString) | |
548 | { | |
549 | $tagger = $this->getTaggerMock(); | |
550 | $tagger->expects($this->once()) | |
551 | ->method('tag'); | |
552 | ||
553 | $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); | |
554 | $entry = new Entry(new User()); | |
555 | $proxy->updateEntry( | |
556 | $entry, | |
557 | 'http://1.1.1.1', | |
558 | [ | |
559 | 'html' => $html, | |
560 | 'title' => 'this is my title', | |
561 | 'url' => 'http://1.1.1.1', | |
562 | 'content_type' => 'text/html', | |
563 | 'language' => 'fr', | |
564 | 'status' => '200', | |
565 | 'open_graph' => [ | |
566 | 'og_title' => 'my OG title', | |
567 | 'og_description' => 'OG desc', | |
568 | 'og_image' => 'http://3.3.3.3/cover.jpg', | |
569 | ], | |
570 | ] | |
571 | ); | |
572 | ||
573 | $this->assertSame('http://1.1.1.1', $entry->getUrl()); | |
574 | $this->assertSame('this is my title', $entry->getTitle()); | |
575 | $this->assertNotContains($escapedString, $entry->getContent()); | |
576 | $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture()); | |
577 | $this->assertSame('text/html', $entry->getMimetype()); | |
578 | $this->assertSame('fr', $entry->getLanguage()); | |
579 | $this->assertSame('200', $entry->getHttpStatus()); | |
580 | $this->assertSame('1.1.1.1', $entry->getDomainName()); | |
581 | } | |
582 | ||
583 | public function testWithImageAsContent() | |
584 | { | |
585 | $tagger = $this->getTaggerMock(); | |
586 | $tagger->expects($this->once()) | |
587 | ->method('tag'); | |
588 | ||
589 | $graby = $this->getMockBuilder('Graby\Graby') | |
590 | ->setMethods(['fetchContent']) | |
591 | ->disableOriginalConstructor() | |
592 | ->getMock(); | |
593 | ||
594 | $graby->expects($this->any()) | |
595 | ->method('fetchContent') | |
596 | ->willReturn([ | |
597 | 'html' => '<p><img src="http://1.1.1.1/image.jpg" /></p>', | |
598 | 'title' => 'this is my title', | |
599 | 'url' => 'http://1.1.1.1/image.jpg', | |
600 | 'content_type' => 'image/jpeg', | |
601 | 'status' => '200', | |
602 | 'open_graph' => [], | |
603 | ]); | |
604 | ||
605 | $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); | |
606 | $entry = new Entry(new User()); | |
607 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | |
608 | ||
609 | $this->assertSame('http://1.1.1.1/image.jpg', $entry->getUrl()); | |
610 | $this->assertSame('this is my title', $entry->getTitle()); | |
611 | $this->assertContains('http://1.1.1.1/image.jpg', $entry->getContent()); | |
612 | $this->assertSame('http://1.1.1.1/image.jpg', $entry->getPreviewPicture()); | |
613 | $this->assertSame('image/jpeg', $entry->getMimetype()); | |
614 | $this->assertSame('200', $entry->getHttpStatus()); | |
615 | $this->assertSame('1.1.1.1', $entry->getDomainName()); | |
616 | } | |
617 | ||
618 | public function testWebsiteWithValidUTF8Title_doNothing() | |
619 | { | |
620 | // You can use https://www.online-toolz.com/tools/text-hex-convertor.php to convert UTF-8 text <=> hex | |
621 | // See http://graphemica.com for more info about the characters | |
622 | // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8 | |
623 | $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '7A'); | |
624 | ||
625 | $tagger = $this->getTaggerMock(); | |
626 | $tagger->expects($this->once()) | |
627 | ->method('tag'); | |
628 | ||
629 | $graby = $this->getMockBuilder('Graby\Graby') | |
630 | ->setMethods(['fetchContent']) | |
631 | ->disableOriginalConstructor() | |
632 | ->getMock(); | |
633 | ||
634 | $graby->expects($this->any()) | |
635 | ->method('fetchContent') | |
636 | ->willReturn([ | |
637 | 'html' => false, | |
638 | 'title' => $actualTitle, | |
639 | 'url' => '', | |
640 | 'content_type' => 'text/html', | |
641 | 'language' => '', | |
642 | ]); | |
643 | ||
644 | $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); | |
645 | $entry = new Entry(new User()); | |
646 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | |
647 | ||
648 | // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8 | |
649 | $expectedTitle = 'F09F98BB' . 'E284A4' . '7A'; | |
650 | $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle())); | |
651 | } | |
652 | ||
653 | public function testWebsiteWithInvalidUTF8Title_removeInvalidCharacter() | |
654 | { | |
655 | // See http://graphemica.com for more info about the characters | |
656 | // 'a€b' (61;80;62) in hexadecimal and WINDOWS-1252 - but 80 is a invalid UTF-8 character. | |
657 | // The correct UTF-8 € character (U+20AC) is E282AC | |
658 | $actualTitle = $this->hexToStr('61' . '80' . '62'); | |
659 | ||
660 | $tagger = $this->getTaggerMock(); | |
661 | $tagger->expects($this->once()) | |
662 | ->method('tag'); | |
663 | ||
664 | $graby = $this->getMockBuilder('Graby\Graby') | |
665 | ->setMethods(['fetchContent']) | |
666 | ->disableOriginalConstructor() | |
667 | ->getMock(); | |
668 | ||
669 | $graby->expects($this->any()) | |
670 | ->method('fetchContent') | |
671 | ->willReturn([ | |
672 | 'html' => false, | |
673 | 'title' => $actualTitle, | |
674 | 'url' => '', | |
675 | 'content_type' => 'text/html', | |
676 | 'language' => '', | |
677 | ]); | |
678 | ||
679 | $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); | |
680 | $entry = new Entry(new User()); | |
681 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | |
682 | ||
683 | // 'ab' (61;62) because all invalid UTF-8 character (like 80) are removed | |
684 | $expectedTitle = '61' . '62'; | |
685 | $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle())); | |
686 | } | |
687 | ||
688 | public function testPdfWithUTF16BETitle_convertToUTF8() | |
689 | { | |
690 | // See http://graphemica.com for more info about the characters | |
691 | // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF16BE | |
692 | $actualTitle = $this->hexToStr('D83DDE3B'); | |
693 | ||
694 | $tagger = $this->getTaggerMock(); | |
695 | $tagger->expects($this->once()) | |
696 | ->method('tag'); | |
697 | ||
698 | $graby = $this->getMockBuilder('Graby\Graby') | |
699 | ->setMethods(['fetchContent']) | |
700 | ->disableOriginalConstructor() | |
701 | ->getMock(); | |
702 | ||
703 | $graby->expects($this->any()) | |
704 | ->method('fetchContent') | |
705 | ->willReturn([ | |
706 | 'html' => false, | |
707 | 'title' => $actualTitle, | |
708 | 'url' => '', | |
709 | 'content_type' => 'application/pdf', | |
710 | 'language' => '', | |
711 | ]); | |
712 | ||
713 | $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); | |
714 | $entry = new Entry(new User()); | |
715 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | |
716 | ||
717 | // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8 | |
718 | $expectedTitle = 'F09F98BB'; | |
719 | $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle())); | |
720 | } | |
721 | ||
722 | public function testPdfWithUTF8Title_doNothing() | |
723 | { | |
724 | // See http://graphemica.com for more info about the characters | |
725 | // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF8 | |
726 | $actualTitle = $this->hexToStr('F09F98BB'); | |
727 | ||
728 | $tagger = $this->getTaggerMock(); | |
729 | $tagger->expects($this->once()) | |
730 | ->method('tag'); | |
731 | ||
732 | $graby = $this->getMockBuilder('Graby\Graby') | |
733 | ->setMethods(['fetchContent']) | |
734 | ->disableOriginalConstructor() | |
735 | ->getMock(); | |
736 | ||
737 | $graby->expects($this->any()) | |
738 | ->method('fetchContent') | |
739 | ->willReturn([ | |
740 | 'html' => false, | |
741 | 'title' => $actualTitle, | |
742 | 'url' => '', | |
743 | 'content_type' => 'application/pdf', | |
744 | 'language' => '', | |
745 | ]); | |
746 | ||
747 | $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); | |
748 | $entry = new Entry(new User()); | |
749 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | |
750 | ||
751 | // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8 | |
752 | $expectedTitle = 'F09F98BB'; | |
753 | $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle())); | |
754 | } | |
755 | ||
756 | public function testPdfWithWINDOWS1252Title_convertToUTF8() | |
757 | { | |
758 | // See http://graphemica.com for more info about the characters | |
759 | // '€' (80) in hexadecimal and WINDOWS-1252 | |
760 | $actualTitle = $this->hexToStr('80'); | |
761 | ||
762 | $tagger = $this->getTaggerMock(); | |
763 | $tagger->expects($this->once()) | |
764 | ->method('tag'); | |
765 | ||
766 | $graby = $this->getMockBuilder('Graby\Graby') | |
767 | ->setMethods(['fetchContent']) | |
768 | ->disableOriginalConstructor() | |
769 | ->getMock(); | |
770 | ||
771 | $graby->expects($this->any()) | |
772 | ->method('fetchContent') | |
773 | ->willReturn([ | |
774 | 'html' => false, | |
775 | 'title' => $actualTitle, | |
776 | 'url' => '', | |
777 | 'content_type' => 'application/pdf', | |
778 | 'language' => '', | |
779 | ]); | |
780 | ||
781 | $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); | |
782 | $entry = new Entry(new User()); | |
783 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | |
784 | ||
785 | // '€' (U+20AC or E282AC) in hexadecimal and UTF-8 | |
786 | $expectedTitle = 'E282AC'; | |
787 | $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle())); | |
788 | } | |
789 | ||
790 | public function testPdfWithInvalidCharacterInTitle_removeInvalidCharacter() | |
791 | { | |
792 | // See http://graphemica.com for more info about the characters | |
793 | // '😻ℤ�z' (U+1F63B or F09F98BB; U+2124 or E284A4; invalid character 81; U+007A or 7A) in hexadecimal and UTF-8 | |
794 | // 0x81 is not a valid character for UTF16, UTF8 and WINDOWS-1252 | |
795 | $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '81' . '7A'); | |
796 | ||
797 | $tagger = $this->getTaggerMock(); | |
798 | $tagger->expects($this->once()) | |
799 | ->method('tag'); | |
800 | ||
801 | $graby = $this->getMockBuilder('Graby\Graby') | |
802 | ->setMethods(['fetchContent']) | |
803 | ->disableOriginalConstructor() | |
804 | ->getMock(); | |
805 | ||
806 | $graby->expects($this->any()) | |
807 | ->method('fetchContent') | |
808 | ->willReturn([ | |
809 | 'html' => false, | |
810 | 'title' => $actualTitle, | |
811 | 'url' => '', | |
812 | 'content_type' => 'application/pdf', | |
813 | 'language' => '', | |
814 | ]); | |
815 | ||
816 | $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); | |
817 | $entry = new Entry(new User()); | |
818 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | |
819 | ||
820 | // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8 | |
821 | // the 0x81 (represented by �) is invalid for UTF16, UTF8 and WINDOWS-1252 and is removed | |
822 | $expectedTitle = 'F09F98BB' . 'E284A4' . '7A'; | |
823 | $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle())); | |
824 | } | |
825 | ||
826 | /** | |
827 | * Data provider for testWithChangedUrl. | |
828 | * | |
829 | * Arrays contain the following values: | |
830 | * $entry_url | |
831 | * $origin_url | |
832 | * $content_url | |
833 | * $expected_entry_url | |
834 | * $expected_origin_url | |
835 | * $expected_domain | |
836 | */ | |
837 | public function dataForChangedUrl() | |
838 | { | |
839 | return [ | |
840 | 'normal' => [ | |
841 | 'http://0.0.0.0', | |
842 | null, | |
843 | 'http://1.1.1.1', | |
844 | 'http://1.1.1.1', | |
845 | 'http://0.0.0.0', | |
846 | '1.1.1.1', | |
847 | ], | |
848 | 'origin already set' => [ | |
849 | 'http://0.0.0.0', | |
850 | 'http://hello', | |
851 | 'http://1.1.1.1', | |
852 | 'http://1.1.1.1', | |
853 | 'http://hello', | |
854 | '1.1.1.1', | |
855 | ], | |
856 | 'trailing slash' => [ | |
857 | 'https://example.com/hello-world', | |
858 | null, | |
859 | 'https://example.com/hello-world/', | |
860 | 'https://example.com/hello-world/', | |
861 | null, | |
862 | 'example.com', | |
863 | ], | |
864 | 'query string in fetched content' => [ | |
865 | 'https://example.org/hello', | |
866 | null, | |
867 | 'https://example.org/hello?world=1', | |
868 | 'https://example.org/hello?world=1', | |
869 | 'https://example.org/hello', | |
870 | 'example.org', | |
871 | ], | |
872 | 'fragment in fetched content' => [ | |
873 | 'https://example.org/hello', | |
874 | null, | |
875 | 'https://example.org/hello#world', | |
876 | 'https://example.org/hello', | |
877 | null, | |
878 | 'example.org', | |
879 | ], | |
880 | 'fragment and query string in fetched content' => [ | |
881 | 'https://example.org/hello', | |
882 | null, | |
883 | 'https://example.org/hello?foo#world', | |
884 | 'https://example.org/hello?foo#world', | |
885 | 'https://example.org/hello', | |
886 | 'example.org', | |
887 | ], | |
888 | 'different path and query string in fetch content' => [ | |
889 | 'https://example.org/hello', | |
890 | null, | |
891 | 'https://example.org/world?foo', | |
892 | 'https://example.org/world?foo', | |
893 | 'https://example.org/hello', | |
894 | 'example.org', | |
895 | ], | |
896 | 'feedproxy ignore list test' => [ | |
897 | 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld', | |
898 | null, | |
899 | 'https://example.org/hello-wallabag', | |
900 | 'https://example.org/hello-wallabag', | |
901 | null, | |
902 | 'example.org', | |
903 | ], | |
904 | 'feedproxy ignore list test with origin url already set' => [ | |
905 | 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld', | |
906 | 'https://example.org/this-is-source', | |
907 | 'https://example.org/hello-wallabag', | |
908 | 'https://example.org/hello-wallabag', | |
909 | 'https://example.org/this-is-source', | |
910 | 'example.org', | |
911 | ], | |
912 | 'lemonde ignore pattern test' => [ | |
913 | 'http://www.lemonde.fr/tiny/url', | |
914 | null, | |
915 | 'http://example.com/hello-world', | |
916 | 'http://example.com/hello-world', | |
917 | null, | |
918 | 'example.com', | |
919 | ], | |
920 | ]; | |
921 | } | |
922 | ||
923 | /** | |
924 | * @dataProvider dataForChangedUrl | |
925 | */ | |
926 | public function testWithChangedUrl($entry_url, $origin_url, $content_url, $expected_entry_url, $expected_origin_url, $expected_domain) | |
927 | { | |
928 | $tagger = $this->getTaggerMock(); | |
929 | $tagger->expects($this->once()) | |
930 | ->method('tag'); | |
931 | ||
932 | $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage, true); | |
933 | $entry = new Entry(new User()); | |
934 | $entry->setOriginUrl($origin_url); | |
935 | $proxy->updateEntry( | |
936 | $entry, | |
937 | $entry_url, | |
938 | [ | |
939 | 'html' => false, | |
940 | 'title' => '', | |
941 | 'url' => $content_url, | |
942 | 'content_type' => '', | |
943 | 'language' => '', | |
944 | ], | |
945 | true | |
946 | ); | |
947 | ||
948 | $this->assertSame($expected_entry_url, $entry->getUrl()); | |
949 | $this->assertSame($expected_domain, $entry->getDomainName()); | |
950 | $this->assertSame($expected_origin_url, $entry->getOriginUrl()); | |
951 | } | |
952 | ||
953 | /** | |
954 | * https://stackoverflow.com/a/18506801. | |
955 | * | |
956 | * @param $string | |
957 | * | |
958 | * @return string | |
959 | */ | |
960 | private function strToHex($string) | |
961 | { | |
962 | $hex = ''; | |
963 | for ($i = 0; $i < \strlen($string); ++$i) { | |
964 | $ord = \ord($string[$i]); | |
965 | $hexCode = dechex($ord); | |
966 | $hex .= substr('0' . $hexCode, -2); | |
967 | } | |
968 | ||
969 | return strtoupper($hex); | |
970 | } | |
971 | ||
972 | /** | |
973 | * https://stackoverflow.com/a/18506801. | |
974 | * | |
975 | * @param $hex | |
976 | * | |
977 | * @return string | |
978 | */ | |
979 | private function hexToStr($hex) | |
980 | { | |
981 | $string = ''; | |
982 | for ($i = 0; $i < \strlen($hex) - 1; $i += 2) { | |
983 | $string .= \chr(hexdec($hex[$i] . $hex[$i + 1])); | |
984 | } | |
985 | ||
986 | return $string; | |
987 | } | |
988 | ||
989 | private function getTaggerMock() | |
990 | { | |
991 | return $this->getMockBuilder(RuleBasedTagger::class) | |
992 | ->setMethods(['tag']) | |
993 | ->disableOriginalConstructor() | |
994 | ->getMock(); | |
995 | } | |
996 | ||
997 | private function getLogger() | |
998 | { | |
999 | return new NullLogger(); | |
1000 | } | |
1001 | ||
1002 | private function getValidator($withDefaultMock = true) | |
1003 | { | |
1004 | $mock = $this->getMockBuilder(RecursiveValidator::class) | |
1005 | ->setMethods(['validate']) | |
1006 | ->disableOriginalConstructor() | |
1007 | ->getMock(); | |
1008 | ||
1009 | if ($withDefaultMock) { | |
1010 | $mock->expects($this->any()) | |
1011 | ->method('validate') | |
1012 | ->willReturn(new ConstraintViolationList()); | |
1013 | } | |
1014 | ||
1015 | return $mock; | |
1016 | } | |
1017 | } |