]>
Commit | Line | Data |
---|---|---|
558d9aab JB |
1 | <?php |
2 | ||
a2c1b94e | 3 | namespace Tests\Wallabag\CoreBundle\Helper; |
558d9aab | 4 | |
f808b016 | 5 | use Graby\Graby; |
d5c2cc54 | 6 | use Monolog\Handler\TestHandler; |
f808b016 | 7 | use Monolog\Logger; |
bd91bd5c | 8 | use PHPUnit\Framework\TestCase; |
f808b016 JB |
9 | use Psr\Log\NullLogger; |
10 | use Symfony\Component\Validator\ConstraintViolation; | |
11 | use Symfony\Component\Validator\ConstraintViolationList; | |
12 | use Symfony\Component\Validator\Validator\RecursiveValidator; | |
c2656f96 | 13 | use Wallabag\CoreBundle\Entity\Entry; |
f808b016 | 14 | use Wallabag\CoreBundle\Helper\ContentProxy; |
6bc6fb1f | 15 | use Wallabag\CoreBundle\Helper\RuleBasedTagger; |
f808b016 | 16 | use Wallabag\UserBundle\Entity\User; |
558d9aab | 17 | |
bd91bd5c | 18 | class ContentProxyTest extends TestCase |
558d9aab | 19 | { |
ac1509a6 | 20 | private $fetchingErrorMessage = 'wallabag can\'t retrieve contents for this article. Please <a href="http://doc.wallabag.org/en/user/errors_during_fetching.html#how-can-i-help-to-fix-that">troubleshoot this issue</a>.'; |
a2c1b94e | 21 | |
4d0ec0e7 JB |
22 | public function testWithBadUrl() |
23 | { | |
24 | $tagger = $this->getTaggerMock(); | |
25 | $tagger->expects($this->once()) | |
26 | ->method('tag'); | |
27 | ||
28 | $graby = $this->getMockBuilder('Graby\Graby') | |
4094ea47 | 29 | ->setMethods(['fetchContent']) |
4d0ec0e7 JB |
30 | ->disableOriginalConstructor() |
31 | ->getMock(); | |
32 | ||
33 | $graby->expects($this->any()) | |
34 | ->method('fetchContent') | |
4094ea47 | 35 | ->willReturn([ |
4d0ec0e7 JB |
36 | 'html' => false, |
37 | 'title' => '', | |
38 | 'url' => '', | |
39 | 'content_type' => '', | |
40 | 'language' => '', | |
4094ea47 | 41 | ]); |
4d0ec0e7 | 42 | |
709e21a3 | 43 | $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); |
7aba665e JC |
44 | $entry = new Entry(new User()); |
45 | $proxy->updateEntry($entry, 'http://user@:80'); | |
4d0ec0e7 | 46 | |
f808b016 | 47 | $this->assertSame('http://user@:80', $entry->getUrl()); |
4d0ec0e7 | 48 | $this->assertEmpty($entry->getTitle()); |
f808b016 | 49 | $this->assertSame($this->fetchingErrorMessage, $entry->getContent()); |
4d0ec0e7 JB |
50 | $this->assertEmpty($entry->getPreviewPicture()); |
51 | $this->assertEmpty($entry->getMimetype()); | |
52 | $this->assertEmpty($entry->getLanguage()); | |
f808b016 | 53 | $this->assertSame(0.0, $entry->getReadingTime()); |
709e21a3 | 54 | $this->assertNull($entry->getDomainName()); |
4d0ec0e7 JB |
55 | } |
56 | ||
558d9aab JB |
57 | public function testWithEmptyContent() |
58 | { | |
f530f7f5 KG |
59 | $tagger = $this->getTaggerMock(); |
60 | $tagger->expects($this->once()) | |
61 | ->method('tag'); | |
62 | ||
558d9aab | 63 | $graby = $this->getMockBuilder('Graby\Graby') |
4094ea47 | 64 | ->setMethods(['fetchContent']) |
558d9aab JB |
65 | ->disableOriginalConstructor() |
66 | ->getMock(); | |
67 | ||
68 | $graby->expects($this->any()) | |
69 | ->method('fetchContent') | |
4094ea47 | 70 | ->willReturn([ |
98f0929f JB |
71 | 'html' => false, |
72 | 'title' => '', | |
73 | 'url' => '', | |
74 | 'content_type' => '', | |
75 | 'language' => '', | |
4094ea47 | 76 | ]); |
558d9aab | 77 | |
709e21a3 | 78 | $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); |
7aba665e JC |
79 | $entry = new Entry(new User()); |
80 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | |
558d9aab | 81 | |
f808b016 | 82 | $this->assertSame('http://0.0.0.0', $entry->getUrl()); |
558d9aab | 83 | $this->assertEmpty($entry->getTitle()); |
f808b016 | 84 | $this->assertSame($this->fetchingErrorMessage, $entry->getContent()); |
558d9aab JB |
85 | $this->assertEmpty($entry->getPreviewPicture()); |
86 | $this->assertEmpty($entry->getMimetype()); | |
98f0929f | 87 | $this->assertEmpty($entry->getLanguage()); |
f808b016 JB |
88 | $this->assertSame(0.0, $entry->getReadingTime()); |
89 | $this->assertSame('0.0.0.0', $entry->getDomainName()); | |
558d9aab JB |
90 | } |
91 | ||
92 | public function testWithEmptyContentButOG() | |
93 | { | |
f530f7f5 KG |
94 | $tagger = $this->getTaggerMock(); |
95 | $tagger->expects($this->once()) | |
96 | ->method('tag'); | |
97 | ||
558d9aab | 98 | $graby = $this->getMockBuilder('Graby\Graby') |
4094ea47 | 99 | ->setMethods(['fetchContent']) |
558d9aab JB |
100 | ->disableOriginalConstructor() |
101 | ->getMock(); | |
102 | ||
103 | $graby->expects($this->any()) | |
104 | ->method('fetchContent') | |
4094ea47 | 105 | ->willReturn([ |
98f0929f JB |
106 | 'html' => false, |
107 | 'title' => '', | |
108 | 'url' => '', | |
109 | 'content_type' => '', | |
110 | 'language' => '', | |
10b35097 | 111 | 'status' => '', |
4094ea47 | 112 | 'open_graph' => [ |
98f0929f JB |
113 | 'og_title' => 'my title', |
114 | 'og_description' => 'desc', | |
4094ea47 JB |
115 | ], |
116 | ]); | |
558d9aab | 117 | |
709e21a3 | 118 | $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); |
7aba665e JC |
119 | $entry = new Entry(new User()); |
120 | $proxy->updateEntry($entry, 'http://domain.io'); | |
558d9aab | 121 | |
f808b016 JB |
122 | $this->assertSame('http://domain.io', $entry->getUrl()); |
123 | $this->assertSame('my title', $entry->getTitle()); | |
124 | $this->assertSame($this->fetchingErrorMessage . '<p><i>But we found a short description: </i></p>desc', $entry->getContent()); | |
558d9aab | 125 | $this->assertEmpty($entry->getPreviewPicture()); |
98f0929f | 126 | $this->assertEmpty($entry->getLanguage()); |
10b35097 | 127 | $this->assertEmpty($entry->getHttpStatus()); |
558d9aab | 128 | $this->assertEmpty($entry->getMimetype()); |
f808b016 JB |
129 | $this->assertSame(0.0, $entry->getReadingTime()); |
130 | $this->assertSame('domain.io', $entry->getDomainName()); | |
558d9aab JB |
131 | } |
132 | ||
133 | public function testWithContent() | |
134 | { | |
f530f7f5 KG |
135 | $tagger = $this->getTaggerMock(); |
136 | $tagger->expects($this->once()) | |
137 | ->method('tag'); | |
138 | ||
558d9aab | 139 | $graby = $this->getMockBuilder('Graby\Graby') |
4094ea47 | 140 | ->setMethods(['fetchContent']) |
558d9aab JB |
141 | ->disableOriginalConstructor() |
142 | ->getMock(); | |
143 | ||
144 | $graby->expects($this->any()) | |
145 | ->method('fetchContent') | |
4094ea47 | 146 | ->willReturn([ |
da3d4998 | 147 | 'html' => str_repeat('this is my content', 325), |
558d9aab JB |
148 | 'title' => 'this is my title', |
149 | 'url' => 'http://1.1.1.1', | |
150 | 'content_type' => 'text/html', | |
98f0929f | 151 | 'language' => 'fr', |
10b35097 | 152 | 'status' => '200', |
4094ea47 | 153 | 'open_graph' => [ |
558d9aab JB |
154 | 'og_title' => 'my OG title', |
155 | 'og_description' => 'OG desc', | |
f1e29e69 | 156 | 'og_image' => 'http://3.3.3.3/cover.jpg', |
4094ea47 JB |
157 | ], |
158 | ]); | |
558d9aab | 159 | |
709e21a3 | 160 | $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); |
7aba665e JC |
161 | $entry = new Entry(new User()); |
162 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | |
558d9aab | 163 | |
f808b016 JB |
164 | $this->assertSame('http://1.1.1.1', $entry->getUrl()); |
165 | $this->assertSame('this is my title', $entry->getTitle()); | |
da3d4998 | 166 | $this->assertContains('this is my content', $entry->getContent()); |
f808b016 JB |
167 | $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture()); |
168 | $this->assertSame('text/html', $entry->getMimetype()); | |
169 | $this->assertSame('fr', $entry->getLanguage()); | |
170 | $this->assertSame('200', $entry->getHttpStatus()); | |
171 | $this->assertSame(4.0, $entry->getReadingTime()); | |
172 | $this->assertSame('1.1.1.1', $entry->getDomainName()); | |
558d9aab | 173 | } |
f530f7f5 | 174 | |
3d71d403 JB |
175 | public function testWithContentAndNoOgImage() |
176 | { | |
177 | $tagger = $this->getTaggerMock(); | |
178 | $tagger->expects($this->once()) | |
179 | ->method('tag'); | |
180 | ||
181 | $graby = $this->getMockBuilder('Graby\Graby') | |
182 | ->setMethods(['fetchContent']) | |
183 | ->disableOriginalConstructor() | |
184 | ->getMock(); | |
185 | ||
186 | $graby->expects($this->any()) | |
187 | ->method('fetchContent') | |
188 | ->willReturn([ | |
189 | 'html' => str_repeat('this is my content', 325), | |
190 | 'title' => 'this is my title', | |
191 | 'url' => 'http://1.1.1.1', | |
192 | 'content_type' => 'text/html', | |
193 | 'language' => 'fr', | |
194 | 'status' => '200', | |
195 | 'open_graph' => [ | |
196 | 'og_title' => 'my OG title', | |
197 | 'og_description' => 'OG desc', | |
0d349ea6 | 198 | 'og_image' => null, |
3d71d403 JB |
199 | ], |
200 | ]); | |
201 | ||
709e21a3 | 202 | $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); |
d5c2cc54 JB |
203 | $entry = new Entry(new User()); |
204 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | |
3d71d403 | 205 | |
f808b016 JB |
206 | $this->assertSame('http://1.1.1.1', $entry->getUrl()); |
207 | $this->assertSame('this is my title', $entry->getTitle()); | |
3d71d403 | 208 | $this->assertContains('this is my content', $entry->getContent()); |
41d45c61 | 209 | $this->assertNull($entry->getPreviewPicture()); |
f808b016 JB |
210 | $this->assertSame('text/html', $entry->getMimetype()); |
211 | $this->assertSame('fr', $entry->getLanguage()); | |
212 | $this->assertSame('200', $entry->getHttpStatus()); | |
213 | $this->assertSame(4.0, $entry->getReadingTime()); | |
214 | $this->assertSame('1.1.1.1', $entry->getDomainName()); | |
0d349ea6 JB |
215 | } |
216 | ||
217 | public function testWithContentAndBadLanguage() | |
218 | { | |
219 | $tagger = $this->getTaggerMock(); | |
220 | $tagger->expects($this->once()) | |
221 | ->method('tag'); | |
222 | ||
5661e8d4 | 223 | $validator = $this->getValidator(false); |
a05b6115 | 224 | $validator->expects($this->once()) |
0d349ea6 | 225 | ->method('validate') |
a05b6115 | 226 | ->willReturn(new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'language', 'dontexist')])); |
0d349ea6 JB |
227 | |
228 | $graby = $this->getMockBuilder('Graby\Graby') | |
229 | ->setMethods(['fetchContent']) | |
230 | ->disableOriginalConstructor() | |
231 | ->getMock(); | |
232 | ||
233 | $graby->expects($this->any()) | |
234 | ->method('fetchContent') | |
235 | ->willReturn([ | |
236 | 'html' => str_repeat('this is my content', 325), | |
237 | 'title' => 'this is my title', | |
238 | 'url' => 'http://1.1.1.1', | |
239 | 'content_type' => 'text/html', | |
240 | 'language' => 'dontexist', | |
241 | 'status' => '200', | |
242 | ]); | |
243 | ||
709e21a3 | 244 | $proxy = new ContentProxy($graby, $tagger, $validator, $this->getLogger(), $this->fetchingErrorMessage); |
0d349ea6 JB |
245 | $entry = new Entry(new User()); |
246 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | |
247 | ||
f808b016 JB |
248 | $this->assertSame('http://1.1.1.1', $entry->getUrl()); |
249 | $this->assertSame('this is my title', $entry->getTitle()); | |
0d349ea6 | 250 | $this->assertContains('this is my content', $entry->getContent()); |
f808b016 | 251 | $this->assertSame('text/html', $entry->getMimetype()); |
41d45c61 | 252 | $this->assertNull($entry->getLanguage()); |
f808b016 JB |
253 | $this->assertSame('200', $entry->getHttpStatus()); |
254 | $this->assertSame(4.0, $entry->getReadingTime()); | |
255 | $this->assertSame('1.1.1.1', $entry->getDomainName()); | |
0d349ea6 JB |
256 | } |
257 | ||
258 | public function testWithContentAndBadOgImage() | |
259 | { | |
260 | $tagger = $this->getTaggerMock(); | |
261 | $tagger->expects($this->once()) | |
262 | ->method('tag'); | |
263 | ||
5661e8d4 | 264 | $validator = $this->getValidator(false); |
0d349ea6 JB |
265 | $validator->expects($this->exactly(2)) |
266 | ->method('validate') | |
267 | ->will($this->onConsecutiveCalls( | |
268 | new ConstraintViolationList(), | |
269 | new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'url', 'https://')]) | |
270 | )); | |
271 | ||
272 | $graby = $this->getMockBuilder('Graby\Graby') | |
273 | ->setMethods(['fetchContent']) | |
274 | ->disableOriginalConstructor() | |
275 | ->getMock(); | |
276 | ||
277 | $graby->expects($this->any()) | |
278 | ->method('fetchContent') | |
279 | ->willReturn([ | |
280 | 'html' => str_repeat('this is my content', 325), | |
281 | 'title' => 'this is my title', | |
282 | 'url' => 'http://1.1.1.1', | |
283 | 'content_type' => 'text/html', | |
284 | 'language' => 'fr', | |
285 | 'status' => '200', | |
286 | 'open_graph' => [ | |
287 | 'og_title' => 'my OG title', | |
288 | 'og_description' => 'OG desc', | |
289 | 'og_image' => 'https://', | |
290 | ], | |
291 | ]); | |
292 | ||
709e21a3 | 293 | $proxy = new ContentProxy($graby, $tagger, $validator, $this->getLogger(), $this->fetchingErrorMessage); |
0d349ea6 JB |
294 | $entry = new Entry(new User()); |
295 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | |
296 | ||
f808b016 JB |
297 | $this->assertSame('http://1.1.1.1', $entry->getUrl()); |
298 | $this->assertSame('this is my title', $entry->getTitle()); | |
0d349ea6 | 299 | $this->assertContains('this is my content', $entry->getContent()); |
41d45c61 | 300 | $this->assertNull($entry->getPreviewPicture()); |
f808b016 JB |
301 | $this->assertSame('text/html', $entry->getMimetype()); |
302 | $this->assertSame('fr', $entry->getLanguage()); | |
303 | $this->assertSame('200', $entry->getHttpStatus()); | |
304 | $this->assertSame(4.0, $entry->getReadingTime()); | |
305 | $this->assertSame('1.1.1.1', $entry->getDomainName()); | |
3d71d403 JB |
306 | } |
307 | ||
4d0ec0e7 JB |
308 | public function testWithForcedContent() |
309 | { | |
310 | $tagger = $this->getTaggerMock(); | |
311 | $tagger->expects($this->once()) | |
312 | ->method('tag'); | |
313 | ||
709e21a3 | 314 | $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage, true); |
7aba665e JC |
315 | $entry = new Entry(new User()); |
316 | $proxy->updateEntry( | |
317 | $entry, | |
0d6cfb88 JB |
318 | 'http://0.0.0.0', |
319 | [ | |
320 | 'html' => str_repeat('this is my content', 325), | |
321 | 'title' => 'this is my title', | |
322 | 'url' => 'http://1.1.1.1', | |
323 | 'content_type' => 'text/html', | |
324 | 'language' => 'fr', | |
f0378b4d JB |
325 | 'date' => '1395635872', |
326 | 'authors' => ['Jeremy', 'Nico', 'Thomas'], | |
327 | 'all_headers' => [ | |
328 | 'Cache-Control' => 'no-cache', | |
38a04dee | 329 | ], |
0d6cfb88 JB |
330 | ] |
331 | ); | |
4d0ec0e7 | 332 | |
f808b016 JB |
333 | $this->assertSame('http://1.1.1.1', $entry->getUrl()); |
334 | $this->assertSame('this is my title', $entry->getTitle()); | |
4d0ec0e7 | 335 | $this->assertContains('this is my content', $entry->getContent()); |
f808b016 JB |
336 | $this->assertSame('text/html', $entry->getMimetype()); |
337 | $this->assertSame('fr', $entry->getLanguage()); | |
338 | $this->assertSame(4.0, $entry->getReadingTime()); | |
339 | $this->assertSame('1.1.1.1', $entry->getDomainName()); | |
340 | $this->assertSame('24/03/2014', $entry->getPublishedAt()->format('d/m/Y')); | |
f0378b4d JB |
341 | $this->assertContains('Jeremy', $entry->getPublishedBy()); |
342 | $this->assertContains('Nico', $entry->getPublishedBy()); | |
343 | $this->assertContains('Thomas', $entry->getPublishedBy()); | |
709e21a3 | 344 | $this->assertNotNull($entry->getHeaders(), 'Headers are stored, so value is not null'); |
f0378b4d JB |
345 | $this->assertContains('no-cache', $entry->getHeaders()); |
346 | } | |
347 | ||
348 | public function testWithForcedContentAndDatetime() | |
349 | { | |
350 | $tagger = $this->getTaggerMock(); | |
351 | $tagger->expects($this->once()) | |
352 | ->method('tag'); | |
353 | ||
d5c2cc54 | 354 | $logHandler = new TestHandler(); |
6acadf8e | 355 | $logger = new Logger('test', [$logHandler]); |
d5c2cc54 | 356 | |
709e21a3 | 357 | $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $logger, $this->fetchingErrorMessage); |
7aba665e | 358 | $entry = new Entry(new User()); |
6acadf8e | 359 | $proxy->updateEntry( |
7aba665e | 360 | $entry, |
6acadf8e | 361 | 'http://1.1.1.1', |
f0378b4d JB |
362 | [ |
363 | 'html' => str_repeat('this is my content', 325), | |
364 | 'title' => 'this is my title', | |
365 | 'url' => 'http://1.1.1.1', | |
366 | 'content_type' => 'text/html', | |
367 | 'language' => 'fr', | |
368 | 'date' => '2016-09-08T11:55:58+0200', | |
369 | ] | |
370 | ); | |
371 | ||
f808b016 JB |
372 | $this->assertSame('http://1.1.1.1', $entry->getUrl()); |
373 | $this->assertSame('this is my title', $entry->getTitle()); | |
f0378b4d | 374 | $this->assertContains('this is my content', $entry->getContent()); |
f808b016 JB |
375 | $this->assertSame('text/html', $entry->getMimetype()); |
376 | $this->assertSame('fr', $entry->getLanguage()); | |
377 | $this->assertSame(4.0, $entry->getReadingTime()); | |
378 | $this->assertSame('1.1.1.1', $entry->getDomainName()); | |
379 | $this->assertSame('08/09/2016', $entry->getPublishedAt()->format('d/m/Y')); | |
f0378b4d JB |
380 | } |
381 | ||
382 | public function testWithForcedContentAndBadDate() | |
383 | { | |
384 | $tagger = $this->getTaggerMock(); | |
385 | $tagger->expects($this->once()) | |
386 | ->method('tag'); | |
387 | ||
388 | $logger = new Logger('foo'); | |
389 | $handler = new TestHandler(); | |
390 | $logger->pushHandler($handler); | |
391 | ||
709e21a3 | 392 | $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $logger, $this->fetchingErrorMessage); |
7aba665e JC |
393 | $entry = new Entry(new User()); |
394 | $proxy->updateEntry( | |
395 | $entry, | |
6acadf8e | 396 | 'http://1.1.1.1', |
f0378b4d JB |
397 | [ |
398 | 'html' => str_repeat('this is my content', 325), | |
399 | 'title' => 'this is my title', | |
400 | 'url' => 'http://1.1.1.1', | |
401 | 'content_type' => 'text/html', | |
402 | 'language' => 'fr', | |
403 | 'date' => '01 02 2012', | |
404 | ] | |
405 | ); | |
406 | ||
f808b016 JB |
407 | $this->assertSame('http://1.1.1.1', $entry->getUrl()); |
408 | $this->assertSame('this is my title', $entry->getTitle()); | |
f0378b4d | 409 | $this->assertContains('this is my content', $entry->getContent()); |
f808b016 JB |
410 | $this->assertSame('text/html', $entry->getMimetype()); |
411 | $this->assertSame('fr', $entry->getLanguage()); | |
412 | $this->assertSame(4.0, $entry->getReadingTime()); | |
413 | $this->assertSame('1.1.1.1', $entry->getDomainName()); | |
f0378b4d JB |
414 | $this->assertNull($entry->getPublishedAt()); |
415 | ||
416 | $records = $handler->getRecords(); | |
417 | ||
418 | $this->assertCount(1, $records); | |
419 | $this->assertContains('Error while defining date', $records[0]['message']); | |
4d0ec0e7 JB |
420 | } |
421 | ||
422 | public function testTaggerThrowException() | |
423 | { | |
4d0ec0e7 JB |
424 | $tagger = $this->getTaggerMock(); |
425 | $tagger->expects($this->once()) | |
426 | ->method('tag') | |
427 | ->will($this->throwException(new \Exception())); | |
428 | ||
709e21a3 | 429 | $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); |
7aba665e | 430 | $entry = new Entry(new User()); |
6acadf8e JB |
431 | $proxy->updateEntry( |
432 | $entry, | |
433 | 'http://1.1.1.1', | |
434 | [ | |
435 | 'html' => str_repeat('this is my content', 325), | |
436 | 'title' => 'this is my title', | |
437 | 'url' => 'http://1.1.1.1', | |
438 | 'content_type' => 'text/html', | |
439 | 'language' => 'fr', | |
440 | ] | |
d0e9b3d6 | 441 | ); |
4d0ec0e7 JB |
442 | |
443 | $this->assertCount(0, $entry->getTags()); | |
444 | } | |
445 | ||
74a75f7d JB |
446 | public function dataForCrazyHtml() |
447 | { | |
448 | return [ | |
449 | 'script and comment' => [ | |
450 | '<strong>Script inside:</strong> <!--[if gte IE 4]><script>alert(\'lol\');</script><![endif]--><br />', | |
38a04dee | 451 | 'lol', |
74a75f7d JB |
452 | ], |
453 | 'script' => [ | |
454 | '<strong>Script inside:</strong><script>alert(\'lol\');</script>', | |
38a04dee | 455 | 'script', |
74a75f7d JB |
456 | ], |
457 | ]; | |
458 | } | |
459 | ||
460 | /** | |
461 | * @dataProvider dataForCrazyHtml | |
462 | */ | |
463 | public function testWithCrazyHtmlContent($html, $escapedString) | |
464 | { | |
465 | $tagger = $this->getTaggerMock(); | |
466 | $tagger->expects($this->once()) | |
467 | ->method('tag'); | |
468 | ||
709e21a3 | 469 | $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); |
6acadf8e JB |
470 | $entry = new Entry(new User()); |
471 | $proxy->updateEntry( | |
472 | $entry, | |
74a75f7d JB |
473 | 'http://1.1.1.1', |
474 | [ | |
475 | 'html' => $html, | |
476 | 'title' => 'this is my title', | |
477 | 'url' => 'http://1.1.1.1', | |
478 | 'content_type' => 'text/html', | |
479 | 'language' => 'fr', | |
480 | 'status' => '200', | |
481 | 'open_graph' => [ | |
482 | 'og_title' => 'my OG title', | |
483 | 'og_description' => 'OG desc', | |
484 | 'og_image' => 'http://3.3.3.3/cover.jpg', | |
485 | ], | |
486 | ] | |
487 | ); | |
488 | ||
f808b016 JB |
489 | $this->assertSame('http://1.1.1.1', $entry->getUrl()); |
490 | $this->assertSame('this is my title', $entry->getTitle()); | |
74a75f7d | 491 | $this->assertNotContains($escapedString, $entry->getContent()); |
f808b016 JB |
492 | $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture()); |
493 | $this->assertSame('text/html', $entry->getMimetype()); | |
494 | $this->assertSame('fr', $entry->getLanguage()); | |
495 | $this->assertSame('200', $entry->getHttpStatus()); | |
496 | $this->assertSame('1.1.1.1', $entry->getDomainName()); | |
74a75f7d JB |
497 | } |
498 | ||
d0ec2ddd JB |
499 | public function testWithImageAsContent() |
500 | { | |
501 | $tagger = $this->getTaggerMock(); | |
502 | $tagger->expects($this->once()) | |
503 | ->method('tag'); | |
504 | ||
505 | $graby = $this->getMockBuilder('Graby\Graby') | |
506 | ->setMethods(['fetchContent']) | |
507 | ->disableOriginalConstructor() | |
508 | ->getMock(); | |
509 | ||
510 | $graby->expects($this->any()) | |
511 | ->method('fetchContent') | |
512 | ->willReturn([ | |
513 | 'html' => '<p><img src="http://1.1.1.1/image.jpg" /></p>', | |
514 | 'title' => 'this is my title', | |
515 | 'url' => 'http://1.1.1.1/image.jpg', | |
516 | 'content_type' => 'image/jpeg', | |
517 | 'status' => '200', | |
518 | 'open_graph' => [], | |
519 | ]); | |
520 | ||
709e21a3 | 521 | $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); |
d0ec2ddd JB |
522 | $entry = new Entry(new User()); |
523 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | |
524 | ||
c18a2476 JB |
525 | $this->assertSame('http://1.1.1.1/image.jpg', $entry->getUrl()); |
526 | $this->assertSame('this is my title', $entry->getTitle()); | |
d0ec2ddd JB |
527 | $this->assertContains('http://1.1.1.1/image.jpg', $entry->getContent()); |
528 | $this->assertSame('http://1.1.1.1/image.jpg', $entry->getPreviewPicture()); | |
c18a2476 JB |
529 | $this->assertSame('image/jpeg', $entry->getMimetype()); |
530 | $this->assertSame('200', $entry->getHttpStatus()); | |
531 | $this->assertSame('1.1.1.1', $entry->getDomainName()); | |
d0ec2ddd JB |
532 | } |
533 | ||
c01d9532 T |
534 | public function testWebsiteWithValidUTF8Title_doNothing() |
535 | { | |
536 | // You can use https://www.online-toolz.com/tools/text-hex-convertor.php to convert UTF-8 text <=> hex | |
537 | // See http://graphemica.com for more info about the characters | |
538 | // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8 | |
539 | $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '7A'); | |
540 | ||
541 | $tagger = $this->getTaggerMock(); | |
542 | $tagger->expects($this->once()) | |
543 | ->method('tag'); | |
544 | ||
545 | $graby = $this->getMockBuilder('Graby\Graby') | |
546 | ->setMethods(['fetchContent']) | |
547 | ->disableOriginalConstructor() | |
548 | ->getMock(); | |
549 | ||
550 | $graby->expects($this->any()) | |
551 | ->method('fetchContent') | |
552 | ->willReturn([ | |
553 | 'html' => false, | |
554 | 'title' => $actualTitle, | |
555 | 'url' => '', | |
556 | 'content_type' => 'text/html', | |
557 | 'language' => '', | |
558 | ]); | |
559 | ||
560 | $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); | |
561 | $entry = new Entry(new User()); | |
562 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | |
563 | ||
564 | // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8 | |
565 | $expectedTitle = 'F09F98BB' . 'E284A4' . '7A'; | |
566 | $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle())); | |
567 | } | |
568 | ||
569 | public function testWebsiteWithInvalidUTF8Title_removeInvalidCharacter() | |
570 | { | |
571 | // See http://graphemica.com for more info about the characters | |
572 | // 'a€b' (61;80;62) in hexadecimal and WINDOWS-1252 - but 80 is a invalid UTF-8 character. | |
573 | // The correct UTF-8 € character (U+20AC) is E282AC | |
574 | $actualTitle = $this->hexToStr('61' . '80' . '62'); | |
575 | ||
576 | $tagger = $this->getTaggerMock(); | |
577 | $tagger->expects($this->once()) | |
578 | ->method('tag'); | |
579 | ||
580 | $graby = $this->getMockBuilder('Graby\Graby') | |
581 | ->setMethods(['fetchContent']) | |
582 | ->disableOriginalConstructor() | |
583 | ->getMock(); | |
584 | ||
585 | $graby->expects($this->any()) | |
586 | ->method('fetchContent') | |
587 | ->willReturn([ | |
588 | 'html' => false, | |
589 | 'title' => $actualTitle, | |
590 | 'url' => '', | |
591 | 'content_type' => 'text/html', | |
592 | 'language' => '', | |
593 | ]); | |
594 | ||
595 | $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); | |
596 | $entry = new Entry(new User()); | |
597 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | |
598 | ||
599 | // 'ab' (61;62) because all invalid UTF-8 character (like 80) are removed | |
600 | $expectedTitle = '61' . '62'; | |
601 | $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle())); | |
602 | } | |
603 | ||
604 | public function testPdfWithUTF16BETitle_convertToUTF8() | |
605 | { | |
606 | // See http://graphemica.com for more info about the characters | |
607 | // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF16BE | |
608 | $actualTitle = $this->hexToStr('D83DDE3B'); | |
609 | ||
610 | $tagger = $this->getTaggerMock(); | |
611 | $tagger->expects($this->once()) | |
612 | ->method('tag'); | |
613 | ||
614 | $graby = $this->getMockBuilder('Graby\Graby') | |
615 | ->setMethods(['fetchContent']) | |
616 | ->disableOriginalConstructor() | |
617 | ->getMock(); | |
618 | ||
619 | $graby->expects($this->any()) | |
620 | ->method('fetchContent') | |
621 | ->willReturn([ | |
622 | 'html' => false, | |
623 | 'title' => $actualTitle, | |
624 | 'url' => '', | |
625 | 'content_type' => 'application/pdf', | |
626 | 'language' => '', | |
627 | ]); | |
628 | ||
629 | $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); | |
630 | $entry = new Entry(new User()); | |
631 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | |
632 | ||
633 | // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8 | |
634 | $expectedTitle = 'F09F98BB'; | |
635 | $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle())); | |
636 | } | |
637 | ||
638 | public function testPdfWithUTF8Title_doNothing() | |
639 | { | |
640 | // See http://graphemica.com for more info about the characters | |
641 | // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF8 | |
642 | $actualTitle = $this->hexToStr('F09F98BB'); | |
643 | ||
644 | $tagger = $this->getTaggerMock(); | |
645 | $tagger->expects($this->once()) | |
646 | ->method('tag'); | |
647 | ||
648 | $graby = $this->getMockBuilder('Graby\Graby') | |
649 | ->setMethods(['fetchContent']) | |
650 | ->disableOriginalConstructor() | |
651 | ->getMock(); | |
652 | ||
653 | $graby->expects($this->any()) | |
654 | ->method('fetchContent') | |
655 | ->willReturn([ | |
656 | 'html' => false, | |
657 | 'title' => $actualTitle, | |
658 | 'url' => '', | |
659 | 'content_type' => 'application/pdf', | |
660 | 'language' => '', | |
661 | ]); | |
662 | ||
663 | $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); | |
664 | $entry = new Entry(new User()); | |
665 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | |
666 | ||
667 | // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8 | |
668 | $expectedTitle = 'F09F98BB'; | |
669 | $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle())); | |
670 | } | |
671 | ||
672 | public function testPdfWithWINDOWS1252Title_convertToUTF8() | |
673 | { | |
674 | // See http://graphemica.com for more info about the characters | |
675 | // '€' (80) in hexadecimal and WINDOWS-1252 | |
676 | $actualTitle = $this->hexToStr('80'); | |
677 | ||
678 | $tagger = $this->getTaggerMock(); | |
679 | $tagger->expects($this->once()) | |
680 | ->method('tag'); | |
681 | ||
682 | $graby = $this->getMockBuilder('Graby\Graby') | |
683 | ->setMethods(['fetchContent']) | |
684 | ->disableOriginalConstructor() | |
685 | ->getMock(); | |
686 | ||
687 | $graby->expects($this->any()) | |
688 | ->method('fetchContent') | |
689 | ->willReturn([ | |
690 | 'html' => false, | |
691 | 'title' => $actualTitle, | |
692 | 'url' => '', | |
693 | 'content_type' => 'application/pdf', | |
694 | 'language' => '', | |
695 | ]); | |
696 | ||
697 | $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); | |
698 | $entry = new Entry(new User()); | |
699 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | |
700 | ||
701 | // '€' (U+20AC or E282AC) in hexadecimal and UTF-8 | |
702 | $expectedTitle = 'E282AC'; | |
703 | $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle())); | |
704 | } | |
705 | ||
706 | public function testPdfWithInvalidCharacterInTitle_removeInvalidCharacter() | |
707 | { | |
708 | // See http://graphemica.com for more info about the characters | |
709 | // '😻ℤ�z' (U+1F63B or F09F98BB; U+2124 or E284A4; invalid character 81; U+007A or 7A) in hexadecimal and UTF-8 | |
710 | // 0x81 is not a valid character for UTF16, UTF8 and WINDOWS-1252 | |
711 | $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '81' . '7A'); | |
712 | ||
713 | $tagger = $this->getTaggerMock(); | |
714 | $tagger->expects($this->once()) | |
715 | ->method('tag'); | |
716 | ||
717 | $graby = $this->getMockBuilder('Graby\Graby') | |
718 | ->setMethods(['fetchContent']) | |
719 | ->disableOriginalConstructor() | |
720 | ->getMock(); | |
721 | ||
722 | $graby->expects($this->any()) | |
723 | ->method('fetchContent') | |
724 | ->willReturn([ | |
725 | 'html' => false, | |
726 | 'title' => $actualTitle, | |
727 | 'url' => '', | |
728 | 'content_type' => 'application/pdf', | |
729 | 'language' => '', | |
730 | ]); | |
731 | ||
732 | $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); | |
733 | $entry = new Entry(new User()); | |
734 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | |
735 | ||
736 | // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8 | |
737 | // the 0x81 (represented by �) is invalid for UTF16, UTF8 and WINDOWS-1252 and is removed | |
738 | $expectedTitle = 'F09F98BB' . 'E284A4' . '7A'; | |
739 | $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle())); | |
740 | } | |
741 | ||
e07fadea KD |
742 | /** |
743 | * Data provider for testWithChangedUrl. | |
744 | * | |
745 | * Arrays contain the following values: | |
746 | * $entry_url | |
747 | * $origin_url | |
748 | * $content_url | |
749 | * $expected_entry_url | |
750 | * $expected_origin_url | |
751 | * $expected_domain | |
752 | */ | |
753 | public function dataForChangedUrl() | |
754 | { | |
755 | return [ | |
756 | 'normal' => [ | |
757 | 'http://0.0.0.0', | |
758 | null, | |
759 | 'http://1.1.1.1', | |
760 | 'http://1.1.1.1', | |
761 | 'http://0.0.0.0', | |
762 | '1.1.1.1', | |
763 | ], | |
764 | 'origin already set' => [ | |
765 | 'http://0.0.0.0', | |
766 | 'http://hello', | |
767 | 'http://1.1.1.1', | |
768 | 'http://1.1.1.1', | |
769 | 'http://hello', | |
770 | '1.1.1.1', | |
771 | ], | |
772 | 'trailing slash' => [ | |
773 | 'https://example.com/hello-world', | |
774 | null, | |
775 | 'https://example.com/hello-world/', | |
776 | 'https://example.com/hello-world/', | |
777 | null, | |
778 | 'example.com', | |
779 | ], | |
e07fadea KD |
780 | 'query string in fetched content' => [ |
781 | 'https://example.org/hello', | |
782 | null, | |
783 | 'https://example.org/hello?world=1', | |
60599679 | 784 | 'https://example.org/hello?world=1', |
e07fadea | 785 | 'https://example.org/hello', |
e07fadea KD |
786 | 'example.org', |
787 | ], | |
788 | 'fragment in fetched content' => [ | |
789 | 'https://example.org/hello', | |
790 | null, | |
791 | 'https://example.org/hello#world', | |
792 | 'https://example.org/hello', | |
793 | null, | |
794 | 'example.org', | |
795 | ], | |
fc040c74 KD |
796 | 'fragment and query string in fetched content' => [ |
797 | 'https://example.org/hello', | |
798 | null, | |
799 | 'https://example.org/hello?foo#world', | |
60599679 | 800 | 'https://example.org/hello?foo#world', |
fc040c74 | 801 | 'https://example.org/hello', |
fc040c74 | 802 | 'example.org', |
b49c87ac KD |
803 | ], |
804 | 'different path and query string in fetch content' => [ | |
805 | 'https://example.org/hello', | |
806 | null, | |
807 | 'https://example.org/world?foo', | |
808 | 'https://example.org/world?foo', | |
809 | 'https://example.org/hello', | |
810 | 'example.org', | |
811 | ], | |
812 | 'feedproxy ignore list test' => [ | |
813 | 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld', | |
814 | null, | |
815 | 'https://example.org/hello-wallabag', | |
816 | 'https://example.org/hello-wallabag', | |
817 | null, | |
818 | 'example.org', | |
819 | ], | |
820 | 'feedproxy ignore list test with origin url already set' => [ | |
821 | 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld', | |
822 | 'https://example.org/this-is-source', | |
823 | 'https://example.org/hello-wallabag', | |
824 | 'https://example.org/hello-wallabag', | |
825 | 'https://example.org/this-is-source', | |
826 | 'example.org', | |
827 | ], | |
828 | 'lemonde ignore pattern test' => [ | |
829 | 'http://www.lemonde.fr/tiny/url', | |
830 | null, | |
831 | 'http://example.com/hello-world', | |
832 | 'http://example.com/hello-world', | |
833 | null, | |
834 | 'example.com', | |
835 | ], | |
e07fadea KD |
836 | ]; |
837 | } | |
838 | ||
839 | /** | |
840 | * @dataProvider dataForChangedUrl | |
841 | */ | |
842 | public function testWithChangedUrl($entry_url, $origin_url, $content_url, $expected_entry_url, $expected_origin_url, $expected_domain) | |
843 | { | |
844 | $tagger = $this->getTaggerMock(); | |
845 | $tagger->expects($this->once()) | |
846 | ->method('tag'); | |
847 | ||
848 | $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage, true); | |
849 | $entry = new Entry(new User()); | |
850 | $entry->setOriginUrl($origin_url); | |
851 | $proxy->updateEntry( | |
852 | $entry, | |
853 | $entry_url, | |
854 | [ | |
855 | 'html' => false, | |
856 | 'title' => '', | |
857 | 'url' => $content_url, | |
858 | 'content_type' => '', | |
859 | 'language' => '', | |
860 | ], | |
861 | true | |
862 | ); | |
863 | ||
864 | $this->assertSame($expected_entry_url, $entry->getUrl()); | |
865 | $this->assertSame($expected_domain, $entry->getDomainName()); | |
866 | $this->assertSame($expected_origin_url, $entry->getOriginUrl()); | |
867 | } | |
868 | ||
c01d9532 | 869 | /** |
28cc645b T |
870 | * https://stackoverflow.com/a/18506801. |
871 | * | |
c01d9532 | 872 | * @param $string |
28cc645b | 873 | * |
c01d9532 T |
874 | * @return string |
875 | */ | |
28cc645b T |
876 | private function strToHex($string) |
877 | { | |
c01d9532 | 878 | $hex = ''; |
28cc645b T |
879 | for ($i = 0; $i < \strlen($string); ++$i) { |
880 | $ord = \ord($string[$i]); | |
c01d9532 | 881 | $hexCode = dechex($ord); |
28cc645b | 882 | $hex .= substr('0' . $hexCode, -2); |
c01d9532 | 883 | } |
28cc645b T |
884 | |
885 | return strtoupper($hex); | |
c01d9532 T |
886 | } |
887 | ||
888 | /** | |
28cc645b T |
889 | * https://stackoverflow.com/a/18506801. |
890 | * | |
c01d9532 | 891 | * @param $hex |
28cc645b | 892 | * |
c01d9532 T |
893 | * @return string |
894 | */ | |
28cc645b T |
895 | private function hexToStr($hex) |
896 | { | |
897 | $string = ''; | |
898 | for ($i = 0; $i < \strlen($hex) - 1; $i += 2) { | |
899 | $string .= \chr(hexdec($hex[$i] . $hex[$i + 1])); | |
c01d9532 | 900 | } |
28cc645b | 901 | |
c01d9532 T |
902 | return $string; |
903 | } | |
904 | ||
f530f7f5 KG |
905 | private function getTaggerMock() |
906 | { | |
6bc6fb1f | 907 | return $this->getMockBuilder(RuleBasedTagger::class) |
4094ea47 | 908 | ->setMethods(['tag']) |
f530f7f5 KG |
909 | ->disableOriginalConstructor() |
910 | ->getMock(); | |
911 | } | |
1c9cd2a7 | 912 | |
0c5bcd82 | 913 | private function getLogger() |
1c9cd2a7 | 914 | { |
0c5bcd82 | 915 | return new NullLogger(); |
1c9cd2a7 | 916 | } |
0d349ea6 | 917 | |
5661e8d4 | 918 | private function getValidator($withDefaultMock = true) |
0d349ea6 | 919 | { |
5661e8d4 | 920 | $mock = $this->getMockBuilder(RecursiveValidator::class) |
0d349ea6 JB |
921 | ->setMethods(['validate']) |
922 | ->disableOriginalConstructor() | |
923 | ->getMock(); | |
5661e8d4 JB |
924 | |
925 | if ($withDefaultMock) { | |
926 | $mock->expects($this->any()) | |
927 | ->method('validate') | |
928 | ->willReturn(new ConstraintViolationList()); | |
929 | } | |
930 | ||
931 | return $mock; | |
0d349ea6 | 932 | } |
558d9aab | 933 | } |