]>
Commit | Line | Data |
---|---|---|
558d9aab JB |
1 | <?php |
2 | ||
a2c1b94e | 3 | namespace Tests\Wallabag\CoreBundle\Helper; |
558d9aab | 4 | |
f808b016 | 5 | use Graby\Graby; |
d5c2cc54 | 6 | use Monolog\Handler\TestHandler; |
f808b016 | 7 | use Monolog\Logger; |
bd91bd5c | 8 | use PHPUnit\Framework\TestCase; |
f808b016 JB |
9 | use Psr\Log\NullLogger; |
10 | use Symfony\Component\Validator\ConstraintViolation; | |
11 | use Symfony\Component\Validator\ConstraintViolationList; | |
12 | use Symfony\Component\Validator\Validator\RecursiveValidator; | |
c2656f96 | 13 | use Wallabag\CoreBundle\Entity\Entry; |
f808b016 | 14 | use Wallabag\CoreBundle\Helper\ContentProxy; |
b22eb276 | 15 | use Wallabag\CoreBundle\Helper\RuleBasedIgnoreOriginProcessor; |
6bc6fb1f | 16 | use Wallabag\CoreBundle\Helper\RuleBasedTagger; |
f808b016 | 17 | use Wallabag\UserBundle\Entity\User; |
558d9aab | 18 | |
bd91bd5c | 19 | class ContentProxyTest extends TestCase |
558d9aab | 20 | { |
ac1509a6 | 21 | private $fetchingErrorMessage = 'wallabag can\'t retrieve contents for this article. Please <a href="http://doc.wallabag.org/en/user/errors_during_fetching.html#how-can-i-help-to-fix-that">troubleshoot this issue</a>.'; |
a2c1b94e | 22 | |
4d0ec0e7 JB |
23 | public function testWithBadUrl() |
24 | { | |
25 | $tagger = $this->getTaggerMock(); | |
26 | $tagger->expects($this->once()) | |
27 | ->method('tag'); | |
28 | ||
b22eb276 KD |
29 | $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock(); |
30 | ||
4d0ec0e7 | 31 | $graby = $this->getMockBuilder('Graby\Graby') |
4094ea47 | 32 | ->setMethods(['fetchContent']) |
4d0ec0e7 JB |
33 | ->disableOriginalConstructor() |
34 | ->getMock(); | |
35 | ||
36 | $graby->expects($this->any()) | |
37 | ->method('fetchContent') | |
4094ea47 | 38 | ->willReturn([ |
4d0ec0e7 JB |
39 | 'html' => false, |
40 | 'title' => '', | |
41 | 'url' => '', | |
36b0d52e JB |
42 | 'headers' => [ |
43 | 'content-type' => '', | |
44 | ], | |
4d0ec0e7 | 45 | 'language' => '', |
4094ea47 | 46 | ]); |
4d0ec0e7 | 47 | |
b22eb276 | 48 | $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); |
7aba665e JC |
49 | $entry = new Entry(new User()); |
50 | $proxy->updateEntry($entry, 'http://user@:80'); | |
4d0ec0e7 | 51 | |
f808b016 | 52 | $this->assertSame('http://user@:80', $entry->getUrl()); |
4d0ec0e7 | 53 | $this->assertEmpty($entry->getTitle()); |
f808b016 | 54 | $this->assertSame($this->fetchingErrorMessage, $entry->getContent()); |
4d0ec0e7 JB |
55 | $this->assertEmpty($entry->getPreviewPicture()); |
56 | $this->assertEmpty($entry->getMimetype()); | |
57 | $this->assertEmpty($entry->getLanguage()); | |
f808b016 | 58 | $this->assertSame(0.0, $entry->getReadingTime()); |
709e21a3 | 59 | $this->assertNull($entry->getDomainName()); |
4d0ec0e7 JB |
60 | } |
61 | ||
558d9aab JB |
62 | public function testWithEmptyContent() |
63 | { | |
f530f7f5 KG |
64 | $tagger = $this->getTaggerMock(); |
65 | $tagger->expects($this->once()) | |
66 | ->method('tag'); | |
67 | ||
b22eb276 KD |
68 | $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock(); |
69 | ||
558d9aab | 70 | $graby = $this->getMockBuilder('Graby\Graby') |
4094ea47 | 71 | ->setMethods(['fetchContent']) |
558d9aab JB |
72 | ->disableOriginalConstructor() |
73 | ->getMock(); | |
74 | ||
75 | $graby->expects($this->any()) | |
76 | ->method('fetchContent') | |
4094ea47 | 77 | ->willReturn([ |
98f0929f JB |
78 | 'html' => false, |
79 | 'title' => '', | |
80 | 'url' => '', | |
36b0d52e JB |
81 | 'headers' => [ |
82 | 'content-type' => '', | |
83 | ], | |
98f0929f | 84 | 'language' => '', |
4094ea47 | 85 | ]); |
558d9aab | 86 | |
b22eb276 | 87 | $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); |
7aba665e JC |
88 | $entry = new Entry(new User()); |
89 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | |
558d9aab | 90 | |
f808b016 | 91 | $this->assertSame('http://0.0.0.0', $entry->getUrl()); |
558d9aab | 92 | $this->assertEmpty($entry->getTitle()); |
f808b016 | 93 | $this->assertSame($this->fetchingErrorMessage, $entry->getContent()); |
558d9aab JB |
94 | $this->assertEmpty($entry->getPreviewPicture()); |
95 | $this->assertEmpty($entry->getMimetype()); | |
98f0929f | 96 | $this->assertEmpty($entry->getLanguage()); |
f808b016 JB |
97 | $this->assertSame(0.0, $entry->getReadingTime()); |
98 | $this->assertSame('0.0.0.0', $entry->getDomainName()); | |
558d9aab JB |
99 | } |
100 | ||
101 | public function testWithEmptyContentButOG() | |
102 | { | |
f530f7f5 KG |
103 | $tagger = $this->getTaggerMock(); |
104 | $tagger->expects($this->once()) | |
105 | ->method('tag'); | |
106 | ||
b22eb276 KD |
107 | $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock(); |
108 | ||
558d9aab | 109 | $graby = $this->getMockBuilder('Graby\Graby') |
4094ea47 | 110 | ->setMethods(['fetchContent']) |
558d9aab JB |
111 | ->disableOriginalConstructor() |
112 | ->getMock(); | |
113 | ||
114 | $graby->expects($this->any()) | |
115 | ->method('fetchContent') | |
4094ea47 | 116 | ->willReturn([ |
98f0929f | 117 | 'html' => false, |
5f084262 | 118 | 'title' => 'my title', |
98f0929f | 119 | 'url' => '', |
36b0d52e JB |
120 | 'headers' => [ |
121 | 'content-type' => '', | |
122 | ], | |
98f0929f | 123 | 'language' => '', |
10b35097 | 124 | 'status' => '', |
5f084262 | 125 | 'description' => 'desc', |
4094ea47 | 126 | ]); |
558d9aab | 127 | |
b22eb276 | 128 | $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); |
7aba665e JC |
129 | $entry = new Entry(new User()); |
130 | $proxy->updateEntry($entry, 'http://domain.io'); | |
558d9aab | 131 | |
f808b016 JB |
132 | $this->assertSame('http://domain.io', $entry->getUrl()); |
133 | $this->assertSame('my title', $entry->getTitle()); | |
134 | $this->assertSame($this->fetchingErrorMessage . '<p><i>But we found a short description: </i></p>desc', $entry->getContent()); | |
558d9aab | 135 | $this->assertEmpty($entry->getPreviewPicture()); |
98f0929f | 136 | $this->assertEmpty($entry->getLanguage()); |
10b35097 | 137 | $this->assertEmpty($entry->getHttpStatus()); |
558d9aab | 138 | $this->assertEmpty($entry->getMimetype()); |
f808b016 JB |
139 | $this->assertSame(0.0, $entry->getReadingTime()); |
140 | $this->assertSame('domain.io', $entry->getDomainName()); | |
558d9aab JB |
141 | } |
142 | ||
143 | public function testWithContent() | |
144 | { | |
f530f7f5 KG |
145 | $tagger = $this->getTaggerMock(); |
146 | $tagger->expects($this->once()) | |
147 | ->method('tag'); | |
148 | ||
b22eb276 KD |
149 | $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock(); |
150 | $ruleBasedIgnoreOriginProcessor->expects($this->once()) | |
151 | ->method('process'); | |
152 | ||
558d9aab | 153 | $graby = $this->getMockBuilder('Graby\Graby') |
4094ea47 | 154 | ->setMethods(['fetchContent']) |
558d9aab JB |
155 | ->disableOriginalConstructor() |
156 | ->getMock(); | |
157 | ||
158 | $graby->expects($this->any()) | |
159 | ->method('fetchContent') | |
4094ea47 | 160 | ->willReturn([ |
da3d4998 | 161 | 'html' => str_repeat('this is my content', 325), |
558d9aab JB |
162 | 'title' => 'this is my title', |
163 | 'url' => 'http://1.1.1.1', | |
98f0929f | 164 | 'language' => 'fr', |
10b35097 | 165 | 'status' => '200', |
5f084262 | 166 | 'description' => 'OG desc', |
167 | 'image' => 'http://3.3.3.3/cover.jpg', | |
168 | 'headers' => [ | |
169 | 'content-type' => 'text/html', | |
4094ea47 JB |
170 | ], |
171 | ]); | |
558d9aab | 172 | |
b22eb276 | 173 | $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); |
7aba665e JC |
174 | $entry = new Entry(new User()); |
175 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | |
558d9aab | 176 | |
f808b016 JB |
177 | $this->assertSame('http://1.1.1.1', $entry->getUrl()); |
178 | $this->assertSame('this is my title', $entry->getTitle()); | |
edc79ad8 | 179 | $this->assertContains('content', $entry->getContent()); |
f808b016 JB |
180 | $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture()); |
181 | $this->assertSame('text/html', $entry->getMimetype()); | |
182 | $this->assertSame('fr', $entry->getLanguage()); | |
183 | $this->assertSame('200', $entry->getHttpStatus()); | |
184 | $this->assertSame(4.0, $entry->getReadingTime()); | |
185 | $this->assertSame('1.1.1.1', $entry->getDomainName()); | |
558d9aab | 186 | } |
f530f7f5 | 187 | |
3d71d403 JB |
188 | public function testWithContentAndNoOgImage() |
189 | { | |
190 | $tagger = $this->getTaggerMock(); | |
191 | $tagger->expects($this->once()) | |
192 | ->method('tag'); | |
193 | ||
b22eb276 KD |
194 | $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock(); |
195 | $ruleBasedIgnoreOriginProcessor->expects($this->once()) | |
196 | ->method('process'); | |
197 | ||
3d71d403 JB |
198 | $graby = $this->getMockBuilder('Graby\Graby') |
199 | ->setMethods(['fetchContent']) | |
200 | ->disableOriginalConstructor() | |
201 | ->getMock(); | |
202 | ||
203 | $graby->expects($this->any()) | |
204 | ->method('fetchContent') | |
205 | ->willReturn([ | |
206 | 'html' => str_repeat('this is my content', 325), | |
207 | 'title' => 'this is my title', | |
208 | 'url' => 'http://1.1.1.1', | |
3d71d403 JB |
209 | 'language' => 'fr', |
210 | 'status' => '200', | |
5f084262 | 211 | 'description' => 'OG desc', |
212 | 'image' => null, | |
213 | 'headers' => [ | |
214 | 'content-type' => 'text/html', | |
3d71d403 JB |
215 | ], |
216 | ]); | |
217 | ||
b22eb276 | 218 | $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); |
d5c2cc54 JB |
219 | $entry = new Entry(new User()); |
220 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | |
3d71d403 | 221 | |
f808b016 JB |
222 | $this->assertSame('http://1.1.1.1', $entry->getUrl()); |
223 | $this->assertSame('this is my title', $entry->getTitle()); | |
edc79ad8 | 224 | $this->assertContains('content', $entry->getContent()); |
41d45c61 | 225 | $this->assertNull($entry->getPreviewPicture()); |
f808b016 JB |
226 | $this->assertSame('text/html', $entry->getMimetype()); |
227 | $this->assertSame('fr', $entry->getLanguage()); | |
228 | $this->assertSame('200', $entry->getHttpStatus()); | |
229 | $this->assertSame(4.0, $entry->getReadingTime()); | |
230 | $this->assertSame('1.1.1.1', $entry->getDomainName()); | |
0d349ea6 JB |
231 | } |
232 | ||
715fabf8 | 233 | public function testWithContentAndContentImage() |
234 | { | |
235 | $tagger = $this->getTaggerMock(); | |
236 | $tagger->expects($this->once()) | |
237 | ->method('tag'); | |
238 | ||
b22eb276 KD |
239 | $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock(); |
240 | $ruleBasedIgnoreOriginProcessor->expects($this->once()) | |
241 | ->method('process'); | |
242 | ||
715fabf8 | 243 | $graby = $this->getMockBuilder('Graby\Graby') |
244 | ->setMethods(['fetchContent']) | |
245 | ->disableOriginalConstructor() | |
246 | ->getMock(); | |
247 | ||
248 | $graby->expects($this->any()) | |
249 | ->method('fetchContent') | |
250 | ->willReturn([ | |
251 | 'html' => "<h1>Test</h1><p><img src='http://3.3.3.3/cover.jpg'/></p>", | |
252 | 'title' => 'this is my title', | |
253 | 'url' => 'http://1.1.1.1', | |
6e68417f JB |
254 | 'headers' => [ |
255 | 'content-type' => 'text/html', | |
256 | ], | |
715fabf8 | 257 | 'language' => 'fr', |
258 | 'status' => '200', | |
6e68417f | 259 | 'image' => null, |
715fabf8 | 260 | ]); |
261 | ||
b22eb276 | 262 | $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); |
715fabf8 | 263 | $entry = new Entry(new User()); |
264 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | |
265 | ||
266 | $this->assertSame('http://1.1.1.1', $entry->getUrl()); | |
267 | $this->assertSame('this is my title', $entry->getTitle()); | |
268 | $this->assertSame("<h1>Test</h1><p><img src='http://3.3.3.3/cover.jpg'/></p>", $entry->getContent()); | |
269 | $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture()); | |
270 | $this->assertSame('text/html', $entry->getMimetype()); | |
271 | $this->assertSame('fr', $entry->getLanguage()); | |
272 | $this->assertSame('200', $entry->getHttpStatus()); | |
273 | $this->assertSame(0.0, $entry->getReadingTime()); | |
274 | $this->assertSame('1.1.1.1', $entry->getDomainName()); | |
275 | } | |
276 | ||
277 | public function testWithContentImageAndOgImage() | |
278 | { | |
279 | $tagger = $this->getTaggerMock(); | |
280 | $tagger->expects($this->once()) | |
281 | ->method('tag'); | |
282 | ||
b22eb276 KD |
283 | $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock(); |
284 | $ruleBasedIgnoreOriginProcessor->expects($this->once()) | |
285 | ->method('process'); | |
286 | ||
715fabf8 | 287 | $graby = $this->getMockBuilder('Graby\Graby') |
288 | ->setMethods(['fetchContent']) | |
289 | ->disableOriginalConstructor() | |
290 | ->getMock(); | |
291 | ||
292 | $graby->expects($this->any()) | |
293 | ->method('fetchContent') | |
294 | ->willReturn([ | |
295 | 'html' => "<h1>Test</h1><p><img src='http://3.3.3.3/nevermind.jpg'/></p>", | |
296 | 'title' => 'this is my title', | |
297 | 'url' => 'http://1.1.1.1', | |
6e68417f JB |
298 | 'headers' => [ |
299 | 'content-type' => 'text/html', | |
300 | ], | |
715fabf8 | 301 | 'language' => 'fr', |
302 | 'status' => '200', | |
6e68417f | 303 | 'image' => 'http://3.3.3.3/cover.jpg', |
715fabf8 | 304 | ]); |
305 | ||
b22eb276 | 306 | $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); |
715fabf8 | 307 | $entry = new Entry(new User()); |
308 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | |
309 | ||
310 | $this->assertSame('http://1.1.1.1', $entry->getUrl()); | |
311 | $this->assertSame('this is my title', $entry->getTitle()); | |
312 | $this->assertSame("<h1>Test</h1><p><img src='http://3.3.3.3/nevermind.jpg'/></p>", $entry->getContent()); | |
313 | $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture()); | |
314 | $this->assertSame('text/html', $entry->getMimetype()); | |
315 | $this->assertSame('fr', $entry->getLanguage()); | |
316 | $this->assertSame('200', $entry->getHttpStatus()); | |
317 | $this->assertSame(0.0, $entry->getReadingTime()); | |
318 | $this->assertSame('1.1.1.1', $entry->getDomainName()); | |
319 | } | |
320 | ||
0d349ea6 JB |
321 | public function testWithContentAndBadLanguage() |
322 | { | |
323 | $tagger = $this->getTaggerMock(); | |
324 | $tagger->expects($this->once()) | |
325 | ->method('tag'); | |
326 | ||
b22eb276 KD |
327 | $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock(); |
328 | $ruleBasedIgnoreOriginProcessor->expects($this->once()) | |
329 | ->method('process'); | |
330 | ||
5661e8d4 | 331 | $validator = $this->getValidator(false); |
a05b6115 | 332 | $validator->expects($this->once()) |
0d349ea6 | 333 | ->method('validate') |
a05b6115 | 334 | ->willReturn(new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'language', 'dontexist')])); |
0d349ea6 JB |
335 | |
336 | $graby = $this->getMockBuilder('Graby\Graby') | |
337 | ->setMethods(['fetchContent']) | |
338 | ->disableOriginalConstructor() | |
339 | ->getMock(); | |
340 | ||
341 | $graby->expects($this->any()) | |
342 | ->method('fetchContent') | |
343 | ->willReturn([ | |
344 | 'html' => str_repeat('this is my content', 325), | |
345 | 'title' => 'this is my title', | |
346 | 'url' => 'http://1.1.1.1', | |
0d349ea6 JB |
347 | 'language' => 'dontexist', |
348 | 'status' => '200', | |
5f084262 | 349 | 'headers' => [ |
350 | 'content-type' => 'text/html', | |
351 | ], | |
0d349ea6 JB |
352 | ]); |
353 | ||
b22eb276 | 354 | $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $validator, $this->getLogger(), $this->fetchingErrorMessage); |
0d349ea6 JB |
355 | $entry = new Entry(new User()); |
356 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | |
357 | ||
f808b016 JB |
358 | $this->assertSame('http://1.1.1.1', $entry->getUrl()); |
359 | $this->assertSame('this is my title', $entry->getTitle()); | |
edc79ad8 | 360 | $this->assertContains('content', $entry->getContent()); |
f808b016 | 361 | $this->assertSame('text/html', $entry->getMimetype()); |
41d45c61 | 362 | $this->assertNull($entry->getLanguage()); |
f808b016 JB |
363 | $this->assertSame('200', $entry->getHttpStatus()); |
364 | $this->assertSame(4.0, $entry->getReadingTime()); | |
365 | $this->assertSame('1.1.1.1', $entry->getDomainName()); | |
0d349ea6 JB |
366 | } |
367 | ||
368 | public function testWithContentAndBadOgImage() | |
369 | { | |
370 | $tagger = $this->getTaggerMock(); | |
371 | $tagger->expects($this->once()) | |
372 | ->method('tag'); | |
373 | ||
b22eb276 KD |
374 | $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock(); |
375 | $ruleBasedIgnoreOriginProcessor->expects($this->once()) | |
376 | ->method('process'); | |
377 | ||
5661e8d4 | 378 | $validator = $this->getValidator(false); |
0d349ea6 JB |
379 | $validator->expects($this->exactly(2)) |
380 | ->method('validate') | |
381 | ->will($this->onConsecutiveCalls( | |
382 | new ConstraintViolationList(), | |
383 | new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'url', 'https://')]) | |
384 | )); | |
385 | ||
386 | $graby = $this->getMockBuilder('Graby\Graby') | |
387 | ->setMethods(['fetchContent']) | |
388 | ->disableOriginalConstructor() | |
389 | ->getMock(); | |
390 | ||
391 | $graby->expects($this->any()) | |
392 | ->method('fetchContent') | |
393 | ->willReturn([ | |
394 | 'html' => str_repeat('this is my content', 325), | |
395 | 'title' => 'this is my title', | |
396 | 'url' => 'http://1.1.1.1', | |
36b0d52e JB |
397 | 'headers' => [ |
398 | 'content-type' => 'text/html', | |
399 | ], | |
0d349ea6 JB |
400 | 'language' => 'fr', |
401 | 'status' => '200', | |
5f084262 | 402 | 'description' => 'OG desc', |
403 | 'image' => 'https://', | |
0d349ea6 JB |
404 | ]); |
405 | ||
b22eb276 | 406 | $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $validator, $this->getLogger(), $this->fetchingErrorMessage); |
0d349ea6 JB |
407 | $entry = new Entry(new User()); |
408 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | |
409 | ||
f808b016 JB |
410 | $this->assertSame('http://1.1.1.1', $entry->getUrl()); |
411 | $this->assertSame('this is my title', $entry->getTitle()); | |
edc79ad8 | 412 | $this->assertContains('content', $entry->getContent()); |
41d45c61 | 413 | $this->assertNull($entry->getPreviewPicture()); |
f808b016 JB |
414 | $this->assertSame('text/html', $entry->getMimetype()); |
415 | $this->assertSame('fr', $entry->getLanguage()); | |
416 | $this->assertSame('200', $entry->getHttpStatus()); | |
417 | $this->assertSame(4.0, $entry->getReadingTime()); | |
418 | $this->assertSame('1.1.1.1', $entry->getDomainName()); | |
3d71d403 JB |
419 | } |
420 | ||
4d0ec0e7 JB |
421 | public function testWithForcedContent() |
422 | { | |
423 | $tagger = $this->getTaggerMock(); | |
424 | $tagger->expects($this->once()) | |
425 | ->method('tag'); | |
426 | ||
b22eb276 KD |
427 | $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock(); |
428 | $ruleBasedIgnoreOriginProcessor->expects($this->once()) | |
429 | ->method('process'); | |
430 | ||
431 | $proxy = new ContentProxy((new Graby()), $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage, true); | |
7aba665e JC |
432 | $entry = new Entry(new User()); |
433 | $proxy->updateEntry( | |
434 | $entry, | |
0d6cfb88 JB |
435 | 'http://0.0.0.0', |
436 | [ | |
437 | 'html' => str_repeat('this is my content', 325), | |
438 | 'title' => 'this is my title', | |
439 | 'url' => 'http://1.1.1.1', | |
0d6cfb88 | 440 | 'language' => 'fr', |
f0378b4d JB |
441 | 'date' => '1395635872', |
442 | 'authors' => ['Jeremy', 'Nico', 'Thomas'], | |
5f084262 | 443 | 'headers' => [ |
444 | 'cache-control' => 'no-cache', | |
445 | 'content-type' => 'text/html', | |
38a04dee | 446 | ], |
0d6cfb88 JB |
447 | ] |
448 | ); | |
4d0ec0e7 | 449 | |
f808b016 JB |
450 | $this->assertSame('http://1.1.1.1', $entry->getUrl()); |
451 | $this->assertSame('this is my title', $entry->getTitle()); | |
edc79ad8 | 452 | $this->assertContains('content', $entry->getContent()); |
f808b016 JB |
453 | $this->assertSame('text/html', $entry->getMimetype()); |
454 | $this->assertSame('fr', $entry->getLanguage()); | |
455 | $this->assertSame(4.0, $entry->getReadingTime()); | |
456 | $this->assertSame('1.1.1.1', $entry->getDomainName()); | |
457 | $this->assertSame('24/03/2014', $entry->getPublishedAt()->format('d/m/Y')); | |
f0378b4d JB |
458 | $this->assertContains('Jeremy', $entry->getPublishedBy()); |
459 | $this->assertContains('Nico', $entry->getPublishedBy()); | |
460 | $this->assertContains('Thomas', $entry->getPublishedBy()); | |
709e21a3 | 461 | $this->assertNotNull($entry->getHeaders(), 'Headers are stored, so value is not null'); |
f0378b4d JB |
462 | $this->assertContains('no-cache', $entry->getHeaders()); |
463 | } | |
464 | ||
465 | public function testWithForcedContentAndDatetime() | |
466 | { | |
467 | $tagger = $this->getTaggerMock(); | |
468 | $tagger->expects($this->once()) | |
469 | ->method('tag'); | |
470 | ||
b22eb276 KD |
471 | $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock(); |
472 | ||
d5c2cc54 | 473 | $logHandler = new TestHandler(); |
6acadf8e | 474 | $logger = new Logger('test', [$logHandler]); |
d5c2cc54 | 475 | |
b22eb276 | 476 | $proxy = new ContentProxy((new Graby()), $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $logger, $this->fetchingErrorMessage); |
7aba665e | 477 | $entry = new Entry(new User()); |
6acadf8e | 478 | $proxy->updateEntry( |
7aba665e | 479 | $entry, |
6acadf8e | 480 | 'http://1.1.1.1', |
f0378b4d JB |
481 | [ |
482 | 'html' => str_repeat('this is my content', 325), | |
483 | 'title' => 'this is my title', | |
484 | 'url' => 'http://1.1.1.1', | |
f0378b4d JB |
485 | 'language' => 'fr', |
486 | 'date' => '2016-09-08T11:55:58+0200', | |
5f084262 | 487 | 'headers' => [ |
488 | 'content-type' => 'text/html', | |
489 | ], | |
f0378b4d JB |
490 | ] |
491 | ); | |
492 | ||
f808b016 JB |
493 | $this->assertSame('http://1.1.1.1', $entry->getUrl()); |
494 | $this->assertSame('this is my title', $entry->getTitle()); | |
edc79ad8 | 495 | $this->assertContains('content', $entry->getContent()); |
f808b016 JB |
496 | $this->assertSame('text/html', $entry->getMimetype()); |
497 | $this->assertSame('fr', $entry->getLanguage()); | |
498 | $this->assertSame(4.0, $entry->getReadingTime()); | |
499 | $this->assertSame('1.1.1.1', $entry->getDomainName()); | |
500 | $this->assertSame('08/09/2016', $entry->getPublishedAt()->format('d/m/Y')); | |
f0378b4d JB |
501 | } |
502 | ||
503 | public function testWithForcedContentAndBadDate() | |
504 | { | |
505 | $tagger = $this->getTaggerMock(); | |
506 | $tagger->expects($this->once()) | |
507 | ->method('tag'); | |
508 | ||
b22eb276 KD |
509 | $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock(); |
510 | ||
f0378b4d JB |
511 | $logger = new Logger('foo'); |
512 | $handler = new TestHandler(); | |
513 | $logger->pushHandler($handler); | |
514 | ||
b22eb276 | 515 | $proxy = new ContentProxy((new Graby()), $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $logger, $this->fetchingErrorMessage); |
7aba665e JC |
516 | $entry = new Entry(new User()); |
517 | $proxy->updateEntry( | |
518 | $entry, | |
6acadf8e | 519 | 'http://1.1.1.1', |
f0378b4d JB |
520 | [ |
521 | 'html' => str_repeat('this is my content', 325), | |
522 | 'title' => 'this is my title', | |
523 | 'url' => 'http://1.1.1.1', | |
f0378b4d JB |
524 | 'language' => 'fr', |
525 | 'date' => '01 02 2012', | |
5f084262 | 526 | 'headers' => [ |
527 | 'content-type' => 'text/html', | |
528 | ], | |
f0378b4d JB |
529 | ] |
530 | ); | |
531 | ||
f808b016 JB |
532 | $this->assertSame('http://1.1.1.1', $entry->getUrl()); |
533 | $this->assertSame('this is my title', $entry->getTitle()); | |
edc79ad8 | 534 | $this->assertContains('content', $entry->getContent()); |
f808b016 JB |
535 | $this->assertSame('text/html', $entry->getMimetype()); |
536 | $this->assertSame('fr', $entry->getLanguage()); | |
537 | $this->assertSame(4.0, $entry->getReadingTime()); | |
538 | $this->assertSame('1.1.1.1', $entry->getDomainName()); | |
f0378b4d JB |
539 | $this->assertNull($entry->getPublishedAt()); |
540 | ||
541 | $records = $handler->getRecords(); | |
542 | ||
d99e6423 | 543 | $this->assertCount(3, $records); |
f0378b4d | 544 | $this->assertContains('Error while defining date', $records[0]['message']); |
4d0ec0e7 JB |
545 | } |
546 | ||
547 | public function testTaggerThrowException() | |
548 | { | |
4d0ec0e7 JB |
549 | $tagger = $this->getTaggerMock(); |
550 | $tagger->expects($this->once()) | |
551 | ->method('tag') | |
552 | ->will($this->throwException(new \Exception())); | |
553 | ||
b22eb276 KD |
554 | $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock(); |
555 | ||
556 | $proxy = new ContentProxy((new Graby()), $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); | |
7aba665e | 557 | $entry = new Entry(new User()); |
6acadf8e JB |
558 | $proxy->updateEntry( |
559 | $entry, | |
560 | 'http://1.1.1.1', | |
561 | [ | |
562 | 'html' => str_repeat('this is my content', 325), | |
563 | 'title' => 'this is my title', | |
564 | 'url' => 'http://1.1.1.1', | |
6acadf8e | 565 | 'language' => 'fr', |
5f084262 | 566 | 'headers' => [ |
567 | 'content-type' => 'text/html', | |
568 | ], | |
6acadf8e | 569 | ] |
d0e9b3d6 | 570 | ); |
4d0ec0e7 JB |
571 | |
572 | $this->assertCount(0, $entry->getTags()); | |
573 | } | |
574 | ||
74a75f7d JB |
575 | public function dataForCrazyHtml() |
576 | { | |
577 | return [ | |
578 | 'script and comment' => [ | |
579 | '<strong>Script inside:</strong> <!--[if gte IE 4]><script>alert(\'lol\');</script><![endif]--><br />', | |
38a04dee | 580 | 'lol', |
74a75f7d JB |
581 | ], |
582 | 'script' => [ | |
583 | '<strong>Script inside:</strong><script>alert(\'lol\');</script>', | |
38a04dee | 584 | 'script', |
74a75f7d JB |
585 | ], |
586 | ]; | |
587 | } | |
588 | ||
589 | /** | |
590 | * @dataProvider dataForCrazyHtml | |
591 | */ | |
592 | public function testWithCrazyHtmlContent($html, $escapedString) | |
593 | { | |
594 | $tagger = $this->getTaggerMock(); | |
595 | $tagger->expects($this->once()) | |
596 | ->method('tag'); | |
597 | ||
b22eb276 KD |
598 | $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock(); |
599 | ||
600 | $proxy = new ContentProxy((new Graby()), $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); | |
6acadf8e JB |
601 | $entry = new Entry(new User()); |
602 | $proxy->updateEntry( | |
603 | $entry, | |
74a75f7d JB |
604 | 'http://1.1.1.1', |
605 | [ | |
606 | 'html' => $html, | |
607 | 'title' => 'this is my title', | |
608 | 'url' => 'http://1.1.1.1', | |
74a75f7d JB |
609 | 'language' => 'fr', |
610 | 'status' => '200', | |
5f084262 | 611 | //'og_title' => 'my OG title', |
612 | 'description' => 'OG desc', | |
613 | 'image' => 'http://3.3.3.3/cover.jpg', | |
614 | 'headers' => [ | |
615 | 'content-type' => 'text/html', | |
74a75f7d JB |
616 | ], |
617 | ] | |
618 | ); | |
619 | ||
f808b016 JB |
620 | $this->assertSame('http://1.1.1.1', $entry->getUrl()); |
621 | $this->assertSame('this is my title', $entry->getTitle()); | |
74a75f7d | 622 | $this->assertNotContains($escapedString, $entry->getContent()); |
f808b016 JB |
623 | $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture()); |
624 | $this->assertSame('text/html', $entry->getMimetype()); | |
625 | $this->assertSame('fr', $entry->getLanguage()); | |
626 | $this->assertSame('200', $entry->getHttpStatus()); | |
627 | $this->assertSame('1.1.1.1', $entry->getDomainName()); | |
74a75f7d JB |
628 | } |
629 | ||
d0ec2ddd JB |
630 | public function testWithImageAsContent() |
631 | { | |
632 | $tagger = $this->getTaggerMock(); | |
633 | $tagger->expects($this->once()) | |
634 | ->method('tag'); | |
635 | ||
b22eb276 KD |
636 | $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock(); |
637 | ||
d0ec2ddd JB |
638 | $graby = $this->getMockBuilder('Graby\Graby') |
639 | ->setMethods(['fetchContent']) | |
640 | ->disableOriginalConstructor() | |
641 | ->getMock(); | |
642 | ||
643 | $graby->expects($this->any()) | |
644 | ->method('fetchContent') | |
645 | ->willReturn([ | |
646 | 'html' => '<p><img src="http://1.1.1.1/image.jpg" /></p>', | |
647 | 'title' => 'this is my title', | |
648 | 'url' => 'http://1.1.1.1/image.jpg', | |
d0ec2ddd | 649 | 'status' => '200', |
5f084262 | 650 | 'headers' => [ |
651 | 'content-type' => 'image/jpeg', | |
652 | ], | |
d0ec2ddd JB |
653 | ]); |
654 | ||
b22eb276 | 655 | $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); |
d0ec2ddd JB |
656 | $entry = new Entry(new User()); |
657 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | |
658 | ||
c18a2476 JB |
659 | $this->assertSame('http://1.1.1.1/image.jpg', $entry->getUrl()); |
660 | $this->assertSame('this is my title', $entry->getTitle()); | |
d0ec2ddd JB |
661 | $this->assertContains('http://1.1.1.1/image.jpg', $entry->getContent()); |
662 | $this->assertSame('http://1.1.1.1/image.jpg', $entry->getPreviewPicture()); | |
c18a2476 JB |
663 | $this->assertSame('image/jpeg', $entry->getMimetype()); |
664 | $this->assertSame('200', $entry->getHttpStatus()); | |
665 | $this->assertSame('1.1.1.1', $entry->getDomainName()); | |
d0ec2ddd JB |
666 | } |
667 | ||
c01d9532 T |
668 | public function testWebsiteWithValidUTF8Title_doNothing() |
669 | { | |
670 | // You can use https://www.online-toolz.com/tools/text-hex-convertor.php to convert UTF-8 text <=> hex | |
671 | // See http://graphemica.com for more info about the characters | |
672 | // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8 | |
673 | $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '7A'); | |
674 | ||
675 | $tagger = $this->getTaggerMock(); | |
676 | $tagger->expects($this->once()) | |
677 | ->method('tag'); | |
678 | ||
b22eb276 KD |
679 | $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock(); |
680 | ||
c01d9532 T |
681 | $graby = $this->getMockBuilder('Graby\Graby') |
682 | ->setMethods(['fetchContent']) | |
683 | ->disableOriginalConstructor() | |
684 | ->getMock(); | |
685 | ||
686 | $graby->expects($this->any()) | |
687 | ->method('fetchContent') | |
688 | ->willReturn([ | |
689 | 'html' => false, | |
690 | 'title' => $actualTitle, | |
691 | 'url' => '', | |
36b0d52e JB |
692 | 'headers' => [ |
693 | 'content-type' => 'text/html', | |
694 | ], | |
c01d9532 T |
695 | 'language' => '', |
696 | ]); | |
697 | ||
b22eb276 | 698 | $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); |
c01d9532 T |
699 | $entry = new Entry(new User()); |
700 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | |
701 | ||
702 | // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8 | |
703 | $expectedTitle = 'F09F98BB' . 'E284A4' . '7A'; | |
704 | $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle())); | |
705 | } | |
706 | ||
707 | public function testWebsiteWithInvalidUTF8Title_removeInvalidCharacter() | |
708 | { | |
709 | // See http://graphemica.com for more info about the characters | |
710 | // 'a€b' (61;80;62) in hexadecimal and WINDOWS-1252 - but 80 is a invalid UTF-8 character. | |
711 | // The correct UTF-8 € character (U+20AC) is E282AC | |
712 | $actualTitle = $this->hexToStr('61' . '80' . '62'); | |
713 | ||
714 | $tagger = $this->getTaggerMock(); | |
715 | $tagger->expects($this->once()) | |
716 | ->method('tag'); | |
717 | ||
b22eb276 KD |
718 | $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock(); |
719 | ||
c01d9532 T |
720 | $graby = $this->getMockBuilder('Graby\Graby') |
721 | ->setMethods(['fetchContent']) | |
722 | ->disableOriginalConstructor() | |
723 | ->getMock(); | |
724 | ||
725 | $graby->expects($this->any()) | |
726 | ->method('fetchContent') | |
727 | ->willReturn([ | |
728 | 'html' => false, | |
729 | 'title' => $actualTitle, | |
730 | 'url' => '', | |
36b0d52e JB |
731 | 'headers' => [ |
732 | 'content-type' => 'text/html', | |
733 | ], | |
c01d9532 T |
734 | 'language' => '', |
735 | ]); | |
736 | ||
b22eb276 | 737 | $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); |
c01d9532 T |
738 | $entry = new Entry(new User()); |
739 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | |
740 | ||
741 | // 'ab' (61;62) because all invalid UTF-8 character (like 80) are removed | |
742 | $expectedTitle = '61' . '62'; | |
743 | $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle())); | |
744 | } | |
745 | ||
746 | public function testPdfWithUTF16BETitle_convertToUTF8() | |
747 | { | |
748 | // See http://graphemica.com for more info about the characters | |
749 | // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF16BE | |
750 | $actualTitle = $this->hexToStr('D83DDE3B'); | |
751 | ||
752 | $tagger = $this->getTaggerMock(); | |
753 | $tagger->expects($this->once()) | |
754 | ->method('tag'); | |
755 | ||
b22eb276 KD |
756 | $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock(); |
757 | ||
c01d9532 T |
758 | $graby = $this->getMockBuilder('Graby\Graby') |
759 | ->setMethods(['fetchContent']) | |
760 | ->disableOriginalConstructor() | |
761 | ->getMock(); | |
762 | ||
763 | $graby->expects($this->any()) | |
764 | ->method('fetchContent') | |
765 | ->willReturn([ | |
766 | 'html' => false, | |
767 | 'title' => $actualTitle, | |
768 | 'url' => '', | |
36b0d52e JB |
769 | 'headers' => [ |
770 | 'content-type' => 'application/pdf', | |
771 | ], | |
c01d9532 T |
772 | 'language' => '', |
773 | ]); | |
774 | ||
b22eb276 | 775 | $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); |
c01d9532 T |
776 | $entry = new Entry(new User()); |
777 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | |
778 | ||
779 | // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8 | |
780 | $expectedTitle = 'F09F98BB'; | |
781 | $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle())); | |
782 | } | |
783 | ||
784 | public function testPdfWithUTF8Title_doNothing() | |
785 | { | |
786 | // See http://graphemica.com for more info about the characters | |
787 | // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF8 | |
788 | $actualTitle = $this->hexToStr('F09F98BB'); | |
789 | ||
790 | $tagger = $this->getTaggerMock(); | |
791 | $tagger->expects($this->once()) | |
792 | ->method('tag'); | |
793 | ||
b22eb276 KD |
794 | $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock(); |
795 | ||
c01d9532 T |
796 | $graby = $this->getMockBuilder('Graby\Graby') |
797 | ->setMethods(['fetchContent']) | |
798 | ->disableOriginalConstructor() | |
799 | ->getMock(); | |
800 | ||
801 | $graby->expects($this->any()) | |
802 | ->method('fetchContent') | |
803 | ->willReturn([ | |
804 | 'html' => false, | |
805 | 'title' => $actualTitle, | |
806 | 'url' => '', | |
36b0d52e JB |
807 | 'headers' => [ |
808 | 'content-type' => 'application/pdf', | |
809 | ], | |
c01d9532 T |
810 | 'language' => '', |
811 | ]); | |
812 | ||
b22eb276 | 813 | $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); |
c01d9532 T |
814 | $entry = new Entry(new User()); |
815 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | |
816 | ||
817 | // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8 | |
818 | $expectedTitle = 'F09F98BB'; | |
819 | $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle())); | |
820 | } | |
821 | ||
822 | public function testPdfWithWINDOWS1252Title_convertToUTF8() | |
823 | { | |
824 | // See http://graphemica.com for more info about the characters | |
825 | // '€' (80) in hexadecimal and WINDOWS-1252 | |
826 | $actualTitle = $this->hexToStr('80'); | |
827 | ||
828 | $tagger = $this->getTaggerMock(); | |
829 | $tagger->expects($this->once()) | |
830 | ->method('tag'); | |
831 | ||
b22eb276 KD |
832 | $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock(); |
833 | ||
c01d9532 T |
834 | $graby = $this->getMockBuilder('Graby\Graby') |
835 | ->setMethods(['fetchContent']) | |
836 | ->disableOriginalConstructor() | |
837 | ->getMock(); | |
838 | ||
839 | $graby->expects($this->any()) | |
840 | ->method('fetchContent') | |
841 | ->willReturn([ | |
842 | 'html' => false, | |
843 | 'title' => $actualTitle, | |
844 | 'url' => '', | |
36b0d52e JB |
845 | 'headers' => [ |
846 | 'content-type' => 'application/pdf', | |
847 | ], | |
c01d9532 T |
848 | 'language' => '', |
849 | ]); | |
850 | ||
b22eb276 | 851 | $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); |
c01d9532 T |
852 | $entry = new Entry(new User()); |
853 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | |
854 | ||
855 | // '€' (U+20AC or E282AC) in hexadecimal and UTF-8 | |
856 | $expectedTitle = 'E282AC'; | |
857 | $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle())); | |
858 | } | |
859 | ||
860 | public function testPdfWithInvalidCharacterInTitle_removeInvalidCharacter() | |
861 | { | |
862 | // See http://graphemica.com for more info about the characters | |
863 | // '😻ℤ�z' (U+1F63B or F09F98BB; U+2124 or E284A4; invalid character 81; U+007A or 7A) in hexadecimal and UTF-8 | |
864 | // 0x81 is not a valid character for UTF16, UTF8 and WINDOWS-1252 | |
865 | $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '81' . '7A'); | |
866 | ||
867 | $tagger = $this->getTaggerMock(); | |
868 | $tagger->expects($this->once()) | |
869 | ->method('tag'); | |
870 | ||
b22eb276 KD |
871 | $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock(); |
872 | ||
c01d9532 T |
873 | $graby = $this->getMockBuilder('Graby\Graby') |
874 | ->setMethods(['fetchContent']) | |
875 | ->disableOriginalConstructor() | |
876 | ->getMock(); | |
877 | ||
878 | $graby->expects($this->any()) | |
879 | ->method('fetchContent') | |
880 | ->willReturn([ | |
881 | 'html' => false, | |
882 | 'title' => $actualTitle, | |
883 | 'url' => '', | |
36b0d52e JB |
884 | 'headers' => [ |
885 | 'content-type' => 'application/pdf', | |
886 | ], | |
c01d9532 T |
887 | 'language' => '', |
888 | ]); | |
889 | ||
b22eb276 | 890 | $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); |
c01d9532 T |
891 | $entry = new Entry(new User()); |
892 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | |
893 | ||
894 | // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8 | |
895 | // the 0x81 (represented by �) is invalid for UTF16, UTF8 and WINDOWS-1252 and is removed | |
896 | $expectedTitle = 'F09F98BB' . 'E284A4' . '7A'; | |
897 | $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle())); | |
898 | } | |
899 | ||
e07fadea KD |
900 | /** |
901 | * Data provider for testWithChangedUrl. | |
902 | * | |
903 | * Arrays contain the following values: | |
904 | * $entry_url | |
905 | * $origin_url | |
906 | * $content_url | |
907 | * $expected_entry_url | |
908 | * $expected_origin_url | |
909 | * $expected_domain | |
b22eb276 | 910 | * $processor_result |
e07fadea KD |
911 | */ |
912 | public function dataForChangedUrl() | |
913 | { | |
914 | return [ | |
915 | 'normal' => [ | |
916 | 'http://0.0.0.0', | |
917 | null, | |
918 | 'http://1.1.1.1', | |
919 | 'http://1.1.1.1', | |
920 | 'http://0.0.0.0', | |
921 | '1.1.1.1', | |
b22eb276 | 922 | false, |
e07fadea KD |
923 | ], |
924 | 'origin already set' => [ | |
925 | 'http://0.0.0.0', | |
926 | 'http://hello', | |
927 | 'http://1.1.1.1', | |
928 | 'http://1.1.1.1', | |
929 | 'http://hello', | |
930 | '1.1.1.1', | |
b22eb276 | 931 | false, |
e07fadea KD |
932 | ], |
933 | 'trailing slash' => [ | |
934 | 'https://example.com/hello-world', | |
935 | null, | |
936 | 'https://example.com/hello-world/', | |
937 | 'https://example.com/hello-world/', | |
938 | null, | |
939 | 'example.com', | |
b22eb276 | 940 | false, |
e07fadea | 941 | ], |
e07fadea KD |
942 | 'query string in fetched content' => [ |
943 | 'https://example.org/hello', | |
944 | null, | |
945 | 'https://example.org/hello?world=1', | |
60599679 | 946 | 'https://example.org/hello?world=1', |
e07fadea | 947 | 'https://example.org/hello', |
e07fadea | 948 | 'example.org', |
b22eb276 | 949 | false, |
e07fadea KD |
950 | ], |
951 | 'fragment in fetched content' => [ | |
952 | 'https://example.org/hello', | |
953 | null, | |
954 | 'https://example.org/hello#world', | |
955 | 'https://example.org/hello', | |
956 | null, | |
957 | 'example.org', | |
b22eb276 | 958 | false, |
e07fadea | 959 | ], |
fc040c74 KD |
960 | 'fragment and query string in fetched content' => [ |
961 | 'https://example.org/hello', | |
962 | null, | |
963 | 'https://example.org/hello?foo#world', | |
60599679 | 964 | 'https://example.org/hello?foo#world', |
fc040c74 | 965 | 'https://example.org/hello', |
fc040c74 | 966 | 'example.org', |
b22eb276 | 967 | false, |
b49c87ac KD |
968 | ], |
969 | 'different path and query string in fetch content' => [ | |
970 | 'https://example.org/hello', | |
971 | null, | |
972 | 'https://example.org/world?foo', | |
973 | 'https://example.org/world?foo', | |
974 | 'https://example.org/hello', | |
975 | 'example.org', | |
b22eb276 | 976 | false, |
b49c87ac KD |
977 | ], |
978 | 'feedproxy ignore list test' => [ | |
979 | 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld', | |
980 | null, | |
981 | 'https://example.org/hello-wallabag', | |
982 | 'https://example.org/hello-wallabag', | |
983 | null, | |
984 | 'example.org', | |
b22eb276 | 985 | true, |
b49c87ac KD |
986 | ], |
987 | 'feedproxy ignore list test with origin url already set' => [ | |
988 | 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld', | |
989 | 'https://example.org/this-is-source', | |
990 | 'https://example.org/hello-wallabag', | |
991 | 'https://example.org/hello-wallabag', | |
992 | 'https://example.org/this-is-source', | |
993 | 'example.org', | |
b22eb276 | 994 | true, |
b49c87ac KD |
995 | ], |
996 | 'lemonde ignore pattern test' => [ | |
997 | 'http://www.lemonde.fr/tiny/url', | |
998 | null, | |
999 | 'http://example.com/hello-world', | |
1000 | 'http://example.com/hello-world', | |
1001 | null, | |
1002 | 'example.com', | |
b22eb276 | 1003 | true, |
b49c87ac | 1004 | ], |
e07fadea KD |
1005 | ]; |
1006 | } | |
1007 | ||
1008 | /** | |
1009 | * @dataProvider dataForChangedUrl | |
1010 | */ | |
b22eb276 | 1011 | public function testWithChangedUrl($entry_url, $origin_url, $content_url, $expected_entry_url, $expected_origin_url, $expected_domain, $processor_result) |
e07fadea KD |
1012 | { |
1013 | $tagger = $this->getTaggerMock(); | |
1014 | $tagger->expects($this->once()) | |
1015 | ->method('tag'); | |
1016 | ||
b22eb276 KD |
1017 | $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock(); |
1018 | $ruleBasedIgnoreOriginProcessor->expects($this->once()) | |
1019 | ->method('process') | |
1020 | ->willReturn($processor_result); | |
1021 | ||
1022 | $proxy = new ContentProxy((new Graby()), $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage, true); | |
e07fadea KD |
1023 | $entry = new Entry(new User()); |
1024 | $entry->setOriginUrl($origin_url); | |
1025 | $proxy->updateEntry( | |
1026 | $entry, | |
1027 | $entry_url, | |
1028 | [ | |
1029 | 'html' => false, | |
1030 | 'title' => '', | |
1031 | 'url' => $content_url, | |
36b0d52e JB |
1032 | 'headers' => [ |
1033 | 'content-type' => '', | |
1034 | ], | |
e07fadea KD |
1035 | 'language' => '', |
1036 | ], | |
1037 | true | |
1038 | ); | |
1039 | ||
1040 | $this->assertSame($expected_entry_url, $entry->getUrl()); | |
1041 | $this->assertSame($expected_domain, $entry->getDomainName()); | |
1042 | $this->assertSame($expected_origin_url, $entry->getOriginUrl()); | |
1043 | } | |
1044 | ||
c01d9532 | 1045 | /** |
28cc645b T |
1046 | * https://stackoverflow.com/a/18506801. |
1047 | * | |
c01d9532 | 1048 | * @param $string |
28cc645b | 1049 | * |
c01d9532 T |
1050 | * @return string |
1051 | */ | |
28cc645b T |
1052 | private function strToHex($string) |
1053 | { | |
c01d9532 | 1054 | $hex = ''; |
28cc645b T |
1055 | for ($i = 0; $i < \strlen($string); ++$i) { |
1056 | $ord = \ord($string[$i]); | |
c01d9532 | 1057 | $hexCode = dechex($ord); |
28cc645b | 1058 | $hex .= substr('0' . $hexCode, -2); |
c01d9532 | 1059 | } |
28cc645b T |
1060 | |
1061 | return strtoupper($hex); | |
c01d9532 T |
1062 | } |
1063 | ||
1064 | /** | |
36b0d52e JB |
1065 | * Convert hex to string. |
1066 | * | |
1067 | * @see https://stackoverflow.com/a/18506801 | |
28cc645b | 1068 | * |
c01d9532 | 1069 | * @param $hex |
28cc645b | 1070 | * |
c01d9532 T |
1071 | * @return string |
1072 | */ | |
28cc645b T |
1073 | private function hexToStr($hex) |
1074 | { | |
1075 | $string = ''; | |
1076 | for ($i = 0; $i < \strlen($hex) - 1; $i += 2) { | |
1077 | $string .= \chr(hexdec($hex[$i] . $hex[$i + 1])); | |
c01d9532 | 1078 | } |
28cc645b | 1079 | |
c01d9532 T |
1080 | return $string; |
1081 | } | |
1082 | ||
f530f7f5 KG |
1083 | private function getTaggerMock() |
1084 | { | |
6bc6fb1f | 1085 | return $this->getMockBuilder(RuleBasedTagger::class) |
4094ea47 | 1086 | ->setMethods(['tag']) |
f530f7f5 KG |
1087 | ->disableOriginalConstructor() |
1088 | ->getMock(); | |
1089 | } | |
1c9cd2a7 | 1090 | |
b22eb276 KD |
1091 | private function getRuleBasedIgnoreOriginProcessorMock() |
1092 | { | |
1093 | return $this->getMockBuilder(RuleBasedIgnoreOriginProcessor::class) | |
1094 | ->setMethods(['process']) | |
1095 | ->disableOriginalConstructor() | |
1096 | ->getMock(); | |
1097 | } | |
1098 | ||
0c5bcd82 | 1099 | private function getLogger() |
1c9cd2a7 | 1100 | { |
0c5bcd82 | 1101 | return new NullLogger(); |
1c9cd2a7 | 1102 | } |
0d349ea6 | 1103 | |
5661e8d4 | 1104 | private function getValidator($withDefaultMock = true) |
0d349ea6 | 1105 | { |
5661e8d4 | 1106 | $mock = $this->getMockBuilder(RecursiveValidator::class) |
0d349ea6 JB |
1107 | ->setMethods(['validate']) |
1108 | ->disableOriginalConstructor() | |
1109 | ->getMock(); | |
5661e8d4 JB |
1110 | |
1111 | if ($withDefaultMock) { | |
1112 | $mock->expects($this->any()) | |
1113 | ->method('validate') | |
1114 | ->willReturn(new ConstraintViolationList()); | |
1115 | } | |
1116 | ||
1117 | return $mock; | |
0d349ea6 | 1118 | } |
558d9aab | 1119 | } |