]> git.immae.eu Git - github/wallabag/wallabag.git/blob - tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php
Use graby ContentExtractor to clean html
[github/wallabag/wallabag.git] / tests / Wallabag / CoreBundle / Helper / ContentProxyTest.php
1 <?php
2
3 namespace Tests\Wallabag\CoreBundle\Helper;
4
5 use Psr\Log\NullLogger;
6 use Wallabag\CoreBundle\Helper\ContentProxy;
7 use Wallabag\CoreBundle\Entity\Entry;
8 use Wallabag\CoreBundle\Entity\Tag;
9 use Wallabag\UserBundle\Entity\User;
10 use Wallabag\CoreBundle\Helper\RuleBasedTagger;
11 use Graby\Graby;
12
13 class ContentProxyTest extends \PHPUnit_Framework_TestCase
14 {
15 private $fetchingErrorMessage = 'wallabag can\'t retrieve contents for this article. Please <a href="http://doc.wallabag.org/en/user/errors_during_fetching.html#how-can-i-help-to-fix-that">troubleshoot this issue</a>.';
16
17 public function testWithBadUrl()
18 {
19 $tagger = $this->getTaggerMock();
20 $tagger->expects($this->once())
21 ->method('tag');
22
23 $graby = $this->getMockBuilder('Graby\Graby')
24 ->setMethods(['fetchContent'])
25 ->disableOriginalConstructor()
26 ->getMock();
27
28 $graby->expects($this->any())
29 ->method('fetchContent')
30 ->willReturn([
31 'html' => false,
32 'title' => '',
33 'url' => '',
34 'content_type' => '',
35 'language' => '',
36 ]);
37
38 $proxy = new ContentProxy($graby, $tagger, $this->getLogger(), $this->fetchingErrorMessage);
39 $entry = $proxy->updateEntry(new Entry(new User()), 'http://user@:80');
40
41 $this->assertEquals('http://user@:80', $entry->getUrl());
42 $this->assertEmpty($entry->getTitle());
43 $this->assertEquals($this->fetchingErrorMessage, $entry->getContent());
44 $this->assertEmpty($entry->getPreviewPicture());
45 $this->assertEmpty($entry->getMimetype());
46 $this->assertEmpty($entry->getLanguage());
47 $this->assertEquals(0.0, $entry->getReadingTime());
48 $this->assertEquals(false, $entry->getDomainName());
49 }
50
51 public function testWithEmptyContent()
52 {
53 $tagger = $this->getTaggerMock();
54 $tagger->expects($this->once())
55 ->method('tag');
56
57 $graby = $this->getMockBuilder('Graby\Graby')
58 ->setMethods(['fetchContent'])
59 ->disableOriginalConstructor()
60 ->getMock();
61
62 $graby->expects($this->any())
63 ->method('fetchContent')
64 ->willReturn([
65 'html' => false,
66 'title' => '',
67 'url' => '',
68 'content_type' => '',
69 'language' => '',
70 ]);
71
72 $proxy = new ContentProxy($graby, $tagger, $this->getLogger(), $this->fetchingErrorMessage);
73 $entry = $proxy->updateEntry(new Entry(new User()), 'http://0.0.0.0');
74
75 $this->assertEquals('http://0.0.0.0', $entry->getUrl());
76 $this->assertEmpty($entry->getTitle());
77 $this->assertEquals($this->fetchingErrorMessage, $entry->getContent());
78 $this->assertEmpty($entry->getPreviewPicture());
79 $this->assertEmpty($entry->getMimetype());
80 $this->assertEmpty($entry->getLanguage());
81 $this->assertEquals(0.0, $entry->getReadingTime());
82 $this->assertEquals('0.0.0.0', $entry->getDomainName());
83 }
84
85 public function testWithEmptyContentButOG()
86 {
87 $tagger = $this->getTaggerMock();
88 $tagger->expects($this->once())
89 ->method('tag');
90
91 $graby = $this->getMockBuilder('Graby\Graby')
92 ->setMethods(['fetchContent'])
93 ->disableOriginalConstructor()
94 ->getMock();
95
96 $graby->expects($this->any())
97 ->method('fetchContent')
98 ->willReturn([
99 'html' => false,
100 'title' => '',
101 'url' => '',
102 'content_type' => '',
103 'language' => '',
104 'status' => '',
105 'open_graph' => [
106 'og_title' => 'my title',
107 'og_description' => 'desc',
108 ],
109 ]);
110
111 $proxy = new ContentProxy($graby, $tagger, $this->getLogger(), $this->fetchingErrorMessage);
112 $entry = $proxy->updateEntry(new Entry(new User()), 'http://domain.io');
113
114 $this->assertEquals('http://domain.io', $entry->getUrl());
115 $this->assertEquals('my title', $entry->getTitle());
116 $this->assertEquals($this->fetchingErrorMessage.'<p><i>But we found a short description: </i></p>desc', $entry->getContent());
117 $this->assertEmpty($entry->getPreviewPicture());
118 $this->assertEmpty($entry->getLanguage());
119 $this->assertEmpty($entry->getHttpStatus());
120 $this->assertEmpty($entry->getMimetype());
121 $this->assertEquals(0.0, $entry->getReadingTime());
122 $this->assertEquals('domain.io', $entry->getDomainName());
123 }
124
125 public function testWithContent()
126 {
127 $tagger = $this->getTaggerMock();
128 $tagger->expects($this->once())
129 ->method('tag');
130
131 $graby = $this->getMockBuilder('Graby\Graby')
132 ->setMethods(['fetchContent'])
133 ->disableOriginalConstructor()
134 ->getMock();
135
136 $graby->expects($this->any())
137 ->method('fetchContent')
138 ->willReturn([
139 'html' => str_repeat('this is my content', 325),
140 'title' => 'this is my title',
141 'url' => 'http://1.1.1.1',
142 'content_type' => 'text/html',
143 'language' => 'fr',
144 'status' => '200',
145 'open_graph' => [
146 'og_title' => 'my OG title',
147 'og_description' => 'OG desc',
148 'og_image' => 'http://3.3.3.3/cover.jpg',
149 ],
150 ]);
151
152 $proxy = new ContentProxy($graby, $tagger, $this->getLogger(), $this->fetchingErrorMessage);
153 $entry = $proxy->updateEntry(new Entry(new User()), 'http://0.0.0.0');
154
155 $this->assertEquals('http://1.1.1.1', $entry->getUrl());
156 $this->assertEquals('this is my title', $entry->getTitle());
157 $this->assertContains('this is my content', $entry->getContent());
158 $this->assertEquals('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
159 $this->assertEquals('text/html', $entry->getMimetype());
160 $this->assertEquals('fr', $entry->getLanguage());
161 $this->assertEquals('200', $entry->getHttpStatus());
162 $this->assertEquals(4.0, $entry->getReadingTime());
163 $this->assertEquals('1.1.1.1', $entry->getDomainName());
164 }
165
166 public function testWithContentAndNoOgImage()
167 {
168 $tagger = $this->getTaggerMock();
169 $tagger->expects($this->once())
170 ->method('tag');
171
172 $graby = $this->getMockBuilder('Graby\Graby')
173 ->setMethods(['fetchContent'])
174 ->disableOriginalConstructor()
175 ->getMock();
176
177 $graby->expects($this->any())
178 ->method('fetchContent')
179 ->willReturn([
180 'html' => str_repeat('this is my content', 325),
181 'title' => 'this is my title',
182 'url' => 'http://1.1.1.1',
183 'content_type' => 'text/html',
184 'language' => 'fr',
185 'status' => '200',
186 'open_graph' => [
187 'og_title' => 'my OG title',
188 'og_description' => 'OG desc',
189 'og_image' => false,
190 ],
191 ]);
192
193 $proxy = new ContentProxy($graby, $tagger, $this->getLogger(), $this->fetchingErrorMessage);
194 $entry = $proxy->updateEntry(new Entry(new User()), 'http://0.0.0.0');
195
196 $this->assertEquals('http://1.1.1.1', $entry->getUrl());
197 $this->assertEquals('this is my title', $entry->getTitle());
198 $this->assertContains('this is my content', $entry->getContent());
199 $this->assertNull($entry->getPreviewPicture());
200 $this->assertEquals('text/html', $entry->getMimetype());
201 $this->assertEquals('fr', $entry->getLanguage());
202 $this->assertEquals('200', $entry->getHttpStatus());
203 $this->assertEquals(4.0, $entry->getReadingTime());
204 $this->assertEquals('1.1.1.1', $entry->getDomainName());
205 }
206
207 public function testWithForcedContent()
208 {
209 $tagger = $this->getTaggerMock();
210 $tagger->expects($this->once())
211 ->method('tag');
212
213 $graby = $this->getMockBuilder('Graby\Graby')->getMock();
214
215 $proxy = new ContentProxy($graby, $tagger, $this->getLogger(), $this->fetchingErrorMessage);
216 $entry = $proxy->updateEntry(new Entry(new User()), 'http://0.0.0.0', [
217 'html' => str_repeat('this is my content', 325),
218 'title' => 'this is my title',
219 'url' => 'http://1.1.1.1',
220 'content_type' => 'text/html',
221 'language' => 'fr',
222 ]);
223
224 $this->assertEquals('http://1.1.1.1', $entry->getUrl());
225 $this->assertEquals('this is my title', $entry->getTitle());
226 $this->assertContains('this is my content', $entry->getContent());
227 $this->assertEquals('text/html', $entry->getMimetype());
228 $this->assertEquals('fr', $entry->getLanguage());
229 $this->assertEquals(4.0, $entry->getReadingTime());
230 $this->assertEquals('1.1.1.1', $entry->getDomainName());
231 }
232
233 public function testTaggerThrowException()
234 {
235 $graby = $this->getMockBuilder('Graby\Graby')
236 ->disableOriginalConstructor()
237 ->getMock();
238
239 $tagger = $this->getTaggerMock();
240 $tagger->expects($this->once())
241 ->method('tag')
242 ->will($this->throwException(new \Exception()));
243
244 $proxy = new ContentProxy($graby, $tagger, $this->getLogger(), $this->fetchingErrorMessage);
245
246 $entry = $proxy->updateEntry(new Entry(new User()), 'http://0.0.0.0', [
247 'html' => str_repeat('this is my content', 325),
248 'title' => 'this is my title',
249 'url' => 'http://1.1.1.1',
250 'content_type' => 'text/html',
251 'language' => 'fr',
252 ]);
253
254 $this->assertCount(0, $entry->getTags());
255 }
256
257 public function dataForCrazyHtml()
258 {
259 return [
260 'script and comment' => [
261 '<strong>Script inside:</strong> <!--[if gte IE 4]><script>alert(\'lol\');</script><![endif]--><br />',
262 'lol'
263 ],
264 'script' => [
265 '<strong>Script inside:</strong><script>alert(\'lol\');</script>',
266 'script'
267 ],
268 ];
269 }
270
271 /**
272 * @dataProvider dataForCrazyHtml
273 */
274 public function testWithCrazyHtmlContent($html, $escapedString)
275 {
276 $tagger = $this->getTaggerMock();
277 $tagger->expects($this->once())
278 ->method('tag');
279
280 $graby = new Graby();
281
282 $proxy = new ContentProxy($graby, $tagger, $this->getTagRepositoryMock(), $this->getLogger(), $this->fetchingErrorMessage);
283 $entry = $proxy->updateEntry(
284 new Entry(new User()),
285 'http://1.1.1.1',
286 [
287 'html' => $html,
288 'title' => 'this is my title',
289 'url' => 'http://1.1.1.1',
290 'content_type' => 'text/html',
291 'language' => 'fr',
292 'status' => '200',
293 'open_graph' => [
294 'og_title' => 'my OG title',
295 'og_description' => 'OG desc',
296 'og_image' => 'http://3.3.3.3/cover.jpg',
297 ],
298 ]
299 );
300
301 $this->assertEquals('http://1.1.1.1', $entry->getUrl());
302 $this->assertEquals('this is my title', $entry->getTitle());
303 $this->assertNotContains($escapedString, $entry->getContent());
304 $this->assertEquals('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
305 $this->assertEquals('text/html', $entry->getMimetype());
306 $this->assertEquals('fr', $entry->getLanguage());
307 $this->assertEquals('200', $entry->getHttpStatus());
308 $this->assertEquals('1.1.1.1', $entry->getDomainName());
309 }
310
311 private function getTaggerMock()
312 {
313 return $this->getMockBuilder(RuleBasedTagger::class)
314 ->setMethods(['tag'])
315 ->disableOriginalConstructor()
316 ->getMock();
317 }
318
319 private function getLogger()
320 {
321 return new NullLogger();
322 }
323 }