aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--src/Wallabag/CoreBundle/Helper/ContentProxy.php20
-rw-r--r--src/Wallabag/CoreBundle/Helper/DownloadImages.php29
-rw-r--r--tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php86
-rw-r--r--tests/Wallabag/ImportBundle/Controller/WallabagV1ControllerTest.php2
4 files changed, 123 insertions, 14 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
index bc257ffb..ca01dec8 100644
--- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php
+++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
@@ -12,8 +12,8 @@ use Wallabag\CoreBundle\Entity\Entry;
12use Wallabag\CoreBundle\Tools\Utils; 12use Wallabag\CoreBundle\Tools\Utils;
13 13
14/** 14/**
15 * This kind of proxy class take care of getting the content from an url 15 * This kind of proxy class takes care of getting the content from an url
16 * and update the entry with what it found. 16 * and updates the entry with what it found.
17 */ 17 */
18class ContentProxy 18class ContentProxy
19{ 19{
@@ -289,13 +289,25 @@ class ContentProxy
289 $this->updateLanguage($entry, $content['language']); 289 $this->updateLanguage($entry, $content['language']);
290 } 290 }
291 291
292 $previewPictureUrl = '';
292 if (!empty($content['open_graph']['og_image'])) { 293 if (!empty($content['open_graph']['og_image'])) {
293 $this->updatePreviewPicture($entry, $content['open_graph']['og_image']); 294 $previewPictureUrl = $content['open_graph']['og_image'];
294 } 295 }
295 296
296 // if content is an image, define it as a preview too 297 // if content is an image, define it as a preview too
297 if (!empty($content['content_type']) && \in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { 298 if (!empty($content['content_type']) && \in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) {
298 $this->updatePreviewPicture($entry, $content['url']); 299 $previewPictureUrl = $content['url'];
300 } elseif (empty($previewPictureUrl)) {
301 $this->logger->debug('Extracting images from content to provide a default preview picture');
302 $imagesUrls = DownloadImages::extractImagesUrlsFromHtml($content['html']);
303 $this->logger->debug(\count($imagesUrls) . ' pictures found');
304 if (!empty($imagesUrls)) {
305 $previewPictureUrl = $imagesUrls[0];
306 }
307 }
308
309 if (!empty($previewPictureUrl)) {
310 $this->updatePreviewPicture($entry, $previewPictureUrl);
299 } 311 }
300 312
301 if (!empty($content['content_type'])) { 313 if (!empty($content['content_type'])) {
diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php
index 9a7e9828..c1645e45 100644
--- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php
+++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php
@@ -31,23 +31,36 @@ class DownloadImages
31 } 31 }
32 32
33 /** 33 /**
34 * Process the html and extract image from it, save them to local and return the updated html. 34 * Process the html and extract images URLs from it.
35 * 35 *
36 * @param int $entryId ID of the entry
37 * @param string $html 36 * @param string $html
38 * @param string $url Used as a base path for relative image and folder
39 * 37 *
40 * @return string 38 * @return string[]
41 */ 39 */
42 public function processHtml($entryId, $html, $url) 40 public static function extractImagesUrlsFromHtml($html)
43 { 41 {
44 $crawler = new Crawler($html); 42 $crawler = new Crawler($html);
45 $imagesCrawler = $crawler 43 $imagesCrawler = $crawler
46 ->filterXpath('//img'); 44 ->filterXpath('//img');
47 $imagesUrls = $imagesCrawler 45 $imagesUrls = $imagesCrawler
48 ->extract(['src']); 46 ->extract(['src']);
49 $imagesSrcsetUrls = $this->getSrcsetUrls($imagesCrawler); 47 $imagesSrcsetUrls = self::getSrcsetUrls($imagesCrawler);
50 $imagesUrls = array_unique(array_merge($imagesUrls, $imagesSrcsetUrls)); 48
49 return array_unique(array_merge($imagesUrls, $imagesSrcsetUrls));
50 }
51
52 /**
53 * Process the html and extract image from it, save them to local and return the updated html.
54 *
55 * @param int $entryId ID of the entry
56 * @param string $html
57 * @param string $url Used as a base path for relative image and folder
58 *
59 * @return string
60 */
61 public function processHtml($entryId, $html, $url)
62 {
63 $imagesUrls = self::extractImagesUrlsFromHtml($html);
51 64
52 $relativePath = $this->getRelativePath($entryId); 65 $relativePath = $this->getRelativePath($entryId);
53 66
@@ -199,7 +212,7 @@ class DownloadImages
199 * 212 *
200 * @return array An array of urls 213 * @return array An array of urls
201 */ 214 */
202 private function getSrcsetUrls(Crawler $imagesCrawler) 215 private static function getSrcsetUrls(Crawler $imagesCrawler)
203 { 216 {
204 $urls = []; 217 $urls = [];
205 $iterator = $imagesCrawler 218 $iterator = $imagesCrawler
diff --git a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php
index 508adb1b..c7caac1d 100644
--- a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php
+++ b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php
@@ -214,6 +214,90 @@ class ContentProxyTest extends TestCase
214 $this->assertSame('1.1.1.1', $entry->getDomainName()); 214 $this->assertSame('1.1.1.1', $entry->getDomainName());
215 } 215 }
216 216
217 public function testWithContentAndContentImage()
218 {
219 $tagger = $this->getTaggerMock();
220 $tagger->expects($this->once())
221 ->method('tag');
222
223 $graby = $this->getMockBuilder('Graby\Graby')
224 ->setMethods(['fetchContent'])
225 ->disableOriginalConstructor()
226 ->getMock();
227
228 $graby->expects($this->any())
229 ->method('fetchContent')
230 ->willReturn([
231 'html' => "<h1>Test</h1><p><img src='http://3.3.3.3/cover.jpg'/></p>",
232 'title' => 'this is my title',
233 'url' => 'http://1.1.1.1',
234 'content_type' => 'text/html',
235 'language' => 'fr',
236 'status' => '200',
237 'open_graph' => [
238 'og_title' => 'my OG title',
239 'og_description' => 'OG desc',
240 'og_image' => null,
241 ],
242 ]);
243
244 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
245 $entry = new Entry(new User());
246 $proxy->updateEntry($entry, 'http://0.0.0.0');
247
248 $this->assertSame('http://1.1.1.1', $entry->getUrl());
249 $this->assertSame('this is my title', $entry->getTitle());
250 $this->assertSame("<h1>Test</h1><p><img src='http://3.3.3.3/cover.jpg'/></p>", $entry->getContent());
251 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
252 $this->assertSame('text/html', $entry->getMimetype());
253 $this->assertSame('fr', $entry->getLanguage());
254 $this->assertSame('200', $entry->getHttpStatus());
255 $this->assertSame(0.0, $entry->getReadingTime());
256 $this->assertSame('1.1.1.1', $entry->getDomainName());
257 }
258
259 public function testWithContentImageAndOgImage()
260 {
261 $tagger = $this->getTaggerMock();
262 $tagger->expects($this->once())
263 ->method('tag');
264
265 $graby = $this->getMockBuilder('Graby\Graby')
266 ->setMethods(['fetchContent'])
267 ->disableOriginalConstructor()
268 ->getMock();
269
270 $graby->expects($this->any())
271 ->method('fetchContent')
272 ->willReturn([
273 'html' => "<h1>Test</h1><p><img src='http://3.3.3.3/nevermind.jpg'/></p>",
274 'title' => 'this is my title',
275 'url' => 'http://1.1.1.1',
276 'content_type' => 'text/html',
277 'language' => 'fr',
278 'status' => '200',
279 'open_graph' => [
280 'og_title' => 'my OG title',
281 'og_description' => 'OG desc',
282 'og_image' => 'http://3.3.3.3/cover.jpg',
283 ],
284 ]);
285
286 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
287 $entry = new Entry(new User());
288 $proxy->updateEntry($entry, 'http://0.0.0.0');
289
290 $this->assertSame('http://1.1.1.1', $entry->getUrl());
291 $this->assertSame('this is my title', $entry->getTitle());
292 $this->assertSame("<h1>Test</h1><p><img src='http://3.3.3.3/nevermind.jpg'/></p>", $entry->getContent());
293 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
294 $this->assertSame('text/html', $entry->getMimetype());
295 $this->assertSame('fr', $entry->getLanguage());
296 $this->assertSame('200', $entry->getHttpStatus());
297 $this->assertSame(0.0, $entry->getReadingTime());
298 $this->assertSame('1.1.1.1', $entry->getDomainName());
299 }
300
217 public function testWithContentAndBadLanguage() 301 public function testWithContentAndBadLanguage()
218 { 302 {
219 $tagger = $this->getTaggerMock(); 303 $tagger = $this->getTaggerMock();
@@ -415,7 +499,7 @@ class ContentProxyTest extends TestCase
415 499
416 $records = $handler->getRecords(); 500 $records = $handler->getRecords();
417 501
418 $this->assertCount(1, $records); 502 $this->assertCount(3, $records);
419 $this->assertContains('Error while defining date', $records[0]['message']); 503 $this->assertContains('Error while defining date', $records[0]['message']);
420 } 504 }
421 505
diff --git a/tests/Wallabag/ImportBundle/Controller/WallabagV1ControllerTest.php b/tests/Wallabag/ImportBundle/Controller/WallabagV1ControllerTest.php
index 1f57939d..2a8e7c89 100644
--- a/tests/Wallabag/ImportBundle/Controller/WallabagV1ControllerTest.php
+++ b/tests/Wallabag/ImportBundle/Controller/WallabagV1ControllerTest.php
@@ -121,7 +121,7 @@ class WallabagV1ControllerTest extends WallabagCoreTestCase
121 121
122 $this->assertInstanceOf('Wallabag\CoreBundle\Entity\Entry', $content); 122 $this->assertInstanceOf('Wallabag\CoreBundle\Entity\Entry', $content);
123 $this->assertEmpty($content->getMimetype(), 'Mimetype for http://www.framablog.org is empty'); 123 $this->assertEmpty($content->getMimetype(), 'Mimetype for http://www.framablog.org is empty');
124 $this->assertEmpty($content->getPreviewPicture(), 'Preview picture for http://www.framablog.org is empty'); 124 $this->assertSame($content->getPreviewPicture(), 'http://www.framablog.org/public/_img/framablog/wallaby_baby.jpg');
125 $this->assertEmpty($content->getLanguage(), 'Language for http://www.framablog.org is empty'); 125 $this->assertEmpty($content->getLanguage(), 'Language for http://www.framablog.org is empty');
126 126
127 $tags = $content->getTags(); 127 $tags = $content->getTags();