From: Kevin Decherf Date: Sun, 26 May 2019 15:47:44 +0000 (+0200) Subject: Merge pull request #3965 from nicofrand/previewPic X-Git-Url: https://git.immae.eu/?a=commitdiff_plain;h=5c0701ba41fd64ba471addb4a84af062277ab559;hp=cc9731bf2bc59c4a3802ac546ac3f76afb4aa5d6;p=github%2Fwallabag%2Fwallabag.git Merge pull request #3965 from nicofrand/previewPic Preview picture: use the 1st pic retrieved if no og:image set --- diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index bc257ffb..ca01dec8 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php @@ -12,8 +12,8 @@ use Wallabag\CoreBundle\Entity\Entry; use Wallabag\CoreBundle\Tools\Utils; /** - * This kind of proxy class take care of getting the content from an url - * and update the entry with what it found. + * This kind of proxy class takes care of getting the content from an url + * and updates the entry with what it found. */ class ContentProxy { @@ -289,13 +289,25 @@ class ContentProxy $this->updateLanguage($entry, $content['language']); } + $previewPictureUrl = ''; if (!empty($content['open_graph']['og_image'])) { - $this->updatePreviewPicture($entry, $content['open_graph']['og_image']); + $previewPictureUrl = $content['open_graph']['og_image']; } // if content is an image, define it as a preview too if (!empty($content['content_type']) && \in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { - $this->updatePreviewPicture($entry, $content['url']); + $previewPictureUrl = $content['url']; + } elseif (empty($previewPictureUrl)) { + $this->logger->debug('Extracting images from content to provide a default preview picture'); + $imagesUrls = DownloadImages::extractImagesUrlsFromHtml($content['html']); + $this->logger->debug(\count($imagesUrls) . ' pictures found'); + if (!empty($imagesUrls)) { + $previewPictureUrl = $imagesUrls[0]; + } + } + + if (!empty($previewPictureUrl)) { + $this->updatePreviewPicture($entry, $previewPictureUrl); } if (!empty($content['content_type'])) { diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php index 9a7e9828..c1645e45 100644 --- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php +++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php @@ -31,23 +31,36 @@ class DownloadImages } /** - * Process the html and extract image from it, save them to local and return the updated html. + * Process the html and extract images URLs from it. * - * @param int $entryId ID of the entry * @param string $html - * @param string $url Used as a base path for relative image and folder * - * @return string + * @return string[] */ - public function processHtml($entryId, $html, $url) + public static function extractImagesUrlsFromHtml($html) { $crawler = new Crawler($html); $imagesCrawler = $crawler ->filterXpath('//img'); $imagesUrls = $imagesCrawler ->extract(['src']); - $imagesSrcsetUrls = $this->getSrcsetUrls($imagesCrawler); - $imagesUrls = array_unique(array_merge($imagesUrls, $imagesSrcsetUrls)); + $imagesSrcsetUrls = self::getSrcsetUrls($imagesCrawler); + + return array_unique(array_merge($imagesUrls, $imagesSrcsetUrls)); + } + + /** + * Process the html and extract image from it, save them to local and return the updated html. + * + * @param int $entryId ID of the entry + * @param string $html + * @param string $url Used as a base path for relative image and folder + * + * @return string + */ + public function processHtml($entryId, $html, $url) + { + $imagesUrls = self::extractImagesUrlsFromHtml($html); $relativePath = $this->getRelativePath($entryId); @@ -199,7 +212,7 @@ class DownloadImages * * @return array An array of urls */ - private function getSrcsetUrls(Crawler $imagesCrawler) + private static function getSrcsetUrls(Crawler $imagesCrawler) { $urls = []; $iterator = $imagesCrawler diff --git a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php index 508adb1b..c7caac1d 100644 --- a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php +++ b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php @@ -214,6 +214,90 @@ class ContentProxyTest extends TestCase $this->assertSame('1.1.1.1', $entry->getDomainName()); } + public function testWithContentAndContentImage() + { + $tagger = $this->getTaggerMock(); + $tagger->expects($this->once()) + ->method('tag'); + + $graby = $this->getMockBuilder('Graby\Graby') + ->setMethods(['fetchContent']) + ->disableOriginalConstructor() + ->getMock(); + + $graby->expects($this->any()) + ->method('fetchContent') + ->willReturn([ + 'html' => "

Test

", + 'title' => 'this is my title', + 'url' => 'http://1.1.1.1', + 'content_type' => 'text/html', + 'language' => 'fr', + 'status' => '200', + 'open_graph' => [ + 'og_title' => 'my OG title', + 'og_description' => 'OG desc', + 'og_image' => null, + ], + ]); + + $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); + $entry = new Entry(new User()); + $proxy->updateEntry($entry, 'http://0.0.0.0'); + + $this->assertSame('http://1.1.1.1', $entry->getUrl()); + $this->assertSame('this is my title', $entry->getTitle()); + $this->assertSame("

Test

", $entry->getContent()); + $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture()); + $this->assertSame('text/html', $entry->getMimetype()); + $this->assertSame('fr', $entry->getLanguage()); + $this->assertSame('200', $entry->getHttpStatus()); + $this->assertSame(0.0, $entry->getReadingTime()); + $this->assertSame('1.1.1.1', $entry->getDomainName()); + } + + public function testWithContentImageAndOgImage() + { + $tagger = $this->getTaggerMock(); + $tagger->expects($this->once()) + ->method('tag'); + + $graby = $this->getMockBuilder('Graby\Graby') + ->setMethods(['fetchContent']) + ->disableOriginalConstructor() + ->getMock(); + + $graby->expects($this->any()) + ->method('fetchContent') + ->willReturn([ + 'html' => "

Test

", + 'title' => 'this is my title', + 'url' => 'http://1.1.1.1', + 'content_type' => 'text/html', + 'language' => 'fr', + 'status' => '200', + 'open_graph' => [ + 'og_title' => 'my OG title', + 'og_description' => 'OG desc', + 'og_image' => 'http://3.3.3.3/cover.jpg', + ], + ]); + + $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); + $entry = new Entry(new User()); + $proxy->updateEntry($entry, 'http://0.0.0.0'); + + $this->assertSame('http://1.1.1.1', $entry->getUrl()); + $this->assertSame('this is my title', $entry->getTitle()); + $this->assertSame("

Test

", $entry->getContent()); + $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture()); + $this->assertSame('text/html', $entry->getMimetype()); + $this->assertSame('fr', $entry->getLanguage()); + $this->assertSame('200', $entry->getHttpStatus()); + $this->assertSame(0.0, $entry->getReadingTime()); + $this->assertSame('1.1.1.1', $entry->getDomainName()); + } + public function testWithContentAndBadLanguage() { $tagger = $this->getTaggerMock(); @@ -415,7 +499,7 @@ class ContentProxyTest extends TestCase $records = $handler->getRecords(); - $this->assertCount(1, $records); + $this->assertCount(3, $records); $this->assertContains('Error while defining date', $records[0]['message']); } diff --git a/tests/Wallabag/ImportBundle/Controller/WallabagV1ControllerTest.php b/tests/Wallabag/ImportBundle/Controller/WallabagV1ControllerTest.php index 1f57939d..2a8e7c89 100644 --- a/tests/Wallabag/ImportBundle/Controller/WallabagV1ControllerTest.php +++ b/tests/Wallabag/ImportBundle/Controller/WallabagV1ControllerTest.php @@ -121,7 +121,7 @@ class WallabagV1ControllerTest extends WallabagCoreTestCase $this->assertInstanceOf('Wallabag\CoreBundle\Entity\Entry', $content); $this->assertEmpty($content->getMimetype(), 'Mimetype for http://www.framablog.org is empty'); - $this->assertEmpty($content->getPreviewPicture(), 'Preview picture for http://www.framablog.org is empty'); + $this->assertSame($content->getPreviewPicture(), 'http://www.framablog.org/public/_img/framablog/wallaby_baby.jpg'); $this->assertEmpty($content->getLanguage(), 'Language for http://www.framablog.org is empty'); $tags = $content->getTags();