diff options
author | Kevin Decherf <kevin@kdecherf.com> | 2019-05-26 17:47:44 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-05-26 17:47:44 +0200 |
commit | 5c0701ba41fd64ba471addb4a84af062277ab559 (patch) | |
tree | 230c6bf7455bd5a8a7f3cf487bc08efdb9b7e16e | |
parent | cc9731bf2bc59c4a3802ac546ac3f76afb4aa5d6 (diff) | |
parent | d99e6423f4bd54595a8a805dd1efd0bd94e8bb09 (diff) | |
download | wallabag-5c0701ba41fd64ba471addb4a84af062277ab559.tar.gz wallabag-5c0701ba41fd64ba471addb4a84af062277ab559.tar.zst wallabag-5c0701ba41fd64ba471addb4a84af062277ab559.zip |
Merge pull request #3965 from nicofrand/previewPic
Preview picture: use the 1st pic retrieved if no og:image set
4 files changed, 123 insertions, 14 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index bc257ffb..ca01dec8 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php | |||
@@ -12,8 +12,8 @@ use Wallabag\CoreBundle\Entity\Entry; | |||
12 | use Wallabag\CoreBundle\Tools\Utils; | 12 | use Wallabag\CoreBundle\Tools\Utils; |
13 | 13 | ||
14 | /** | 14 | /** |
15 | * This kind of proxy class take care of getting the content from an url | 15 | * This kind of proxy class takes care of getting the content from an url |
16 | * and update the entry with what it found. | 16 | * and updates the entry with what it found. |
17 | */ | 17 | */ |
18 | class ContentProxy | 18 | class ContentProxy |
19 | { | 19 | { |
@@ -289,13 +289,25 @@ class ContentProxy | |||
289 | $this->updateLanguage($entry, $content['language']); | 289 | $this->updateLanguage($entry, $content['language']); |
290 | } | 290 | } |
291 | 291 | ||
292 | $previewPictureUrl = ''; | ||
292 | if (!empty($content['open_graph']['og_image'])) { | 293 | if (!empty($content['open_graph']['og_image'])) { |
293 | $this->updatePreviewPicture($entry, $content['open_graph']['og_image']); | 294 | $previewPictureUrl = $content['open_graph']['og_image']; |
294 | } | 295 | } |
295 | 296 | ||
296 | // if content is an image, define it as a preview too | 297 | // if content is an image, define it as a preview too |
297 | if (!empty($content['content_type']) && \in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { | 298 | if (!empty($content['content_type']) && \in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { |
298 | $this->updatePreviewPicture($entry, $content['url']); | 299 | $previewPictureUrl = $content['url']; |
300 | } elseif (empty($previewPictureUrl)) { | ||
301 | $this->logger->debug('Extracting images from content to provide a default preview picture'); | ||
302 | $imagesUrls = DownloadImages::extractImagesUrlsFromHtml($content['html']); | ||
303 | $this->logger->debug(\count($imagesUrls) . ' pictures found'); | ||
304 | if (!empty($imagesUrls)) { | ||
305 | $previewPictureUrl = $imagesUrls[0]; | ||
306 | } | ||
307 | } | ||
308 | |||
309 | if (!empty($previewPictureUrl)) { | ||
310 | $this->updatePreviewPicture($entry, $previewPictureUrl); | ||
299 | } | 311 | } |
300 | 312 | ||
301 | if (!empty($content['content_type'])) { | 313 | if (!empty($content['content_type'])) { |
diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php index 9a7e9828..c1645e45 100644 --- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php +++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php | |||
@@ -31,23 +31,36 @@ class DownloadImages | |||
31 | } | 31 | } |
32 | 32 | ||
33 | /** | 33 | /** |
34 | * Process the html and extract image from it, save them to local and return the updated html. | 34 | * Process the html and extract images URLs from it. |
35 | * | 35 | * |
36 | * @param int $entryId ID of the entry | ||
37 | * @param string $html | 36 | * @param string $html |
38 | * @param string $url Used as a base path for relative image and folder | ||
39 | * | 37 | * |
40 | * @return string | 38 | * @return string[] |
41 | */ | 39 | */ |
42 | public function processHtml($entryId, $html, $url) | 40 | public static function extractImagesUrlsFromHtml($html) |
43 | { | 41 | { |
44 | $crawler = new Crawler($html); | 42 | $crawler = new Crawler($html); |
45 | $imagesCrawler = $crawler | 43 | $imagesCrawler = $crawler |
46 | ->filterXpath('//img'); | 44 | ->filterXpath('//img'); |
47 | $imagesUrls = $imagesCrawler | 45 | $imagesUrls = $imagesCrawler |
48 | ->extract(['src']); | 46 | ->extract(['src']); |
49 | $imagesSrcsetUrls = $this->getSrcsetUrls($imagesCrawler); | 47 | $imagesSrcsetUrls = self::getSrcsetUrls($imagesCrawler); |
50 | $imagesUrls = array_unique(array_merge($imagesUrls, $imagesSrcsetUrls)); | 48 | |
49 | return array_unique(array_merge($imagesUrls, $imagesSrcsetUrls)); | ||
50 | } | ||
51 | |||
52 | /** | ||
53 | * Process the html and extract image from it, save them to local and return the updated html. | ||
54 | * | ||
55 | * @param int $entryId ID of the entry | ||
56 | * @param string $html | ||
57 | * @param string $url Used as a base path for relative image and folder | ||
58 | * | ||
59 | * @return string | ||
60 | */ | ||
61 | public function processHtml($entryId, $html, $url) | ||
62 | { | ||
63 | $imagesUrls = self::extractImagesUrlsFromHtml($html); | ||
51 | 64 | ||
52 | $relativePath = $this->getRelativePath($entryId); | 65 | $relativePath = $this->getRelativePath($entryId); |
53 | 66 | ||
@@ -199,7 +212,7 @@ class DownloadImages | |||
199 | * | 212 | * |
200 | * @return array An array of urls | 213 | * @return array An array of urls |
201 | */ | 214 | */ |
202 | private function getSrcsetUrls(Crawler $imagesCrawler) | 215 | private static function getSrcsetUrls(Crawler $imagesCrawler) |
203 | { | 216 | { |
204 | $urls = []; | 217 | $urls = []; |
205 | $iterator = $imagesCrawler | 218 | $iterator = $imagesCrawler |
diff --git a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php index 508adb1b..c7caac1d 100644 --- a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php +++ b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php | |||
@@ -214,6 +214,90 @@ class ContentProxyTest extends TestCase | |||
214 | $this->assertSame('1.1.1.1', $entry->getDomainName()); | 214 | $this->assertSame('1.1.1.1', $entry->getDomainName()); |
215 | } | 215 | } |
216 | 216 | ||
217 | public function testWithContentAndContentImage() | ||
218 | { | ||
219 | $tagger = $this->getTaggerMock(); | ||
220 | $tagger->expects($this->once()) | ||
221 | ->method('tag'); | ||
222 | |||
223 | $graby = $this->getMockBuilder('Graby\Graby') | ||
224 | ->setMethods(['fetchContent']) | ||
225 | ->disableOriginalConstructor() | ||
226 | ->getMock(); | ||
227 | |||
228 | $graby->expects($this->any()) | ||
229 | ->method('fetchContent') | ||
230 | ->willReturn([ | ||
231 | 'html' => "<h1>Test</h1><p><img src='http://3.3.3.3/cover.jpg'/></p>", | ||
232 | 'title' => 'this is my title', | ||
233 | 'url' => 'http://1.1.1.1', | ||
234 | 'content_type' => 'text/html', | ||
235 | 'language' => 'fr', | ||
236 | 'status' => '200', | ||
237 | 'open_graph' => [ | ||
238 | 'og_title' => 'my OG title', | ||
239 | 'og_description' => 'OG desc', | ||
240 | 'og_image' => null, | ||
241 | ], | ||
242 | ]); | ||
243 | |||
244 | $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); | ||
245 | $entry = new Entry(new User()); | ||
246 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | ||
247 | |||
248 | $this->assertSame('http://1.1.1.1', $entry->getUrl()); | ||
249 | $this->assertSame('this is my title', $entry->getTitle()); | ||
250 | $this->assertSame("<h1>Test</h1><p><img src='http://3.3.3.3/cover.jpg'/></p>", $entry->getContent()); | ||
251 | $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture()); | ||
252 | $this->assertSame('text/html', $entry->getMimetype()); | ||
253 | $this->assertSame('fr', $entry->getLanguage()); | ||
254 | $this->assertSame('200', $entry->getHttpStatus()); | ||
255 | $this->assertSame(0.0, $entry->getReadingTime()); | ||
256 | $this->assertSame('1.1.1.1', $entry->getDomainName()); | ||
257 | } | ||
258 | |||
259 | public function testWithContentImageAndOgImage() | ||
260 | { | ||
261 | $tagger = $this->getTaggerMock(); | ||
262 | $tagger->expects($this->once()) | ||
263 | ->method('tag'); | ||
264 | |||
265 | $graby = $this->getMockBuilder('Graby\Graby') | ||
266 | ->setMethods(['fetchContent']) | ||
267 | ->disableOriginalConstructor() | ||
268 | ->getMock(); | ||
269 | |||
270 | $graby->expects($this->any()) | ||
271 | ->method('fetchContent') | ||
272 | ->willReturn([ | ||
273 | 'html' => "<h1>Test</h1><p><img src='http://3.3.3.3/nevermind.jpg'/></p>", | ||
274 | 'title' => 'this is my title', | ||
275 | 'url' => 'http://1.1.1.1', | ||
276 | 'content_type' => 'text/html', | ||
277 | 'language' => 'fr', | ||
278 | 'status' => '200', | ||
279 | 'open_graph' => [ | ||
280 | 'og_title' => 'my OG title', | ||
281 | 'og_description' => 'OG desc', | ||
282 | 'og_image' => 'http://3.3.3.3/cover.jpg', | ||
283 | ], | ||
284 | ]); | ||
285 | |||
286 | $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); | ||
287 | $entry = new Entry(new User()); | ||
288 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | ||
289 | |||
290 | $this->assertSame('http://1.1.1.1', $entry->getUrl()); | ||
291 | $this->assertSame('this is my title', $entry->getTitle()); | ||
292 | $this->assertSame("<h1>Test</h1><p><img src='http://3.3.3.3/nevermind.jpg'/></p>", $entry->getContent()); | ||
293 | $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture()); | ||
294 | $this->assertSame('text/html', $entry->getMimetype()); | ||
295 | $this->assertSame('fr', $entry->getLanguage()); | ||
296 | $this->assertSame('200', $entry->getHttpStatus()); | ||
297 | $this->assertSame(0.0, $entry->getReadingTime()); | ||
298 | $this->assertSame('1.1.1.1', $entry->getDomainName()); | ||
299 | } | ||
300 | |||
217 | public function testWithContentAndBadLanguage() | 301 | public function testWithContentAndBadLanguage() |
218 | { | 302 | { |
219 | $tagger = $this->getTaggerMock(); | 303 | $tagger = $this->getTaggerMock(); |
@@ -415,7 +499,7 @@ class ContentProxyTest extends TestCase | |||
415 | 499 | ||
416 | $records = $handler->getRecords(); | 500 | $records = $handler->getRecords(); |
417 | 501 | ||
418 | $this->assertCount(1, $records); | 502 | $this->assertCount(3, $records); |
419 | $this->assertContains('Error while defining date', $records[0]['message']); | 503 | $this->assertContains('Error while defining date', $records[0]['message']); |
420 | } | 504 | } |
421 | 505 | ||
diff --git a/tests/Wallabag/ImportBundle/Controller/WallabagV1ControllerTest.php b/tests/Wallabag/ImportBundle/Controller/WallabagV1ControllerTest.php index 1f57939d..2a8e7c89 100644 --- a/tests/Wallabag/ImportBundle/Controller/WallabagV1ControllerTest.php +++ b/tests/Wallabag/ImportBundle/Controller/WallabagV1ControllerTest.php | |||
@@ -121,7 +121,7 @@ class WallabagV1ControllerTest extends WallabagCoreTestCase | |||
121 | 121 | ||
122 | $this->assertInstanceOf('Wallabag\CoreBundle\Entity\Entry', $content); | 122 | $this->assertInstanceOf('Wallabag\CoreBundle\Entity\Entry', $content); |
123 | $this->assertEmpty($content->getMimetype(), 'Mimetype for http://www.framablog.org is empty'); | 123 | $this->assertEmpty($content->getMimetype(), 'Mimetype for http://www.framablog.org is empty'); |
124 | $this->assertEmpty($content->getPreviewPicture(), 'Preview picture for http://www.framablog.org is empty'); | 124 | $this->assertSame($content->getPreviewPicture(), 'http://www.framablog.org/public/_img/framablog/wallaby_baby.jpg'); |
125 | $this->assertEmpty($content->getLanguage(), 'Language for http://www.framablog.org is empty'); | 125 | $this->assertEmpty($content->getLanguage(), 'Language for http://www.framablog.org is empty'); |
126 | 126 | ||
127 | $tags = $content->getTags(); | 127 | $tags = $content->getTags(); |