public function processHtml($entryId, $html, $url)
{
$crawler = new Crawler($html);
- $result = $crawler
- ->filterXpath('//img')
+ $imagesCrawler = $crawler
+ ->filterXpath('//img');
+ $imagesUrls = $imagesCrawler
->extract(['src']);
+ $imagesSrcsetUrls = $this->getSrcsetUrls($imagesCrawler);
+ $imagesUrls = array_unique(array_merge($imagesUrls, $imagesSrcsetUrls));
$relativePath = $this->getRelativePath($entryId);
// download and save the image to the folder
- foreach ($result as $image) {
+ foreach ($imagesUrls as $image) {
$imagePath = $this->processSingleImage($entryId, $image, $url, $relativePath);
if (false === $imagePath) {
@rmdir($folderPath);
}
+ /**
+ * Get images urls from the srcset image attribute.
+ *
+ * @param Crawler $imagesCrawler
+ *
+ * @return array An array of urls
+ */
+ protected function getSrcsetUrls(Crawler $imagesCrawler)
+ {
+ $urls = [];
+ $iterator = $imagesCrawler
+ ->getIterator();
+ while ($iterator->valid()) {
+ $srcsetAttribute = $iterator->current()->getAttribute('srcset');
+ if ('' !== $srcsetAttribute) {
+ $srcset = array_map('trim', explode(',', $srcsetAttribute));
+ $srcsetUrls = array_map(function ($src) {
+ return explode(' ', $src)[0];
+ }, $srcset);
+ $urls = array_merge($srcsetUrls, $urls);
+ }
+ $iterator->next();
+ }
+
+ return $urls;
+ }
+
/**
* Setup base folder where all images are going to be saved.
*/
$this->assertContains('http://wallabag.io/assets/images/9/b/9b0ead26/', $res, 'Content-Type was empty but data is ok for an image');
$this->assertContains('DownloadImages: Checking extension (alternative)', $logHandler->getRecords()[3]['message']);
}
+
+ public function testProcessImageWithSrcset()
+ {
+ $client = new Client();
+
+ $mock = new Mock([
+ new Response(200, ['content-type' => 'image/jpeg'], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))),
+ new Response(200, ['content-type' => 'image/jpeg'], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))),
+ new Response(200, ['content-type' => 'image/jpeg'], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))),
+ ]);
+
+ $client->getEmitter()->attach($mock);
+
+ $logHandler = new TestHandler();
+ $logger = new Logger('test', [$logHandler]);
+
+ $download = new DownloadImages($client, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
+ $res = $download->processHtml(123, '<p><img class="alignnone wp-image-1153" src="http://piketty.blog.lemonde.fr/files/2017/10/F1FR-530x375.jpg" alt="" width="628" height="444" srcset="http://piketty.blog.lemonde.fr/files/2017/10/F1FR-530x375.jpg 530w, http://piketty.blog.lemonde.fr/files/2017/10/F1FR-768x543.jpg 768w, http://piketty.blog.lemonde.fr/files/2017/10/F1FR-900x636.jpg 900w" sizes="(max-width: 628px) 100vw, 628px" /></p>', 'http://piketty.blog.lemonde.fr/2017/10/12/budget-2018-la-jeunesse-sacrifiee/');
+
+ $this->assertNotContains('http://piketty.blog.lemonde.fr/', $res, 'Image srcset attribute were not replaced');
+ }
}