diff options
author | Simounet <contact@simounet.net> | 2018-05-31 23:42:06 +0200 |
---|---|---|
committer | Simounet <contact@simounet.net> | 2018-06-01 13:49:16 +0200 |
commit | c15bb5ad72b1a9692682cf88318a70b7e650d34a (patch) | |
tree | 778558ec77177019596ccd2c356d24b1b6074936 /src | |
parent | 9707ac46613eea0c536d87f4c141d567fc2ef7d0 (diff) | |
download | wallabag-c15bb5ad72b1a9692682cf88318a70b7e650d34a.tar.gz wallabag-c15bb5ad72b1a9692682cf88318a70b7e650d34a.tar.zst wallabag-c15bb5ad72b1a9692682cf88318a70b7e650d34a.zip |
Fix srcset attribute on images downloaded
Diffstat (limited to 'src')
-rw-r--r-- | src/Wallabag/CoreBundle/Helper/DownloadImages.php | 36 |
1 files changed, 33 insertions, 3 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php index 252ba57c..9c9452dd 100644 --- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php +++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php | |||
@@ -42,14 +42,17 @@ class DownloadImages | |||
42 | public function processHtml($entryId, $html, $url) | 42 | public function processHtml($entryId, $html, $url) |
43 | { | 43 | { |
44 | $crawler = new Crawler($html); | 44 | $crawler = new Crawler($html); |
45 | $result = $crawler | 45 | $imagesCrawler = $crawler |
46 | ->filterXpath('//img') | 46 | ->filterXpath('//img'); |
47 | $imagesUrls = $imagesCrawler | ||
47 | ->extract(['src']); | 48 | ->extract(['src']); |
49 | $imagesSrcsetUrls = $this->getSrcsetUrls($imagesCrawler); | ||
50 | $imagesUrls = array_unique(array_merge($imagesUrls, $imagesSrcsetUrls)); | ||
48 | 51 | ||
49 | $relativePath = $this->getRelativePath($entryId); | 52 | $relativePath = $this->getRelativePath($entryId); |
50 | 53 | ||
51 | // download and save the image to the folder | 54 | // download and save the image to the folder |
52 | foreach ($result as $image) { | 55 | foreach ($imagesUrls as $image) { |
53 | $imagePath = $this->processSingleImage($entryId, $image, $url, $relativePath); | 56 | $imagePath = $this->processSingleImage($entryId, $image, $url, $relativePath); |
54 | 57 | ||
55 | if (false === $imagePath) { | 58 | if (false === $imagePath) { |
@@ -172,6 +175,33 @@ class DownloadImages | |||
172 | } | 175 | } |
173 | 176 | ||
174 | /** | 177 | /** |
178 | * Get images urls from the srcset image attribute. | ||
179 | * | ||
180 | * @param Crawler $imagesCrawler | ||
181 | * | ||
182 | * @return array An array of urls | ||
183 | */ | ||
184 | protected function getSrcsetUrls(Crawler $imagesCrawler) | ||
185 | { | ||
186 | $urls = []; | ||
187 | $iterator = $imagesCrawler | ||
188 | ->getIterator(); | ||
189 | while ($iterator->valid()) { | ||
190 | $srcsetAttribute = $iterator->current()->getAttribute('srcset'); | ||
191 | if ('' !== $srcsetAttribute) { | ||
192 | $srcset = array_map('trim', explode(',', $srcsetAttribute)); | ||
193 | $srcsetUrls = array_map(function ($src) { | ||
194 | return explode(' ', $src)[0]; | ||
195 | }, $srcset); | ||
196 | $urls = array_merge($srcsetUrls, $urls); | ||
197 | } | ||
198 | $iterator->next(); | ||
199 | } | ||
200 | |||
201 | return $urls; | ||
202 | } | ||
203 | |||
204 | /** | ||
175 | * Setup base folder where all images are going to be saved. | 205 | * Setup base folder where all images are going to be saved. |
176 | */ | 206 | */ |
177 | private function setFolder() | 207 | private function setFolder() |