diff options
Diffstat (limited to 'src/Wallabag/CoreBundle/Helper/DownloadImages.php')
-rw-r--r-- | src/Wallabag/CoreBundle/Helper/DownloadImages.php | 47 |
1 files changed, 43 insertions, 4 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php index 252ba57c..cc3dcfce 100644 --- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php +++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php | |||
@@ -42,14 +42,17 @@ class DownloadImages | |||
42 | public function processHtml($entryId, $html, $url) | 42 | public function processHtml($entryId, $html, $url) |
43 | { | 43 | { |
44 | $crawler = new Crawler($html); | 44 | $crawler = new Crawler($html); |
45 | $result = $crawler | 45 | $imagesCrawler = $crawler |
46 | ->filterXpath('//img') | 46 | ->filterXpath('//img'); |
47 | $imagesUrls = $imagesCrawler | ||
47 | ->extract(['src']); | 48 | ->extract(['src']); |
49 | $imagesSrcsetUrls = $this->getSrcsetUrls($imagesCrawler); | ||
50 | $imagesUrls = array_unique(array_merge($imagesUrls, $imagesSrcsetUrls)); | ||
48 | 51 | ||
49 | $relativePath = $this->getRelativePath($entryId); | 52 | $relativePath = $this->getRelativePath($entryId); |
50 | 53 | ||
51 | // download and save the image to the folder | 54 | // download and save the image to the folder |
52 | foreach ($result as $image) { | 55 | foreach ($imagesUrls as $image) { |
53 | $imagePath = $this->processSingleImage($entryId, $image, $url, $relativePath); | 56 | $imagePath = $this->processSingleImage($entryId, $image, $url, $relativePath); |
54 | 57 | ||
55 | if (false === $imagePath) { | 58 | if (false === $imagePath) { |
@@ -82,6 +85,10 @@ class DownloadImages | |||
82 | */ | 85 | */ |
83 | public function processSingleImage($entryId, $imagePath, $url, $relativePath = null) | 86 | public function processSingleImage($entryId, $imagePath, $url, $relativePath = null) |
84 | { | 87 | { |
88 | if (null === $imagePath) { | ||
89 | return false; | ||
90 | } | ||
91 | |||
85 | if (null === $relativePath) { | 92 | if (null === $relativePath) { |
86 | $relativePath = $this->getRelativePath($entryId); | 93 | $relativePath = $this->getRelativePath($entryId); |
87 | } | 94 | } |
@@ -172,6 +179,38 @@ class DownloadImages | |||
172 | } | 179 | } |
173 | 180 | ||
174 | /** | 181 | /** |
182 | * Get images urls from the srcset image attribute. | ||
183 | * | ||
184 | * @param Crawler $imagesCrawler | ||
185 | * | ||
186 | * @return array An array of urls | ||
187 | */ | ||
188 | private function getSrcsetUrls(Crawler $imagesCrawler) | ||
189 | { | ||
190 | $urls = []; | ||
191 | $iterator = $imagesCrawler | ||
192 | ->getIterator(); | ||
193 | while ($iterator->valid()) { | ||
194 | $srcsetAttribute = $iterator->current()->getAttribute('srcset'); | ||
195 | if ('' !== $srcsetAttribute) { | ||
196 | // Couldn't start with " OR ' OR a white space | ||
197 | // Could be one or more white space | ||
198 | // Must be one or more digits followed by w OR x | ||
199 | $pattern = "/(?:[^\"'\s]+\s*(?:\d+[wx])+)/"; | ||
200 | preg_match_all($pattern, $srcsetAttribute, $matches); | ||
201 | $srcset = \call_user_func_array('array_merge', $matches); | ||
202 | $srcsetUrls = array_map(function ($src) { | ||
203 | return trim(explode(' ', $src, 2)[0]); | ||
204 | }, $srcset); | ||
205 | $urls = array_merge($srcsetUrls, $urls); | ||
206 | } | ||
207 | $iterator->next(); | ||
208 | } | ||
209 | |||
210 | return $urls; | ||
211 | } | ||
212 | |||
213 | /** | ||
175 | * Setup base folder where all images are going to be saved. | 214 | * Setup base folder where all images are going to be saved. |
176 | */ | 215 | */ |
177 | private function setFolder() | 216 | private function setFolder() |
@@ -269,7 +308,7 @@ class DownloadImages | |||
269 | $this->logger->debug('DownloadImages: Checking extension (alternative)', ['ext' => $ext]); | 308 | $this->logger->debug('DownloadImages: Checking extension (alternative)', ['ext' => $ext]); |
270 | } | 309 | } |
271 | 310 | ||
272 | if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) { | 311 | if (!\in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) { |
273 | $this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping: ' . $imagePath); | 312 | $this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping: ' . $imagePath); |
274 | 313 | ||
275 | return false; | 314 | return false; |