aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/Wallabag/CoreBundle/Helper/DownloadImages.php
diff options
context:
space:
mode:
Diffstat (limited to 'src/Wallabag/CoreBundle/Helper/DownloadImages.php')
-rw-r--r--src/Wallabag/CoreBundle/Helper/DownloadImages.php47
1 files changed, 43 insertions, 4 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php
index 252ba57c..cc3dcfce 100644
--- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php
+++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php
@@ -42,14 +42,17 @@ class DownloadImages
42 public function processHtml($entryId, $html, $url) 42 public function processHtml($entryId, $html, $url)
43 { 43 {
44 $crawler = new Crawler($html); 44 $crawler = new Crawler($html);
45 $result = $crawler 45 $imagesCrawler = $crawler
46 ->filterXpath('//img') 46 ->filterXpath('//img');
47 $imagesUrls = $imagesCrawler
47 ->extract(['src']); 48 ->extract(['src']);
49 $imagesSrcsetUrls = $this->getSrcsetUrls($imagesCrawler);
50 $imagesUrls = array_unique(array_merge($imagesUrls, $imagesSrcsetUrls));
48 51
49 $relativePath = $this->getRelativePath($entryId); 52 $relativePath = $this->getRelativePath($entryId);
50 53
51 // download and save the image to the folder 54 // download and save the image to the folder
52 foreach ($result as $image) { 55 foreach ($imagesUrls as $image) {
53 $imagePath = $this->processSingleImage($entryId, $image, $url, $relativePath); 56 $imagePath = $this->processSingleImage($entryId, $image, $url, $relativePath);
54 57
55 if (false === $imagePath) { 58 if (false === $imagePath) {
@@ -82,6 +85,10 @@ class DownloadImages
82 */ 85 */
83 public function processSingleImage($entryId, $imagePath, $url, $relativePath = null) 86 public function processSingleImage($entryId, $imagePath, $url, $relativePath = null)
84 { 87 {
88 if (null === $imagePath) {
89 return false;
90 }
91
85 if (null === $relativePath) { 92 if (null === $relativePath) {
86 $relativePath = $this->getRelativePath($entryId); 93 $relativePath = $this->getRelativePath($entryId);
87 } 94 }
@@ -172,6 +179,38 @@ class DownloadImages
172 } 179 }
173 180
174 /** 181 /**
182 * Get images urls from the srcset image attribute.
183 *
184 * @param Crawler $imagesCrawler
185 *
186 * @return array An array of urls
187 */
188 private function getSrcsetUrls(Crawler $imagesCrawler)
189 {
190 $urls = [];
191 $iterator = $imagesCrawler
192 ->getIterator();
193 while ($iterator->valid()) {
194 $srcsetAttribute = $iterator->current()->getAttribute('srcset');
195 if ('' !== $srcsetAttribute) {
196 // Couldn't start with " OR ' OR a white space
197 // Could be one or more white space
198 // Must be one or more digits followed by w OR x
199 $pattern = "/(?:[^\"'\s]+\s*(?:\d+[wx])+)/";
200 preg_match_all($pattern, $srcsetAttribute, $matches);
201 $srcset = \call_user_func_array('array_merge', $matches);
202 $srcsetUrls = array_map(function ($src) {
203 return trim(explode(' ', $src, 2)[0]);
204 }, $srcset);
205 $urls = array_merge($srcsetUrls, $urls);
206 }
207 $iterator->next();
208 }
209
210 return $urls;
211 }
212
213 /**
175 * Setup base folder where all images are going to be saved. 214 * Setup base folder where all images are going to be saved.
176 */ 215 */
177 private function setFolder() 216 private function setFolder()
@@ -269,7 +308,7 @@ class DownloadImages
269 $this->logger->debug('DownloadImages: Checking extension (alternative)', ['ext' => $ext]); 308 $this->logger->debug('DownloadImages: Checking extension (alternative)', ['ext' => $ext]);
270 } 309 }
271 310
272 if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) { 311 if (!\in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) {
273 $this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping: ' . $imagePath); 312 $this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping: ' . $imagePath);
274 313
275 return false; 314 return false;