]>
Commit | Line | Data |
---|---|---|
1 | <?php | |
2 | ||
3 | namespace Wallabag\CoreBundle\Helper; | |
4 | ||
5 | use Psr\Log\LoggerInterface as Logger; | |
6 | use Symfony\Component\DomCrawler\Crawler; | |
7 | ||
8 | define('REGENERATE_PICTURES_QUALITY', 75); | |
9 | ||
10 | class DownloadImages { | |
11 | private $folder; | |
12 | private $url; | |
13 | private $html; | |
14 | private $fileName; | |
15 | private $logger; | |
16 | ||
17 | public function __construct($html, $url, Logger $logger) { | |
18 | $this->html = $html; | |
19 | $this->url = $url; | |
20 | $this->setFolder(); | |
21 | $this->logger = $logger; | |
22 | } | |
23 | ||
24 | public function setFolder($folder = "assets/images") { | |
25 | // if folder doesn't exist, attempt to create one and store the folder name in property $folder | |
26 | if(!file_exists($folder)) { | |
27 | mkdir($folder); | |
28 | } | |
29 | $this->folder = $folder; | |
30 | } | |
31 | ||
32 | public function process() { | |
33 | //instantiate the symfony DomCrawler Component | |
34 | $crawler = new Crawler($this->html); | |
35 | // create an array of all scrapped image links | |
36 | $this->logger->log('debug', 'Finding images inside document'); | |
37 | $result = $crawler | |
38 | ->filterXpath('//img') | |
39 | ->extract(array('src')); | |
40 | ||
41 | // download and save the image to the folder | |
42 | foreach ($result as $image) { | |
43 | $file = file_get_contents($image); | |
44 | ||
45 | // Checks | |
46 | $absolute_path = self::getAbsoluteLink($image, $this->url); | |
47 | $filename = basename(parse_url($absolute_path, PHP_URL_PATH)); | |
48 | $fullpath = $this->folder."/".$filename; | |
49 | self::checks($file, $fullpath, $absolute_path); | |
50 | $this->html = str_replace($image, $fullpath, $this->html); | |
51 | } | |
52 | ||
53 | return $this->html; | |
54 | } | |
55 | ||
56 | private function checks($rawdata, $fullpath, $absolute_path) { | |
57 | $fullpath = urldecode($fullpath); | |
58 | ||
59 | if (file_exists($fullpath)) { | |
60 | unlink($fullpath); | |
61 | } | |
62 | ||
63 | // check extension | |
64 | $this->logger->log('debug','Checking extension'); | |
65 | ||
66 | $file_ext = strrchr($fullpath, '.'); | |
67 | $whitelist = array('.jpg', '.jpeg', '.gif', '.png'); | |
68 | if (!(in_array($file_ext, $whitelist))) { | |
69 | $this->logger->log('debug','processed image with not allowed extension. Skipping '.$fullpath); | |
70 | ||
71 | return false; | |
72 | } | |
73 | ||
74 | // check headers | |
75 | $this->logger->log('debug','Checking headers'); | |
76 | $imageinfo = getimagesize($absolute_path); | |
77 | if ($imageinfo['mime'] != 'image/gif' && $imageinfo['mime'] != 'image/jpeg' && $imageinfo['mime'] != 'image/jpg' && $imageinfo['mime'] != 'image/png') { | |
78 | $this->logger->log('debug','processed image with bad header. Skipping '.$fullpath); | |
79 | ||
80 | return false; | |
81 | } | |
82 | ||
83 | // regenerate image | |
84 | $this->logger->log('debug','regenerating image'); | |
85 | $im = imagecreatefromstring($rawdata); | |
86 | if ($im === false) { | |
87 | $this->logger->log('error','error while regenerating image '.$fullpath); | |
88 | ||
89 | return false; | |
90 | } | |
91 | ||
92 | switch ($imageinfo['mime']) { | |
93 | case 'image/gif': | |
94 | $result = imagegif($im, $fullpath); | |
95 | $this->logger->log('debug','Re-creating gif'); | |
96 | break; | |
97 | case 'image/jpeg': | |
98 | case 'image/jpg': | |
99 | $result = imagejpeg($im, $fullpath, REGENERATE_PICTURES_QUALITY); | |
100 | $this->logger->log('debug','Re-creating jpg'); | |
101 | break; | |
102 | case 'image/png': | |
103 | $this->logger->log('debug','Re-creating png'); | |
104 | $result = imagepng($im, $fullpath, ceil(REGENERATE_PICTURES_QUALITY / 100 * 9)); | |
105 | break; | |
106 | } | |
107 | imagedestroy($im); | |
108 | ||
109 | return $result; | |
110 | } | |
111 | ||
112 | private static function getAbsoluteLink($relativeLink, $url) | |
113 | { | |
114 | /* return if already absolute URL */ | |
115 | if (parse_url($relativeLink, PHP_URL_SCHEME) != '') { | |
116 | return $relativeLink; | |
117 | } | |
118 | ||
119 | /* queries and anchors */ | |
120 | if ($relativeLink[0] == '#' || $relativeLink[0] == '?') { | |
121 | return $url.$relativeLink; | |
122 | } | |
123 | ||
124 | /* parse base URL and convert to local variables: | |
125 | $scheme, $host, $path */ | |
126 | extract(parse_url($url)); | |
127 | ||
128 | /* remove non-directory element from path */ | |
129 | $path = preg_replace('#/[^/]*$#', '', $path); | |
130 | ||
131 | /* destroy path if relative url points to root */ | |
132 | if ($relativeLink[0] == '/') { | |
133 | $path = ''; | |
134 | } | |
135 | ||
136 | /* dirty absolute URL */ | |
137 | $abs = $host.$path.'/'.$relativeLink; | |
138 | ||
139 | /* replace '//' or '/./' or '/foo/../' with '/' */ | |
140 | $re = array('#(/\.?/)#', '#/(?!\.\.)[^/]+/\.\./#'); | |
141 | for ($n = 1; $n > 0; $abs = preg_replace($re, '/', $abs, -1, $n)) { | |
142 | } | |
143 | ||
144 | /* absolute URL is ready! */ | |
145 | return $scheme.'://'.$abs; | |
146 | } | |
147 | } |