]>
Commit | Line | Data |
---|---|---|
419214d7 TC |
1 | <?php |
2 | ||
3 | namespace Wallabag\CoreBundle\Helper; | |
4 | ||
5 | use Psr\Log\LoggerInterface as Logger; | |
6 | use Symfony\Component\DomCrawler\Crawler; | |
7 | ||
8 | define('REGENERATE_PICTURES_QUALITY', 75); | |
94654765 TC |
9 | define('HTTP_PORT', 80); |
10 | define('SSL_PORT', 443); | |
11 | define('BASE_URL',''); | |
419214d7 TC |
12 | |
13 | class DownloadImages { | |
14 | private $folder; | |
15 | private $url; | |
16 | private $html; | |
17 | private $fileName; | |
18 | private $logger; | |
19 | ||
20 | public function __construct($html, $url, Logger $logger) { | |
21 | $this->html = $html; | |
22 | $this->url = $url; | |
23 | $this->setFolder(); | |
24 | $this->logger = $logger; | |
25 | } | |
26 | ||
27 | public function setFolder($folder = "assets/images") { | |
28 | // if folder doesn't exist, attempt to create one and store the folder name in property $folder | |
29 | if(!file_exists($folder)) { | |
30 | mkdir($folder); | |
31 | } | |
32 | $this->folder = $folder; | |
33 | } | |
34 | ||
35 | public function process() { | |
36 | //instantiate the symfony DomCrawler Component | |
37 | $crawler = new Crawler($this->html); | |
38 | // create an array of all scrapped image links | |
39 | $this->logger->log('debug', 'Finding images inside document'); | |
40 | $result = $crawler | |
41 | ->filterXpath('//img') | |
42 | ->extract(array('src')); | |
43 | ||
44 | // download and save the image to the folder | |
45 | foreach ($result as $image) { | |
46 | $file = file_get_contents($image); | |
47 | ||
48 | // Checks | |
49 | $absolute_path = self::getAbsoluteLink($image, $this->url); | |
50 | $filename = basename(parse_url($absolute_path, PHP_URL_PATH)); | |
51 | $fullpath = $this->folder."/".$filename; | |
52 | self::checks($file, $fullpath, $absolute_path); | |
94654765 | 53 | $this->html = str_replace($image, self::getPocheUrl() . '/' . $fullpath, $this->html); |
419214d7 TC |
54 | } |
55 | ||
56 | return $this->html; | |
57 | } | |
58 | ||
59 | private function checks($rawdata, $fullpath, $absolute_path) { | |
60 | $fullpath = urldecode($fullpath); | |
61 | ||
62 | if (file_exists($fullpath)) { | |
63 | unlink($fullpath); | |
64 | } | |
65 | ||
66 | // check extension | |
67 | $this->logger->log('debug','Checking extension'); | |
68 | ||
69 | $file_ext = strrchr($fullpath, '.'); | |
70 | $whitelist = array('.jpg', '.jpeg', '.gif', '.png'); | |
71 | if (!(in_array($file_ext, $whitelist))) { | |
72 | $this->logger->log('debug','processed image with not allowed extension. Skipping '.$fullpath); | |
73 | ||
74 | return false; | |
75 | } | |
76 | ||
77 | // check headers | |
78 | $this->logger->log('debug','Checking headers'); | |
79 | $imageinfo = getimagesize($absolute_path); | |
80 | if ($imageinfo['mime'] != 'image/gif' && $imageinfo['mime'] != 'image/jpeg' && $imageinfo['mime'] != 'image/jpg' && $imageinfo['mime'] != 'image/png') { | |
81 | $this->logger->log('debug','processed image with bad header. Skipping '.$fullpath); | |
82 | ||
83 | return false; | |
84 | } | |
85 | ||
86 | // regenerate image | |
87 | $this->logger->log('debug','regenerating image'); | |
88 | $im = imagecreatefromstring($rawdata); | |
89 | if ($im === false) { | |
90 | $this->logger->log('error','error while regenerating image '.$fullpath); | |
91 | ||
92 | return false; | |
93 | } | |
94 | ||
95 | switch ($imageinfo['mime']) { | |
96 | case 'image/gif': | |
97 | $result = imagegif($im, $fullpath); | |
98 | $this->logger->log('debug','Re-creating gif'); | |
99 | break; | |
100 | case 'image/jpeg': | |
101 | case 'image/jpg': | |
102 | $result = imagejpeg($im, $fullpath, REGENERATE_PICTURES_QUALITY); | |
103 | $this->logger->log('debug','Re-creating jpg'); | |
104 | break; | |
105 | case 'image/png': | |
106 | $this->logger->log('debug','Re-creating png'); | |
107 | $result = imagepng($im, $fullpath, ceil(REGENERATE_PICTURES_QUALITY / 100 * 9)); | |
108 | break; | |
109 | } | |
110 | imagedestroy($im); | |
111 | ||
112 | return $result; | |
113 | } | |
114 | ||
115 | private static function getAbsoluteLink($relativeLink, $url) | |
116 | { | |
117 | /* return if already absolute URL */ | |
118 | if (parse_url($relativeLink, PHP_URL_SCHEME) != '') { | |
119 | return $relativeLink; | |
120 | } | |
121 | ||
122 | /* queries and anchors */ | |
123 | if ($relativeLink[0] == '#' || $relativeLink[0] == '?') { | |
124 | return $url.$relativeLink; | |
125 | } | |
126 | ||
127 | /* parse base URL and convert to local variables: | |
128 | $scheme, $host, $path */ | |
129 | extract(parse_url($url)); | |
130 | ||
131 | /* remove non-directory element from path */ | |
132 | $path = preg_replace('#/[^/]*$#', '', $path); | |
133 | ||
134 | /* destroy path if relative url points to root */ | |
135 | if ($relativeLink[0] == '/') { | |
136 | $path = ''; | |
137 | } | |
138 | ||
139 | /* dirty absolute URL */ | |
140 | $abs = $host.$path.'/'.$relativeLink; | |
141 | ||
142 | /* replace '//' or '/./' or '/foo/../' with '/' */ | |
143 | $re = array('#(/\.?/)#', '#/(?!\.\.)[^/]+/\.\./#'); | |
144 | for ($n = 1; $n > 0; $abs = preg_replace($re, '/', $abs, -1, $n)) { | |
145 | } | |
146 | ||
147 | /* absolute URL is ready! */ | |
148 | return $scheme.'://'.$abs; | |
149 | } | |
94654765 TC |
150 | |
151 | public static function getPocheUrl() | |
152 | { | |
153 | $baseUrl = ""; | |
154 | $https = (!empty($_SERVER['HTTPS']) | |
155 | && (strtolower($_SERVER['HTTPS']) == 'on')) | |
156 | || (isset($_SERVER["SERVER_PORT"]) | |
157 | && $_SERVER["SERVER_PORT"] == '443') // HTTPS detection. | |
158 | || (isset($_SERVER["SERVER_PORT"]) //Custom HTTPS port detection | |
159 | && $_SERVER["SERVER_PORT"] == SSL_PORT) | |
160 | || (isset($_SERVER['HTTP_X_FORWARDED_PROTO']) | |
161 | && $_SERVER['HTTP_X_FORWARDED_PROTO'] == 'https'); | |
162 | $serverport = (!isset($_SERVER["SERVER_PORT"]) | |
163 | || $_SERVER["SERVER_PORT"] == '80' | |
164 | || $_SERVER["SERVER_PORT"] == HTTP_PORT | |
165 | || ($https && $_SERVER["SERVER_PORT"] == '443') | |
166 | || ($https && $_SERVER["SERVER_PORT"]==SSL_PORT) //Custom HTTPS port detection | |
167 | ? '' : ':' . $_SERVER["SERVER_PORT"]); | |
168 | ||
169 | if (isset($_SERVER["HTTP_X_FORWARDED_PORT"])) { | |
170 | $serverport = ':' . $_SERVER["HTTP_X_FORWARDED_PORT"]; | |
171 | } | |
172 | // $scriptname = str_replace('/index.php', '/', $_SERVER["SCRIPT_NAME"]); | |
173 | // if (!isset($_SERVER["HTTP_HOST"])) { | |
174 | // return $scriptname; | |
175 | // } | |
176 | $host = (isset($_SERVER['HTTP_X_FORWARDED_HOST']) ? $_SERVER['HTTP_X_FORWARDED_HOST'] : (isset($_SERVER['HTTP_HOST']) ? $_SERVER['HTTP_HOST'] : $_SERVER['SERVER_NAME'])); | |
177 | if (strpos($host, ':') !== false) { | |
178 | $serverport = ''; | |
179 | } | |
180 | // check if BASE_URL is configured | |
181 | if(BASE_URL) { | |
182 | $baseUrl = BASE_URL; | |
183 | } else { | |
184 | $baseUrl = 'http' . ($https ? 's' : '') . '://' . $host . $serverport; | |
185 | } | |
186 | return $baseUrl; | |
187 | ||
188 | } | |
419214d7 | 189 | } |