]>
Commit | Line | Data |
---|---|---|
419214d7 TC |
1 | <?php |
2 | ||
3 | namespace Wallabag\CoreBundle\Helper; | |
4 | ||
5 | use Psr\Log\LoggerInterface as Logger; | |
6 | use Symfony\Component\DomCrawler\Crawler; | |
7 | ||
8 | define('REGENERATE_PICTURES_QUALITY', 75); | |
94654765 TC |
9 | define('HTTP_PORT', 80); |
10 | define('SSL_PORT', 443); | |
156bf627 | 11 | define('BASE_URL', ''); |
419214d7 | 12 | |
156bf627 JB |
13 | class DownloadImages |
14 | { | |
419214d7 TC |
15 | private $folder; |
16 | private $url; | |
17 | private $html; | |
18 | private $fileName; | |
19 | private $logger; | |
20 | ||
156bf627 JB |
21 | public function __construct($html, $url, Logger $logger) |
22 | { | |
419214d7 TC |
23 | $this->html = $html; |
24 | $this->url = $url; | |
25 | $this->setFolder(); | |
26 | $this->logger = $logger; | |
27 | } | |
28 | ||
156bf627 JB |
29 | public function setFolder($folder = 'assets/images') |
30 | { | |
419214d7 | 31 | // if folder doesn't exist, attempt to create one and store the folder name in property $folder |
156bf627 | 32 | if (!file_exists($folder)) { |
419214d7 TC |
33 | mkdir($folder); |
34 | } | |
35 | $this->folder = $folder; | |
36 | } | |
37 | ||
156bf627 JB |
38 | public function process() |
39 | { | |
419214d7 TC |
40 | //instantiate the symfony DomCrawler Component |
41 | $crawler = new Crawler($this->html); | |
42 | // create an array of all scrapped image links | |
43 | $this->logger->log('debug', 'Finding images inside document'); | |
44 | $result = $crawler | |
45 | ->filterXpath('//img') | |
46 | ->extract(array('src')); | |
47 | ||
48 | // download and save the image to the folder | |
49 | foreach ($result as $image) { | |
50 | $file = file_get_contents($image); | |
51 | ||
52 | // Checks | |
53 | $absolute_path = self::getAbsoluteLink($image, $this->url); | |
54 | $filename = basename(parse_url($absolute_path, PHP_URL_PATH)); | |
156bf627 | 55 | $fullpath = $this->folder.'/'.$filename; |
419214d7 | 56 | self::checks($file, $fullpath, $absolute_path); |
156bf627 | 57 | $this->html = str_replace($image, self::getPocheUrl().'/'.$fullpath, $this->html); |
419214d7 TC |
58 | } |
59 | ||
60 | return $this->html; | |
61 | } | |
62 | ||
156bf627 JB |
63 | private function checks($rawdata, $fullpath, $absolute_path) |
64 | { | |
419214d7 TC |
65 | $fullpath = urldecode($fullpath); |
66 | ||
67 | if (file_exists($fullpath)) { | |
68 | unlink($fullpath); | |
69 | } | |
70 | ||
71 | // check extension | |
156bf627 | 72 | $this->logger->log('debug', 'Checking extension'); |
419214d7 TC |
73 | |
74 | $file_ext = strrchr($fullpath, '.'); | |
75 | $whitelist = array('.jpg', '.jpeg', '.gif', '.png'); | |
76 | if (!(in_array($file_ext, $whitelist))) { | |
156bf627 | 77 | $this->logger->log('debug', 'processed image with not allowed extension. Skipping '.$fullpath); |
419214d7 TC |
78 | |
79 | return false; | |
80 | } | |
81 | ||
82 | // check headers | |
156bf627 | 83 | $this->logger->log('debug', 'Checking headers'); |
419214d7 TC |
84 | $imageinfo = getimagesize($absolute_path); |
85 | if ($imageinfo['mime'] != 'image/gif' && $imageinfo['mime'] != 'image/jpeg' && $imageinfo['mime'] != 'image/jpg' && $imageinfo['mime'] != 'image/png') { | |
156bf627 | 86 | $this->logger->log('debug', 'processed image with bad header. Skipping '.$fullpath); |
419214d7 TC |
87 | |
88 | return false; | |
89 | } | |
90 | ||
91 | // regenerate image | |
156bf627 | 92 | $this->logger->log('debug', 'regenerating image'); |
419214d7 TC |
93 | $im = imagecreatefromstring($rawdata); |
94 | if ($im === false) { | |
156bf627 | 95 | $this->logger->log('error', 'error while regenerating image '.$fullpath); |
419214d7 TC |
96 | |
97 | return false; | |
98 | } | |
99 | ||
100 | switch ($imageinfo['mime']) { | |
101 | case 'image/gif': | |
102 | $result = imagegif($im, $fullpath); | |
156bf627 | 103 | $this->logger->log('debug', 'Re-creating gif'); |
419214d7 TC |
104 | break; |
105 | case 'image/jpeg': | |
106 | case 'image/jpg': | |
107 | $result = imagejpeg($im, $fullpath, REGENERATE_PICTURES_QUALITY); | |
156bf627 | 108 | $this->logger->log('debug', 'Re-creating jpg'); |
419214d7 TC |
109 | break; |
110 | case 'image/png': | |
156bf627 | 111 | $this->logger->log('debug', 'Re-creating png'); |
419214d7 TC |
112 | $result = imagepng($im, $fullpath, ceil(REGENERATE_PICTURES_QUALITY / 100 * 9)); |
113 | break; | |
114 | } | |
115 | imagedestroy($im); | |
116 | ||
117 | return $result; | |
118 | } | |
119 | ||
120 | private static function getAbsoluteLink($relativeLink, $url) | |
121 | { | |
122 | /* return if already absolute URL */ | |
123 | if (parse_url($relativeLink, PHP_URL_SCHEME) != '') { | |
124 | return $relativeLink; | |
125 | } | |
126 | ||
127 | /* queries and anchors */ | |
128 | if ($relativeLink[0] == '#' || $relativeLink[0] == '?') { | |
129 | return $url.$relativeLink; | |
130 | } | |
131 | ||
132 | /* parse base URL and convert to local variables: | |
133 | $scheme, $host, $path */ | |
134 | extract(parse_url($url)); | |
135 | ||
136 | /* remove non-directory element from path */ | |
137 | $path = preg_replace('#/[^/]*$#', '', $path); | |
138 | ||
139 | /* destroy path if relative url points to root */ | |
140 | if ($relativeLink[0] == '/') { | |
141 | $path = ''; | |
142 | } | |
143 | ||
144 | /* dirty absolute URL */ | |
145 | $abs = $host.$path.'/'.$relativeLink; | |
146 | ||
147 | /* replace '//' or '/./' or '/foo/../' with '/' */ | |
148 | $re = array('#(/\.?/)#', '#/(?!\.\.)[^/]+/\.\./#'); | |
149 | for ($n = 1; $n > 0; $abs = preg_replace($re, '/', $abs, -1, $n)) { | |
150 | } | |
151 | ||
152 | /* absolute URL is ready! */ | |
153 | return $scheme.'://'.$abs; | |
154 | } | |
94654765 TC |
155 | |
156 | public static function getPocheUrl() | |
157 | { | |
156bf627 | 158 | $baseUrl = ''; |
94654765 TC |
159 | $https = (!empty($_SERVER['HTTPS']) |
160 | && (strtolower($_SERVER['HTTPS']) == 'on')) | |
156bf627 JB |
161 | || (isset($_SERVER['SERVER_PORT']) |
162 | && $_SERVER['SERVER_PORT'] == '443') // HTTPS detection. | |
163 | || (isset($_SERVER['SERVER_PORT']) //Custom HTTPS port detection | |
164 | && $_SERVER['SERVER_PORT'] == SSL_PORT) | |
94654765 TC |
165 | || (isset($_SERVER['HTTP_X_FORWARDED_PROTO']) |
166 | && $_SERVER['HTTP_X_FORWARDED_PROTO'] == 'https'); | |
156bf627 JB |
167 | $serverport = (!isset($_SERVER['SERVER_PORT']) |
168 | || $_SERVER['SERVER_PORT'] == '80' | |
169 | || $_SERVER['SERVER_PORT'] == HTTP_PORT | |
170 | || ($https && $_SERVER['SERVER_PORT'] == '443') | |
171 | || ($https && $_SERVER['SERVER_PORT'] == SSL_PORT) //Custom HTTPS port detection | |
172 | ? '' : ':'.$_SERVER['SERVER_PORT']); | |
173 | ||
174 | if (isset($_SERVER['HTTP_X_FORWARDED_PORT'])) { | |
175 | $serverport = ':'.$_SERVER['HTTP_X_FORWARDED_PORT']; | |
94654765 TC |
176 | } |
177 | // $scriptname = str_replace('/index.php', '/', $_SERVER["SCRIPT_NAME"]); | |
178 | // if (!isset($_SERVER["HTTP_HOST"])) { | |
179 | // return $scriptname; | |
180 | // } | |
181 | $host = (isset($_SERVER['HTTP_X_FORWARDED_HOST']) ? $_SERVER['HTTP_X_FORWARDED_HOST'] : (isset($_SERVER['HTTP_HOST']) ? $_SERVER['HTTP_HOST'] : $_SERVER['SERVER_NAME'])); | |
182 | if (strpos($host, ':') !== false) { | |
183 | $serverport = ''; | |
184 | } | |
185 | // check if BASE_URL is configured | |
156bf627 | 186 | if (BASE_URL) { |
94654765 TC |
187 | $baseUrl = BASE_URL; |
188 | } else { | |
156bf627 | 189 | $baseUrl = 'http'.($https ? 's' : '').'://'.$host.$serverport; |
94654765 | 190 | } |
156bf627 JB |
191 | |
192 | return $baseUrl; | |
94654765 | 193 | } |
419214d7 | 194 | } |