From 419214d7221e0821ef2b73eb2b3db816ed0cf173 Mon Sep 17 00:00:00 2001 From: Thomas Citharel Date: Tue, 28 Jun 2016 19:07:55 +0200 Subject: Download pictures successfully Needs to rewrite them properly (get base url) --- src/Wallabag/CoreBundle/Helper/DownloadImages.php | 147 ++++++++++++++++++++++ 1 file changed, 147 insertions(+) create mode 100644 src/Wallabag/CoreBundle/Helper/DownloadImages.php (limited to 'src/Wallabag/CoreBundle/Helper/DownloadImages.php') diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php new file mode 100644 index 00000000..32a9dbb2 --- /dev/null +++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php @@ -0,0 +1,147 @@ +html = $html; + $this->url = $url; + $this->setFolder(); + $this->logger = $logger; + } + + public function setFolder($folder = "assets/images") { + // if folder doesn't exist, attempt to create one and store the folder name in property $folder + if(!file_exists($folder)) { + mkdir($folder); + } + $this->folder = $folder; + } + + public function process() { + //instantiate the symfony DomCrawler Component + $crawler = new Crawler($this->html); + // create an array of all scrapped image links + $this->logger->log('debug', 'Finding images inside document'); + $result = $crawler + ->filterXpath('//img') + ->extract(array('src')); + + // download and save the image to the folder + foreach ($result as $image) { + $file = file_get_contents($image); + + // Checks + $absolute_path = self::getAbsoluteLink($image, $this->url); + $filename = basename(parse_url($absolute_path, PHP_URL_PATH)); + $fullpath = $this->folder."/".$filename; + self::checks($file, $fullpath, $absolute_path); + $this->html = str_replace($image, $fullpath, $this->html); + } + + return $this->html; + } + + private function checks($rawdata, $fullpath, $absolute_path) { + $fullpath = urldecode($fullpath); + + if (file_exists($fullpath)) { + unlink($fullpath); + } + + // check extension + $this->logger->log('debug','Checking extension'); + + $file_ext = strrchr($fullpath, '.'); + $whitelist = array('.jpg', '.jpeg', '.gif', '.png'); + if (!(in_array($file_ext, $whitelist))) { + $this->logger->log('debug','processed image with not allowed extension. Skipping '.$fullpath); + + return false; + } + + // check headers + $this->logger->log('debug','Checking headers'); + $imageinfo = getimagesize($absolute_path); + if ($imageinfo['mime'] != 'image/gif' && $imageinfo['mime'] != 'image/jpeg' && $imageinfo['mime'] != 'image/jpg' && $imageinfo['mime'] != 'image/png') { + $this->logger->log('debug','processed image with bad header. Skipping '.$fullpath); + + return false; + } + + // regenerate image + $this->logger->log('debug','regenerating image'); + $im = imagecreatefromstring($rawdata); + if ($im === false) { + $this->logger->log('error','error while regenerating image '.$fullpath); + + return false; + } + + switch ($imageinfo['mime']) { + case 'image/gif': + $result = imagegif($im, $fullpath); + $this->logger->log('debug','Re-creating gif'); + break; + case 'image/jpeg': + case 'image/jpg': + $result = imagejpeg($im, $fullpath, REGENERATE_PICTURES_QUALITY); + $this->logger->log('debug','Re-creating jpg'); + break; + case 'image/png': + $this->logger->log('debug','Re-creating png'); + $result = imagepng($im, $fullpath, ceil(REGENERATE_PICTURES_QUALITY / 100 * 9)); + break; + } + imagedestroy($im); + + return $result; + } + + private static function getAbsoluteLink($relativeLink, $url) + { + /* return if already absolute URL */ + if (parse_url($relativeLink, PHP_URL_SCHEME) != '') { + return $relativeLink; + } + + /* queries and anchors */ + if ($relativeLink[0] == '#' || $relativeLink[0] == '?') { + return $url.$relativeLink; + } + + /* parse base URL and convert to local variables: + $scheme, $host, $path */ + extract(parse_url($url)); + + /* remove non-directory element from path */ + $path = preg_replace('#/[^/]*$#', '', $path); + + /* destroy path if relative url points to root */ + if ($relativeLink[0] == '/') { + $path = ''; + } + + /* dirty absolute URL */ + $abs = $host.$path.'/'.$relativeLink; + + /* replace '//' or '/./' or '/foo/../' with '/' */ + $re = array('#(/\.?/)#', '#/(?!\.\.)[^/]+/\.\./#'); + for ($n = 1; $n > 0; $abs = preg_replace($re, '/', $abs, -1, $n)) { + } + + /* absolute URL is ready! */ + return $scheme.'://'.$abs; + } +} -- cgit v1.2.3 From 94654765cca6771c2f54eeaa056b7e65f3353105 Mon Sep 17 00:00:00 2001 From: Thomas Citharel Date: Tue, 28 Jun 2016 22:06:00 +0200 Subject: Working --- src/Wallabag/CoreBundle/Helper/DownloadImages.php | 44 ++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) (limited to 'src/Wallabag/CoreBundle/Helper/DownloadImages.php') diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php index 32a9dbb2..14f0aa1b 100644 --- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php +++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php @@ -6,6 +6,9 @@ use Psr\Log\LoggerInterface as Logger; use Symfony\Component\DomCrawler\Crawler; define('REGENERATE_PICTURES_QUALITY', 75); +define('HTTP_PORT', 80); +define('SSL_PORT', 443); +define('BASE_URL',''); class DownloadImages { private $folder; @@ -47,7 +50,7 @@ class DownloadImages { $filename = basename(parse_url($absolute_path, PHP_URL_PATH)); $fullpath = $this->folder."/".$filename; self::checks($file, $fullpath, $absolute_path); - $this->html = str_replace($image, $fullpath, $this->html); + $this->html = str_replace($image, self::getPocheUrl() . '/' . $fullpath, $this->html); } return $this->html; @@ -144,4 +147,43 @@ class DownloadImages { /* absolute URL is ready! */ return $scheme.'://'.$abs; } + + public static function getPocheUrl() + { + $baseUrl = ""; + $https = (!empty($_SERVER['HTTPS']) + && (strtolower($_SERVER['HTTPS']) == 'on')) + || (isset($_SERVER["SERVER_PORT"]) + && $_SERVER["SERVER_PORT"] == '443') // HTTPS detection. + || (isset($_SERVER["SERVER_PORT"]) //Custom HTTPS port detection + && $_SERVER["SERVER_PORT"] == SSL_PORT) + || (isset($_SERVER['HTTP_X_FORWARDED_PROTO']) + && $_SERVER['HTTP_X_FORWARDED_PROTO'] == 'https'); + $serverport = (!isset($_SERVER["SERVER_PORT"]) + || $_SERVER["SERVER_PORT"] == '80' + || $_SERVER["SERVER_PORT"] == HTTP_PORT + || ($https && $_SERVER["SERVER_PORT"] == '443') + || ($https && $_SERVER["SERVER_PORT"]==SSL_PORT) //Custom HTTPS port detection + ? '' : ':' . $_SERVER["SERVER_PORT"]); + + if (isset($_SERVER["HTTP_X_FORWARDED_PORT"])) { + $serverport = ':' . $_SERVER["HTTP_X_FORWARDED_PORT"]; + } + // $scriptname = str_replace('/index.php', '/', $_SERVER["SCRIPT_NAME"]); + // if (!isset($_SERVER["HTTP_HOST"])) { + // return $scriptname; + // } + $host = (isset($_SERVER['HTTP_X_FORWARDED_HOST']) ? $_SERVER['HTTP_X_FORWARDED_HOST'] : (isset($_SERVER['HTTP_HOST']) ? $_SERVER['HTTP_HOST'] : $_SERVER['SERVER_NAME'])); + if (strpos($host, ':') !== false) { + $serverport = ''; + } + // check if BASE_URL is configured + if(BASE_URL) { + $baseUrl = BASE_URL; + } else { + $baseUrl = 'http' . ($https ? 's' : '') . '://' . $host . $serverport; + } + return $baseUrl; + + } } -- cgit v1.2.3 From 156bf62758080153668a65db611c4241d0fc8a00 Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Sat, 22 Oct 2016 09:22:30 +0200 Subject: CS --- src/Wallabag/CoreBundle/Helper/DownloadImages.php | 77 ++++++++++++----------- 1 file changed, 41 insertions(+), 36 deletions(-) (limited to 'src/Wallabag/CoreBundle/Helper/DownloadImages.php') diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php index 14f0aa1b..e23e0c55 100644 --- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php +++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php @@ -8,31 +8,35 @@ use Symfony\Component\DomCrawler\Crawler; define('REGENERATE_PICTURES_QUALITY', 75); define('HTTP_PORT', 80); define('SSL_PORT', 443); -define('BASE_URL',''); +define('BASE_URL', ''); -class DownloadImages { +class DownloadImages +{ private $folder; private $url; private $html; private $fileName; private $logger; - public function __construct($html, $url, Logger $logger) { + public function __construct($html, $url, Logger $logger) + { $this->html = $html; $this->url = $url; $this->setFolder(); $this->logger = $logger; } - public function setFolder($folder = "assets/images") { + public function setFolder($folder = 'assets/images') + { // if folder doesn't exist, attempt to create one and store the folder name in property $folder - if(!file_exists($folder)) { + if (!file_exists($folder)) { mkdir($folder); } $this->folder = $folder; } - public function process() { + public function process() + { //instantiate the symfony DomCrawler Component $crawler = new Crawler($this->html); // create an array of all scrapped image links @@ -48,15 +52,16 @@ class DownloadImages { // Checks $absolute_path = self::getAbsoluteLink($image, $this->url); $filename = basename(parse_url($absolute_path, PHP_URL_PATH)); - $fullpath = $this->folder."/".$filename; + $fullpath = $this->folder.'/'.$filename; self::checks($file, $fullpath, $absolute_path); - $this->html = str_replace($image, self::getPocheUrl() . '/' . $fullpath, $this->html); + $this->html = str_replace($image, self::getPocheUrl().'/'.$fullpath, $this->html); } return $this->html; } - private function checks($rawdata, $fullpath, $absolute_path) { + private function checks($rawdata, $fullpath, $absolute_path) + { $fullpath = urldecode($fullpath); if (file_exists($fullpath)) { @@ -64,30 +69,30 @@ class DownloadImages { } // check extension - $this->logger->log('debug','Checking extension'); + $this->logger->log('debug', 'Checking extension'); $file_ext = strrchr($fullpath, '.'); $whitelist = array('.jpg', '.jpeg', '.gif', '.png'); if (!(in_array($file_ext, $whitelist))) { - $this->logger->log('debug','processed image with not allowed extension. Skipping '.$fullpath); + $this->logger->log('debug', 'processed image with not allowed extension. Skipping '.$fullpath); return false; } // check headers - $this->logger->log('debug','Checking headers'); + $this->logger->log('debug', 'Checking headers'); $imageinfo = getimagesize($absolute_path); if ($imageinfo['mime'] != 'image/gif' && $imageinfo['mime'] != 'image/jpeg' && $imageinfo['mime'] != 'image/jpg' && $imageinfo['mime'] != 'image/png') { - $this->logger->log('debug','processed image with bad header. Skipping '.$fullpath); + $this->logger->log('debug', 'processed image with bad header. Skipping '.$fullpath); return false; } // regenerate image - $this->logger->log('debug','regenerating image'); + $this->logger->log('debug', 'regenerating image'); $im = imagecreatefromstring($rawdata); if ($im === false) { - $this->logger->log('error','error while regenerating image '.$fullpath); + $this->logger->log('error', 'error while regenerating image '.$fullpath); return false; } @@ -95,15 +100,15 @@ class DownloadImages { switch ($imageinfo['mime']) { case 'image/gif': $result = imagegif($im, $fullpath); - $this->logger->log('debug','Re-creating gif'); + $this->logger->log('debug', 'Re-creating gif'); break; case 'image/jpeg': case 'image/jpg': $result = imagejpeg($im, $fullpath, REGENERATE_PICTURES_QUALITY); - $this->logger->log('debug','Re-creating jpg'); + $this->logger->log('debug', 'Re-creating jpg'); break; case 'image/png': - $this->logger->log('debug','Re-creating png'); + $this->logger->log('debug', 'Re-creating png'); $result = imagepng($im, $fullpath, ceil(REGENERATE_PICTURES_QUALITY / 100 * 9)); break; } @@ -150,24 +155,24 @@ class DownloadImages { public static function getPocheUrl() { - $baseUrl = ""; + $baseUrl = ''; $https = (!empty($_SERVER['HTTPS']) && (strtolower($_SERVER['HTTPS']) == 'on')) - || (isset($_SERVER["SERVER_PORT"]) - && $_SERVER["SERVER_PORT"] == '443') // HTTPS detection. - || (isset($_SERVER["SERVER_PORT"]) //Custom HTTPS port detection - && $_SERVER["SERVER_PORT"] == SSL_PORT) + || (isset($_SERVER['SERVER_PORT']) + && $_SERVER['SERVER_PORT'] == '443') // HTTPS detection. + || (isset($_SERVER['SERVER_PORT']) //Custom HTTPS port detection + && $_SERVER['SERVER_PORT'] == SSL_PORT) || (isset($_SERVER['HTTP_X_FORWARDED_PROTO']) && $_SERVER['HTTP_X_FORWARDED_PROTO'] == 'https'); - $serverport = (!isset($_SERVER["SERVER_PORT"]) - || $_SERVER["SERVER_PORT"] == '80' - || $_SERVER["SERVER_PORT"] == HTTP_PORT - || ($https && $_SERVER["SERVER_PORT"] == '443') - || ($https && $_SERVER["SERVER_PORT"]==SSL_PORT) //Custom HTTPS port detection - ? '' : ':' . $_SERVER["SERVER_PORT"]); - - if (isset($_SERVER["HTTP_X_FORWARDED_PORT"])) { - $serverport = ':' . $_SERVER["HTTP_X_FORWARDED_PORT"]; + $serverport = (!isset($_SERVER['SERVER_PORT']) + || $_SERVER['SERVER_PORT'] == '80' + || $_SERVER['SERVER_PORT'] == HTTP_PORT + || ($https && $_SERVER['SERVER_PORT'] == '443') + || ($https && $_SERVER['SERVER_PORT'] == SSL_PORT) //Custom HTTPS port detection + ? '' : ':'.$_SERVER['SERVER_PORT']); + + if (isset($_SERVER['HTTP_X_FORWARDED_PORT'])) { + $serverport = ':'.$_SERVER['HTTP_X_FORWARDED_PORT']; } // $scriptname = str_replace('/index.php', '/', $_SERVER["SCRIPT_NAME"]); // if (!isset($_SERVER["HTTP_HOST"])) { @@ -178,12 +183,12 @@ class DownloadImages { $serverport = ''; } // check if BASE_URL is configured - if(BASE_URL) { + if (BASE_URL) { $baseUrl = BASE_URL; } else { - $baseUrl = 'http' . ($https ? 's' : '') . '://' . $host . $serverport; + $baseUrl = 'http'.($https ? 's' : '').'://'.$host.$serverport; } - return $baseUrl; - + + return $baseUrl; } } -- cgit v1.2.3 From 7f55941856549a3f5f45c42fdc171d66ff7ee297 Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Sun, 30 Oct 2016 10:48:29 +0100 Subject: Use doctrine event to download images --- src/Wallabag/CoreBundle/Helper/DownloadImages.php | 248 +++++++++++----------- 1 file changed, 126 insertions(+), 122 deletions(-) (limited to 'src/Wallabag/CoreBundle/Helper/DownloadImages.php') diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php index e23e0c55..426cbe48 100644 --- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php +++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php @@ -2,193 +2,197 @@ namespace Wallabag\CoreBundle\Helper; -use Psr\Log\LoggerInterface as Logger; +use Psr\Log\LoggerInterface; use Symfony\Component\DomCrawler\Crawler; - -define('REGENERATE_PICTURES_QUALITY', 75); -define('HTTP_PORT', 80); -define('SSL_PORT', 443); -define('BASE_URL', ''); +use GuzzleHttp\Client; +use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser; class DownloadImages { - private $folder; - private $url; - private $html; - private $fileName; + const REGENERATE_PICTURES_QUALITY = 80; + + private $client; + private $baseFolder; private $logger; + private $mimeGuesser; - public function __construct($html, $url, Logger $logger) + public function __construct(Client $client, $baseFolder, LoggerInterface $logger) { - $this->html = $html; - $this->url = $url; - $this->setFolder(); + $this->client = $client; + $this->baseFolder = $baseFolder; $this->logger = $logger; + $this->mimeGuesser = new MimeTypeExtensionGuesser(); + + $this->setFolder(); } - public function setFolder($folder = 'assets/images') + /** + * Setup base folder where all images are going to be saved. + */ + private function setFolder() { // if folder doesn't exist, attempt to create one and store the folder name in property $folder - if (!file_exists($folder)) { - mkdir($folder); + if (!file_exists($this->baseFolder)) { + mkdir($this->baseFolder, 0777, true); } - $this->folder = $folder; } - public function process() + /** + * Process the html and extract image from it, save them to local and return the updated html. + * + * @param string $html + * @param string $url Used as a base path for relative image and folder + * + * @return string + */ + public function processHtml($html, $url) { - //instantiate the symfony DomCrawler Component - $crawler = new Crawler($this->html); - // create an array of all scrapped image links - $this->logger->log('debug', 'Finding images inside document'); + $crawler = new Crawler($html); $result = $crawler ->filterXpath('//img') ->extract(array('src')); + $relativePath = $this->getRelativePath($url); + // download and save the image to the folder foreach ($result as $image) { - $file = file_get_contents($image); - - // Checks - $absolute_path = self::getAbsoluteLink($image, $this->url); - $filename = basename(parse_url($absolute_path, PHP_URL_PATH)); - $fullpath = $this->folder.'/'.$filename; - self::checks($file, $fullpath, $absolute_path); - $this->html = str_replace($image, self::getPocheUrl().'/'.$fullpath, $this->html); + $imagePath = $this->processSingleImage($image, $url, $relativePath); + + if (false === $imagePath) { + continue; + } + + $html = str_replace($image, $imagePath, $html); } - return $this->html; + return $html; } - private function checks($rawdata, $fullpath, $absolute_path) + /** + * Process a single image: + * - retrieve it + * - re-saved it (for security reason) + * - return the new local path. + * + * @param string $imagePath Path to the image to retrieve + * @param string $url Url from where the image were found + * @param string $relativePath Relative local path to saved the image + * + * @return string Relative url to access the image from the web + */ + public function processSingleImage($imagePath, $url, $relativePath = null) { - $fullpath = urldecode($fullpath); - - if (file_exists($fullpath)) { - unlink($fullpath); + if (null == $relativePath) { + $relativePath = $this->getRelativePath($url); } - // check extension - $this->logger->log('debug', 'Checking extension'); + $folderPath = $this->baseFolder.'/'.$relativePath; - $file_ext = strrchr($fullpath, '.'); - $whitelist = array('.jpg', '.jpeg', '.gif', '.png'); - if (!(in_array($file_ext, $whitelist))) { - $this->logger->log('debug', 'processed image with not allowed extension. Skipping '.$fullpath); + // build image path + $absolutePath = $this->getAbsoluteLink($url, $imagePath); + if (false === $absolutePath) { + $this->logger->log('debug', 'Can not determine the absolute path for that image, skipping.'); return false; } - // check headers - $this->logger->log('debug', 'Checking headers'); - $imageinfo = getimagesize($absolute_path); - if ($imageinfo['mime'] != 'image/gif' && $imageinfo['mime'] != 'image/jpeg' && $imageinfo['mime'] != 'image/jpg' && $imageinfo['mime'] != 'image/png') { - $this->logger->log('debug', 'processed image with bad header. Skipping '.$fullpath); + $res = $this->client->get( + $absolutePath, + ['exceptions' => false] + ); + + $ext = $this->mimeGuesser->guess($res->getHeader('content-type')); + $this->logger->log('debug', 'Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]); + if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'])) { + $this->logger->log('debug', 'Processed image with not allowed extension. Skipping '.$imagePath); return false; } + $hashImage = hash('crc32', $absolutePath); + $localPath = $folderPath.'/'.$hashImage.'.'.$ext; + + try { + $im = imagecreatefromstring($res->getBody()); + } catch (\Exception $e) { + $im = false; + } - // regenerate image - $this->logger->log('debug', 'regenerating image'); - $im = imagecreatefromstring($rawdata); if ($im === false) { - $this->logger->log('error', 'error while regenerating image '.$fullpath); + $this->logger->log('error', 'Error while regenerating image', ['path' => $localPath]); return false; } - switch ($imageinfo['mime']) { - case 'image/gif': - $result = imagegif($im, $fullpath); + switch ($ext) { + case 'gif': + $result = imagegif($im, $localPath); $this->logger->log('debug', 'Re-creating gif'); break; - case 'image/jpeg': - case 'image/jpg': - $result = imagejpeg($im, $fullpath, REGENERATE_PICTURES_QUALITY); + case 'jpeg': + case 'jpg': + $result = imagejpeg($im, $localPath, self::REGENERATE_PICTURES_QUALITY); $this->logger->log('debug', 'Re-creating jpg'); break; - case 'image/png': + case 'png': + $result = imagepng($im, $localPath, ceil(self::REGENERATE_PICTURES_QUALITY / 100 * 9)); $this->logger->log('debug', 'Re-creating png'); - $result = imagepng($im, $fullpath, ceil(REGENERATE_PICTURES_QUALITY / 100 * 9)); - break; } + imagedestroy($im); - return $result; + return '/assets/images/'.$relativePath.'/'.$hashImage.'.'.$ext; } - private static function getAbsoluteLink($relativeLink, $url) + /** + * Generate the folder where we are going to save images based on the entry url. + * + * @param string $url + * + * @return string + */ + private function getRelativePath($url) { - /* return if already absolute URL */ - if (parse_url($relativeLink, PHP_URL_SCHEME) != '') { - return $relativeLink; - } + $hashUrl = hash('crc32', $url); + $relativePath = $hashUrl[0].'/'.$hashUrl[1].'/'.$hashUrl; + $folderPath = $this->baseFolder.'/'.$relativePath; - /* queries and anchors */ - if ($relativeLink[0] == '#' || $relativeLink[0] == '?') { - return $url.$relativeLink; + if (!file_exists($folderPath)) { + mkdir($folderPath, 0777, true); } - /* parse base URL and convert to local variables: - $scheme, $host, $path */ - extract(parse_url($url)); + $this->logger->log('debug', 'Folder used for that url', ['folder' => $folderPath, 'url' => $url]); - /* remove non-directory element from path */ - $path = preg_replace('#/[^/]*$#', '', $path); + return $relativePath; + } - /* destroy path if relative url points to root */ - if ($relativeLink[0] == '/') { - $path = ''; + /** + * Make an $url absolute based on the $base. + * + * @see Graby->makeAbsoluteStr + * + * @param string $base Base url + * @param string $url Url to make it absolute + * + * @return false|string + */ + private function getAbsoluteLink($base, $url) + { + if (preg_match('!^https?://!i', $url)) { + // already absolute + return $url; } - /* dirty absolute URL */ - $abs = $host.$path.'/'.$relativeLink; + $base = new \SimplePie_IRI($base); - /* replace '//' or '/./' or '/foo/../' with '/' */ - $re = array('#(/\.?/)#', '#/(?!\.\.)[^/]+/\.\./#'); - for ($n = 1; $n > 0; $abs = preg_replace($re, '/', $abs, -1, $n)) { + // remove '//' in URL path (causes URLs not to resolve properly) + if (isset($base->ipath)) { + $base->ipath = preg_replace('!//+!', '/', $base->ipath); } - /* absolute URL is ready! */ - return $scheme.'://'.$abs; - } - - public static function getPocheUrl() - { - $baseUrl = ''; - $https = (!empty($_SERVER['HTTPS']) - && (strtolower($_SERVER['HTTPS']) == 'on')) - || (isset($_SERVER['SERVER_PORT']) - && $_SERVER['SERVER_PORT'] == '443') // HTTPS detection. - || (isset($_SERVER['SERVER_PORT']) //Custom HTTPS port detection - && $_SERVER['SERVER_PORT'] == SSL_PORT) - || (isset($_SERVER['HTTP_X_FORWARDED_PROTO']) - && $_SERVER['HTTP_X_FORWARDED_PROTO'] == 'https'); - $serverport = (!isset($_SERVER['SERVER_PORT']) - || $_SERVER['SERVER_PORT'] == '80' - || $_SERVER['SERVER_PORT'] == HTTP_PORT - || ($https && $_SERVER['SERVER_PORT'] == '443') - || ($https && $_SERVER['SERVER_PORT'] == SSL_PORT) //Custom HTTPS port detection - ? '' : ':'.$_SERVER['SERVER_PORT']); - - if (isset($_SERVER['HTTP_X_FORWARDED_PORT'])) { - $serverport = ':'.$_SERVER['HTTP_X_FORWARDED_PORT']; - } - // $scriptname = str_replace('/index.php', '/', $_SERVER["SCRIPT_NAME"]); - // if (!isset($_SERVER["HTTP_HOST"])) { - // return $scriptname; - // } - $host = (isset($_SERVER['HTTP_X_FORWARDED_HOST']) ? $_SERVER['HTTP_X_FORWARDED_HOST'] : (isset($_SERVER['HTTP_HOST']) ? $_SERVER['HTTP_HOST'] : $_SERVER['SERVER_NAME'])); - if (strpos($host, ':') !== false) { - $serverport = ''; - } - // check if BASE_URL is configured - if (BASE_URL) { - $baseUrl = BASE_URL; - } else { - $baseUrl = 'http'.($https ? 's' : '').'://'.$host.$serverport; + if ($absolute = \SimplePie_IRI::absolutize($base, $url)) { + return $absolute->get_uri(); } - return $baseUrl; + return false; } } -- cgit v1.2.3 From 48656e0eaac006a80f21e9aec8900747fe76283a Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Sun, 30 Oct 2016 11:27:09 +0100 Subject: Fixing tests --- src/Wallabag/CoreBundle/Helper/DownloadImages.php | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'src/Wallabag/CoreBundle/Helper/DownloadImages.php') diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php index 426cbe48..004bb277 100644 --- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php +++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php @@ -91,20 +91,23 @@ class DownloadImages // build image path $absolutePath = $this->getAbsoluteLink($url, $imagePath); if (false === $absolutePath) { - $this->logger->log('debug', 'Can not determine the absolute path for that image, skipping.'); + $this->logger->log('error', 'Can not determine the absolute path for that image, skipping.'); return false; } - $res = $this->client->get( - $absolutePath, - ['exceptions' => false] - ); + try { + $res = $this->client->get($absolutePath); + } catch (\Exception $e) { + $this->logger->log('error', 'Can not retrieve image, skipping.', ['exception' => $e]); + + return false; + } $ext = $this->mimeGuesser->guess($res->getHeader('content-type')); $this->logger->log('debug', 'Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]); - if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'])) { - $this->logger->log('debug', 'Processed image with not allowed extension. Skipping '.$imagePath); + if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) { + $this->logger->log('error', 'Processed image with not allowed extension. Skipping '.$imagePath); return false; } @@ -117,7 +120,7 @@ class DownloadImages $im = false; } - if ($im === false) { + if (false === $im) { $this->logger->log('error', 'Error while regenerating image', ['path' => $localPath]); return false; @@ -193,6 +196,8 @@ class DownloadImages return $absolute->get_uri(); } + $this->logger->log('error', 'Can not make an absolute link', ['base' => $base, 'url' => $url]); + return false; } } -- cgit v1.2.3 From 41ada277f066ea57947bce05bcda63962b7fea55 Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Sun, 30 Oct 2016 19:50:00 +0100 Subject: Add instance url to the downloaded images --- src/Wallabag/CoreBundle/Helper/DownloadImages.php | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'src/Wallabag/CoreBundle/Helper/DownloadImages.php') diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php index 004bb277..e7982c56 100644 --- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php +++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php @@ -15,6 +15,7 @@ class DownloadImages private $baseFolder; private $logger; private $mimeGuesser; + private $wallabagUrl; public function __construct(Client $client, $baseFolder, LoggerInterface $logger) { @@ -26,6 +27,17 @@ class DownloadImages $this->setFolder(); } + /** + * Since we can't inject CraueConfig service because it'll generate a circular reference when injected in the subscriber + * we use a different way to inject the current wallabag url. + * + * @param string $url Usually from `$config->get('wallabag_url')` + */ + public function setWallabagUrl($url) + { + $this->wallabagUrl = rtrim($url, '/'); + } + /** * Setup base folder where all images are going to be saved. */ @@ -143,7 +155,7 @@ class DownloadImages imagedestroy($im); - return '/assets/images/'.$relativePath.'/'.$hashImage.'.'.$ext; + return $this->wallabagUrl.'/assets/images/'.$relativePath.'/'.$hashImage.'.'.$ext; } /** -- cgit v1.2.3 From e0597476d1d5f6a4a7d6ea9b76966465f3d22fb8 Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Tue, 1 Nov 2016 14:49:02 +0100 Subject: Use custom event instead of Doctrine ones MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This give us ability to use Entry ID to determine where to store images and it’s then more easy to remove them when we remove the entry. --- src/Wallabag/CoreBundle/Helper/DownloadImages.php | 84 ++++++++++++++--------- 1 file changed, 51 insertions(+), 33 deletions(-) (limited to 'src/Wallabag/CoreBundle/Helper/DownloadImages.php') diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php index e7982c56..c5298236 100644 --- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php +++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php @@ -6,6 +6,7 @@ use Psr\Log\LoggerInterface; use Symfony\Component\DomCrawler\Crawler; use GuzzleHttp\Client; use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser; +use Symfony\Component\Finder\Finder; class DownloadImages { @@ -17,27 +18,17 @@ class DownloadImages private $mimeGuesser; private $wallabagUrl; - public function __construct(Client $client, $baseFolder, LoggerInterface $logger) + public function __construct(Client $client, $baseFolder, $wallabagUrl, LoggerInterface $logger) { $this->client = $client; $this->baseFolder = $baseFolder; + $this->wallabagUrl = rtrim($wallabagUrl, '/'); $this->logger = $logger; $this->mimeGuesser = new MimeTypeExtensionGuesser(); $this->setFolder(); } - /** - * Since we can't inject CraueConfig service because it'll generate a circular reference when injected in the subscriber - * we use a different way to inject the current wallabag url. - * - * @param string $url Usually from `$config->get('wallabag_url')` - */ - public function setWallabagUrl($url) - { - $this->wallabagUrl = rtrim($url, '/'); - } - /** * Setup base folder where all images are going to be saved. */ @@ -52,23 +43,24 @@ class DownloadImages /** * Process the html and extract image from it, save them to local and return the updated html. * + * @param int $entryId ID of the entry * @param string $html - * @param string $url Used as a base path for relative image and folder + * @param string $url Used as a base path for relative image and folder * * @return string */ - public function processHtml($html, $url) + public function processHtml($entryId, $html, $url) { $crawler = new Crawler($html); $result = $crawler ->filterXpath('//img') ->extract(array('src')); - $relativePath = $this->getRelativePath($url); + $relativePath = $this->getRelativePath($entryId); // download and save the image to the folder foreach ($result as $image) { - $imagePath = $this->processSingleImage($image, $url, $relativePath); + $imagePath = $this->processSingleImage($entryId, $image, $url, $relativePath); if (false === $imagePath) { continue; @@ -86,24 +78,27 @@ class DownloadImages * - re-saved it (for security reason) * - return the new local path. * + * @param int $entryId ID of the entry * @param string $imagePath Path to the image to retrieve * @param string $url Url from where the image were found * @param string $relativePath Relative local path to saved the image * * @return string Relative url to access the image from the web */ - public function processSingleImage($imagePath, $url, $relativePath = null) + public function processSingleImage($entryId, $imagePath, $url, $relativePath = null) { - if (null == $relativePath) { - $relativePath = $this->getRelativePath($url); + if (null === $relativePath) { + $relativePath = $this->getRelativePath($entryId); } + $this->logger->debug('DownloadImages: working on image: '.$imagePath); + $folderPath = $this->baseFolder.'/'.$relativePath; // build image path $absolutePath = $this->getAbsoluteLink($url, $imagePath); if (false === $absolutePath) { - $this->logger->log('error', 'Can not determine the absolute path for that image, skipping.'); + $this->logger->error('DownloadImages: Can not determine the absolute path for that image, skipping.'); return false; } @@ -111,15 +106,15 @@ class DownloadImages try { $res = $this->client->get($absolutePath); } catch (\Exception $e) { - $this->logger->log('error', 'Can not retrieve image, skipping.', ['exception' => $e]); + $this->logger->error('DownloadImages: Can not retrieve image, skipping.', ['exception' => $e]); return false; } $ext = $this->mimeGuesser->guess($res->getHeader('content-type')); - $this->logger->log('debug', 'Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]); + $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]); if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) { - $this->logger->log('error', 'Processed image with not allowed extension. Skipping '.$imagePath); + $this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping '.$imagePath); return false; } @@ -133,7 +128,7 @@ class DownloadImages } if (false === $im) { - $this->logger->log('error', 'Error while regenerating image', ['path' => $localPath]); + $this->logger->error('DownloadImages: Error while regenerating image', ['path' => $localPath]); return false; } @@ -141,16 +136,16 @@ class DownloadImages switch ($ext) { case 'gif': $result = imagegif($im, $localPath); - $this->logger->log('debug', 'Re-creating gif'); + $this->logger->debug('DownloadImages: Re-creating gif'); break; case 'jpeg': case 'jpg': $result = imagejpeg($im, $localPath, self::REGENERATE_PICTURES_QUALITY); - $this->logger->log('debug', 'Re-creating jpg'); + $this->logger->debug('DownloadImages: Re-creating jpg'); break; case 'png': $result = imagepng($im, $localPath, ceil(self::REGENERATE_PICTURES_QUALITY / 100 * 9)); - $this->logger->log('debug', 'Re-creating png'); + $this->logger->debug('DownloadImages: Re-creating png'); } imagedestroy($im); @@ -158,24 +153,47 @@ class DownloadImages return $this->wallabagUrl.'/assets/images/'.$relativePath.'/'.$hashImage.'.'.$ext; } + /** + * Remove all images for the given entry id. + * + * @param int $entryId ID of the entry + */ + public function removeImages($entryId) + { + $relativePath = $this->getRelativePath($entryId); + $folderPath = $this->baseFolder.'/'.$relativePath; + + $finder = new Finder(); + $finder + ->files() + ->ignoreDotFiles(true) + ->in($folderPath); + + foreach ($finder as $file) { + @unlink($file->getRealPath()); + } + + @rmdir($folderPath); + } + /** * Generate the folder where we are going to save images based on the entry url. * - * @param string $url + * @param int $entryId ID of the entry * * @return string */ - private function getRelativePath($url) + private function getRelativePath($entryId) { - $hashUrl = hash('crc32', $url); - $relativePath = $hashUrl[0].'/'.$hashUrl[1].'/'.$hashUrl; + $hashId = hash('crc32', $entryId); + $relativePath = $hashId[0].'/'.$hashId[1].'/'.$hashId; $folderPath = $this->baseFolder.'/'.$relativePath; if (!file_exists($folderPath)) { mkdir($folderPath, 0777, true); } - $this->logger->log('debug', 'Folder used for that url', ['folder' => $folderPath, 'url' => $url]); + $this->logger->debug('DownloadImages: Folder used for that Entry id', ['folder' => $folderPath, 'entryId' => $entryId]); return $relativePath; } @@ -208,7 +226,7 @@ class DownloadImages return $absolute->get_uri(); } - $this->logger->log('error', 'Can not make an absolute link', ['base' => $base, 'url' => $url]); + $this->logger->error('DownloadImages: Can not make an absolute link', ['base' => $base, 'url' => $url]); return false; } -- cgit v1.2.3