From 1b93137e16694f52952c930848e1a7928e8a00a6 Mon Sep 17 00:00:00 2001 From: ArthurHoaro Date: Wed, 9 Nov 2016 18:57:02 +0100 Subject: Use web-thumbnailer to retrieve thumbnails * requires PHP 5.6 * use blazy on linklist since a lot more thumbs are retrieved * thumbnails can be disabled * thumbs size is now 120x120 * thumbs are now cropped to fit the expected size Fixes #345 #425 #487 #543 #588 #590 --- application/PageBuilder.php | 5 + application/Thumbnailer.php | 49 ++++ application/config/ConfigManager.php | 4 + assets/vintage/css/shaarli.css | 8 +- composer.json | 1 + inc/web-thumbnailer.json | 9 + index.php | 490 +++++---------------------------- tests/ThumbnailerTest.php | 51 ++++ tests/utils/config/configJson.json.php | 3 + tpl/vintage/configure.html | 12 + tpl/vintage/linklist.html | 11 +- tpl/vintage/picwall.html | 6 +- 12 files changed, 222 insertions(+), 427 deletions(-) create mode 100644 application/Thumbnailer.php create mode 100644 inc/web-thumbnailer.json create mode 100644 tests/ThumbnailerTest.php diff --git a/application/PageBuilder.php b/application/PageBuilder.php index a4483870..3dba7677 100644 --- a/application/PageBuilder.php +++ b/application/PageBuilder.php @@ -105,6 +105,11 @@ class PageBuilder if ($this->linkDB !== null) { $this->tpl->assign('tags', $this->linkDB->linksCountPerTag()); } + + $this->tpl->assign('thumbnails_enabled', $this->conf->get('thumbnails.enabled')); + $this->tpl->assign('thumbnails_width', $this->conf->get('thumbnails.width')); + $this->tpl->assign('thumbnails_height', $this->conf->get('thumbnails.height')); + // To be removed with a proper theme configuration. $this->tpl->assign('conf', $this->conf); } diff --git a/application/Thumbnailer.php b/application/Thumbnailer.php new file mode 100644 index 00000000..b669adae --- /dev/null +++ b/application/Thumbnailer.php @@ -0,0 +1,49 @@ +conf = $conf; + $this->wt = new WebThumbnailer(); + \WebThumbnailer\Application\ConfigManager::addFile('inc/web-thumbnailer.json'); + $this->wt->maxWidth($this->conf->get('thumbnails.width')) + ->maxHeight($this->conf->get('thumbnails.height')) + ->crop(true) + ->debug($this->conf->get('dev.debug', false)); + } + + /** + * Retrieve a thumbnail for given URL + * + * @param string $url where to look for a thumbnail. + * + * @return bool|string The thumbnail relative cache file path, or false if none has been found. + */ + public function get($url) + { + return $this->wt->thumbnail($url); + } +} diff --git a/application/config/ConfigManager.php b/application/config/ConfigManager.php index 82f4a368..124fedc2 100644 --- a/application/config/ConfigManager.php +++ b/application/config/ConfigManager.php @@ -319,6 +319,10 @@ class ConfigManager $this->setEmpty('general.enabled_plugins', self::$DEFAULT_PLUGINS); $this->setEmpty('general.default_note_title', 'Note: '); + $this->setEmpty('thumbnails.enabled', true); + $this->setEmpty('thumbnails.width', 120); + $this->setEmpty('thumbnails.height', 120); + $this->setEmpty('updates.check_updates', false); $this->setEmpty('updates.check_updates_branch', 'stable'); $this->setEmpty('updates.check_updates_interval', 86400); diff --git a/assets/vintage/css/shaarli.css b/assets/vintage/css/shaarli.css index c919339b..05e2c17e 100644 --- a/assets/vintage/css/shaarli.css +++ b/assets/vintage/css/shaarli.css @@ -701,8 +701,8 @@ a.bigbutton, #pageheader a.bigbutton { position: relative; display: table-cell; vertical-align: middle; - width: 90px; - height: 90px; + width: 120px; + height: 120px; overflow: hidden; text-align: center; float: left; @@ -739,9 +739,9 @@ a.bigbutton, #pageheader a.bigbutton { position: absolute; top: 0; left: 0; - width: 90px; + width: 120px; font-weight: bold; - font-size: 8pt; + font-size: 9pt; color: #fff; text-align: left; background-color: transparent; diff --git a/composer.json b/composer.json index 0d4c623c..983bf9e9 100644 --- a/composer.json +++ b/composer.json @@ -19,6 +19,7 @@ "shaarli/netscape-bookmark-parser": "^2.0", "erusev/parsedown": "^1.6", "slim/slim": "^3.0", + "arthurhoaro/web-thumbnailer": "dev-master", "pubsubhubbub/publisher": "dev-master", "gettext/gettext": "^4.4" }, diff --git a/inc/web-thumbnailer.json b/inc/web-thumbnailer.json new file mode 100644 index 00000000..8a19f070 --- /dev/null +++ b/inc/web-thumbnailer.json @@ -0,0 +1,9 @@ +{ + "settings": { + "default": { + "_comment": "infinite cache", + "cache_duration": -1, + "timeout": 60 + } + } +} \ No newline at end of file diff --git a/index.php b/index.php index ddd5dbf5..28dfd3b4 100644 --- a/index.php +++ b/index.php @@ -74,6 +74,7 @@ require_once 'application/Url.php'; require_once 'application/Utils.php'; require_once 'application/PluginManager.php'; require_once 'application/Router.php'; +require_once 'application/Thumbnailer.php'; require_once 'application/Updater.php'; use \Shaarli\Languages; use \Shaarli\ThemeUtils; @@ -601,22 +602,54 @@ function renderPage($conf, $pluginManager, $LINKSDB, $history, $sessionManager, // -------- Picture wall if ($targetPage == Router::$PAGE_PICWALL) { + if (! $conf->get('thumbnails.enabled')) { + header('Location: ?'); + exit; + } + // Optionally filter the results: $links = $LINKSDB->filterSearch($_GET); $linksToDisplay = array(); + $thumbnailer = new Thumbnailer($conf); + + + $cpt = 0; // Get only links which have a thumbnail. foreach($links as $link) { $permalink='?'.$link['shorturl']; - $thumb=lazyThumbnail($conf, $link['url'],$permalink); - if ($thumb!='') // Only output links which have a thumbnail. - { - $link['thumbnail']=$thumb; // Thumbnail HTML code. - $linksToDisplay[]=$link; // Add to array. + // Not a note, + // and (never retrieved yet or no valid cache file) + if ($link['url'][0] != '?' + && (! isset($link['thumbnail']) || ($link['thumbnail'] !== false && ! is_file($link['thumbnail']))) + ) { + $link['thumbnail'] = $thumbnailer->get($link['url']); + // FIXME! we really need to get rid of ArrayAccess... + $item = $LINKSDB[$link['linkdate']]; + $item['thumbnail'] = $link['thumbnail']; + $LINKSDB[$link['linkdate']] = $item; + $updateDB = true; + $cpt++; + } + + if (isset($link['thumbnail']) && $link['thumbnail'] !== false) { + $linksToDisplay[] = $link; // Add to array. + } + + // If we retrieved new thumbnails, we update the database every 20 links. + // Downloading everything the first time may take a very long time + if (!empty($updateDB) && $cpt == 20) { + $LINKSDB->save($conf->get('resource.page_cache')); + $updateDB = false; + $cpt = 0; } } + if (!empty($updateDB)) { + $LINKSDB->save($conf->get('resource.page_cache')); + } + $data = array( 'linksToDisplay' => $linksToDisplay, ); @@ -1008,6 +1041,7 @@ function renderPage($conf, $pluginManager, $LINKSDB, $history, $sessionManager, $conf->set('api.enabled', !empty($_POST['enableApi'])); $conf->set('api.secret', escape($_POST['apiSecret'])); $conf->set('translation.language', escape($_POST['language'])); + $conf->set('thumbnails.enabled', !empty($_POST['enableThumbnails'])); try { $conf->write($loginManager->isLoggedIn()); @@ -1148,6 +1182,11 @@ function renderPage($conf, $pluginManager, $LINKSDB, $history, $sessionManager, $link['title'] = $link['url']; } + if ($conf->get('thumbnails.enabled')) { + $thumbnailer = new Thumbnailer($conf); + $link['thumbnail'] = $thumbnailer->get($url); + } + $pluginManager->executeHooks('save_link', $link); $LINKSDB[$id] = $link; @@ -1549,6 +1588,11 @@ function buildLinkList($PAGE, $LINKSDB, $conf, $pluginManager, $loginManager) // Start index. $i = ($page-1) * $_SESSION['LINKS_PER_PAGE']; $end = $i + $_SESSION['LINKS_PER_PAGE']; + + if ($conf->get('thumbnails.enabled')) { + $thumbnailer = new Thumbnailer($conf); + } + $linkDisp = array(); while ($i<$end && $iget('thumbnails.enabled') && $link['url'][0] != '?' + && (! isset($link['thumbnail']) || ($link['thumbnail'] !== false && ! is_file($link['thumbnail']))) + ) { + $link['thumbnail'] = $thumbnailer->get($link['url']); + // FIXME! we really need to get rid of ArrayAccess... + $item = $LINKSDB[$keys[$i]]; + $item['thumbnail'] = $link['thumbnail']; + $LINKSDB[$keys[$i]] = $item; + $updateDB = true; + } + // Check for both signs of a note: starting with ? and 7 chars long. - if ($link['url'][0] === '?' && - strlen($link['url']) === 7) { + if ($link['url'][0] === '?' && strlen($link['url']) === 7) { $link['url'] = index_url($_SERVER) . $link['url']; } @@ -1579,6 +1636,11 @@ function buildLinkList($PAGE, $LINKSDB, $conf, $pluginManager, $loginManager) $i++; } + // If we retrieved new thumbnails, we update the database. + if (!empty($updateDB)) { + $LINKSDB->save($conf->get('resource.page_cache')); + } + // Compute paging navigation $searchtagsUrl = $searchtags === '' ? '' : '&searchtags=' . urlencode($searchtags); $searchtermUrl = empty($searchterm) ? '' : '&searchterm=' . urlencode($searchterm); @@ -1629,194 +1691,6 @@ function buildLinkList($PAGE, $LINKSDB, $conf, $pluginManager, $loginManager) return; } -/** - * Compute the thumbnail for a link. - * - * With a link to the original URL. - * Understands various services (youtube.com...) - * Input: $url = URL for which the thumbnail must be found. - * $href = if provided, this URL will be followed instead of $url - * Returns an associative array with thumbnail attributes (src,href,width,height,style,alt) - * Some of them may be missing. - * Return an empty array if no thumbnail available. - * - * @param ConfigManager $conf Configuration Manager instance. - * @param string $url - * @param string|bool $href - * - * @return array - */ -function computeThumbnail($conf, $url, $href = false) -{ - if (!$conf->get('thumbnail.enable_thumbnails')) return array(); - if ($href==false) $href=$url; - - // For most hosts, the URL of the thumbnail can be easily deduced from the URL of the link. - // (e.g. http://www.youtube.com/watch?v=spVypYk4kto ---> http://img.youtube.com/vi/spVypYk4kto/default.jpg ) - // ^^^^^^^^^^^ ^^^^^^^^^^^ - $domain = parse_url($url,PHP_URL_HOST); - if ($domain=='youtube.com' || $domain=='www.youtube.com') - { - parse_str(parse_url($url,PHP_URL_QUERY), $params); // Extract video ID and get thumbnail - if (!empty($params['v'])) return array('src'=>'https://img.youtube.com/vi/'.$params['v'].'/default.jpg', - 'href'=>$href,'width'=>'120','height'=>'90','alt'=>'YouTube thumbnail'); - } - if ($domain=='youtu.be') // Youtube short links - { - $path = parse_url($url,PHP_URL_PATH); - return array('src'=>'https://img.youtube.com/vi'.$path.'/default.jpg', - 'href'=>$href,'width'=>'120','height'=>'90','alt'=>'YouTube thumbnail'); - } - if ($domain=='pix.toile-libre.org') // pix.toile-libre.org image hosting - { - parse_str(parse_url($url,PHP_URL_QUERY), $params); // Extract image filename. - if (!empty($params) && !empty($params['img'])) return array('src'=>'http://pix.toile-libre.org/upload/thumb/'.urlencode($params['img']), - 'href'=>$href,'style'=>'max-width:120px; max-height:150px','alt'=>'pix.toile-libre.org thumbnail'); - } - - if ($domain=='imgur.com') - { - $path = parse_url($url,PHP_URL_PATH); - if (startsWith($path,'/a/')) return array(); // Thumbnails for albums are not available. - if (startsWith($path,'/r/')) return array('src'=>'https://i.imgur.com/'.basename($path).'s.jpg', - 'href'=>$href,'width'=>'90','height'=>'90','alt'=>'imgur.com thumbnail'); - if (startsWith($path,'/gallery/')) return array('src'=>'https://i.imgur.com'.substr($path,8).'s.jpg', - 'href'=>$href,'width'=>'90','height'=>'90','alt'=>'imgur.com thumbnail'); - - if (substr_count($path,'/')==1) return array('src'=>'https://i.imgur.com/'.substr($path,1).'s.jpg', - 'href'=>$href,'width'=>'90','height'=>'90','alt'=>'imgur.com thumbnail'); - } - if ($domain=='i.imgur.com') - { - $pi = pathinfo(parse_url($url,PHP_URL_PATH)); - if (!empty($pi['filename'])) return array('src'=>'https://i.imgur.com/'.$pi['filename'].'s.jpg', - 'href'=>$href,'width'=>'90','height'=>'90','alt'=>'imgur.com thumbnail'); - } - if ($domain=='dailymotion.com' || $domain=='www.dailymotion.com') - { - if (strpos($url,'dailymotion.com/video/')!==false) - { - $thumburl=str_replace('dailymotion.com/video/','dailymotion.com/thumbnail/video/',$url); - return array('src'=>$thumburl, - 'href'=>$href,'width'=>'120','style'=>'height:auto;','alt'=>'DailyMotion thumbnail'); - } - } - if (endsWith($domain,'.imageshack.us')) - { - $ext=strtolower(pathinfo($url,PATHINFO_EXTENSION)); - if ($ext=='jpg' || $ext=='jpeg' || $ext=='png' || $ext=='gif') - { - $thumburl = substr($url,0,strlen($url)-strlen($ext)).'th.'.$ext; - return array('src'=>$thumburl, - 'href'=>$href,'width'=>'120','style'=>'height:auto;','alt'=>'imageshack.us thumbnail'); - } - } - - // Some other hosts are SLOW AS HELL and usually require an extra HTTP request to get the thumbnail URL. - // So we deport the thumbnail generation in order not to slow down page generation - // (and we also cache the thumbnail) - - if (! $conf->get('thumbnail.enable_localcache')) return array(); // If local cache is disabled, no thumbnails for services which require the use a local cache. - - if ($domain=='flickr.com' || endsWith($domain,'.flickr.com') - || $domain=='vimeo.com' - || $domain=='ted.com' || endsWith($domain,'.ted.com') - || $domain=='xkcd.com' || endsWith($domain,'.xkcd.com') - ) - { - if ($domain=='vimeo.com') - { // Make sure this vimeo URL points to a video (/xxx... where xxx is numeric) - $path = parse_url($url,PHP_URL_PATH); - if (!preg_match('!/\d+.+?!',$path)) return array(); // This is not a single video URL. - } - if ($domain=='xkcd.com' || endsWith($domain,'.xkcd.com')) - { // Make sure this URL points to a single comic (/xxx... where xxx is numeric) - $path = parse_url($url,PHP_URL_PATH); - if (!preg_match('!/\d+.+?!',$path)) return array(); - } - if ($domain=='ted.com' || endsWith($domain,'.ted.com')) - { // Make sure this TED URL points to a video (/talks/...) - $path = parse_url($url,PHP_URL_PATH); - if ("/talks/" !== substr($path,0,7)) return array(); // This is not a single video URL. - } - $sign = hash_hmac('sha256', $url, $conf->get('credentials.salt')); // We use the salt to sign data (it's random, secret, and specific to each installation) - return array('src'=>index_url($_SERVER).'?do=genthumbnail&hmac='.$sign.'&url='.urlencode($url), - 'href'=>$href,'width'=>'120','style'=>'height:auto;','alt'=>'thumbnail'); - } - - // For all other, we try to make a thumbnail of links ending with .jpg/jpeg/png/gif - // Technically speaking, we should download ALL links and check their Content-Type to see if they are images. - // But using the extension will do. - $ext=strtolower(pathinfo($url,PATHINFO_EXTENSION)); - if ($ext=='jpg' || $ext=='jpeg' || $ext=='png' || $ext=='gif') - { - $sign = hash_hmac('sha256', $url, $conf->get('credentials.salt')); // We use the salt to sign data (it's random, secret, and specific to each installation) - return array('src'=>index_url($_SERVER).'?do=genthumbnail&hmac='.$sign.'&url='.urlencode($url), - 'href'=>$href,'width'=>'120','style'=>'height:auto;','alt'=>'thumbnail'); - } - return array(); // No thumbnail. - -} - - -// Returns the HTML code to display a thumbnail for a link -// with a link to the original URL. -// Understands various services (youtube.com...) -// Input: $url = URL for which the thumbnail must be found. -// $href = if provided, this URL will be followed instead of $url -// Returns '' if no thumbnail available. -function thumbnail($url,$href=false) -{ - // FIXME! - global $conf; - $t = computeThumbnail($conf, $url,$href); - if (count($t)==0) return ''; // Empty array = no thumbnail for this URL. - - $html=''; - - // Lazy image - $html.='get('credentials.salt')); - if ($sign!=$_GET['hmac']) die('Naughty boy!'); - - $cacheDir = $conf->get('resource.thumbnails_cache', 'cache'); - // Let's see if we don't already have the image for this URL in the cache. - $thumbname=hash('sha1',$_GET['url']).'.jpg'; - if (is_file($cacheDir .'/'. $thumbname)) - { // We have the thumbnail, just serve it: - header('Content-Type: image/jpeg'); - echo file_get_contents($cacheDir .'/'. $thumbname); - return; - } - // We may also serve a blank image (if service did not respond) - $blankname=hash('sha1',$_GET['url']).'.gif'; - if (is_file($cacheDir .'/'. $blankname)) - { - header('Content-Type: image/gif'); - echo file_get_contents($cacheDir .'/'. $blankname); - return; - } - - // Otherwise, generate the thumbnail. - $url = $_GET['url']; - $domain = parse_url($url,PHP_URL_HOST); - - if ($domain=='flickr.com' || endsWith($domain,'.flickr.com')) - { - // Crude replacement to handle new flickr domain policy (They prefer www. now) - $url = str_replace('http://flickr.com/','http://www.flickr.com/',$url); - - // Is this a link to an image, or to a flickr page ? - $imageurl=''; - if (endsWith(parse_url($url, PHP_URL_PATH), '.jpg')) - { // This is a direct link to an image. e.g. http://farm1.staticflickr.com/5/5921913_ac83ed27bd_o.jpg - preg_match('!(http://farm\d+\.staticflickr\.com/\d+/\d+_\w+_)\w.jpg!',$url,$matches); - if (!empty($matches[1])) $imageurl=$matches[1].'m.jpg'; - } - else // This is a flickr page (html) - { - // Get the flickr html page. - list($headers, $content) = get_http_response($url, 20); - if (strpos($headers[0], '200 OK') !== false) - { - // flickr now nicely provides the URL of the thumbnail in each flickr page. - preg_match('! - if ($imageurl=='') - { - preg_match('! tag on that page - // http://www.ted.com/talks/mikko_hypponen_fighting_viruses_defending_the_net.html - // - list($headers, $content) = get_http_response($url, 5); - if (strpos($headers[0], '200 OK') !== false) { - // Extract the link to the thumbnail - preg_match('!link rel="image_src" href="(http://images.ted.com/images/ted/.+_\d+x\d+\.jpg)"!', $content, $matches); - if (!empty($matches[1])) - { // Let's download the image. - $imageurl=$matches[1]; - // No control on image size, so wait long enough - list($headers, $content) = get_http_response($imageurl, 20); - if (strpos($headers[0], '200 OK') !== false) { - $filepath = $cacheDir .'/'. $thumbname; - file_put_contents($filepath, $content); // Save image to cache. - if (resizeImage($filepath)) - { - header('Content-Type: image/jpeg'); - echo file_get_contents($filepath); - return; - } - } - } - } - } - - elseif ($domain=='xkcd.com' || endsWith($domain,'.xkcd.com')) - { - // There is no thumbnail available for xkcd comics, so download the whole image and resize it. - // http://xkcd.com/327/ - // <BLABLA> - list($headers, $content) = get_http_response($url, 5); - if (strpos($headers[0], '200 OK') !== false) { - // Extract the link to the thumbnail - preg_match('! diff --git a/tpl/vintage/configure.html b/tpl/vintage/configure.html index 479284eb..e47c71ad 100644 --- a/tpl/vintage/configure.html +++ b/tpl/vintage/configure.html @@ -128,6 +128,18 @@ + + Enable thumbnails + + + + + diff --git a/tpl/vintage/linklist.html b/tpl/vintage/linklist.html index 1ca51be3..9bdafa8c 100644 --- a/tpl/vintage/linklist.html +++ b/tpl/vintage/linklist.html @@ -80,7 +80,16 @@ {loop="$links"} -
{$value.url|thumbnail}
+ {if="$thumbnails_enabled && !empty($value.thumbnail)"} + + {/if}