SSL detection: add support for `X-Forwarded-Proto`

[github/shaarli/Shaarli.git] / index.php
diff --git a/index.php b/index.php

index 74f95497ef39883de67a0d500e3156ccf34210dc..7818ee88755c54329cfeb86d1b56ac8434b14f68 100755 (executable)
--- a/index.php
+++ b/index.php
@@ -1,5 +1,5 @@
  <?php
-// Shaarli 0.5.0 - Shaare your links...
+// Shaarli 0.5.3 - Shaare your links...
  // The personal, minimalist, super-fast, no-database Delicious clone. By sebsauvage.net
  // http://sebsauvage.net/wiki/doku.php?id=php:shaarli
  // Licence: http://www.opensource.org/licenses/zlib-license.php
@@ -39,23 +39,10 @@ $GLOBALS['config']['ARCHIVE_ORG'] = false; // For each link, add a link to an ar
  $GLOBALS['config']['ENABLE_RSS_PERMALINKS'] = true;  // Enable RSS permalinks by default. This corresponds to the default behavior of shaarli before this was added as an option.
  $GLOBALS['config']['HIDE_PUBLIC_LINKS'] = false;
  // -----------------------------------------------------------------------------------------------
-define('shaarli_version','0.5.0');
+define('shaarli_version','0.5.3');
  // http://server.com/x/shaarli --> /shaarli/
  define('WEB_PATH', substr($_SERVER["REQUEST_URI"], 0, 1+strrpos($_SERVER["REQUEST_URI"], '/', 0)));
  
-// Force cookie path (but do not change lifetime)
-$cookie=session_get_cookie_params();
-$cookiedir = ''; if(dirname($_SERVER['SCRIPT_NAME'])!='/') $cookiedir=dirname($_SERVER["SCRIPT_NAME"]).'/';
-session_set_cookie_params($cookie['lifetime'],$cookiedir,$_SERVER['SERVER_NAME']); // Set default cookie expiration and path.
-
-// Set session parameters on server side.
-define('INACTIVITY_TIMEOUT',3600); // (in seconds). If the user does not access any page within this time, his/her session is considered expired.
-ini_set('session.use_cookies', 1);       // Use cookies to store session.
-ini_set('session.use_only_cookies', 1);  // Force cookies for session (phpsessionID forbidden in URL).
-ini_set('session.use_trans_sid', false); // Prevent PHP form using sessionID in URL if cookies are disabled.
-session_name('shaarli');
-if (session_id() == '') session_start();  // Start session if needed (Some server auto-start sessions).
-
  // PHP Settings
  ini_set('max_input_time','60');  // High execution time in case of problematic imports/exports.
  ini_set('memory_limit', '128M');  // Try to set max upload file size and read (May not work on some hosts).
@@ -72,6 +59,7 @@ if (is_file($GLOBALS['config']['CONFIG_FILE'])) {
  // Shaarli library
  require_once 'application/Cache.php';
  require_once 'application/CachedPage.php';
+require_once 'application/HttpUtils.php';
  require_once 'application/LinkDB.php';
  require_once 'application/TimeZone.php';
  require_once 'application/Url.php';
@@ -87,6 +75,36 @@ try {
      exit;
  }
  
+// Force cookie path (but do not change lifetime)
+$cookie = session_get_cookie_params();
+$cookiedir = '';
+if (dirname($_SERVER['SCRIPT_NAME']) != '/') {
+    $cookiedir = dirname($_SERVER["SCRIPT_NAME"]).'/';
+}
+// Set default cookie expiration and path.
+session_set_cookie_params($cookie['lifetime'], $cookiedir, $_SERVER['SERVER_NAME']);
+// Set session parameters on server side.
+// If the user does not access any page within this time, his/her session is considered expired.
+define('INACTIVITY_TIMEOUT', 3600); // in seconds.
+// Use cookies to store session.
+ini_set('session.use_cookies', 1);
+// Force cookies for session (phpsessionID forbidden in URL).
+ini_set('session.use_only_cookies', 1);
+// Prevent PHP form using sessionID in URL if cookies are disabled.
+ini_set('session.use_trans_sid', false);
+
+session_name('shaarli');
+// Start session if needed (Some server auto-start sessions).
+if (session_id() == '') {
+    session_start();
+}
+
+// Regenerate session ID if invalid or not defined in cookie.
+if (isset($_COOKIE['shaarli']) && !is_session_id_valid($_COOKIE['shaarli'])) {
+    session_regenerate_id(true);
+    $_COOKIE['shaarli'] = session_id();
+}
+
  include "inc/rain.tpl.class.php"; //include Rain TPL
  raintpl::$tpl_dir = $GLOBALS['config']['RAINTPL_TPL']; // template directory
  raintpl::$cache_dir = $GLOBALS['config']['RAINTPL_TMP']; // cache directory
@@ -192,9 +210,11 @@ function checkUpdate()
      // Get latest version number at most once a day.
      if (!is_file($GLOBALS['config']['UPDATECHECK_FILENAME']) || (filemtime($GLOBALS['config']['UPDATECHECK_FILENAME'])<time()-($GLOBALS['config']['UPDATECHECK_INTERVAL'])))
      {
-        $version=shaarli_version;
-        list($httpstatus,$headers,$data) = getHTTP('https://raw.githubusercontent.com/shaarli/Shaarli/master/shaarli_version.php',2);
-        if (strpos($httpstatus,'200 OK')!==false) $version=str_replace(' */ ?>','',str_replace('<?php /* ','',$data));
+        $version = shaarli_version;
+        list($headers, $data) = get_http_url('https://raw.githubusercontent.com/shaarli/Shaarli/master/shaarli_version.php', 2);
+        if (strpos($headers[0], '200 OK') !== false) {
+            $version = str_replace(' */ ?>', '', str_replace('<?php /* ', '', $data));
+        }
          // If failed, never mind. We don't want to bother the user with that.
          file_put_contents($GLOBALS['config']['UPDATECHECK_FILENAME'],$version); // touch file date
      }
@@ -443,7 +463,7 @@ if (isset($_POST['login']))
  // You can append $_SERVER['SCRIPT_NAME'] to get the current script URL.
  function serverUrl()
  {
-    $https = (!empty($_SERVER['HTTPS']) && (strtolower($_SERVER['HTTPS'])=='on')) || $_SERVER["SERVER_PORT"]=='443'; // HTTPS detection.
+    $https = (!empty($_SERVER['HTTPS']) && (strtolower($_SERVER['HTTPS'])=='on')) || $_SERVER["SERVER_PORT"]=='443' || (!empty($_SERVER['HTTP_X_FORWARDED_PROTO']) && $_SERVER['HTTP_X_FORWARDED_PROTO'] == 'https'); // HTTPS detection.
      $serverport = ($_SERVER["SERVER_PORT"]=='80' || ($https && $_SERVER["SERVER_PORT"]=='443') ? '' : ':'.$_SERVER["SERVER_PORT"]);
      return 'http'.($https?'s':'').'://'.$_SERVER['SERVER_NAME'].$serverport;
  }
@@ -518,53 +538,6 @@ function linkdate2iso8601($linkdate)
      return date('c',linkdate2timestamp($linkdate)); // 'c' is for ISO 8601 date format.
  }
  
-// Parse HTTP response headers and return an associative array.
-function http_parse_headers_shaarli( $headers )
-{
-    $res=array();
-    foreach($headers as $header)
-    {
-        $i = strpos($header,': ');
-        if ($i!==false)
-        {
-            $key=substr($header,0,$i);
-            $value=substr($header,$i+2,strlen($header)-$i-2);
-            $res[$key]=$value;
-        }
-    }
-    return $res;
-}
-
-/* GET an URL.
-   Input: $url : URL to get (http://...)
-          $timeout : Network timeout (will wait this many seconds for an anwser before giving up).
-   Output: An array.  [0] = HTTP status message (e.g. "HTTP/1.1 200 OK") or error message
-                      [1] = associative array containing HTTP response headers (e.g. echo getHTTP($url)[1]['Content-Type'])
-                      [2] = data
-    Example: list($httpstatus,$headers,$data) = getHTTP('http://sebauvage.net/');
-             if (strpos($httpstatus,'200 OK')!==false)
-                 echo 'Data type: '.htmlspecialchars($headers['Content-Type']);
-             else
-                 echo 'There was an error: '.htmlspecialchars($httpstatus)
-*/
-function getHTTP($url,$timeout=30)
-{
-    try
-    {
-        $options = array('http'=>array('method'=>'GET','timeout' => $timeout, 'user_agent' => 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:23.0) Gecko/20100101 Firefox/23.0')); // Force network timeout
-        $context = stream_context_create($options);
-        $data=file_get_contents($url,false,$context,-1, 4000000); // We download at most 4 Mb from source.
-        if (!$data) { return array('HTTP Error',array(),''); }
-        $httpStatus=$http_response_header[0]; // e.g. "HTTP/1.1 200 OK"
-        $responseHeaders=http_parse_headers_shaarli($http_response_header);
-        return array($httpStatus,$responseHeaders,$data);
-    }
-    catch (Exception $e)  // getHTTP *can* fail silently (we don't care if the title cannot be fetched)
-    {
-        return array($e->getMessage(),'','');
-    }
-}
-
  // Extract title from an HTML document.
  // (Returns an empty string if not found.)
  function html_extract_title($html)
@@ -1481,55 +1454,61 @@ function renderPage()
  
      // -------- User want to post a new link: Display link edit form.
      if (isset($_GET['post'])) {
-        $url = new Url($_GET['post']);
-        $url->cleanup();
+        $url = cleanup_url($_GET['post']);
  
          $link_is_new = false;
-        $link = $LINKSDB->getLinkFromUrl($url); // Check if URL is not already in database (in this case, we will edit the existing link)
+        // Check if URL is not already in database (in this case, we will edit the existing link)
+        $link = $LINKSDB->getLinkFromUrl($url);
          if (!$link)
          {
-            $link_is_new = true;  // This is a new link
+            $link_is_new = true;
              $linkdate = strval(date('Ymd_His'));
-            $title = (empty($_GET['title']) ? '' : $_GET['title'] ); // Get title if it was provided in URL (by the bookmarklet).
-            $description = (empty($_GET['description']) ? '' : $_GET['description']); // Get description if it was provided in URL (by the bookmarklet). [Bronco added that]
-            $tags = (empty($_GET['tags']) ? '' : $_GET['tags'] ); // Get tags if it was provided in URL
-            $private = (!empty($_GET['private']) && $_GET['private'] === "1" ? 1 : 0); // Get private if it was provided in URL
-            if (($url!='') && parse_url($url,PHP_URL_SCHEME)=='') $url = 'http://'.$url;
-            // If this is an HTTP link, we try go get the page to extract the title (otherwise we will to straight to the edit form.)
-            if (empty($title) && parse_url($url,PHP_URL_SCHEME)=='http')
-            {
-                list($status,$headers,$data) = getHTTP($url,4); // Short timeout to keep the application responsive.
+            // Get title if it was provided in URL (by the bookmarklet).
+            $title = (empty($_GET['title']) ? '' : $_GET['title'] );
+            // Get description if it was provided in URL (by the bookmarklet). [Bronco added that]
+            $description = (empty($_GET['description']) ? '' : $_GET['description']);
+            $tags = (empty($_GET['tags']) ? '' : $_GET['tags'] );
+            $private = (!empty($_GET['private']) && $_GET['private'] === "1" ? 1 : 0);
+            // If this is an HTTP(S) link, we try go get the page to extract the title (otherwise we will to straight to the edit form.)
+            if (empty($title) && strpos(get_url_scheme($url), 'http') !== false) {
+                // Short timeout to keep the application responsive
+                list($headers, $data) = get_http_url($url, 4);
                  // FIXME: Decode charset according to specified in either 1) HTTP response headers or 2) <head> in html
-                if (strpos($status,'200 OK')!==false)
-                                        {
-                        // Look for charset in html header.
-                                               preg_match('#<meta .*charset=.*>#Usi', $data, $meta);
-
-                                               // If found, extract encoding.
-                                               if (!empty($meta[0]))
-                                               {
-                                                       // Get encoding specified in header.
-                                                       preg_match('#charset="?(.*)"#si', $meta[0], $enc);
-                                                       // If charset not found, use utf-8.
-                                                       $html_charset = (!empty($enc[1])) ? strtolower($enc[1]) : 'utf-8';
-                                               }
-                                               else { $html_charset = 'utf-8'; }
-
-                                               // Extract title
-                                               $title = html_extract_title($data);
-                                               if (!empty($title))
-                                               {
-                                                       // Re-encode title in utf-8 if necessary.
-                                                       $title = ($html_charset == 'iso-8859-1') ? utf8_encode($title) : $title;
-                                               }
-                                       }
+                if (strpos($headers[0], '200 OK') !== false) {
+                    // Look for charset in html header.
+                    preg_match('#<meta .*charset=.*>#Usi', $data, $meta);
+
+                    // If found, extract encoding.
+                    if (!empty($meta[0])) {
+                        // Get encoding specified in header.
+                        preg_match('#charset="?(.*)"#si', $meta[0], $enc);
+                        // If charset not found, use utf-8.
+                        $html_charset = (!empty($enc[1])) ? strtolower($enc[1]) : 'utf-8';
+                    }
+                    else {
+                        $html_charset = 'utf-8';
+                    }
+
+                    // Extract title
+                    $title = html_extract_title($data);
+                    if (!empty($title)) {
+                        // Re-encode title in utf-8 if necessary.
+                        $title = ($html_charset == 'iso-8859-1') ? utf8_encode($title) : $title;
+                    }
+                }
              }
-            if ($url=='') // In case of empty URL, this is just a text (with a link that points to itself)
-            {
-                $url='?'.smallHash($linkdate);
-                $title='Note: ';
+            if ($url == '') {
+                $url = '?' . smallHash($linkdate);
+                $title = 'Note: ';
              }
-            $link = array('linkdate'=>$linkdate,'title'=>$title,'url'=>$url,'description'=>$description,'tags'=>$tags,'private'=>$private);
+            $link = array(
+                'linkdate' => $linkdate,
+                'title' => $title,
+                'url' => $url,
+                'description' => $description,
+                'tags' => $tags,
+                'private' => $private
+            );
          }
  
          $PAGE = new pageBuilder;
@@ -2163,8 +2142,9 @@ function genThumbnail()
          }
          else // This is a flickr page (html)
          {
-            list($httpstatus,$headers,$data) = getHTTP($url,20); // Get the flickr html page.
-            if (strpos($httpstatus,'200 OK')!==false)
+            // Get the flickr html page.
+            list($headers, $data) = get_http_url($url, 20);
+            if (strpos($headers[0], '200 OK') !== false)
              {
                  // flickr now nicely provides the URL of the thumbnail in each flickr page.
                  preg_match('!<link rel=\"image_src\" href=\"(.+?)\"!',$data,$matches);
@@ -2183,9 +2163,9 @@ function genThumbnail()
  
          if ($imageurl!='')
          {   // Let's download the image.
-            list($httpstatus,$headers,$data) = getHTTP($imageurl,10); // Image is 240x120, so 10 seconds to download should be enough.
-            if (strpos($httpstatus,'200 OK')!==false)
-            {
+            // Image is 240x120, so 10 seconds to download should be enough.
+            list($headers, $data) = get_http_url($imageurl, 10);
+            if (strpos($headers[0], '200 OK') !== false) {
                  file_put_contents($GLOBALS['config']['CACHEDIR'].'/'.$thumbname,$data); // Save image to cache.
                  header('Content-Type: image/jpeg');
                  echo $data;
@@ -2199,15 +2179,13 @@ function genThumbnail()
          // This is more complex: we have to perform a HTTP request, then parse the result.
          // Maybe we should deport this to JavaScript ? Example: http://stackoverflow.com/questions/1361149/get-img-thumbnails-from-vimeo/4285098#4285098
          $vid = substr(parse_url($url,PHP_URL_PATH),1);
-        list($httpstatus,$headers,$data) = getHTTP('https://vimeo.com/api/v2/video/'.escape($vid).'.php',5);
-        if (strpos($httpstatus,'200 OK')!==false)
-        {
+        list($headers, $data) = get_http_url('https://vimeo.com/api/v2/video/'.escape($vid).'.php', 5);
+        if (strpos($headers[0], '200 OK') !== false) {
              $t = unserialize($data);
              $imageurl = $t[0]['thumbnail_medium'];
              // Then we download the image and serve it to our client.
-            list($httpstatus,$headers,$data) = getHTTP($imageurl,10);
-            if (strpos($httpstatus,'200 OK')!==false)
-            {
+            list($headers, $data) = get_http_url($imageurl, 10);
+            if (strpos($headers[0], '200 OK') !== false) {
                  file_put_contents($GLOBALS['config']['CACHEDIR'].'/'.$thumbname,$data); // Save image to cache.
                  header('Content-Type: image/jpeg');
                  echo $data;
@@ -2221,17 +2199,16 @@ function genThumbnail()
          // The thumbnail for TED talks is located in the <link rel="image_src" [...]> tag on that page
          // http://www.ted.com/talks/mikko_hypponen_fighting_viruses_defending_the_net.html
          // <link rel="image_src" href="http://images.ted.com/images/ted/28bced335898ba54d4441809c5b1112ffaf36781_389x292.jpg" />
-        list($httpstatus,$headers,$data) = getHTTP($url,5);
-        if (strpos($httpstatus,'200 OK')!==false)
-        {
+        list($headers, $data) = get_http_url($url, 5);
+        if (strpos($headers[0], '200 OK') !== false) {
              // Extract the link to the thumbnail
              preg_match('!link rel="image_src" href="(http://images.ted.com/images/ted/.+_\d+x\d+\.jpg)"!',$data,$matches);
              if (!empty($matches[1]))
              {   // Let's download the image.
                  $imageurl=$matches[1];
-                list($httpstatus,$headers,$data) = getHTTP($imageurl,20); // No control on image size, so wait long enough.
-                if (strpos($httpstatus,'200 OK')!==false)
-                {
+                // No control on image size, so wait long enough
+                list($headers, $data) = get_http_url($imageurl, 20);
+                if (strpos($headers[0], '200 OK') !== false) {
                      $filepath=$GLOBALS['config']['CACHEDIR'].'/'.$thumbname;
                      file_put_contents($filepath,$data); // Save image to cache.
                      if (resizeImage($filepath))
@@ -2250,17 +2227,16 @@ function genThumbnail()
          // There is no thumbnail available for xkcd comics, so download the whole image and resize it.
          // http://xkcd.com/327/
          // <img src="http://imgs.xkcd.com/comics/exploits_of_a_mom.png" title="<BLABLA>" alt="<BLABLA>" />
-        list($httpstatus,$headers,$data) = getHTTP($url,5);
-        if (strpos($httpstatus,'200 OK')!==false)
-        {
+        list($headers, $data) = get_http_url($url, 5);
+        if (strpos($headers[0], '200 OK') !== false) {
              // Extract the link to the thumbnail
              preg_match('!<img src="(http://imgs.xkcd.com/comics/.*)" title="[^s]!',$data,$matches);
              if (!empty($matches[1]))
              {   // Let's download the image.
                  $imageurl=$matches[1];
-                list($httpstatus,$headers,$data) = getHTTP($imageurl,20); // No control on image size, so wait long enough.
-                if (strpos($httpstatus,'200 OK')!==false)
-                {
+                // No control on image size, so wait long enough
+                list($headers, $data) = get_http_url($imageurl, 20);
+                if (strpos($headers[0], '200 OK') !== false) {
                      $filepath=$GLOBALS['config']['CACHEDIR'].'/'.$thumbname;
                      file_put_contents($filepath,$data); // Save image to cache.
                      if (resizeImage($filepath))
@@ -2277,9 +2253,9 @@ function genThumbnail()
      else
      {
          // For all other domains, we try to download the image and make a thumbnail.
-        list($httpstatus,$headers,$data) = getHTTP($url,30);  // We allow 30 seconds max to download (and downloads are limited to 4 Mb)
-        if (strpos($httpstatus,'200 OK')!==false)
-        {
+        // We allow 30 seconds max to download (and downloads are limited to 4 Mb)
+        list($headers, $data) = get_http_url($url, 30);
+        if (strpos($headers[0], '200 OK') !== false) {
              $filepath=$GLOBALS['config']['CACHEDIR'].'/'.$thumbname;
              file_put_contents($filepath,$data); // Save image to cache.
              if (resizeImage($filepath))