]> git.immae.eu Git - github/shaarli/Shaarli.git/blobdiff - index.php
Merge pull request #126 from Alkarex/Milliseconds
[github/shaarli/Shaarli.git] / index.php
index 8b5c912ac5c0ffebcce6bea401d61046808da963..b402eba9b2f7f006441bd8b65bc19293bf778d70 100644 (file)
--- a/index.php
+++ b/index.php
@@ -566,7 +566,7 @@ function getHTTP($url,$timeout=30)
 {
     try
     {
-        $options = array('http'=>array('method'=>'GET','timeout' => $timeout)); // Force network timeout
+        $options = array('http'=>array('method'=>'GET','timeout' => $timeout, 'user_agent' => 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:23.0) Gecko/20100101 Firefox/23.0')); // Force network timeout
         $context = stream_context_create($options);
         $data=file_get_contents($url,false,$context,-1, 4000000); // We download at most 4 Mb from source.
         if (!$data) { return array('HTTP Error',array(),''); }
@@ -1545,8 +1545,29 @@ function renderPage()
             {
                 list($status,$headers,$data) = getHTTP($url,4); // Short timeout to keep the application responsive.
                 // FIXME: Decode charset according to specified in either 1) HTTP response headers or 2) <head> in html
-                if (strpos($status,'200 OK')!==false) $title=html_entity_decode(html_extract_title($data),ENT_QUOTES,'UTF-8');
-
+                if (strpos($status,'200 OK')!==false)
+                                        {
+                        // Look for charset in html header.
+                                               preg_match('#<meta .*charset=.*>#Usi', $data, $meta);
+                                               // If found, extract encoding.
+                                               if (!empty($meta[0]))
+                                               {
+                                                       // Get encoding specified in header.
+                                                       preg_match('#charset="?(.*)"#si', $meta[0], $enc);
+                                                       // If charset not found, use utf-8.
+                                                       $html_charset = (!empty($enc[1])) ? strtolower($enc[1]) : 'utf-8';
+                                               }
+                                               else { $html_charset = 'utf-8'; }
+                                               // Extract title
+                                               $title = html_extract_title($data);
+                                               if (!empty($title))
+                                               {
+                                                       // Re-encode title in utf-8 if necessary.
+                                                       $title = ($html_charset == 'iso-8859-1') ? utf8_encode($title) : $title;
+                                               }
+                                       }
             }
             if ($url=='') $url='?'.smallHash($linkdate); // In case of empty URL, this is just a text (with a link that point to itself)
             $link = array('linkdate'=>$linkdate,'title'=>$title,'url'=>$url,'description'=>$description,'tags'=>$tags,'private'=>0);
@@ -1674,7 +1695,11 @@ function importFile()
                 {
                     $attr=$m[1]; $value=$m[2];
                     if ($attr=='HREF') $link['url']=html_entity_decode($value,ENT_QUOTES,'UTF-8');
-                    elseif ($attr=='ADD_DATE') $raw_add_date=intval($value);
+                    elseif ($attr=='ADD_DATE')
+                    {
+                        $raw_add_date=intval($value);
+                        if ($raw_add_date>30000000000) $raw_add_date/=1000;    //If larger than year 2920, then was likely stored in milliseconds instead of seconds
+                    }
                     elseif ($attr=='PRIVATE') $link['private']=($value=='0'?0:1);
                     elseif ($attr=='TAGS') $link['tags']=html_entity_decode(str_replace(',',' ',$value),ENT_QUOTES,'UTF-8');
                 }
@@ -2419,4 +2444,4 @@ if (isset($_SERVER["QUERY_STRING"]) && startswith($_SERVER["QUERY_STRING"],'do=d
 if (isset($_SERVER["QUERY_STRING"]) && startswith($_SERVER["QUERY_STRING"],'ws=')) { processWS(); exit; } // Webservices (for jQuery/jQueryUI)
 if (!isset($_SESSION['LINKS_PER_PAGE'])) $_SESSION['LINKS_PER_PAGE']=$GLOBALS['config']['LINKS_PER_PAGE'];
 renderPage();
-?>
\ No newline at end of file
+?>