aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--application/HttpUtils.php52
-rwxr-xr-xindex.php110
-rw-r--r--tests/HttpUtilsTest.php38
3 files changed, 122 insertions, 78 deletions
diff --git a/application/HttpUtils.php b/application/HttpUtils.php
new file mode 100644
index 00000000..175333ae
--- /dev/null
+++ b/application/HttpUtils.php
@@ -0,0 +1,52 @@
1<?php
2/**
3 * GET an HTTP URL to retrieve its content
4 *
5 * @param string $url URL to get (http://...)
6 * @param int $timeout network timeout (in seconds)
7 * @param int $maxBytes maximum downloaded bytes (default: 4 MiB)
8 *
9 * @return array HTTP response headers, downloaded content
10 *
11 * Output format:
12 * [0] = associative array containing HTTP response headers
13 * [1] = URL content (downloaded data)
14 *
15 * Example:
16 * list($headers, $data) = get_http_url('http://sebauvage.net/');
17 * if (strpos($headers[0], '200 OK') !== false) {
18 * echo 'Data type: '.htmlspecialchars($headers['Content-Type']);
19 * } else {
20 * echo 'There was an error: '.htmlspecialchars($headers[0]);
21 * }
22 *
23 * @see http://php.net/manual/en/function.file-get-contents.php
24 * @see http://php.net/manual/en/function.stream-context-create.php
25 * @see http://php.net/manual/en/function.get-headers.php
26 */
27function get_http_url($url, $timeout = 30, $maxBytes = 4194304)
28{
29 $options = array(
30 'http' => array(
31 'method' => 'GET',
32 'timeout' => $timeout,
33 'user_agent' => 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:23.0)'
34 .' Gecko/20100101 Firefox/23.0'
35 )
36 );
37
38 $context = stream_context_create($options);
39
40 try {
41 // TODO: catch Exception in calling code (thumbnailer)
42 $content = file_get_contents($url, false, $context, -1, $maxBytes);
43 } catch (Exception $exc) {
44 return array(array(0 => 'HTTP Error'), $exc->getMessage());
45 }
46
47 if (!$content) {
48 return array(array(0 => 'HTTP Error'), '');
49 }
50
51 return array(get_headers($url, 1), $content);
52}
diff --git a/index.php b/index.php
index 8863cc29..e39cff38 100755
--- a/index.php
+++ b/index.php
@@ -59,6 +59,7 @@ if (is_file($GLOBALS['config']['CONFIG_FILE'])) {
59// Shaarli library 59// Shaarli library
60require_once 'application/Cache.php'; 60require_once 'application/Cache.php';
61require_once 'application/CachedPage.php'; 61require_once 'application/CachedPage.php';
62require_once 'application/HttpUtils.php';
62require_once 'application/LinkDB.php'; 63require_once 'application/LinkDB.php';
63require_once 'application/TimeZone.php'; 64require_once 'application/TimeZone.php';
64require_once 'application/Url.php'; 65require_once 'application/Url.php';
@@ -209,9 +210,11 @@ function checkUpdate()
209 // Get latest version number at most once a day. 210 // Get latest version number at most once a day.
210 if (!is_file($GLOBALS['config']['UPDATECHECK_FILENAME']) || (filemtime($GLOBALS['config']['UPDATECHECK_FILENAME'])<time()-($GLOBALS['config']['UPDATECHECK_INTERVAL']))) 211 if (!is_file($GLOBALS['config']['UPDATECHECK_FILENAME']) || (filemtime($GLOBALS['config']['UPDATECHECK_FILENAME'])<time()-($GLOBALS['config']['UPDATECHECK_INTERVAL'])))
211 { 212 {
212 $version=shaarli_version; 213 $version = shaarli_version;
213 list($httpstatus,$headers,$data) = getHTTP('https://raw.githubusercontent.com/shaarli/Shaarli/master/shaarli_version.php',2); 214 list($headers, $data) = get_http_url('https://raw.githubusercontent.com/shaarli/Shaarli/master/shaarli_version.php', 2);
214 if (strpos($httpstatus,'200 OK')!==false) $version=str_replace(' */ ?>','',str_replace('<?php /* ','',$data)); 215 if (strpos($headers[0], '200 OK') !== false) {
216 $version = str_replace(' */ ?>', '', str_replace('<?php /* ', '', $data));
217 }
215 // If failed, never mind. We don't want to bother the user with that. 218 // If failed, never mind. We don't want to bother the user with that.
216 file_put_contents($GLOBALS['config']['UPDATECHECK_FILENAME'],$version); // touch file date 219 file_put_contents($GLOBALS['config']['UPDATECHECK_FILENAME'],$version); // touch file date
217 } 220 }
@@ -535,53 +538,6 @@ function linkdate2iso8601($linkdate)
535 return date('c',linkdate2timestamp($linkdate)); // 'c' is for ISO 8601 date format. 538 return date('c',linkdate2timestamp($linkdate)); // 'c' is for ISO 8601 date format.
536} 539}
537 540
538// Parse HTTP response headers and return an associative array.
539function http_parse_headers_shaarli( $headers )
540{
541 $res=array();
542 foreach($headers as $header)
543 {
544 $i = strpos($header,': ');
545 if ($i!==false)
546 {
547 $key=substr($header,0,$i);
548 $value=substr($header,$i+2,strlen($header)-$i-2);
549 $res[$key]=$value;
550 }
551 }
552 return $res;
553}
554
555/* GET an URL.
556 Input: $url : URL to get (http://...)
557 $timeout : Network timeout (will wait this many seconds for an anwser before giving up).
558 Output: An array. [0] = HTTP status message (e.g. "HTTP/1.1 200 OK") or error message
559 [1] = associative array containing HTTP response headers (e.g. echo getHTTP($url)[1]['Content-Type'])
560 [2] = data
561 Example: list($httpstatus,$headers,$data) = getHTTP('http://sebauvage.net/');
562 if (strpos($httpstatus,'200 OK')!==false)
563 echo 'Data type: '.htmlspecialchars($headers['Content-Type']);
564 else
565 echo 'There was an error: '.htmlspecialchars($httpstatus)
566*/
567function getHTTP($url,$timeout=30)
568{
569 try
570 {
571 $options = array('http'=>array('method'=>'GET','timeout' => $timeout, 'user_agent' => 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:23.0) Gecko/20100101 Firefox/23.0')); // Force network timeout
572 $context = stream_context_create($options);
573 $data=file_get_contents($url,false,$context,-1, 4000000); // We download at most 4 Mb from source.
574 if (!$data) { return array('HTTP Error',array(),''); }
575 $httpStatus=$http_response_header[0]; // e.g. "HTTP/1.1 200 OK"
576 $responseHeaders=http_parse_headers_shaarli($http_response_header);
577 return array($httpStatus,$responseHeaders,$data);
578 }
579 catch (Exception $e) // getHTTP *can* fail silently (we don't care if the title cannot be fetched)
580 {
581 return array($e->getMessage(),'','');
582 }
583}
584
585// Extract title from an HTML document. 541// Extract title from an HTML document.
586// (Returns an empty string if not found.) 542// (Returns an empty string if not found.)
587function html_extract_title($html) 543function html_extract_title($html)
@@ -1516,9 +1472,10 @@ function renderPage()
1516 $private = (!empty($_GET['private']) && $_GET['private'] === "1" ? 1 : 0); 1472 $private = (!empty($_GET['private']) && $_GET['private'] === "1" ? 1 : 0);
1517 // If this is an HTTP(S) link, we try go get the page to extract the title (otherwise we will to straight to the edit form.) 1473 // If this is an HTTP(S) link, we try go get the page to extract the title (otherwise we will to straight to the edit form.)
1518 if (empty($title) && strpos($url->getScheme(), 'http') !== false) { 1474 if (empty($title) && strpos($url->getScheme(), 'http') !== false) {
1519 list($status,$headers,$data) = getHTTP($url,4); // Short timeout to keep the application responsive. 1475 // Short timeout to keep the application responsive
1476 list($headers, $data) = get_http_url($url, 4);
1520 // FIXME: Decode charset according to specified in either 1) HTTP response headers or 2) <head> in html 1477 // FIXME: Decode charset according to specified in either 1) HTTP response headers or 2) <head> in html
1521 if (strpos($status,'200 OK')!==false) { 1478 if (strpos($headers[0], '200 OK') !== false) {
1522 // Look for charset in html header. 1479 // Look for charset in html header.
1523 preg_match('#<meta .*charset=.*>#Usi', $data, $meta); 1480 preg_match('#<meta .*charset=.*>#Usi', $data, $meta);
1524 1481
@@ -2186,8 +2143,9 @@ function genThumbnail()
2186 } 2143 }
2187 else // This is a flickr page (html) 2144 else // This is a flickr page (html)
2188 { 2145 {
2189 list($httpstatus,$headers,$data) = getHTTP($url,20); // Get the flickr html page. 2146 // Get the flickr html page.
2190 if (strpos($httpstatus,'200 OK')!==false) 2147 list($headers, $data) = get_http_url($url, 20);
2148 if (strpos($headers[0], '200 OK') !== false)
2191 { 2149 {
2192 // flickr now nicely provides the URL of the thumbnail in each flickr page. 2150 // flickr now nicely provides the URL of the thumbnail in each flickr page.
2193 preg_match('!<link rel=\"image_src\" href=\"(.+?)\"!',$data,$matches); 2151 preg_match('!<link rel=\"image_src\" href=\"(.+?)\"!',$data,$matches);
@@ -2206,9 +2164,9 @@ function genThumbnail()
2206 2164
2207 if ($imageurl!='') 2165 if ($imageurl!='')
2208 { // Let's download the image. 2166 { // Let's download the image.
2209 list($httpstatus,$headers,$data) = getHTTP($imageurl,10); // Image is 240x120, so 10 seconds to download should be enough. 2167 // Image is 240x120, so 10 seconds to download should be enough.
2210 if (strpos($httpstatus,'200 OK')!==false) 2168 list($headers, $data) = get_http_url($imageurl, 10);
2211 { 2169 if (strpos($headers[0], '200 OK') !== false) {
2212 file_put_contents($GLOBALS['config']['CACHEDIR'].'/'.$thumbname,$data); // Save image to cache. 2170 file_put_contents($GLOBALS['config']['CACHEDIR'].'/'.$thumbname,$data); // Save image to cache.
2213 header('Content-Type: image/jpeg'); 2171 header('Content-Type: image/jpeg');
2214 echo $data; 2172 echo $data;
@@ -2222,15 +2180,13 @@ function genThumbnail()
2222 // This is more complex: we have to perform a HTTP request, then parse the result. 2180 // This is more complex: we have to perform a HTTP request, then parse the result.
2223 // Maybe we should deport this to JavaScript ? Example: http://stackoverflow.com/questions/1361149/get-img-thumbnails-from-vimeo/4285098#4285098 2181 // Maybe we should deport this to JavaScript ? Example: http://stackoverflow.com/questions/1361149/get-img-thumbnails-from-vimeo/4285098#4285098
2224 $vid = substr(parse_url($url,PHP_URL_PATH),1); 2182 $vid = substr(parse_url($url,PHP_URL_PATH),1);
2225 list($httpstatus,$headers,$data) = getHTTP('https://vimeo.com/api/v2/video/'.escape($vid).'.php',5); 2183 list($headers, $data) = get_http_url('https://vimeo.com/api/v2/video/'.escape($vid).'.php', 5);
2226 if (strpos($httpstatus,'200 OK')!==false) 2184 if (strpos($headers[0], '200 OK') !== false) {
2227 {
2228 $t = unserialize($data); 2185 $t = unserialize($data);
2229 $imageurl = $t[0]['thumbnail_medium']; 2186 $imageurl = $t[0]['thumbnail_medium'];
2230 // Then we download the image and serve it to our client. 2187 // Then we download the image and serve it to our client.
2231 list($httpstatus,$headers,$data) = getHTTP($imageurl,10); 2188 list($headers, $data) = get_http_url($imageurl, 10);
2232 if (strpos($httpstatus,'200 OK')!==false) 2189 if (strpos($headers[0], '200 OK') !== false) {
2233 {
2234 file_put_contents($GLOBALS['config']['CACHEDIR'].'/'.$thumbname,$data); // Save image to cache. 2190 file_put_contents($GLOBALS['config']['CACHEDIR'].'/'.$thumbname,$data); // Save image to cache.
2235 header('Content-Type: image/jpeg'); 2191 header('Content-Type: image/jpeg');
2236 echo $data; 2192 echo $data;
@@ -2244,17 +2200,16 @@ function genThumbnail()
2244 // The thumbnail for TED talks is located in the <link rel="image_src" [...]> tag on that page 2200 // The thumbnail for TED talks is located in the <link rel="image_src" [...]> tag on that page
2245 // http://www.ted.com/talks/mikko_hypponen_fighting_viruses_defending_the_net.html 2201 // http://www.ted.com/talks/mikko_hypponen_fighting_viruses_defending_the_net.html
2246 // <link rel="image_src" href="http://images.ted.com/images/ted/28bced335898ba54d4441809c5b1112ffaf36781_389x292.jpg" /> 2202 // <link rel="image_src" href="http://images.ted.com/images/ted/28bced335898ba54d4441809c5b1112ffaf36781_389x292.jpg" />
2247 list($httpstatus,$headers,$data) = getHTTP($url,5); 2203 list($headers, $data) = get_http_url($url, 5);
2248 if (strpos($httpstatus,'200 OK')!==false) 2204 if (strpos($headers[0], '200 OK') !== false) {
2249 {
2250 // Extract the link to the thumbnail 2205 // Extract the link to the thumbnail
2251 preg_match('!link rel="image_src" href="(http://images.ted.com/images/ted/.+_\d+x\d+\.jpg)"!',$data,$matches); 2206 preg_match('!link rel="image_src" href="(http://images.ted.com/images/ted/.+_\d+x\d+\.jpg)"!',$data,$matches);
2252 if (!empty($matches[1])) 2207 if (!empty($matches[1]))
2253 { // Let's download the image. 2208 { // Let's download the image.
2254 $imageurl=$matches[1]; 2209 $imageurl=$matches[1];
2255 list($httpstatus,$headers,$data) = getHTTP($imageurl,20); // No control on image size, so wait long enough. 2210 // No control on image size, so wait long enough
2256 if (strpos($httpstatus,'200 OK')!==false) 2211 list($headers, $data) = get_http_url($imageurl, 20);
2257 { 2212 if (strpos($headers[0], '200 OK') !== false) {
2258 $filepath=$GLOBALS['config']['CACHEDIR'].'/'.$thumbname; 2213 $filepath=$GLOBALS['config']['CACHEDIR'].'/'.$thumbname;
2259 file_put_contents($filepath,$data); // Save image to cache. 2214 file_put_contents($filepath,$data); // Save image to cache.
2260 if (resizeImage($filepath)) 2215 if (resizeImage($filepath))
@@ -2273,17 +2228,16 @@ function genThumbnail()
2273 // There is no thumbnail available for xkcd comics, so download the whole image and resize it. 2228 // There is no thumbnail available for xkcd comics, so download the whole image and resize it.
2274 // http://xkcd.com/327/ 2229 // http://xkcd.com/327/
2275 // <img src="http://imgs.xkcd.com/comics/exploits_of_a_mom.png" title="<BLABLA>" alt="<BLABLA>" /> 2230 // <img src="http://imgs.xkcd.com/comics/exploits_of_a_mom.png" title="<BLABLA>" alt="<BLABLA>" />
2276 list($httpstatus,$headers,$data) = getHTTP($url,5); 2231 list($headers, $data) = get_http_url($url, 5);
2277 if (strpos($httpstatus,'200 OK')!==false) 2232 if (strpos($headers[0], '200 OK') !== false) {
2278 {
2279 // Extract the link to the thumbnail 2233 // Extract the link to the thumbnail
2280 preg_match('!<img src="(http://imgs.xkcd.com/comics/.*)" title="[^s]!',$data,$matches); 2234 preg_match('!<img src="(http://imgs.xkcd.com/comics/.*)" title="[^s]!',$data,$matches);
2281 if (!empty($matches[1])) 2235 if (!empty($matches[1]))
2282 { // Let's download the image. 2236 { // Let's download the image.
2283 $imageurl=$matches[1]; 2237 $imageurl=$matches[1];
2284 list($httpstatus,$headers,$data) = getHTTP($imageurl,20); // No control on image size, so wait long enough. 2238 // No control on image size, so wait long enough
2285 if (strpos($httpstatus,'200 OK')!==false) 2239 list($headers, $data) = get_http_url($imageurl, 20);
2286 { 2240 if (strpos($headers[0], '200 OK') !== false) {
2287 $filepath=$GLOBALS['config']['CACHEDIR'].'/'.$thumbname; 2241 $filepath=$GLOBALS['config']['CACHEDIR'].'/'.$thumbname;
2288 file_put_contents($filepath,$data); // Save image to cache. 2242 file_put_contents($filepath,$data); // Save image to cache.
2289 if (resizeImage($filepath)) 2243 if (resizeImage($filepath))
@@ -2300,9 +2254,9 @@ function genThumbnail()
2300 else 2254 else
2301 { 2255 {
2302 // For all other domains, we try to download the image and make a thumbnail. 2256 // For all other domains, we try to download the image and make a thumbnail.
2303 list($httpstatus,$headers,$data) = getHTTP($url,30); // We allow 30 seconds max to download (and downloads are limited to 4 Mb) 2257 // We allow 30 seconds max to download (and downloads are limited to 4 Mb)
2304 if (strpos($httpstatus,'200 OK')!==false) 2258 list($headers, $data) = get_http_url($url, 30);
2305 { 2259 if (strpos($headers[0], '200 OK') !== false) {
2306 $filepath=$GLOBALS['config']['CACHEDIR'].'/'.$thumbname; 2260 $filepath=$GLOBALS['config']['CACHEDIR'].'/'.$thumbname;
2307 file_put_contents($filepath,$data); // Save image to cache. 2261 file_put_contents($filepath,$data); // Save image to cache.
2308 if (resizeImage($filepath)) 2262 if (resizeImage($filepath))
diff --git a/tests/HttpUtilsTest.php b/tests/HttpUtilsTest.php
new file mode 100644
index 00000000..76092b80
--- /dev/null
+++ b/tests/HttpUtilsTest.php
@@ -0,0 +1,38 @@
1<?php
2/**
3 * HttpUtils' tests
4 */
5
6require_once 'application/HttpUtils.php';
7
8/**
9 * Unitary tests for get_http_url()
10 */
11class GetHttpUrlTest extends PHPUnit_Framework_TestCase
12{
13 /**
14 * Get an invalid local URL
15 */
16 public function testGetInvalidLocalUrl()
17 {
18 list($headers, $content) = get_http_url('/non/existent', 1);
19 $this->assertEquals('HTTP Error', $headers[0]);
20 $this->assertRegexp(
21 '/failed to open stream: No such file or directory/',
22 $content
23 );
24 }
25
26 /**
27 * Get an invalid remote URL
28 */
29 public function testGetInvalidRemoteUrl()
30 {
31 list($headers, $content) = get_http_url('http://non.existent', 1);
32 $this->assertEquals('HTTP Error', $headers[0]);
33 $this->assertRegexp(
34 '/Name or service not known/',
35 $content
36 );
37 }
38}