aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorArthurHoaro <arthur@hoa.ro>2016-01-04 10:45:54 +0100
committerArthurHoaro <arthur@hoa.ro>2016-01-11 21:19:31 +0100
commit1557cefbd76257ceb830f65806831b490faf0acc (patch)
tree787f6d8fdabe8ea2fc0c37b61d616e667cdfbda5
parentc0a50f3663e207d5df007e0fa321219c1b32d6ea (diff)
downloadShaarli-1557cefbd76257ceb830f65806831b490faf0acc.tar.gz
Shaarli-1557cefbd76257ceb830f65806831b490faf0acc.tar.zst
Shaarli-1557cefbd76257ceb830f65806831b490faf0acc.zip
Fixes #410 - Retrieve title fails in multiple cases
* `get_http_url()` renamed to `get_http_response()`. * Use the same HTTP context to retrieve response headers and content. * Follow HTTP 301 and 302 redirections to retrieve the title (default max 3 redirections). * Add `LinkUtils` to extract titles and charset. * Try to retrieve charset from HTTP headers first (new), then HTML content. * Use mb_string to re-encode title if necessary.
-rw-r--r--application/ApplicationUtils.php2
-rwxr-xr-x[-rw-r--r--]application/HttpUtils.php49
-rwxr-xr-xapplication/LinkUtils.php79
-rwxr-xr-x[-rw-r--r--]application/Url.php11
-rw-r--r--index.php87
-rw-r--r--tests/HttpUtils/GetHttpUrlTest.php26
-rw-r--r--tests/LinkUtilsTest.php85
-rw-r--r--tests/Url/UrlTest.php18
8 files changed, 285 insertions, 72 deletions
diff --git a/application/ApplicationUtils.php b/application/ApplicationUtils.php
index 274331e1..978fc9da 100644
--- a/application/ApplicationUtils.php
+++ b/application/ApplicationUtils.php
@@ -19,7 +19,7 @@ class ApplicationUtils
19 */ 19 */
20 public static function getLatestGitVersionCode($url, $timeout=2) 20 public static function getLatestGitVersionCode($url, $timeout=2)
21 { 21 {
22 list($headers, $data) = get_http_url($url, $timeout); 22 list($headers, $data) = get_http_response($url, $timeout);
23 23
24 if (strpos($headers[0], '200 OK') === false) { 24 if (strpos($headers[0], '200 OK') === false) {
25 error_log('Failed to retrieve ' . $url); 25 error_log('Failed to retrieve ' . $url);
diff --git a/application/HttpUtils.php b/application/HttpUtils.php
index 499220c5..e2c1cb47 100644..100755
--- a/application/HttpUtils.php
+++ b/application/HttpUtils.php
@@ -13,7 +13,7 @@
13 * [1] = URL content (downloaded data) 13 * [1] = URL content (downloaded data)
14 * 14 *
15 * Example: 15 * Example:
16 * list($headers, $data) = get_http_url('http://sebauvage.net/'); 16 * list($headers, $data) = get_http_response('http://sebauvage.net/');
17 * if (strpos($headers[0], '200 OK') !== false) { 17 * if (strpos($headers[0], '200 OK') !== false) {
18 * echo 'Data type: '.htmlspecialchars($headers['Content-Type']); 18 * echo 'Data type: '.htmlspecialchars($headers['Content-Type']);
19 * } else { 19 * } else {
@@ -24,31 +24,66 @@
24 * @see http://php.net/manual/en/function.stream-context-create.php 24 * @see http://php.net/manual/en/function.stream-context-create.php
25 * @see http://php.net/manual/en/function.get-headers.php 25 * @see http://php.net/manual/en/function.get-headers.php
26 */ 26 */
27function get_http_url($url, $timeout = 30, $maxBytes = 4194304) 27function get_http_response($url, $timeout = 30, $maxBytes = 4194304)
28{ 28{
29 $urlObj = new Url($url);
30 if (! filter_var($url, FILTER_VALIDATE_URL) || ! $urlObj->isHttp()) {
31 return array(array(0 => 'Invalid HTTP Url'), false);
32 }
33
29 $options = array( 34 $options = array(
30 'http' => array( 35 'http' => array(
31 'method' => 'GET', 36 'method' => 'GET',
32 'timeout' => $timeout, 37 'timeout' => $timeout,
33 'user_agent' => 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:23.0)' 38 'user_agent' => 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:23.0)'
34 .' Gecko/20100101 Firefox/23.0' 39 .' Gecko/20100101 Firefox/23.0',
40 'request_fulluri' => true,
35 ) 41 )
36 ); 42 );
37 43
38 $context = stream_context_create($options); 44 $context = stream_context_create($options);
45 stream_context_set_default($options);
46
47 list($headers, $finalUrl) = get_redirected_headers($urlObj->cleanup());
48 if (! $headers || strpos($headers[0], '200 OK') === false) {
49 return array($headers, false);
50 }
39 51
40 try { 52 try {
41 // TODO: catch Exception in calling code (thumbnailer) 53 // TODO: catch Exception in calling code (thumbnailer)
42 $content = file_get_contents($url, false, $context, -1, $maxBytes); 54 $content = file_get_contents($finalUrl, false, $context, -1, $maxBytes);
43 } catch (Exception $exc) { 55 } catch (Exception $exc) {
44 return array(array(0 => 'HTTP Error'), $exc->getMessage()); 56 return array(array(0 => 'HTTP Error'), $exc->getMessage());
45 } 57 }
46 58
47 if (!$content) { 59 return array($headers, $content);
48 return array(array(0 => 'HTTP Error'), ''); 60}
61
62/**
63 * Retrieve HTTP headers, following n redirections (temporary and permanent).
64 *
65 * @param string $url initial URL to reach.
66 * @param int $redirectionLimit max redirection follow..
67 *
68 * @return array
69 */
70function get_redirected_headers($url, $redirectionLimit = 3)
71{
72 $headers = get_headers($url, 1);
73
74 // Headers found, redirection found, and limit not reached.
75 if ($redirectionLimit-- > 0
76 && !empty($headers)
77 && (strpos($headers[0], '301') !== false || strpos($headers[0], '302') !== false)
78 && !empty($headers['Location'])) {
79
80 $redirection = is_array($headers['Location']) ? end($headers['Location']) : $headers['Location'];
81 if ($redirection != $url) {
82 return get_redirected_headers($redirection, $redirectionLimit);
83 }
49 } 84 }
50 85
51 return array(get_headers($url, 1), $content); 86 return array($headers, $url);
52} 87}
53 88
54/** 89/**
diff --git a/application/LinkUtils.php b/application/LinkUtils.php
new file mode 100755
index 00000000..26dd6b67
--- /dev/null
+++ b/application/LinkUtils.php
@@ -0,0 +1,79 @@
1<?php
2
3/**
4 * Extract title from an HTML document.
5 *
6 * @param string $html HTML content where to look for a title.
7 *
8 * @return bool|string Extracted title if found, false otherwise.
9 */
10function html_extract_title($html)
11{
12 if (preg_match('!<title>(.*)</title>!is', $html, $matches)) {
13 return trim(str_replace("\n", ' ', $matches[1]));
14 }
15 return false;
16}
17
18/**
19 * Determine charset from downloaded page.
20 * Priority:
21 * 1. HTTP headers (Content type).
22 * 2. HTML content page (tag <meta charset>).
23 * 3. Use a default charset (default: UTF-8).
24 *
25 * @param array $headers HTTP headers array.
26 * @param string $htmlContent HTML content where to look for charset.
27 * @param string $defaultCharset Default charset to apply if other methods failed.
28 *
29 * @return string Determined charset.
30 */
31function get_charset($headers, $htmlContent, $defaultCharset = 'utf-8')
32{
33 if ($charset = headers_extract_charset($headers)) {
34 return $charset;
35 }
36
37 if ($charset = html_extract_charset($htmlContent)) {
38 return $charset;
39 }
40
41 return $defaultCharset;
42}
43
44/**
45 * Extract charset from HTTP headers if it's defined.
46 *
47 * @param array $headers HTTP headers array.
48 *
49 * @return bool|string Charset string if found (lowercase), false otherwise.
50 */
51function headers_extract_charset($headers)
52{
53 if (! empty($headers['Content-Type']) && strpos($headers['Content-Type'], 'charset=') !== false) {
54 preg_match('/charset="?([^; ]+)/i', $headers['Content-Type'], $match);
55 if (! empty($match[1])) {
56 return strtolower(trim($match[1]));
57 }
58 }
59
60 return false;
61}
62
63/**
64 * Extract charset HTML content (tag <meta charset>).
65 *
66 * @param string $html HTML content where to look for charset.
67 *
68 * @return bool|string Charset string if found, false otherwise.
69 */
70function html_extract_charset($html)
71{
72 // Get encoding specified in HTML header.
73 preg_match('#<meta .*charset="?([^">/]+)"? */?>#Usi', $html, $enc);
74 if (!empty($enc[1])) {
75 return strtolower($enc[1]);
76 }
77
78 return false;
79}
diff --git a/application/Url.php b/application/Url.php
index d80c9c58..a4ac2e73 100644..100755
--- a/application/Url.php
+++ b/application/Url.php
@@ -118,7 +118,7 @@ class Url
118 */ 118 */
119 public function __construct($url) 119 public function __construct($url)
120 { 120 {
121 $this->parts = parse_url($url); 121 $this->parts = parse_url(trim($url));
122 122
123 if (!empty($url) && empty($this->parts['scheme'])) { 123 if (!empty($url) && empty($this->parts['scheme'])) {
124 $this->parts['scheme'] = 'http'; 124 $this->parts['scheme'] = 'http';
@@ -201,4 +201,13 @@ class Url
201 } 201 }
202 return $this->parts['scheme']; 202 return $this->parts['scheme'];
203 } 203 }
204
205 /**
206 * Test if the Url is an HTTP one.
207 *
208 * @return true is HTTP, false otherwise.
209 */
210 public function isHttp() {
211 return strpos(strtolower($this->parts['scheme']), 'http') !== false;
212 }
204} 213}
diff --git a/index.php b/index.php
index cd83600b..600b2f55 100644
--- a/index.php
+++ b/index.php
@@ -152,6 +152,7 @@ require_once 'application/FileUtils.php';
152require_once 'application/HttpUtils.php'; 152require_once 'application/HttpUtils.php';
153require_once 'application/LinkDB.php'; 153require_once 'application/LinkDB.php';
154require_once 'application/LinkFilter.php'; 154require_once 'application/LinkFilter.php';
155require_once 'application/LinkUtils.php';
155require_once 'application/TimeZone.php'; 156require_once 'application/TimeZone.php';
156require_once 'application/Url.php'; 157require_once 'application/Url.php';
157require_once 'application/Utils.php'; 158require_once 'application/Utils.php';
@@ -578,13 +579,6 @@ function linkdate2iso8601($linkdate)
578 return date('c',linkdate2timestamp($linkdate)); // 'c' is for ISO 8601 date format. 579 return date('c',linkdate2timestamp($linkdate)); // 'c' is for ISO 8601 date format.
579} 580}
580 581
581// Extract title from an HTML document.
582// (Returns an empty string if not found.)
583function html_extract_title($html)
584{
585 return preg_match('!<title>(.*?)</title>!is', $html, $matches) ? trim(str_replace("\n",' ', $matches[1])) : '' ;
586}
587
588// ------------------------------------------------------------------------------------------ 582// ------------------------------------------------------------------------------------------
589// Token management for XSRF protection 583// Token management for XSRF protection
590// Token should be used in any form which acts on data (create,update,delete,import...). 584// Token should be used in any form which acts on data (create,update,delete,import...).
@@ -1642,7 +1636,7 @@ function renderPage()
1642 1636
1643 // -------- User want to post a new link: Display link edit form. 1637 // -------- User want to post a new link: Display link edit form.
1644 if (isset($_GET['post'])) { 1638 if (isset($_GET['post'])) {
1645 $url = cleanup_url($_GET['post']); 1639 $url = cleanup_url(escape($_GET['post']));
1646 1640
1647 $link_is_new = false; 1641 $link_is_new = false;
1648 // Check if URL is not already in database (in this case, we will edit the existing link) 1642 // Check if URL is not already in database (in this case, we will edit the existing link)
@@ -1660,35 +1654,24 @@ function renderPage()
1660 // If this is an HTTP(S) link, we try go get the page to extract the title (otherwise we will to straight to the edit form.) 1654 // If this is an HTTP(S) link, we try go get the page to extract the title (otherwise we will to straight to the edit form.)
1661 if (empty($title) && strpos(get_url_scheme($url), 'http') !== false) { 1655 if (empty($title) && strpos(get_url_scheme($url), 'http') !== false) {
1662 // Short timeout to keep the application responsive 1656 // Short timeout to keep the application responsive
1663 list($headers, $data) = get_http_url($url, 4); 1657 list($headers, $content) = get_http_response($url, 4);
1664 // FIXME: Decode charset according to specified in either 1) HTTP response headers or 2) <head> in html
1665 if (strpos($headers[0], '200 OK') !== false) { 1658 if (strpos($headers[0], '200 OK') !== false) {
1666 // Look for charset in html header. 1659 // Retrieve charset.
1667 preg_match('#<meta .*charset=.*>#Usi', $data, $meta); 1660 $charset = get_charset($headers, $content);
1668 1661 // Extract title.
1669 // If found, extract encoding. 1662 $title = html_extract_title($content);
1670 if (!empty($meta[0])) { 1663 // Re-encode title in utf-8 if necessary.
1671 // Get encoding specified in header. 1664 if (! empty($title) && $charset != 'utf-8') {
1672 preg_match('#charset="?(.*)"#si', $meta[0], $enc); 1665 $title = mb_convert_encoding($title, $charset, 'utf-8');
1673 // If charset not found, use utf-8.
1674 $html_charset = (!empty($enc[1])) ? strtolower($enc[1]) : 'utf-8';
1675 }
1676 else {
1677 $html_charset = 'utf-8';
1678 }
1679
1680 // Extract title
1681 $title = html_extract_title($data);
1682 if (!empty($title)) {
1683 // Re-encode title in utf-8 if necessary.
1684 $title = ($html_charset == 'iso-8859-1') ? utf8_encode($title) : $title;
1685 } 1666 }
1686 } 1667 }
1687 } 1668 }
1669
1688 if ($url == '') { 1670 if ($url == '') {
1689 $url = '?' . smallHash($linkdate); 1671 $url = '?' . smallHash($linkdate);
1690 $title = 'Note: '; 1672 $title = 'Note: ';
1691 } 1673 }
1674
1692 $link = array( 1675 $link = array(
1693 'linkdate' => $linkdate, 1676 'linkdate' => $linkdate,
1694 'title' => $title, 1677 'title' => $title,
@@ -2314,11 +2297,11 @@ function genThumbnail()
2314 else // This is a flickr page (html) 2297 else // This is a flickr page (html)
2315 { 2298 {
2316 // Get the flickr html page. 2299 // Get the flickr html page.
2317 list($headers, $data) = get_http_url($url, 20); 2300 list($headers, $content) = get_http_response($url, 20);
2318 if (strpos($headers[0], '200 OK') !== false) 2301 if (strpos($headers[0], '200 OK') !== false)
2319 { 2302 {
2320 // flickr now nicely provides the URL of the thumbnail in each flickr page. 2303 // flickr now nicely provides the URL of the thumbnail in each flickr page.
2321 preg_match('!<link rel=\"image_src\" href=\"(.+?)\"!',$data,$matches); 2304 preg_match('!<link rel=\"image_src\" href=\"(.+?)\"!', $content, $matches);
2322 if (!empty($matches[1])) $imageurl=$matches[1]; 2305 if (!empty($matches[1])) $imageurl=$matches[1];
2323 2306
2324 // In albums (and some other pages), the link rel="image_src" is not provided, 2307 // In albums (and some other pages), the link rel="image_src" is not provided,
@@ -2326,7 +2309,7 @@ function genThumbnail()
2326 // <meta property="og:image" content="http://farm4.staticflickr.com/3398/3239339068_25d13535ff_z.jpg" /> 2309 // <meta property="og:image" content="http://farm4.staticflickr.com/3398/3239339068_25d13535ff_z.jpg" />
2327 if ($imageurl=='') 2310 if ($imageurl=='')
2328 { 2311 {
2329 preg_match('!<meta property=\"og:image\" content=\"(.+?)\"!',$data,$matches); 2312 preg_match('!<meta property=\"og:image\" content=\"(.+?)\"!', $content, $matches);
2330 if (!empty($matches[1])) $imageurl=$matches[1]; 2313 if (!empty($matches[1])) $imageurl=$matches[1];
2331 } 2314 }
2332 } 2315 }
@@ -2335,11 +2318,12 @@ function genThumbnail()
2335 if ($imageurl!='') 2318 if ($imageurl!='')
2336 { // Let's download the image. 2319 { // Let's download the image.
2337 // Image is 240x120, so 10 seconds to download should be enough. 2320 // Image is 240x120, so 10 seconds to download should be enough.
2338 list($headers, $data) = get_http_url($imageurl, 10); 2321 list($headers, $content) = get_http_response($imageurl, 10);
2339 if (strpos($headers[0], '200 OK') !== false) { 2322 if (strpos($headers[0], '200 OK') !== false) {
2340 file_put_contents($GLOBALS['config']['CACHEDIR'].'/'.$thumbname,$data); // Save image to cache. 2323 // Save image to cache.
2324 file_put_contents($GLOBALS['config']['CACHEDIR'].'/' . $thumbname, $content);
2341 header('Content-Type: image/jpeg'); 2325 header('Content-Type: image/jpeg');
2342 echo $data; 2326 echo $content;
2343 return; 2327 return;
2344 } 2328 }
2345 } 2329 }
@@ -2350,16 +2334,17 @@ function genThumbnail()
2350 // This is more complex: we have to perform a HTTP request, then parse the result. 2334 // This is more complex: we have to perform a HTTP request, then parse the result.
2351 // Maybe we should deport this to JavaScript ? Example: http://stackoverflow.com/questions/1361149/get-img-thumbnails-from-vimeo/4285098#4285098 2335 // Maybe we should deport this to JavaScript ? Example: http://stackoverflow.com/questions/1361149/get-img-thumbnails-from-vimeo/4285098#4285098
2352 $vid = substr(parse_url($url,PHP_URL_PATH),1); 2336 $vid = substr(parse_url($url,PHP_URL_PATH),1);
2353 list($headers, $data) = get_http_url('https://vimeo.com/api/v2/video/'.escape($vid).'.php', 5); 2337 list($headers, $content) = get_http_response('https://vimeo.com/api/v2/video/'.escape($vid).'.php', 5);
2354 if (strpos($headers[0], '200 OK') !== false) { 2338 if (strpos($headers[0], '200 OK') !== false) {
2355 $t = unserialize($data); 2339 $t = unserialize($content);
2356 $imageurl = $t[0]['thumbnail_medium']; 2340 $imageurl = $t[0]['thumbnail_medium'];
2357 // Then we download the image and serve it to our client. 2341 // Then we download the image and serve it to our client.
2358 list($headers, $data) = get_http_url($imageurl, 10); 2342 list($headers, $content) = get_http_response($imageurl, 10);
2359 if (strpos($headers[0], '200 OK') !== false) { 2343 if (strpos($headers[0], '200 OK') !== false) {
2360 file_put_contents($GLOBALS['config']['CACHEDIR'].'/'.$thumbname,$data); // Save image to cache. 2344 // Save image to cache.
2345 file_put_contents($GLOBALS['config']['CACHEDIR'] . '/' . $thumbname, $content);
2361 header('Content-Type: image/jpeg'); 2346 header('Content-Type: image/jpeg');
2362 echo $data; 2347 echo $content;
2363 return; 2348 return;
2364 } 2349 }
2365 } 2350 }
@@ -2370,18 +2355,18 @@ function genThumbnail()
2370 // The thumbnail for TED talks is located in the <link rel="image_src" [...]> tag on that page 2355 // The thumbnail for TED talks is located in the <link rel="image_src" [...]> tag on that page
2371 // http://www.ted.com/talks/mikko_hypponen_fighting_viruses_defending_the_net.html 2356 // http://www.ted.com/talks/mikko_hypponen_fighting_viruses_defending_the_net.html
2372 // <link rel="image_src" href="http://images.ted.com/images/ted/28bced335898ba54d4441809c5b1112ffaf36781_389x292.jpg" /> 2357 // <link rel="image_src" href="http://images.ted.com/images/ted/28bced335898ba54d4441809c5b1112ffaf36781_389x292.jpg" />
2373 list($headers, $data) = get_http_url($url, 5); 2358 list($headers, $content) = get_http_response($url, 5);
2374 if (strpos($headers[0], '200 OK') !== false) { 2359 if (strpos($headers[0], '200 OK') !== false) {
2375 // Extract the link to the thumbnail 2360 // Extract the link to the thumbnail
2376 preg_match('!link rel="image_src" href="(http://images.ted.com/images/ted/.+_\d+x\d+\.jpg)"!',$data,$matches); 2361 preg_match('!link rel="image_src" href="(http://images.ted.com/images/ted/.+_\d+x\d+\.jpg)"!', $content, $matches);
2377 if (!empty($matches[1])) 2362 if (!empty($matches[1]))
2378 { // Let's download the image. 2363 { // Let's download the image.
2379 $imageurl=$matches[1]; 2364 $imageurl=$matches[1];
2380 // No control on image size, so wait long enough 2365 // No control on image size, so wait long enough
2381 list($headers, $data) = get_http_url($imageurl, 20); 2366 list($headers, $content) = get_http_response($imageurl, 20);
2382 if (strpos($headers[0], '200 OK') !== false) { 2367 if (strpos($headers[0], '200 OK') !== false) {
2383 $filepath=$GLOBALS['config']['CACHEDIR'].'/'.$thumbname; 2368 $filepath=$GLOBALS['config']['CACHEDIR'].'/'.$thumbname;
2384 file_put_contents($filepath,$data); // Save image to cache. 2369 file_put_contents($filepath, $content); // Save image to cache.
2385 if (resizeImage($filepath)) 2370 if (resizeImage($filepath))
2386 { 2371 {
2387 header('Content-Type: image/jpeg'); 2372 header('Content-Type: image/jpeg');
@@ -2398,18 +2383,19 @@ function genThumbnail()
2398 // There is no thumbnail available for xkcd comics, so download the whole image and resize it. 2383 // There is no thumbnail available for xkcd comics, so download the whole image and resize it.
2399 // http://xkcd.com/327/ 2384 // http://xkcd.com/327/
2400 // <img src="http://imgs.xkcd.com/comics/exploits_of_a_mom.png" title="<BLABLA>" alt="<BLABLA>" /> 2385 // <img src="http://imgs.xkcd.com/comics/exploits_of_a_mom.png" title="<BLABLA>" alt="<BLABLA>" />
2401 list($headers, $data) = get_http_url($url, 5); 2386 list($headers, $content) = get_http_response($url, 5);
2402 if (strpos($headers[0], '200 OK') !== false) { 2387 if (strpos($headers[0], '200 OK') !== false) {
2403 // Extract the link to the thumbnail 2388 // Extract the link to the thumbnail
2404 preg_match('!<img src="(http://imgs.xkcd.com/comics/.*)" title="[^s]!',$data,$matches); 2389 preg_match('!<img src="(http://imgs.xkcd.com/comics/.*)" title="[^s]!', $content, $matches);
2405 if (!empty($matches[1])) 2390 if (!empty($matches[1]))
2406 { // Let's download the image. 2391 { // Let's download the image.
2407 $imageurl=$matches[1]; 2392 $imageurl=$matches[1];
2408 // No control on image size, so wait long enough 2393 // No control on image size, so wait long enough
2409 list($headers, $data) = get_http_url($imageurl, 20); 2394 list($headers, $content) = get_http_response($imageurl, 20);
2410 if (strpos($headers[0], '200 OK') !== false) { 2395 if (strpos($headers[0], '200 OK') !== false) {
2411 $filepath=$GLOBALS['config']['CACHEDIR'].'/'.$thumbname; 2396 $filepath=$GLOBALS['config']['CACHEDIR'].'/'.$thumbname;
2412 file_put_contents($filepath,$data); // Save image to cache. 2397 // Save image to cache.
2398 file_put_contents($filepath, $content);
2413 if (resizeImage($filepath)) 2399 if (resizeImage($filepath))
2414 { 2400 {
2415 header('Content-Type: image/jpeg'); 2401 header('Content-Type: image/jpeg');
@@ -2425,10 +2411,11 @@ function genThumbnail()
2425 { 2411 {
2426 // For all other domains, we try to download the image and make a thumbnail. 2412 // For all other domains, we try to download the image and make a thumbnail.
2427 // We allow 30 seconds max to download (and downloads are limited to 4 Mb) 2413 // We allow 30 seconds max to download (and downloads are limited to 4 Mb)
2428 list($headers, $data) = get_http_url($url, 30); 2414 list($headers, $content) = get_http_response($url, 30);
2429 if (strpos($headers[0], '200 OK') !== false) { 2415 if (strpos($headers[0], '200 OK') !== false) {
2430 $filepath=$GLOBALS['config']['CACHEDIR'].'/'.$thumbname; 2416 $filepath=$GLOBALS['config']['CACHEDIR'].'/'.$thumbname;
2431 file_put_contents($filepath,$data); // Save image to cache. 2417 // Save image to cache.
2418 file_put_contents($filepath, $content);
2432 if (resizeImage($filepath)) 2419 if (resizeImage($filepath))
2433 { 2420 {
2434 header('Content-Type: image/jpeg'); 2421 header('Content-Type: image/jpeg');
diff --git a/tests/HttpUtils/GetHttpUrlTest.php b/tests/HttpUtils/GetHttpUrlTest.php
index 76092b80..fd293505 100644
--- a/tests/HttpUtils/GetHttpUrlTest.php
+++ b/tests/HttpUtils/GetHttpUrlTest.php
@@ -6,7 +6,7 @@
6require_once 'application/HttpUtils.php'; 6require_once 'application/HttpUtils.php';
7 7
8/** 8/**
9 * Unitary tests for get_http_url() 9 * Unitary tests for get_http_response()
10 */ 10 */
11class GetHttpUrlTest extends PHPUnit_Framework_TestCase 11class GetHttpUrlTest extends PHPUnit_Framework_TestCase
12{ 12{
@@ -15,12 +15,15 @@ class GetHttpUrlTest extends PHPUnit_Framework_TestCase
15 */ 15 */
16 public function testGetInvalidLocalUrl() 16 public function testGetInvalidLocalUrl()
17 { 17 {
18 list($headers, $content) = get_http_url('/non/existent', 1); 18 // Local
19 $this->assertEquals('HTTP Error', $headers[0]); 19 list($headers, $content) = get_http_response('/non/existent', 1);
20 $this->assertRegexp( 20 $this->assertEquals('Invalid HTTP Url', $headers[0]);
21 '/failed to open stream: No such file or directory/', 21 $this->assertFalse($content);
22 $content 22
23 ); 23 // Non HTTP
24 list($headers, $content) = get_http_response('ftp://save.tld/mysave', 1);
25 $this->assertEquals('Invalid HTTP Url', $headers[0]);
26 $this->assertFalse($content);
24 } 27 }
25 28
26 /** 29 /**
@@ -28,11 +31,8 @@ class GetHttpUrlTest extends PHPUnit_Framework_TestCase
28 */ 31 */
29 public function testGetInvalidRemoteUrl() 32 public function testGetInvalidRemoteUrl()
30 { 33 {
31 list($headers, $content) = get_http_url('http://non.existent', 1); 34 list($headers, $content) = @get_http_response('http://non.existent', 1);
32 $this->assertEquals('HTTP Error', $headers[0]); 35 $this->assertFalse($headers);
33 $this->assertRegexp( 36 $this->assertFalse($content);
34 '/Name or service not known/',
35 $content
36 );
37 } 37 }
38} 38}
diff --git a/tests/LinkUtilsTest.php b/tests/LinkUtilsTest.php
new file mode 100644
index 00000000..c2257590
--- /dev/null
+++ b/tests/LinkUtilsTest.php
@@ -0,0 +1,85 @@
1<?php
2
3require_once 'application/LinkUtils.php';
4
5/**
6* Class LinkUtilsTest.
7*/
8class LinkUtilsTest extends PHPUnit_Framework_TestCase
9{
10 /**
11 * Test html_extract_title() when the title is found.
12 */
13 public function testHtmlExtractExistentTitle()
14 {
15 $title = 'Read me please.';
16 $html = '<html><meta>stuff</meta><title>'. $title .'</title></html>';
17 $this->assertEquals($title, html_extract_title($html));
18 }
19
20 /**
21 * Test html_extract_title() when the title is not found.
22 */
23 public function testHtmlExtractNonExistentTitle()
24 {
25 $html = '<html><meta>stuff</meta></html>';
26 $this->assertFalse(html_extract_title($html));
27 }
28
29 /**
30 * Test get_charset() with all priorities.
31 */
32 public function testGetCharset()
33 {
34 $headers = array('Content-Type' => 'text/html; charset=Headers');
35 $html = '<html><meta>stuff</meta><meta charset="Html"/></html>';
36 $default = 'default';
37 $this->assertEquals('headers', get_charset($headers, $html, $default));
38 $this->assertEquals('html', get_charset(array(), $html, $default));
39 $this->assertEquals($default, get_charset(array(), '', $default));
40 $this->assertEquals('utf-8', get_charset(array(), ''));
41 }
42
43 /**
44 * Test headers_extract_charset() when the charset is found.
45 */
46 public function testHeadersExtractExistentCharset()
47 {
48 $charset = 'x-MacCroatian';
49 $headers = array('Content-Type' => 'text/html; charset='. $charset);
50 $this->assertEquals(strtolower($charset), headers_extract_charset($headers));
51 }
52
53 /**
54 * Test headers_extract_charset() when the charset is not found.
55 */
56 public function testHeadersExtractNonExistentCharset()
57 {
58 $headers = array();
59 $this->assertFalse(headers_extract_charset($headers));
60
61 $headers = array('Content-Type' => 'text/html');
62 $this->assertFalse(headers_extract_charset($headers));
63 }
64
65 /**
66 * Test html_extract_charset() when the charset is found.
67 */
68 public function testHtmlExtractExistentCharset()
69 {
70 $charset = 'x-MacCroatian';
71 $html = '<html><meta>stuff2</meta><meta charset="'. $charset .'"/></html>';
72 $this->assertEquals(strtolower($charset), html_extract_charset($html));
73 }
74
75 /**
76 * Test html_extract_charset() when the charset is not found.
77 */
78 public function testHtmlExtractNonExistentCharset()
79 {
80 $html = '<html><meta>stuff</meta></html>';
81 $this->assertFalse(html_extract_charset($html));
82 $html = '<html><meta>stuff</meta><meta charset=""/></html>';
83 $this->assertFalse(html_extract_charset($html));
84 }
85}
diff --git a/tests/Url/UrlTest.php b/tests/Url/UrlTest.php
index af6daaa4..425327ed 100644
--- a/tests/Url/UrlTest.php
+++ b/tests/Url/UrlTest.php
@@ -156,4 +156,22 @@ class UrlTest extends PHPUnit_Framework_TestCase
156 $this->assertEquals($strOn, add_trailing_slash($strOn)); 156 $this->assertEquals($strOn, add_trailing_slash($strOn));
157 $this->assertEquals($strOn, add_trailing_slash($strOff)); 157 $this->assertEquals($strOn, add_trailing_slash($strOff));
158 } 158 }
159
160 /**
161 * Test valid HTTP url.
162 */
163 function testUrlIsHttp()
164 {
165 $url = new Url(self::$baseUrl);
166 $this->assertTrue($url->isHttp());
167 }
168
169 /**
170 * Test non HTTP url.
171 */
172 function testUrlIsNotHttp()
173 {
174 $url = new Url('ftp://save.tld/mysave');
175 $this->assertFalse($url->isHttp());
176 }
159} 177}