diff options
author | VirtualTam <virtualtam@flibidi.net> | 2016-01-11 21:47:00 +0100 |
---|---|---|
committer | VirtualTam <virtualtam@flibidi.net> | 2016-01-11 21:47:00 +0100 |
commit | 92ba7b573f2833bd35c7eb2fc7fdbeb1a0ac7b44 (patch) | |
tree | 787f6d8fdabe8ea2fc0c37b61d616e667cdfbda5 | |
parent | c0a50f3663e207d5df007e0fa321219c1b32d6ea (diff) | |
parent | 1557cefbd76257ceb830f65806831b490faf0acc (diff) | |
download | Shaarli-92ba7b573f2833bd35c7eb2fc7fdbeb1a0ac7b44.tar.gz Shaarli-92ba7b573f2833bd35c7eb2fc7fdbeb1a0ac7b44.tar.zst Shaarli-92ba7b573f2833bd35c7eb2fc7fdbeb1a0ac7b44.zip |
Merge pull request #432 from ArthurHoaro/title-retrieve
Fixes #410 - Retrieve title fails in multiple cases
-rw-r--r-- | application/ApplicationUtils.php | 2 | ||||
-rwxr-xr-x[-rw-r--r--] | application/HttpUtils.php | 49 | ||||
-rwxr-xr-x | application/LinkUtils.php | 79 | ||||
-rwxr-xr-x[-rw-r--r--] | application/Url.php | 11 | ||||
-rw-r--r-- | index.php | 87 | ||||
-rw-r--r-- | tests/HttpUtils/GetHttpUrlTest.php | 26 | ||||
-rw-r--r-- | tests/LinkUtilsTest.php | 85 | ||||
-rw-r--r-- | tests/Url/UrlTest.php | 18 |
8 files changed, 285 insertions, 72 deletions
diff --git a/application/ApplicationUtils.php b/application/ApplicationUtils.php index 274331e1..978fc9da 100644 --- a/application/ApplicationUtils.php +++ b/application/ApplicationUtils.php | |||
@@ -19,7 +19,7 @@ class ApplicationUtils | |||
19 | */ | 19 | */ |
20 | public static function getLatestGitVersionCode($url, $timeout=2) | 20 | public static function getLatestGitVersionCode($url, $timeout=2) |
21 | { | 21 | { |
22 | list($headers, $data) = get_http_url($url, $timeout); | 22 | list($headers, $data) = get_http_response($url, $timeout); |
23 | 23 | ||
24 | if (strpos($headers[0], '200 OK') === false) { | 24 | if (strpos($headers[0], '200 OK') === false) { |
25 | error_log('Failed to retrieve ' . $url); | 25 | error_log('Failed to retrieve ' . $url); |
diff --git a/application/HttpUtils.php b/application/HttpUtils.php index 499220c5..e2c1cb47 100644..100755 --- a/application/HttpUtils.php +++ b/application/HttpUtils.php | |||
@@ -13,7 +13,7 @@ | |||
13 | * [1] = URL content (downloaded data) | 13 | * [1] = URL content (downloaded data) |
14 | * | 14 | * |
15 | * Example: | 15 | * Example: |
16 | * list($headers, $data) = get_http_url('http://sebauvage.net/'); | 16 | * list($headers, $data) = get_http_response('http://sebauvage.net/'); |
17 | * if (strpos($headers[0], '200 OK') !== false) { | 17 | * if (strpos($headers[0], '200 OK') !== false) { |
18 | * echo 'Data type: '.htmlspecialchars($headers['Content-Type']); | 18 | * echo 'Data type: '.htmlspecialchars($headers['Content-Type']); |
19 | * } else { | 19 | * } else { |
@@ -24,31 +24,66 @@ | |||
24 | * @see http://php.net/manual/en/function.stream-context-create.php | 24 | * @see http://php.net/manual/en/function.stream-context-create.php |
25 | * @see http://php.net/manual/en/function.get-headers.php | 25 | * @see http://php.net/manual/en/function.get-headers.php |
26 | */ | 26 | */ |
27 | function get_http_url($url, $timeout = 30, $maxBytes = 4194304) | 27 | function get_http_response($url, $timeout = 30, $maxBytes = 4194304) |
28 | { | 28 | { |
29 | $urlObj = new Url($url); | ||
30 | if (! filter_var($url, FILTER_VALIDATE_URL) || ! $urlObj->isHttp()) { | ||
31 | return array(array(0 => 'Invalid HTTP Url'), false); | ||
32 | } | ||
33 | |||
29 | $options = array( | 34 | $options = array( |
30 | 'http' => array( | 35 | 'http' => array( |
31 | 'method' => 'GET', | 36 | 'method' => 'GET', |
32 | 'timeout' => $timeout, | 37 | 'timeout' => $timeout, |
33 | 'user_agent' => 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:23.0)' | 38 | 'user_agent' => 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:23.0)' |
34 | .' Gecko/20100101 Firefox/23.0' | 39 | .' Gecko/20100101 Firefox/23.0', |
40 | 'request_fulluri' => true, | ||
35 | ) | 41 | ) |
36 | ); | 42 | ); |
37 | 43 | ||
38 | $context = stream_context_create($options); | 44 | $context = stream_context_create($options); |
45 | stream_context_set_default($options); | ||
46 | |||
47 | list($headers, $finalUrl) = get_redirected_headers($urlObj->cleanup()); | ||
48 | if (! $headers || strpos($headers[0], '200 OK') === false) { | ||
49 | return array($headers, false); | ||
50 | } | ||
39 | 51 | ||
40 | try { | 52 | try { |
41 | // TODO: catch Exception in calling code (thumbnailer) | 53 | // TODO: catch Exception in calling code (thumbnailer) |
42 | $content = file_get_contents($url, false, $context, -1, $maxBytes); | 54 | $content = file_get_contents($finalUrl, false, $context, -1, $maxBytes); |
43 | } catch (Exception $exc) { | 55 | } catch (Exception $exc) { |
44 | return array(array(0 => 'HTTP Error'), $exc->getMessage()); | 56 | return array(array(0 => 'HTTP Error'), $exc->getMessage()); |
45 | } | 57 | } |
46 | 58 | ||
47 | if (!$content) { | 59 | return array($headers, $content); |
48 | return array(array(0 => 'HTTP Error'), ''); | 60 | } |
61 | |||
62 | /** | ||
63 | * Retrieve HTTP headers, following n redirections (temporary and permanent). | ||
64 | * | ||
65 | * @param string $url initial URL to reach. | ||
66 | * @param int $redirectionLimit max redirection follow.. | ||
67 | * | ||
68 | * @return array | ||
69 | */ | ||
70 | function get_redirected_headers($url, $redirectionLimit = 3) | ||
71 | { | ||
72 | $headers = get_headers($url, 1); | ||
73 | |||
74 | // Headers found, redirection found, and limit not reached. | ||
75 | if ($redirectionLimit-- > 0 | ||
76 | && !empty($headers) | ||
77 | && (strpos($headers[0], '301') !== false || strpos($headers[0], '302') !== false) | ||
78 | && !empty($headers['Location'])) { | ||
79 | |||
80 | $redirection = is_array($headers['Location']) ? end($headers['Location']) : $headers['Location']; | ||
81 | if ($redirection != $url) { | ||
82 | return get_redirected_headers($redirection, $redirectionLimit); | ||
83 | } | ||
49 | } | 84 | } |
50 | 85 | ||
51 | return array(get_headers($url, 1), $content); | 86 | return array($headers, $url); |
52 | } | 87 | } |
53 | 88 | ||
54 | /** | 89 | /** |
diff --git a/application/LinkUtils.php b/application/LinkUtils.php new file mode 100755 index 00000000..26dd6b67 --- /dev/null +++ b/application/LinkUtils.php | |||
@@ -0,0 +1,79 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * Extract title from an HTML document. | ||
5 | * | ||
6 | * @param string $html HTML content where to look for a title. | ||
7 | * | ||
8 | * @return bool|string Extracted title if found, false otherwise. | ||
9 | */ | ||
10 | function html_extract_title($html) | ||
11 | { | ||
12 | if (preg_match('!<title>(.*)</title>!is', $html, $matches)) { | ||
13 | return trim(str_replace("\n", ' ', $matches[1])); | ||
14 | } | ||
15 | return false; | ||
16 | } | ||
17 | |||
18 | /** | ||
19 | * Determine charset from downloaded page. | ||
20 | * Priority: | ||
21 | * 1. HTTP headers (Content type). | ||
22 | * 2. HTML content page (tag <meta charset>). | ||
23 | * 3. Use a default charset (default: UTF-8). | ||
24 | * | ||
25 | * @param array $headers HTTP headers array. | ||
26 | * @param string $htmlContent HTML content where to look for charset. | ||
27 | * @param string $defaultCharset Default charset to apply if other methods failed. | ||
28 | * | ||
29 | * @return string Determined charset. | ||
30 | */ | ||
31 | function get_charset($headers, $htmlContent, $defaultCharset = 'utf-8') | ||
32 | { | ||
33 | if ($charset = headers_extract_charset($headers)) { | ||
34 | return $charset; | ||
35 | } | ||
36 | |||
37 | if ($charset = html_extract_charset($htmlContent)) { | ||
38 | return $charset; | ||
39 | } | ||
40 | |||
41 | return $defaultCharset; | ||
42 | } | ||
43 | |||
44 | /** | ||
45 | * Extract charset from HTTP headers if it's defined. | ||
46 | * | ||
47 | * @param array $headers HTTP headers array. | ||
48 | * | ||
49 | * @return bool|string Charset string if found (lowercase), false otherwise. | ||
50 | */ | ||
51 | function headers_extract_charset($headers) | ||
52 | { | ||
53 | if (! empty($headers['Content-Type']) && strpos($headers['Content-Type'], 'charset=') !== false) { | ||
54 | preg_match('/charset="?([^; ]+)/i', $headers['Content-Type'], $match); | ||
55 | if (! empty($match[1])) { | ||
56 | return strtolower(trim($match[1])); | ||
57 | } | ||
58 | } | ||
59 | |||
60 | return false; | ||
61 | } | ||
62 | |||
63 | /** | ||
64 | * Extract charset HTML content (tag <meta charset>). | ||
65 | * | ||
66 | * @param string $html HTML content where to look for charset. | ||
67 | * | ||
68 | * @return bool|string Charset string if found, false otherwise. | ||
69 | */ | ||
70 | function html_extract_charset($html) | ||
71 | { | ||
72 | // Get encoding specified in HTML header. | ||
73 | preg_match('#<meta .*charset="?([^">/]+)"? */?>#Usi', $html, $enc); | ||
74 | if (!empty($enc[1])) { | ||
75 | return strtolower($enc[1]); | ||
76 | } | ||
77 | |||
78 | return false; | ||
79 | } | ||
diff --git a/application/Url.php b/application/Url.php index d80c9c58..a4ac2e73 100644..100755 --- a/application/Url.php +++ b/application/Url.php | |||
@@ -118,7 +118,7 @@ class Url | |||
118 | */ | 118 | */ |
119 | public function __construct($url) | 119 | public function __construct($url) |
120 | { | 120 | { |
121 | $this->parts = parse_url($url); | 121 | $this->parts = parse_url(trim($url)); |
122 | 122 | ||
123 | if (!empty($url) && empty($this->parts['scheme'])) { | 123 | if (!empty($url) && empty($this->parts['scheme'])) { |
124 | $this->parts['scheme'] = 'http'; | 124 | $this->parts['scheme'] = 'http'; |
@@ -201,4 +201,13 @@ class Url | |||
201 | } | 201 | } |
202 | return $this->parts['scheme']; | 202 | return $this->parts['scheme']; |
203 | } | 203 | } |
204 | |||
205 | /** | ||
206 | * Test if the Url is an HTTP one. | ||
207 | * | ||
208 | * @return true is HTTP, false otherwise. | ||
209 | */ | ||
210 | public function isHttp() { | ||
211 | return strpos(strtolower($this->parts['scheme']), 'http') !== false; | ||
212 | } | ||
204 | } | 213 | } |
@@ -152,6 +152,7 @@ require_once 'application/FileUtils.php'; | |||
152 | require_once 'application/HttpUtils.php'; | 152 | require_once 'application/HttpUtils.php'; |
153 | require_once 'application/LinkDB.php'; | 153 | require_once 'application/LinkDB.php'; |
154 | require_once 'application/LinkFilter.php'; | 154 | require_once 'application/LinkFilter.php'; |
155 | require_once 'application/LinkUtils.php'; | ||
155 | require_once 'application/TimeZone.php'; | 156 | require_once 'application/TimeZone.php'; |
156 | require_once 'application/Url.php'; | 157 | require_once 'application/Url.php'; |
157 | require_once 'application/Utils.php'; | 158 | require_once 'application/Utils.php'; |
@@ -578,13 +579,6 @@ function linkdate2iso8601($linkdate) | |||
578 | return date('c',linkdate2timestamp($linkdate)); // 'c' is for ISO 8601 date format. | 579 | return date('c',linkdate2timestamp($linkdate)); // 'c' is for ISO 8601 date format. |
579 | } | 580 | } |
580 | 581 | ||
581 | // Extract title from an HTML document. | ||
582 | // (Returns an empty string if not found.) | ||
583 | function html_extract_title($html) | ||
584 | { | ||
585 | return preg_match('!<title>(.*?)</title>!is', $html, $matches) ? trim(str_replace("\n",' ', $matches[1])) : '' ; | ||
586 | } | ||
587 | |||
588 | // ------------------------------------------------------------------------------------------ | 582 | // ------------------------------------------------------------------------------------------ |
589 | // Token management for XSRF protection | 583 | // Token management for XSRF protection |
590 | // Token should be used in any form which acts on data (create,update,delete,import...). | 584 | // Token should be used in any form which acts on data (create,update,delete,import...). |
@@ -1642,7 +1636,7 @@ function renderPage() | |||
1642 | 1636 | ||
1643 | // -------- User want to post a new link: Display link edit form. | 1637 | // -------- User want to post a new link: Display link edit form. |
1644 | if (isset($_GET['post'])) { | 1638 | if (isset($_GET['post'])) { |
1645 | $url = cleanup_url($_GET['post']); | 1639 | $url = cleanup_url(escape($_GET['post'])); |
1646 | 1640 | ||
1647 | $link_is_new = false; | 1641 | $link_is_new = false; |
1648 | // Check if URL is not already in database (in this case, we will edit the existing link) | 1642 | // Check if URL is not already in database (in this case, we will edit the existing link) |
@@ -1660,35 +1654,24 @@ function renderPage() | |||
1660 | // If this is an HTTP(S) link, we try go get the page to extract the title (otherwise we will to straight to the edit form.) | 1654 | // If this is an HTTP(S) link, we try go get the page to extract the title (otherwise we will to straight to the edit form.) |
1661 | if (empty($title) && strpos(get_url_scheme($url), 'http') !== false) { | 1655 | if (empty($title) && strpos(get_url_scheme($url), 'http') !== false) { |
1662 | // Short timeout to keep the application responsive | 1656 | // Short timeout to keep the application responsive |
1663 | list($headers, $data) = get_http_url($url, 4); | 1657 | list($headers, $content) = get_http_response($url, 4); |
1664 | // FIXME: Decode charset according to specified in either 1) HTTP response headers or 2) <head> in html | ||
1665 | if (strpos($headers[0], '200 OK') !== false) { | 1658 | if (strpos($headers[0], '200 OK') !== false) { |
1666 | // Look for charset in html header. | 1659 | // Retrieve charset. |
1667 | preg_match('#<meta .*charset=.*>#Usi', $data, $meta); | 1660 | $charset = get_charset($headers, $content); |
1668 | 1661 | // Extract title. | |
1669 | // If found, extract encoding. | 1662 | $title = html_extract_title($content); |
1670 | if (!empty($meta[0])) { | 1663 | // Re-encode title in utf-8 if necessary. |
1671 | // Get encoding specified in header. | 1664 | if (! empty($title) && $charset != 'utf-8') { |
1672 | preg_match('#charset="?(.*)"#si', $meta[0], $enc); | 1665 | $title = mb_convert_encoding($title, $charset, 'utf-8'); |
1673 | // If charset not found, use utf-8. | ||
1674 | $html_charset = (!empty($enc[1])) ? strtolower($enc[1]) : 'utf-8'; | ||
1675 | } | ||
1676 | else { | ||
1677 | $html_charset = 'utf-8'; | ||
1678 | } | ||
1679 | |||
1680 | // Extract title | ||
1681 | $title = html_extract_title($data); | ||
1682 | if (!empty($title)) { | ||
1683 | // Re-encode title in utf-8 if necessary. | ||
1684 | $title = ($html_charset == 'iso-8859-1') ? utf8_encode($title) : $title; | ||
1685 | } | 1666 | } |
1686 | } | 1667 | } |
1687 | } | 1668 | } |
1669 | |||
1688 | if ($url == '') { | 1670 | if ($url == '') { |
1689 | $url = '?' . smallHash($linkdate); | 1671 | $url = '?' . smallHash($linkdate); |
1690 | $title = 'Note: '; | 1672 | $title = 'Note: '; |
1691 | } | 1673 | } |
1674 | |||
1692 | $link = array( | 1675 | $link = array( |
1693 | 'linkdate' => $linkdate, | 1676 | 'linkdate' => $linkdate, |
1694 | 'title' => $title, | 1677 | 'title' => $title, |
@@ -2314,11 +2297,11 @@ function genThumbnail() | |||
2314 | else // This is a flickr page (html) | 2297 | else // This is a flickr page (html) |
2315 | { | 2298 | { |
2316 | // Get the flickr html page. | 2299 | // Get the flickr html page. |
2317 | list($headers, $data) = get_http_url($url, 20); | 2300 | list($headers, $content) = get_http_response($url, 20); |
2318 | if (strpos($headers[0], '200 OK') !== false) | 2301 | if (strpos($headers[0], '200 OK') !== false) |
2319 | { | 2302 | { |
2320 | // flickr now nicely provides the URL of the thumbnail in each flickr page. | 2303 | // flickr now nicely provides the URL of the thumbnail in each flickr page. |
2321 | preg_match('!<link rel=\"image_src\" href=\"(.+?)\"!',$data,$matches); | 2304 | preg_match('!<link rel=\"image_src\" href=\"(.+?)\"!', $content, $matches); |
2322 | if (!empty($matches[1])) $imageurl=$matches[1]; | 2305 | if (!empty($matches[1])) $imageurl=$matches[1]; |
2323 | 2306 | ||
2324 | // In albums (and some other pages), the link rel="image_src" is not provided, | 2307 | // In albums (and some other pages), the link rel="image_src" is not provided, |
@@ -2326,7 +2309,7 @@ function genThumbnail() | |||
2326 | // <meta property="og:image" content="http://farm4.staticflickr.com/3398/3239339068_25d13535ff_z.jpg" /> | 2309 | // <meta property="og:image" content="http://farm4.staticflickr.com/3398/3239339068_25d13535ff_z.jpg" /> |
2327 | if ($imageurl=='') | 2310 | if ($imageurl=='') |
2328 | { | 2311 | { |
2329 | preg_match('!<meta property=\"og:image\" content=\"(.+?)\"!',$data,$matches); | 2312 | preg_match('!<meta property=\"og:image\" content=\"(.+?)\"!', $content, $matches); |
2330 | if (!empty($matches[1])) $imageurl=$matches[1]; | 2313 | if (!empty($matches[1])) $imageurl=$matches[1]; |
2331 | } | 2314 | } |
2332 | } | 2315 | } |
@@ -2335,11 +2318,12 @@ function genThumbnail() | |||
2335 | if ($imageurl!='') | 2318 | if ($imageurl!='') |
2336 | { // Let's download the image. | 2319 | { // Let's download the image. |
2337 | // Image is 240x120, so 10 seconds to download should be enough. | 2320 | // Image is 240x120, so 10 seconds to download should be enough. |
2338 | list($headers, $data) = get_http_url($imageurl, 10); | 2321 | list($headers, $content) = get_http_response($imageurl, 10); |
2339 | if (strpos($headers[0], '200 OK') !== false) { | 2322 | if (strpos($headers[0], '200 OK') !== false) { |
2340 | file_put_contents($GLOBALS['config']['CACHEDIR'].'/'.$thumbname,$data); // Save image to cache. | 2323 | // Save image to cache. |
2324 | file_put_contents($GLOBALS['config']['CACHEDIR'].'/' . $thumbname, $content); | ||
2341 | header('Content-Type: image/jpeg'); | 2325 | header('Content-Type: image/jpeg'); |
2342 | echo $data; | 2326 | echo $content; |
2343 | return; | 2327 | return; |
2344 | } | 2328 | } |
2345 | } | 2329 | } |
@@ -2350,16 +2334,17 @@ function genThumbnail() | |||
2350 | // This is more complex: we have to perform a HTTP request, then parse the result. | 2334 | // This is more complex: we have to perform a HTTP request, then parse the result. |
2351 | // Maybe we should deport this to JavaScript ? Example: http://stackoverflow.com/questions/1361149/get-img-thumbnails-from-vimeo/4285098#4285098 | 2335 | // Maybe we should deport this to JavaScript ? Example: http://stackoverflow.com/questions/1361149/get-img-thumbnails-from-vimeo/4285098#4285098 |
2352 | $vid = substr(parse_url($url,PHP_URL_PATH),1); | 2336 | $vid = substr(parse_url($url,PHP_URL_PATH),1); |
2353 | list($headers, $data) = get_http_url('https://vimeo.com/api/v2/video/'.escape($vid).'.php', 5); | 2337 | list($headers, $content) = get_http_response('https://vimeo.com/api/v2/video/'.escape($vid).'.php', 5); |
2354 | if (strpos($headers[0], '200 OK') !== false) { | 2338 | if (strpos($headers[0], '200 OK') !== false) { |
2355 | $t = unserialize($data); | 2339 | $t = unserialize($content); |
2356 | $imageurl = $t[0]['thumbnail_medium']; | 2340 | $imageurl = $t[0]['thumbnail_medium']; |
2357 | // Then we download the image and serve it to our client. | 2341 | // Then we download the image and serve it to our client. |
2358 | list($headers, $data) = get_http_url($imageurl, 10); | 2342 | list($headers, $content) = get_http_response($imageurl, 10); |
2359 | if (strpos($headers[0], '200 OK') !== false) { | 2343 | if (strpos($headers[0], '200 OK') !== false) { |
2360 | file_put_contents($GLOBALS['config']['CACHEDIR'].'/'.$thumbname,$data); // Save image to cache. | 2344 | // Save image to cache. |
2345 | file_put_contents($GLOBALS['config']['CACHEDIR'] . '/' . $thumbname, $content); | ||
2361 | header('Content-Type: image/jpeg'); | 2346 | header('Content-Type: image/jpeg'); |
2362 | echo $data; | 2347 | echo $content; |
2363 | return; | 2348 | return; |
2364 | } | 2349 | } |
2365 | } | 2350 | } |
@@ -2370,18 +2355,18 @@ function genThumbnail() | |||
2370 | // The thumbnail for TED talks is located in the <link rel="image_src" [...]> tag on that page | 2355 | // The thumbnail for TED talks is located in the <link rel="image_src" [...]> tag on that page |
2371 | // http://www.ted.com/talks/mikko_hypponen_fighting_viruses_defending_the_net.html | 2356 | // http://www.ted.com/talks/mikko_hypponen_fighting_viruses_defending_the_net.html |
2372 | // <link rel="image_src" href="http://images.ted.com/images/ted/28bced335898ba54d4441809c5b1112ffaf36781_389x292.jpg" /> | 2357 | // <link rel="image_src" href="http://images.ted.com/images/ted/28bced335898ba54d4441809c5b1112ffaf36781_389x292.jpg" /> |
2373 | list($headers, $data) = get_http_url($url, 5); | 2358 | list($headers, $content) = get_http_response($url, 5); |
2374 | if (strpos($headers[0], '200 OK') !== false) { | 2359 | if (strpos($headers[0], '200 OK') !== false) { |
2375 | // Extract the link to the thumbnail | 2360 | // Extract the link to the thumbnail |
2376 | preg_match('!link rel="image_src" href="(http://images.ted.com/images/ted/.+_\d+x\d+\.jpg)"!',$data,$matches); | 2361 | preg_match('!link rel="image_src" href="(http://images.ted.com/images/ted/.+_\d+x\d+\.jpg)"!', $content, $matches); |
2377 | if (!empty($matches[1])) | 2362 | if (!empty($matches[1])) |
2378 | { // Let's download the image. | 2363 | { // Let's download the image. |
2379 | $imageurl=$matches[1]; | 2364 | $imageurl=$matches[1]; |
2380 | // No control on image size, so wait long enough | 2365 | // No control on image size, so wait long enough |
2381 | list($headers, $data) = get_http_url($imageurl, 20); | 2366 | list($headers, $content) = get_http_response($imageurl, 20); |
2382 | if (strpos($headers[0], '200 OK') !== false) { | 2367 | if (strpos($headers[0], '200 OK') !== false) { |
2383 | $filepath=$GLOBALS['config']['CACHEDIR'].'/'.$thumbname; | 2368 | $filepath=$GLOBALS['config']['CACHEDIR'].'/'.$thumbname; |
2384 | file_put_contents($filepath,$data); // Save image to cache. | 2369 | file_put_contents($filepath, $content); // Save image to cache. |
2385 | if (resizeImage($filepath)) | 2370 | if (resizeImage($filepath)) |
2386 | { | 2371 | { |
2387 | header('Content-Type: image/jpeg'); | 2372 | header('Content-Type: image/jpeg'); |
@@ -2398,18 +2383,19 @@ function genThumbnail() | |||
2398 | // There is no thumbnail available for xkcd comics, so download the whole image and resize it. | 2383 | // There is no thumbnail available for xkcd comics, so download the whole image and resize it. |
2399 | // http://xkcd.com/327/ | 2384 | // http://xkcd.com/327/ |
2400 | // <img src="http://imgs.xkcd.com/comics/exploits_of_a_mom.png" title="<BLABLA>" alt="<BLABLA>" /> | 2385 | // <img src="http://imgs.xkcd.com/comics/exploits_of_a_mom.png" title="<BLABLA>" alt="<BLABLA>" /> |
2401 | list($headers, $data) = get_http_url($url, 5); | 2386 | list($headers, $content) = get_http_response($url, 5); |
2402 | if (strpos($headers[0], '200 OK') !== false) { | 2387 | if (strpos($headers[0], '200 OK') !== false) { |
2403 | // Extract the link to the thumbnail | 2388 | // Extract the link to the thumbnail |
2404 | preg_match('!<img src="(http://imgs.xkcd.com/comics/.*)" title="[^s]!',$data,$matches); | 2389 | preg_match('!<img src="(http://imgs.xkcd.com/comics/.*)" title="[^s]!', $content, $matches); |
2405 | if (!empty($matches[1])) | 2390 | if (!empty($matches[1])) |
2406 | { // Let's download the image. | 2391 | { // Let's download the image. |
2407 | $imageurl=$matches[1]; | 2392 | $imageurl=$matches[1]; |
2408 | // No control on image size, so wait long enough | 2393 | // No control on image size, so wait long enough |
2409 | list($headers, $data) = get_http_url($imageurl, 20); | 2394 | list($headers, $content) = get_http_response($imageurl, 20); |
2410 | if (strpos($headers[0], '200 OK') !== false) { | 2395 | if (strpos($headers[0], '200 OK') !== false) { |
2411 | $filepath=$GLOBALS['config']['CACHEDIR'].'/'.$thumbname; | 2396 | $filepath=$GLOBALS['config']['CACHEDIR'].'/'.$thumbname; |
2412 | file_put_contents($filepath,$data); // Save image to cache. | 2397 | // Save image to cache. |
2398 | file_put_contents($filepath, $content); | ||
2413 | if (resizeImage($filepath)) | 2399 | if (resizeImage($filepath)) |
2414 | { | 2400 | { |
2415 | header('Content-Type: image/jpeg'); | 2401 | header('Content-Type: image/jpeg'); |
@@ -2425,10 +2411,11 @@ function genThumbnail() | |||
2425 | { | 2411 | { |
2426 | // For all other domains, we try to download the image and make a thumbnail. | 2412 | // For all other domains, we try to download the image and make a thumbnail. |
2427 | // We allow 30 seconds max to download (and downloads are limited to 4 Mb) | 2413 | // We allow 30 seconds max to download (and downloads are limited to 4 Mb) |
2428 | list($headers, $data) = get_http_url($url, 30); | 2414 | list($headers, $content) = get_http_response($url, 30); |
2429 | if (strpos($headers[0], '200 OK') !== false) { | 2415 | if (strpos($headers[0], '200 OK') !== false) { |
2430 | $filepath=$GLOBALS['config']['CACHEDIR'].'/'.$thumbname; | 2416 | $filepath=$GLOBALS['config']['CACHEDIR'].'/'.$thumbname; |
2431 | file_put_contents($filepath,$data); // Save image to cache. | 2417 | // Save image to cache. |
2418 | file_put_contents($filepath, $content); | ||
2432 | if (resizeImage($filepath)) | 2419 | if (resizeImage($filepath)) |
2433 | { | 2420 | { |
2434 | header('Content-Type: image/jpeg'); | 2421 | header('Content-Type: image/jpeg'); |
diff --git a/tests/HttpUtils/GetHttpUrlTest.php b/tests/HttpUtils/GetHttpUrlTest.php index 76092b80..fd293505 100644 --- a/tests/HttpUtils/GetHttpUrlTest.php +++ b/tests/HttpUtils/GetHttpUrlTest.php | |||
@@ -6,7 +6,7 @@ | |||
6 | require_once 'application/HttpUtils.php'; | 6 | require_once 'application/HttpUtils.php'; |
7 | 7 | ||
8 | /** | 8 | /** |
9 | * Unitary tests for get_http_url() | 9 | * Unitary tests for get_http_response() |
10 | */ | 10 | */ |
11 | class GetHttpUrlTest extends PHPUnit_Framework_TestCase | 11 | class GetHttpUrlTest extends PHPUnit_Framework_TestCase |
12 | { | 12 | { |
@@ -15,12 +15,15 @@ class GetHttpUrlTest extends PHPUnit_Framework_TestCase | |||
15 | */ | 15 | */ |
16 | public function testGetInvalidLocalUrl() | 16 | public function testGetInvalidLocalUrl() |
17 | { | 17 | { |
18 | list($headers, $content) = get_http_url('/non/existent', 1); | 18 | // Local |
19 | $this->assertEquals('HTTP Error', $headers[0]); | 19 | list($headers, $content) = get_http_response('/non/existent', 1); |
20 | $this->assertRegexp( | 20 | $this->assertEquals('Invalid HTTP Url', $headers[0]); |
21 | '/failed to open stream: No such file or directory/', | 21 | $this->assertFalse($content); |
22 | $content | 22 | |
23 | ); | 23 | // Non HTTP |
24 | list($headers, $content) = get_http_response('ftp://save.tld/mysave', 1); | ||
25 | $this->assertEquals('Invalid HTTP Url', $headers[0]); | ||
26 | $this->assertFalse($content); | ||
24 | } | 27 | } |
25 | 28 | ||
26 | /** | 29 | /** |
@@ -28,11 +31,8 @@ class GetHttpUrlTest extends PHPUnit_Framework_TestCase | |||
28 | */ | 31 | */ |
29 | public function testGetInvalidRemoteUrl() | 32 | public function testGetInvalidRemoteUrl() |
30 | { | 33 | { |
31 | list($headers, $content) = get_http_url('http://non.existent', 1); | 34 | list($headers, $content) = @get_http_response('http://non.existent', 1); |
32 | $this->assertEquals('HTTP Error', $headers[0]); | 35 | $this->assertFalse($headers); |
33 | $this->assertRegexp( | 36 | $this->assertFalse($content); |
34 | '/Name or service not known/', | ||
35 | $content | ||
36 | ); | ||
37 | } | 37 | } |
38 | } | 38 | } |
diff --git a/tests/LinkUtilsTest.php b/tests/LinkUtilsTest.php new file mode 100644 index 00000000..c2257590 --- /dev/null +++ b/tests/LinkUtilsTest.php | |||
@@ -0,0 +1,85 @@ | |||
1 | <?php | ||
2 | |||
3 | require_once 'application/LinkUtils.php'; | ||
4 | |||
5 | /** | ||
6 | * Class LinkUtilsTest. | ||
7 | */ | ||
8 | class LinkUtilsTest extends PHPUnit_Framework_TestCase | ||
9 | { | ||
10 | /** | ||
11 | * Test html_extract_title() when the title is found. | ||
12 | */ | ||
13 | public function testHtmlExtractExistentTitle() | ||
14 | { | ||
15 | $title = 'Read me please.'; | ||
16 | $html = '<html><meta>stuff</meta><title>'. $title .'</title></html>'; | ||
17 | $this->assertEquals($title, html_extract_title($html)); | ||
18 | } | ||
19 | |||
20 | /** | ||
21 | * Test html_extract_title() when the title is not found. | ||
22 | */ | ||
23 | public function testHtmlExtractNonExistentTitle() | ||
24 | { | ||
25 | $html = '<html><meta>stuff</meta></html>'; | ||
26 | $this->assertFalse(html_extract_title($html)); | ||
27 | } | ||
28 | |||
29 | /** | ||
30 | * Test get_charset() with all priorities. | ||
31 | */ | ||
32 | public function testGetCharset() | ||
33 | { | ||
34 | $headers = array('Content-Type' => 'text/html; charset=Headers'); | ||
35 | $html = '<html><meta>stuff</meta><meta charset="Html"/></html>'; | ||
36 | $default = 'default'; | ||
37 | $this->assertEquals('headers', get_charset($headers, $html, $default)); | ||
38 | $this->assertEquals('html', get_charset(array(), $html, $default)); | ||
39 | $this->assertEquals($default, get_charset(array(), '', $default)); | ||
40 | $this->assertEquals('utf-8', get_charset(array(), '')); | ||
41 | } | ||
42 | |||
43 | /** | ||
44 | * Test headers_extract_charset() when the charset is found. | ||
45 | */ | ||
46 | public function testHeadersExtractExistentCharset() | ||
47 | { | ||
48 | $charset = 'x-MacCroatian'; | ||
49 | $headers = array('Content-Type' => 'text/html; charset='. $charset); | ||
50 | $this->assertEquals(strtolower($charset), headers_extract_charset($headers)); | ||
51 | } | ||
52 | |||
53 | /** | ||
54 | * Test headers_extract_charset() when the charset is not found. | ||
55 | */ | ||
56 | public function testHeadersExtractNonExistentCharset() | ||
57 | { | ||
58 | $headers = array(); | ||
59 | $this->assertFalse(headers_extract_charset($headers)); | ||
60 | |||
61 | $headers = array('Content-Type' => 'text/html'); | ||
62 | $this->assertFalse(headers_extract_charset($headers)); | ||
63 | } | ||
64 | |||
65 | /** | ||
66 | * Test html_extract_charset() when the charset is found. | ||
67 | */ | ||
68 | public function testHtmlExtractExistentCharset() | ||
69 | { | ||
70 | $charset = 'x-MacCroatian'; | ||
71 | $html = '<html><meta>stuff2</meta><meta charset="'. $charset .'"/></html>'; | ||
72 | $this->assertEquals(strtolower($charset), html_extract_charset($html)); | ||
73 | } | ||
74 | |||
75 | /** | ||
76 | * Test html_extract_charset() when the charset is not found. | ||
77 | */ | ||
78 | public function testHtmlExtractNonExistentCharset() | ||
79 | { | ||
80 | $html = '<html><meta>stuff</meta></html>'; | ||
81 | $this->assertFalse(html_extract_charset($html)); | ||
82 | $html = '<html><meta>stuff</meta><meta charset=""/></html>'; | ||
83 | $this->assertFalse(html_extract_charset($html)); | ||
84 | } | ||
85 | } | ||
diff --git a/tests/Url/UrlTest.php b/tests/Url/UrlTest.php index af6daaa4..425327ed 100644 --- a/tests/Url/UrlTest.php +++ b/tests/Url/UrlTest.php | |||
@@ -156,4 +156,22 @@ class UrlTest extends PHPUnit_Framework_TestCase | |||
156 | $this->assertEquals($strOn, add_trailing_slash($strOn)); | 156 | $this->assertEquals($strOn, add_trailing_slash($strOn)); |
157 | $this->assertEquals($strOn, add_trailing_slash($strOff)); | 157 | $this->assertEquals($strOn, add_trailing_slash($strOff)); |
158 | } | 158 | } |
159 | |||
160 | /** | ||
161 | * Test valid HTTP url. | ||
162 | */ | ||
163 | function testUrlIsHttp() | ||
164 | { | ||
165 | $url = new Url(self::$baseUrl); | ||
166 | $this->assertTrue($url->isHttp()); | ||
167 | } | ||
168 | |||
169 | /** | ||
170 | * Test non HTTP url. | ||
171 | */ | ||
172 | function testUrlIsNotHttp() | ||
173 | { | ||
174 | $url = new Url('ftp://save.tld/mysave'); | ||
175 | $this->assertFalse($url->isHttp()); | ||
176 | } | ||
159 | } | 177 | } |