aboutsummaryrefslogtreecommitdiffhomepage
path: root/application
diff options
context:
space:
mode:
Diffstat (limited to 'application')
-rw-r--r--application/HttpUtils.php60
-rw-r--r--application/LinkFilter.php2
-rw-r--r--application/LinkUtils.php6
-rw-r--r--application/NetscapeBookmarkUtils.php47
-rw-r--r--application/Url.php42
5 files changed, 143 insertions, 14 deletions
diff --git a/application/HttpUtils.php b/application/HttpUtils.php
index af7cb371..0e1ce879 100644
--- a/application/HttpUtils.php
+++ b/application/HttpUtils.php
@@ -27,7 +27,9 @@
27function get_http_response($url, $timeout = 30, $maxBytes = 4194304) 27function get_http_response($url, $timeout = 30, $maxBytes = 4194304)
28{ 28{
29 $urlObj = new Url($url); 29 $urlObj = new Url($url);
30 if (! filter_var($url, FILTER_VALIDATE_URL) || ! $urlObj->isHttp()) { 30 $cleanUrl = $urlObj->indToAscii();
31
32 if (! filter_var($cleanUrl, FILTER_VALIDATE_URL) || ! $urlObj->isHttp()) {
31 return array(array(0 => 'Invalid HTTP Url'), false); 33 return array(array(0 => 'Invalid HTTP Url'), false);
32 } 34 }
33 35
@@ -35,22 +37,27 @@ function get_http_response($url, $timeout = 30, $maxBytes = 4194304)
35 'http' => array( 37 'http' => array(
36 'method' => 'GET', 38 'method' => 'GET',
37 'timeout' => $timeout, 39 'timeout' => $timeout,
38 'user_agent' => 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:23.0)' 40 'user_agent' => 'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:45.0)'
39 .' Gecko/20100101 Firefox/23.0', 41 .' Gecko/20100101 Firefox/45.0',
40 'request_fulluri' => true, 42 'accept_language' => substr(setlocale(LC_COLLATE, 0), 0, 2) . ',en-US;q=0.7,en;q=0.3',
41 ) 43 )
42 ); 44 );
43 45
44 $context = stream_context_create($options);
45 stream_context_set_default($options); 46 stream_context_set_default($options);
47 list($headers, $finalUrl) = get_redirected_headers($cleanUrl);
48 if (! $headers || strpos($headers[0], '200 OK') === false) {
49 $options['http']['request_fulluri'] = true;
50 stream_context_set_default($options);
51 list($headers, $finalUrl) = get_redirected_headers($cleanUrl);
52 }
46 53
47 list($headers, $finalUrl) = get_redirected_headers($urlObj->cleanup());
48 if (! $headers || strpos($headers[0], '200 OK') === false) { 54 if (! $headers || strpos($headers[0], '200 OK') === false) {
49 return array($headers, false); 55 return array($headers, false);
50 } 56 }
51 57
52 try { 58 try {
53 // TODO: catch Exception in calling code (thumbnailer) 59 // TODO: catch Exception in calling code (thumbnailer)
60 $context = stream_context_create($options);
54 $content = file_get_contents($finalUrl, false, $context, -1, $maxBytes); 61 $content = file_get_contents($finalUrl, false, $context, -1, $maxBytes);
55 } catch (Exception $exc) { 62 } catch (Exception $exc) {
56 return array(array(0 => 'HTTP Error'), $exc->getMessage()); 63 return array(array(0 => 'HTTP Error'), $exc->getMessage());
@@ -60,16 +67,19 @@ function get_http_response($url, $timeout = 30, $maxBytes = 4194304)
60} 67}
61 68
62/** 69/**
63 * Retrieve HTTP headers, following n redirections (temporary and permanent). 70 * Retrieve HTTP headers, following n redirections (temporary and permanent ones).
64 * 71 *
65 * @param string $url initial URL to reach. 72 * @param string $url initial URL to reach.
66 * @param int $redirectionLimit max redirection follow.. 73 * @param int $redirectionLimit max redirection follow..
67 * 74 *
68 * @return array 75 * @return array HTTP headers, or false if it failed.
69 */ 76 */
70function get_redirected_headers($url, $redirectionLimit = 3) 77function get_redirected_headers($url, $redirectionLimit = 3)
71{ 78{
72 $headers = get_headers($url, 1); 79 $headers = get_headers($url, 1);
80 if (!empty($headers['location']) && empty($headers['Location'])) {
81 $headers['Location'] = $headers['location'];
82 }
73 83
74 // Headers found, redirection found, and limit not reached. 84 // Headers found, redirection found, and limit not reached.
75 if ($redirectionLimit-- > 0 85 if ($redirectionLimit-- > 0
@@ -79,6 +89,7 @@ function get_redirected_headers($url, $redirectionLimit = 3)
79 89
80 $redirection = is_array($headers['Location']) ? end($headers['Location']) : $headers['Location']; 90 $redirection = is_array($headers['Location']) ? end($headers['Location']) : $headers['Location'];
81 if ($redirection != $url) { 91 if ($redirection != $url) {
92 $redirection = getAbsoluteUrl($url, $redirection);
82 return get_redirected_headers($redirection, $redirectionLimit); 93 return get_redirected_headers($redirection, $redirectionLimit);
83 } 94 }
84 } 95 }
@@ -87,6 +98,35 @@ function get_redirected_headers($url, $redirectionLimit = 3)
87} 98}
88 99
89/** 100/**
101 * Get an absolute URL from a complete one, and another absolute/relative URL.
102 *
103 * @param string $originalUrl The original complete URL.
104 * @param string $newUrl The new one, absolute or relative.
105 *
106 * @return string Final URL:
107 * - $newUrl if it was already an absolute URL.
108 * - if it was relative, absolute URL from $originalUrl path.
109 */
110function getAbsoluteUrl($originalUrl, $newUrl)
111{
112 $newScheme = parse_url($newUrl, PHP_URL_SCHEME);
113 // Already an absolute URL.
114 if (!empty($newScheme)) {
115 return $newUrl;
116 }
117
118 $parts = parse_url($originalUrl);
119 $final = $parts['scheme'] .'://'. $parts['host'];
120 $final .= (!empty($parts['port'])) ? $parts['port'] : '';
121 $final .= '/';
122 if ($newUrl[0] != '/') {
123 $final .= substr(ltrim($parts['path'], '/'), 0, strrpos($parts['path'], '/'));
124 }
125 $final .= ltrim($newUrl, '/');
126 return $final;
127}
128
129/**
90 * Returns the server's base URL: scheme://domain.tld[:port] 130 * Returns the server's base URL: scheme://domain.tld[:port]
91 * 131 *
92 * @param array $server the $_SERVER array 132 * @param array $server the $_SERVER array
diff --git a/application/LinkFilter.php b/application/LinkFilter.php
index 5e0d8015..e693b284 100644
--- a/application/LinkFilter.php
+++ b/application/LinkFilter.php
@@ -322,7 +322,7 @@ class LinkFilter
322 $tagsOut = $casesensitive ? $tags : mb_convert_case($tags, MB_CASE_LOWER, 'UTF-8'); 322 $tagsOut = $casesensitive ? $tags : mb_convert_case($tags, MB_CASE_LOWER, 'UTF-8');
323 $tagsOut = str_replace(',', ' ', $tagsOut); 323 $tagsOut = str_replace(',', ' ', $tagsOut);
324 324
325 return array_filter(explode(' ', trim($tagsOut)), 'strlen'); 325 return array_values(array_filter(explode(' ', trim($tagsOut)), 'strlen'));
326 } 326 }
327} 327}
328 328
diff --git a/application/LinkUtils.php b/application/LinkUtils.php
index d8dc8b5e..2df76ba8 100644
--- a/application/LinkUtils.php
+++ b/application/LinkUtils.php
@@ -9,8 +9,8 @@
9 */ 9 */
10function html_extract_title($html) 10function html_extract_title($html)
11{ 11{
12 if (preg_match('!<title>(.*?)</title>!is', $html, $matches)) { 12 if (preg_match('!<title.*?>(.*?)</title>!is', $html, $matches)) {
13 return trim(str_replace("\n", ' ', $matches[1])); 13 return trim(str_replace("\n", '', $matches[1]));
14 } 14 }
15 return false; 15 return false;
16} 16}
@@ -70,7 +70,7 @@ function headers_extract_charset($headers)
70function html_extract_charset($html) 70function html_extract_charset($html)
71{ 71{
72 // Get encoding specified in HTML header. 72 // Get encoding specified in HTML header.
73 preg_match('#<meta .*charset="?([^">/]+)"? */?>#Usi', $html, $enc); 73 preg_match('#<meta .*charset=["\']?([^";\'>/]+)["\']? */?>#Usi', $html, $enc);
74 if (!empty($enc[1])) { 74 if (!empty($enc[1])) {
75 return strtolower($enc[1]); 75 return strtolower($enc[1]);
76 } 76 }
diff --git a/application/NetscapeBookmarkUtils.php b/application/NetscapeBookmarkUtils.php
new file mode 100644
index 00000000..8a296705
--- /dev/null
+++ b/application/NetscapeBookmarkUtils.php
@@ -0,0 +1,47 @@
1<?php
2
3/**
4 * Utilities to import and export bookmarks using the Netscape format
5 */
6class NetscapeBookmarkUtils
7{
8
9 /**
10 * Filters links and adds Netscape-formatted fields
11 *
12 * Added fields:
13 * - timestamp link addition date, using the Unix epoch format
14 * - taglist comma-separated tag list
15 *
16 * @param LinkDB $linkDb The link datastore
17 * @param string $selection Which links to export: (all|private|public)
18 *
19 * @throws Exception Invalid export selection
20 *
21 * @return array The links to be exported, with additional fields
22 */
23 public static function filterAndFormat($linkDb, $selection)
24 {
25 // see tpl/export.html for possible values
26 if (! in_array($selection, array('all','public','private'))) {
27 throw new Exception('Invalid export selection: "'.$selection.'"');
28 }
29
30 $bookmarkLinks = array();
31
32 foreach ($linkDb as $link) {
33 if ($link['private'] != 0 && $selection == 'public') {
34 continue;
35 }
36 if ($link['private'] == 0 && $selection == 'private') {
37 continue;
38 }
39 $date = DateTime::createFromFormat(LinkDB::LINK_DATE_FORMAT, $link['linkdate']);
40 $link['timestamp'] = $date->getTimestamp();
41 $link['taglist'] = str_replace(' ', ',', $link['tags']);
42 $bookmarkLinks[] = $link;
43 }
44
45 return $bookmarkLinks;
46 }
47}
diff --git a/application/Url.php b/application/Url.php
index af38c4d9..61a30a78 100644
--- a/application/Url.php
+++ b/application/Url.php
@@ -62,7 +62,21 @@ function add_trailing_slash($url)
62{ 62{
63 return $url . (!endsWith($url, '/') ? '/' : ''); 63 return $url . (!endsWith($url, '/') ? '/' : '');
64} 64}
65/**
66 * Converts an URL with an IDN host to a ASCII one.
67 *
68 * @param string $url Input URL.
69 *
70 * @return string converted URL.
71 */
72function url_with_idn_to_ascii($url)
73{
74 $parts = parse_url($url);
75 $parts['host'] = idn_to_ascii($parts['host']);
65 76
77 $httpUrl = new \http\Url($parts);
78 return $httpUrl->toString();
79}
66/** 80/**
67 * URL representation and cleanup utilities 81 * URL representation and cleanup utilities
68 * 82 *
@@ -221,6 +235,22 @@ class Url
221 } 235 }
222 236
223 /** 237 /**
238 * Converts an URL with an International Domain Name host to a ASCII one.
239 * This requires PHP-intl. If it's not available, just returns this->cleanup().
240 *
241 * @return string converted cleaned up URL.
242 */
243 public function indToAscii()
244 {
245 $out = $this->cleanup();
246 if (! function_exists('idn_to_ascii') || ! isset($this->parts['host'])) {
247 return $out;
248 }
249 $asciiHost = idn_to_ascii($this->parts['host']);
250 return str_replace($this->parts['host'], $asciiHost, $out);
251 }
252
253 /**
224 * Get URL scheme. 254 * Get URL scheme.
225 * 255 *
226 * @return string the URL scheme or false if none is provided. 256 * @return string the URL scheme or false if none is provided.
@@ -233,6 +263,18 @@ class Url
233 } 263 }
234 264
235 /** 265 /**
266 * Get URL host.
267 *
268 * @return string the URL host or false if none is provided.
269 */
270 public function getHost() {
271 if (empty($this->parts['host'])) {
272 return false;
273 }
274 return $this->parts['host'];
275 }
276
277 /**
236 * Test if the Url is an HTTP one. 278 * Test if the Url is an HTTP one.
237 * 279 *
238 * @return true is HTTP, false otherwise. 280 * @return true is HTTP, false otherwise.