diff options
Diffstat (limited to 'application')
-rw-r--r-- | application/ApplicationUtils.php | 2 | ||||
-rw-r--r-- | application/HttpUtils.php | 49 | ||||
-rw-r--r-- | application/LinkDB.php | 118 | ||||
-rw-r--r-- | application/LinkFilter.php | 259 | ||||
-rw-r--r-- | application/LinkUtils.php | 79 | ||||
-rw-r--r-- | application/Url.php | 23 | ||||
-rw-r--r-- | application/Utils.php | 30 |
7 files changed, 441 insertions, 119 deletions
diff --git a/application/ApplicationUtils.php b/application/ApplicationUtils.php index 274331e1..978fc9da 100644 --- a/application/ApplicationUtils.php +++ b/application/ApplicationUtils.php | |||
@@ -19,7 +19,7 @@ class ApplicationUtils | |||
19 | */ | 19 | */ |
20 | public static function getLatestGitVersionCode($url, $timeout=2) | 20 | public static function getLatestGitVersionCode($url, $timeout=2) |
21 | { | 21 | { |
22 | list($headers, $data) = get_http_url($url, $timeout); | 22 | list($headers, $data) = get_http_response($url, $timeout); |
23 | 23 | ||
24 | if (strpos($headers[0], '200 OK') === false) { | 24 | if (strpos($headers[0], '200 OK') === false) { |
25 | error_log('Failed to retrieve ' . $url); | 25 | error_log('Failed to retrieve ' . $url); |
diff --git a/application/HttpUtils.php b/application/HttpUtils.php index 499220c5..e2c1cb47 100644 --- a/application/HttpUtils.php +++ b/application/HttpUtils.php | |||
@@ -13,7 +13,7 @@ | |||
13 | * [1] = URL content (downloaded data) | 13 | * [1] = URL content (downloaded data) |
14 | * | 14 | * |
15 | * Example: | 15 | * Example: |
16 | * list($headers, $data) = get_http_url('http://sebauvage.net/'); | 16 | * list($headers, $data) = get_http_response('http://sebauvage.net/'); |
17 | * if (strpos($headers[0], '200 OK') !== false) { | 17 | * if (strpos($headers[0], '200 OK') !== false) { |
18 | * echo 'Data type: '.htmlspecialchars($headers['Content-Type']); | 18 | * echo 'Data type: '.htmlspecialchars($headers['Content-Type']); |
19 | * } else { | 19 | * } else { |
@@ -24,31 +24,66 @@ | |||
24 | * @see http://php.net/manual/en/function.stream-context-create.php | 24 | * @see http://php.net/manual/en/function.stream-context-create.php |
25 | * @see http://php.net/manual/en/function.get-headers.php | 25 | * @see http://php.net/manual/en/function.get-headers.php |
26 | */ | 26 | */ |
27 | function get_http_url($url, $timeout = 30, $maxBytes = 4194304) | 27 | function get_http_response($url, $timeout = 30, $maxBytes = 4194304) |
28 | { | 28 | { |
29 | $urlObj = new Url($url); | ||
30 | if (! filter_var($url, FILTER_VALIDATE_URL) || ! $urlObj->isHttp()) { | ||
31 | return array(array(0 => 'Invalid HTTP Url'), false); | ||
32 | } | ||
33 | |||
29 | $options = array( | 34 | $options = array( |
30 | 'http' => array( | 35 | 'http' => array( |
31 | 'method' => 'GET', | 36 | 'method' => 'GET', |
32 | 'timeout' => $timeout, | 37 | 'timeout' => $timeout, |
33 | 'user_agent' => 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:23.0)' | 38 | 'user_agent' => 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:23.0)' |
34 | .' Gecko/20100101 Firefox/23.0' | 39 | .' Gecko/20100101 Firefox/23.0', |
40 | 'request_fulluri' => true, | ||
35 | ) | 41 | ) |
36 | ); | 42 | ); |
37 | 43 | ||
38 | $context = stream_context_create($options); | 44 | $context = stream_context_create($options); |
45 | stream_context_set_default($options); | ||
46 | |||
47 | list($headers, $finalUrl) = get_redirected_headers($urlObj->cleanup()); | ||
48 | if (! $headers || strpos($headers[0], '200 OK') === false) { | ||
49 | return array($headers, false); | ||
50 | } | ||
39 | 51 | ||
40 | try { | 52 | try { |
41 | // TODO: catch Exception in calling code (thumbnailer) | 53 | // TODO: catch Exception in calling code (thumbnailer) |
42 | $content = file_get_contents($url, false, $context, -1, $maxBytes); | 54 | $content = file_get_contents($finalUrl, false, $context, -1, $maxBytes); |
43 | } catch (Exception $exc) { | 55 | } catch (Exception $exc) { |
44 | return array(array(0 => 'HTTP Error'), $exc->getMessage()); | 56 | return array(array(0 => 'HTTP Error'), $exc->getMessage()); |
45 | } | 57 | } |
46 | 58 | ||
47 | if (!$content) { | 59 | return array($headers, $content); |
48 | return array(array(0 => 'HTTP Error'), ''); | 60 | } |
61 | |||
62 | /** | ||
63 | * Retrieve HTTP headers, following n redirections (temporary and permanent). | ||
64 | * | ||
65 | * @param string $url initial URL to reach. | ||
66 | * @param int $redirectionLimit max redirection follow.. | ||
67 | * | ||
68 | * @return array | ||
69 | */ | ||
70 | function get_redirected_headers($url, $redirectionLimit = 3) | ||
71 | { | ||
72 | $headers = get_headers($url, 1); | ||
73 | |||
74 | // Headers found, redirection found, and limit not reached. | ||
75 | if ($redirectionLimit-- > 0 | ||
76 | && !empty($headers) | ||
77 | && (strpos($headers[0], '301') !== false || strpos($headers[0], '302') !== false) | ||
78 | && !empty($headers['Location'])) { | ||
79 | |||
80 | $redirection = is_array($headers['Location']) ? end($headers['Location']) : $headers['Location']; | ||
81 | if ($redirection != $url) { | ||
82 | return get_redirected_headers($redirection, $redirectionLimit); | ||
83 | } | ||
49 | } | 84 | } |
50 | 85 | ||
51 | return array(get_headers($url, 1), $content); | 86 | return array($headers, $url); |
52 | } | 87 | } |
53 | 88 | ||
54 | /** | 89 | /** |
diff --git a/application/LinkDB.php b/application/LinkDB.php index f771ac8b..19ca6435 100644 --- a/application/LinkDB.php +++ b/application/LinkDB.php | |||
@@ -17,8 +17,10 @@ | |||
17 | * - private: Is this link private? 0=no, other value=yes | 17 | * - private: Is this link private? 0=no, other value=yes |
18 | * - tags: tags attached to this entry (separated by spaces) | 18 | * - tags: tags attached to this entry (separated by spaces) |
19 | * - title Title of the link | 19 | * - title Title of the link |
20 | * - url URL of the link. Can be absolute or relative. | 20 | * - url URL of the link. Used for displayable links (no redirector, relative, etc.). |
21 | * Can be absolute or relative. | ||
21 | * Relative URLs are permalinks (e.g.'?m-ukcw') | 22 | * Relative URLs are permalinks (e.g.'?m-ukcw') |
23 | * - real_url Absolute processed URL. | ||
22 | * | 24 | * |
23 | * Implements 3 interfaces: | 25 | * Implements 3 interfaces: |
24 | * - ArrayAccess: behaves like an associative array; | 26 | * - ArrayAccess: behaves like an associative array; |
@@ -332,114 +334,20 @@ You use the community supported version of the original Shaarli project, by Seba | |||
332 | } | 334 | } |
333 | 335 | ||
334 | /** | 336 | /** |
335 | * Returns the list of links corresponding to a full-text search | 337 | * Filter links. |
336 | * | 338 | * |
337 | * Searches: | 339 | * @param string $type Type of filter. |
338 | * - in the URLs, title and description; | 340 | * @param mixed $request Search request, string or array. |
339 | * - are case-insensitive. | 341 | * @param bool $casesensitive Optional: Perform case sensitive filter |
342 | * @param bool $privateonly Optional: Returns private links only if true. | ||
340 | * | 343 | * |
341 | * Example: | 344 | * @return array filtered links |
342 | * print_r($mydb->filterFulltext('hollandais')); | ||
343 | * | ||
344 | * mb_convert_case($val, MB_CASE_LOWER, 'UTF-8') | ||
345 | * - allows to perform searches on Unicode text | ||
346 | * - see https://github.com/shaarli/Shaarli/issues/75 for examples | ||
347 | */ | ||
348 | public function filterFulltext($searchterms) | ||
349 | { | ||
350 | // FIXME: explode(' ',$searchterms) and perform a AND search. | ||
351 | // FIXME: accept double-quotes to search for a string "as is"? | ||
352 | $filtered = array(); | ||
353 | $search = mb_convert_case($searchterms, MB_CASE_LOWER, 'UTF-8'); | ||
354 | $keys = array('title', 'description', 'url', 'tags'); | ||
355 | |||
356 | foreach ($this->_links as $link) { | ||
357 | $found = false; | ||
358 | |||
359 | foreach ($keys as $key) { | ||
360 | if (strpos(mb_convert_case($link[$key], MB_CASE_LOWER, 'UTF-8'), | ||
361 | $search) !== false) { | ||
362 | $found = true; | ||
363 | } | ||
364 | } | ||
365 | |||
366 | if ($found) { | ||
367 | $filtered[$link['linkdate']] = $link; | ||
368 | } | ||
369 | } | ||
370 | krsort($filtered); | ||
371 | return $filtered; | ||
372 | } | ||
373 | |||
374 | /** | ||
375 | * Returns the list of links associated with a given list of tags | ||
376 | * | ||
377 | * You can specify one or more tags, separated by space or a comma, e.g. | ||
378 | * print_r($mydb->filterTags('linux programming')); | ||
379 | */ | 345 | */ |
380 | public function filterTags($tags, $casesensitive=false) | 346 | public function filter($type, $request, $casesensitive = false, $privateonly = false) |
381 | { | 347 | { |
382 | // Same as above, we use UTF-8 conversion to handle various graphemes (i.e. cyrillic, or greek) | 348 | $linkFilter = new LinkFilter($this->_links); |
383 | // FIXME: is $casesensitive ever true? | 349 | $requestFilter = is_array($request) ? implode(' ', $request) : $request; |
384 | $t = str_replace( | 350 | return $linkFilter->filter($type, trim($requestFilter), $casesensitive, $privateonly); |
385 | ',', ' ', | ||
386 | ($casesensitive ? $tags : mb_convert_case($tags, MB_CASE_LOWER, 'UTF-8')) | ||
387 | ); | ||
388 | |||
389 | $searchtags = explode(' ', $t); | ||
390 | $filtered = array(); | ||
391 | |||
392 | foreach ($this->_links as $l) { | ||
393 | $linktags = explode( | ||
394 | ' ', | ||
395 | ($casesensitive ? $l['tags']:mb_convert_case($l['tags'], MB_CASE_LOWER, 'UTF-8')) | ||
396 | ); | ||
397 | |||
398 | if (count(array_intersect($linktags, $searchtags)) == count($searchtags)) { | ||
399 | $filtered[$l['linkdate']] = $l; | ||
400 | } | ||
401 | } | ||
402 | krsort($filtered); | ||
403 | return $filtered; | ||
404 | } | ||
405 | |||
406 | |||
407 | /** | ||
408 | * Returns the list of articles for a given day, chronologically sorted | ||
409 | * | ||
410 | * Day must be in the form 'YYYYMMDD' (e.g. '20120125'), e.g. | ||
411 | * print_r($mydb->filterDay('20120125')); | ||
412 | */ | ||
413 | public function filterDay($day) | ||
414 | { | ||
415 | if (! checkDateFormat('Ymd', $day)) { | ||
416 | throw new Exception('Invalid date format'); | ||
417 | } | ||
418 | |||
419 | $filtered = array(); | ||
420 | foreach ($this->_links as $l) { | ||
421 | if (startsWith($l['linkdate'], $day)) { | ||
422 | $filtered[$l['linkdate']] = $l; | ||
423 | } | ||
424 | } | ||
425 | ksort($filtered); | ||
426 | return $filtered; | ||
427 | } | ||
428 | |||
429 | /** | ||
430 | * Returns the article corresponding to a smallHash | ||
431 | */ | ||
432 | public function filterSmallHash($smallHash) | ||
433 | { | ||
434 | $filtered = array(); | ||
435 | foreach ($this->_links as $l) { | ||
436 | if ($smallHash == smallHash($l['linkdate'])) { | ||
437 | // Yes, this is ugly and slow | ||
438 | $filtered[$l['linkdate']] = $l; | ||
439 | return $filtered; | ||
440 | } | ||
441 | } | ||
442 | return $filtered; | ||
443 | } | 351 | } |
444 | 352 | ||
445 | /** | 353 | /** |
diff --git a/application/LinkFilter.php b/application/LinkFilter.php new file mode 100644 index 00000000..cf647371 --- /dev/null +++ b/application/LinkFilter.php | |||
@@ -0,0 +1,259 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * Class LinkFilter. | ||
5 | * | ||
6 | * Perform search and filter operation on link data list. | ||
7 | */ | ||
8 | class LinkFilter | ||
9 | { | ||
10 | /** | ||
11 | * @var string permalinks. | ||
12 | */ | ||
13 | public static $FILTER_HASH = 'permalink'; | ||
14 | |||
15 | /** | ||
16 | * @var string text search. | ||
17 | */ | ||
18 | public static $FILTER_TEXT = 'fulltext'; | ||
19 | |||
20 | /** | ||
21 | * @var string tag filter. | ||
22 | */ | ||
23 | public static $FILTER_TAG = 'tags'; | ||
24 | |||
25 | /** | ||
26 | * @var string filter by day. | ||
27 | */ | ||
28 | public static $FILTER_DAY = 'FILTER_DAY'; | ||
29 | |||
30 | /** | ||
31 | * @var array all available links. | ||
32 | */ | ||
33 | private $links; | ||
34 | |||
35 | /** | ||
36 | * @param array $links initialization. | ||
37 | */ | ||
38 | public function __construct($links) | ||
39 | { | ||
40 | $this->links = $links; | ||
41 | } | ||
42 | |||
43 | /** | ||
44 | * Filter links according to parameters. | ||
45 | * | ||
46 | * @param string $type Type of filter (eg. tags, permalink, etc.). | ||
47 | * @param string $request Filter content. | ||
48 | * @param bool $casesensitive Optional: Perform case sensitive filter if true. | ||
49 | * @param bool $privateonly Optional: Only returns private links if true. | ||
50 | * | ||
51 | * @return array filtered link list. | ||
52 | */ | ||
53 | public function filter($type, $request, $casesensitive = false, $privateonly = false) | ||
54 | { | ||
55 | switch($type) { | ||
56 | case self::$FILTER_HASH: | ||
57 | return $this->filterSmallHash($request); | ||
58 | break; | ||
59 | case self::$FILTER_TEXT: | ||
60 | return $this->filterFulltext($request, $privateonly); | ||
61 | break; | ||
62 | case self::$FILTER_TAG: | ||
63 | return $this->filterTags($request, $casesensitive, $privateonly); | ||
64 | break; | ||
65 | case self::$FILTER_DAY: | ||
66 | return $this->filterDay($request); | ||
67 | break; | ||
68 | default: | ||
69 | return $this->noFilter($privateonly); | ||
70 | } | ||
71 | } | ||
72 | |||
73 | /** | ||
74 | * Unknown filter, but handle private only. | ||
75 | * | ||
76 | * @param bool $privateonly returns private link only if true. | ||
77 | * | ||
78 | * @return array filtered links. | ||
79 | */ | ||
80 | private function noFilter($privateonly = false) | ||
81 | { | ||
82 | if (! $privateonly) { | ||
83 | krsort($this->links); | ||
84 | return $this->links; | ||
85 | } | ||
86 | |||
87 | $out = array(); | ||
88 | foreach ($this->links as $value) { | ||
89 | if ($value['private']) { | ||
90 | $out[$value['linkdate']] = $value; | ||
91 | } | ||
92 | } | ||
93 | |||
94 | krsort($out); | ||
95 | return $out; | ||
96 | } | ||
97 | |||
98 | /** | ||
99 | * Returns the shaare corresponding to a smallHash. | ||
100 | * | ||
101 | * @param string $smallHash permalink hash. | ||
102 | * | ||
103 | * @return array $filtered array containing permalink data. | ||
104 | */ | ||
105 | private function filterSmallHash($smallHash) | ||
106 | { | ||
107 | $filtered = array(); | ||
108 | foreach ($this->links as $l) { | ||
109 | if ($smallHash == smallHash($l['linkdate'])) { | ||
110 | // Yes, this is ugly and slow | ||
111 | $filtered[$l['linkdate']] = $l; | ||
112 | return $filtered; | ||
113 | } | ||
114 | } | ||
115 | return $filtered; | ||
116 | } | ||
117 | |||
118 | /** | ||
119 | * Returns the list of links corresponding to a full-text search | ||
120 | * | ||
121 | * Searches: | ||
122 | * - in the URLs, title and description; | ||
123 | * - are case-insensitive. | ||
124 | * | ||
125 | * Example: | ||
126 | * print_r($mydb->filterFulltext('hollandais')); | ||
127 | * | ||
128 | * mb_convert_case($val, MB_CASE_LOWER, 'UTF-8') | ||
129 | * - allows to perform searches on Unicode text | ||
130 | * - see https://github.com/shaarli/Shaarli/issues/75 for examples | ||
131 | * | ||
132 | * @param string $searchterms search query. | ||
133 | * @param bool $privateonly return only private links if true. | ||
134 | * | ||
135 | * @return array search results. | ||
136 | */ | ||
137 | private function filterFulltext($searchterms, $privateonly = false) | ||
138 | { | ||
139 | // FIXME: explode(' ',$searchterms) and perform a AND search. | ||
140 | // FIXME: accept double-quotes to search for a string "as is"? | ||
141 | $filtered = array(); | ||
142 | $search = mb_convert_case($searchterms, MB_CASE_LOWER, 'UTF-8'); | ||
143 | $explodedSearch = explode(' ', trim($search)); | ||
144 | $keys = array('title', 'description', 'url', 'tags'); | ||
145 | |||
146 | // Iterate over every stored link. | ||
147 | foreach ($this->links as $link) { | ||
148 | $found = false; | ||
149 | |||
150 | // ignore non private links when 'privatonly' is on. | ||
151 | if (! $link['private'] && $privateonly === true) { | ||
152 | continue; | ||
153 | } | ||
154 | |||
155 | // Iterate over searchable link fields. | ||
156 | foreach ($keys as $key) { | ||
157 | // Search full expression. | ||
158 | if (strpos( | ||
159 | mb_convert_case($link[$key], MB_CASE_LOWER, 'UTF-8'), | ||
160 | $search | ||
161 | ) !== false) { | ||
162 | $found = true; | ||
163 | } | ||
164 | |||
165 | if ($found) { | ||
166 | break; | ||
167 | } | ||
168 | } | ||
169 | |||
170 | if ($found) { | ||
171 | $filtered[$link['linkdate']] = $link; | ||
172 | } | ||
173 | } | ||
174 | |||
175 | krsort($filtered); | ||
176 | return $filtered; | ||
177 | } | ||
178 | |||
179 | /** | ||
180 | * Returns the list of links associated with a given list of tags | ||
181 | * | ||
182 | * You can specify one or more tags, separated by space or a comma, e.g. | ||
183 | * print_r($mydb->filterTags('linux programming')); | ||
184 | * | ||
185 | * @param string $tags list of tags separated by commas or blank spaces. | ||
186 | * @param bool $casesensitive ignore case if false. | ||
187 | * @param bool $privateonly returns private links only. | ||
188 | * | ||
189 | * @return array filtered links. | ||
190 | */ | ||
191 | public function filterTags($tags, $casesensitive = false, $privateonly = false) | ||
192 | { | ||
193 | $searchtags = $this->tagsStrToArray($tags, $casesensitive); | ||
194 | $filtered = array(); | ||
195 | |||
196 | foreach ($this->links as $l) { | ||
197 | // ignore non private links when 'privatonly' is on. | ||
198 | if (! $l['private'] && $privateonly === true) { | ||
199 | continue; | ||
200 | } | ||
201 | |||
202 | $linktags = $this->tagsStrToArray($l['tags'], $casesensitive); | ||
203 | |||
204 | if (count(array_intersect($linktags, $searchtags)) == count($searchtags)) { | ||
205 | $filtered[$l['linkdate']] = $l; | ||
206 | } | ||
207 | } | ||
208 | krsort($filtered); | ||
209 | return $filtered; | ||
210 | } | ||
211 | |||
212 | /** | ||
213 | * Returns the list of articles for a given day, chronologically sorted | ||
214 | * | ||
215 | * Day must be in the form 'YYYYMMDD' (e.g. '20120125'), e.g. | ||
216 | * print_r($mydb->filterDay('20120125')); | ||
217 | * | ||
218 | * @param string $day day to filter. | ||
219 | * | ||
220 | * @return array all link matching given day. | ||
221 | * | ||
222 | * @throws Exception if date format is invalid. | ||
223 | */ | ||
224 | public function filterDay($day) | ||
225 | { | ||
226 | if (! checkDateFormat('Ymd', $day)) { | ||
227 | throw new Exception('Invalid date format'); | ||
228 | } | ||
229 | |||
230 | $filtered = array(); | ||
231 | foreach ($this->links as $l) { | ||
232 | if (startsWith($l['linkdate'], $day)) { | ||
233 | $filtered[$l['linkdate']] = $l; | ||
234 | } | ||
235 | } | ||
236 | ksort($filtered); | ||
237 | return $filtered; | ||
238 | } | ||
239 | |||
240 | /** | ||
241 | * Convert a list of tags (str) to an array. Also | ||
242 | * - handle case sensitivity. | ||
243 | * - accepts spaces commas as separator. | ||
244 | * - remove private tags for loggedout users. | ||
245 | * | ||
246 | * @param string $tags string containing a list of tags. | ||
247 | * @param bool $casesensitive will convert everything to lowercase if false. | ||
248 | * | ||
249 | * @return array filtered tags string. | ||
250 | */ | ||
251 | public function tagsStrToArray($tags, $casesensitive) | ||
252 | { | ||
253 | // We use UTF-8 conversion to handle various graphemes (i.e. cyrillic, or greek) | ||
254 | $tagsOut = $casesensitive ? $tags : mb_convert_case($tags, MB_CASE_LOWER, 'UTF-8'); | ||
255 | $tagsOut = str_replace(',', ' ', $tagsOut); | ||
256 | |||
257 | return explode(' ', trim($tagsOut)); | ||
258 | } | ||
259 | } | ||
diff --git a/application/LinkUtils.php b/application/LinkUtils.php new file mode 100644 index 00000000..26dd6b67 --- /dev/null +++ b/application/LinkUtils.php | |||
@@ -0,0 +1,79 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * Extract title from an HTML document. | ||
5 | * | ||
6 | * @param string $html HTML content where to look for a title. | ||
7 | * | ||
8 | * @return bool|string Extracted title if found, false otherwise. | ||
9 | */ | ||
10 | function html_extract_title($html) | ||
11 | { | ||
12 | if (preg_match('!<title>(.*)</title>!is', $html, $matches)) { | ||
13 | return trim(str_replace("\n", ' ', $matches[1])); | ||
14 | } | ||
15 | return false; | ||
16 | } | ||
17 | |||
18 | /** | ||
19 | * Determine charset from downloaded page. | ||
20 | * Priority: | ||
21 | * 1. HTTP headers (Content type). | ||
22 | * 2. HTML content page (tag <meta charset>). | ||
23 | * 3. Use a default charset (default: UTF-8). | ||
24 | * | ||
25 | * @param array $headers HTTP headers array. | ||
26 | * @param string $htmlContent HTML content where to look for charset. | ||
27 | * @param string $defaultCharset Default charset to apply if other methods failed. | ||
28 | * | ||
29 | * @return string Determined charset. | ||
30 | */ | ||
31 | function get_charset($headers, $htmlContent, $defaultCharset = 'utf-8') | ||
32 | { | ||
33 | if ($charset = headers_extract_charset($headers)) { | ||
34 | return $charset; | ||
35 | } | ||
36 | |||
37 | if ($charset = html_extract_charset($htmlContent)) { | ||
38 | return $charset; | ||
39 | } | ||
40 | |||
41 | return $defaultCharset; | ||
42 | } | ||
43 | |||
44 | /** | ||
45 | * Extract charset from HTTP headers if it's defined. | ||
46 | * | ||
47 | * @param array $headers HTTP headers array. | ||
48 | * | ||
49 | * @return bool|string Charset string if found (lowercase), false otherwise. | ||
50 | */ | ||
51 | function headers_extract_charset($headers) | ||
52 | { | ||
53 | if (! empty($headers['Content-Type']) && strpos($headers['Content-Type'], 'charset=') !== false) { | ||
54 | preg_match('/charset="?([^; ]+)/i', $headers['Content-Type'], $match); | ||
55 | if (! empty($match[1])) { | ||
56 | return strtolower(trim($match[1])); | ||
57 | } | ||
58 | } | ||
59 | |||
60 | return false; | ||
61 | } | ||
62 | |||
63 | /** | ||
64 | * Extract charset HTML content (tag <meta charset>). | ||
65 | * | ||
66 | * @param string $html HTML content where to look for charset. | ||
67 | * | ||
68 | * @return bool|string Charset string if found, false otherwise. | ||
69 | */ | ||
70 | function html_extract_charset($html) | ||
71 | { | ||
72 | // Get encoding specified in HTML header. | ||
73 | preg_match('#<meta .*charset="?([^">/]+)"? */?>#Usi', $html, $enc); | ||
74 | if (!empty($enc[1])) { | ||
75 | return strtolower($enc[1]); | ||
76 | } | ||
77 | |||
78 | return false; | ||
79 | } | ||
diff --git a/application/Url.php b/application/Url.php index af43b457..a4ac2e73 100644 --- a/application/Url.php +++ b/application/Url.php | |||
@@ -52,6 +52,18 @@ function get_url_scheme($url) | |||
52 | } | 52 | } |
53 | 53 | ||
54 | /** | 54 | /** |
55 | * Adds a trailing slash at the end of URL if necessary. | ||
56 | * | ||
57 | * @param string $url URL to check/edit. | ||
58 | * | ||
59 | * @return string $url URL with a end trailing slash. | ||
60 | */ | ||
61 | function add_trailing_slash($url) | ||
62 | { | ||
63 | return $url . (!endsWith($url, '/') ? '/' : ''); | ||
64 | } | ||
65 | |||
66 | /** | ||
55 | * URL representation and cleanup utilities | 67 | * URL representation and cleanup utilities |
56 | * | 68 | * |
57 | * Form | 69 | * Form |
@@ -106,7 +118,7 @@ class Url | |||
106 | */ | 118 | */ |
107 | public function __construct($url) | 119 | public function __construct($url) |
108 | { | 120 | { |
109 | $this->parts = parse_url($url); | 121 | $this->parts = parse_url(trim($url)); |
110 | 122 | ||
111 | if (!empty($url) && empty($this->parts['scheme'])) { | 123 | if (!empty($url) && empty($this->parts['scheme'])) { |
112 | $this->parts['scheme'] = 'http'; | 124 | $this->parts['scheme'] = 'http'; |
@@ -189,4 +201,13 @@ class Url | |||
189 | } | 201 | } |
190 | return $this->parts['scheme']; | 202 | return $this->parts['scheme']; |
191 | } | 203 | } |
204 | |||
205 | /** | ||
206 | * Test if the Url is an HTTP one. | ||
207 | * | ||
208 | * @return true is HTTP, false otherwise. | ||
209 | */ | ||
210 | public function isHttp() { | ||
211 | return strpos(strtolower($this->parts['scheme']), 'http') !== false; | ||
212 | } | ||
192 | } | 213 | } |
diff --git a/application/Utils.php b/application/Utils.php index ac8bfbfc..10d60698 100644 --- a/application/Utils.php +++ b/application/Utils.php | |||
@@ -4,6 +4,24 @@ | |||
4 | */ | 4 | */ |
5 | 5 | ||
6 | /** | 6 | /** |
7 | * Logs a message to a text file | ||
8 | * | ||
9 | * The log format is compatible with fail2ban. | ||
10 | * | ||
11 | * @param string $logFile where to write the logs | ||
12 | * @param string $clientIp the client's remote IPv4/IPv6 address | ||
13 | * @param string $message the message to log | ||
14 | */ | ||
15 | function logm($logFile, $clientIp, $message) | ||
16 | { | ||
17 | file_put_contents( | ||
18 | $logFile, | ||
19 | date('Y/m/d H:i:s').' - '.$clientIp.' - '.strval($message).PHP_EOL, | ||
20 | FILE_APPEND | ||
21 | ); | ||
22 | } | ||
23 | |||
24 | /** | ||
7 | * Returns the small hash of a string, using RFC 4648 base64url format | 25 | * Returns the small hash of a string, using RFC 4648 base64url format |
8 | * | 26 | * |
9 | * Small hashes: | 27 | * Small hashes: |
@@ -64,12 +82,14 @@ function sanitizeLink(&$link) | |||
64 | 82 | ||
65 | /** | 83 | /** |
66 | * Checks if a string represents a valid date | 84 | * Checks if a string represents a valid date |
85 | |||
86 | * @param string $format The expected DateTime format of the string | ||
87 | * @param string $string A string-formatted date | ||
88 | * | ||
89 | * @return bool whether the string is a valid date | ||
67 | * | 90 | * |
68 | * @param string a string-formatted date | 91 | * @see http://php.net/manual/en/class.datetime.php |
69 | * @param format the expected DateTime format of the string | 92 | * @see http://php.net/manual/en/datetime.createfromformat.php |
70 | * @return whether the string is a valid date | ||
71 | * @see http://php.net/manual/en/class.datetime.php | ||
72 | * @see http://php.net/manual/en/datetime.createfromformat.php | ||
73 | */ | 93 | */ |
74 | function checkDateFormat($format, $string) | 94 | function checkDateFormat($format, $string) |
75 | { | 95 | { |