]>
Commit | Line | Data |
---|---|---|
1557cefb A |
1 | <?php |
2 | ||
3 | /** | |
4 | * Extract title from an HTML document. | |
5 | * | |
6 | * @param string $html HTML content where to look for a title. | |
7 | * | |
8 | * @return bool|string Extracted title if found, false otherwise. | |
9 | */ | |
10 | function html_extract_title($html) | |
11 | { | |
ce7b0b64 A |
12 | if (preg_match('!<title.*?>(.*?)</title>!is', $html, $matches)) { |
13 | return trim(str_replace("\n", '', $matches[1])); | |
1557cefb A |
14 | } |
15 | return false; | |
16 | } | |
17 | ||
18 | /** | |
19 | * Determine charset from downloaded page. | |
20 | * Priority: | |
21 | * 1. HTTP headers (Content type). | |
22 | * 2. HTML content page (tag <meta charset>). | |
23 | * 3. Use a default charset (default: UTF-8). | |
24 | * | |
25 | * @param array $headers HTTP headers array. | |
26 | * @param string $htmlContent HTML content where to look for charset. | |
27 | * @param string $defaultCharset Default charset to apply if other methods failed. | |
28 | * | |
29 | * @return string Determined charset. | |
30 | */ | |
31 | function get_charset($headers, $htmlContent, $defaultCharset = 'utf-8') | |
32 | { | |
33 | if ($charset = headers_extract_charset($headers)) { | |
34 | return $charset; | |
35 | } | |
36 | ||
37 | if ($charset = html_extract_charset($htmlContent)) { | |
38 | return $charset; | |
39 | } | |
40 | ||
41 | return $defaultCharset; | |
42 | } | |
43 | ||
44 | /** | |
45 | * Extract charset from HTTP headers if it's defined. | |
46 | * | |
47 | * @param array $headers HTTP headers array. | |
48 | * | |
49 | * @return bool|string Charset string if found (lowercase), false otherwise. | |
50 | */ | |
51 | function headers_extract_charset($headers) | |
52 | { | |
53 | if (! empty($headers['Content-Type']) && strpos($headers['Content-Type'], 'charset=') !== false) { | |
54 | preg_match('/charset="?([^; ]+)/i', $headers['Content-Type'], $match); | |
55 | if (! empty($match[1])) { | |
56 | return strtolower(trim($match[1])); | |
57 | } | |
58 | } | |
59 | ||
60 | return false; | |
61 | } | |
62 | ||
63 | /** | |
64 | * Extract charset HTML content (tag <meta charset>). | |
65 | * | |
66 | * @param string $html HTML content where to look for charset. | |
67 | * | |
68 | * @return bool|string Charset string if found, false otherwise. | |
69 | */ | |
70 | function html_extract_charset($html) | |
71 | { | |
72 | // Get encoding specified in HTML header. | |
ce7b0b64 | 73 | preg_match('#<meta .*charset=["\']?([^";\'>/]+)["\']? */?>#Usi', $html, $enc); |
1557cefb A |
74 | if (!empty($enc[1])) { |
75 | return strtolower($enc[1]); | |
76 | } | |
77 | ||
78 | return false; | |
79 | } | |
141a86c5 A |
80 | |
81 | /** | |
82 | * Count private links in given linklist. | |
83 | * | |
7af9a418 | 84 | * @param array|Countable $links Linklist. |
141a86c5 A |
85 | * |
86 | * @return int Number of private links. | |
87 | */ | |
88 | function count_private($links) | |
89 | { | |
90 | $cpt = 0; | |
91 | foreach ($links as $link) { | |
92 | $cpt = $link['private'] == true ? $cpt + 1 : $cpt; | |
93 | } | |
9ccca401 | 94 | |
141a86c5 A |
95 | return $cpt; |
96 | } | |
9ccca401 A |
97 | |
98 | /** | |
99 | * In a string, converts URLs to clickable links. | |
100 | * | |
101 | * @param string $text input string. | |
102 | * @param string $redirector if a redirector is set, use it to gerenate links. | |
103 | * | |
104 | * @return string returns $text with all links converted to HTML links. | |
105 | * | |
106 | * @see Function inspired from http://www.php.net/manual/en/function.preg-replace.php#85722 | |
107 | */ | |
108 | function text2clickable($text, $redirector = '') | |
109 | { | |
110 | $regex = '!(((?:https?|ftp|file)://|apt:|magnet:)\S+[[:alnum:]]/?)!si'; | |
111 | ||
112 | if (empty($redirector)) { | |
113 | return preg_replace($regex, '<a href="$1">$1</a>', $text); | |
114 | } | |
115 | // Redirector is set, urlencode the final URL. | |
116 | return preg_replace_callback( | |
117 | $regex, | |
118 | function ($matches) use ($redirector) { | |
119 | return '<a href="' . $redirector . urlencode($matches[1]) .'">'. $matches[1] .'</a>'; | |
120 | }, | |
121 | $text | |
122 | ); | |
123 | } | |
124 | ||
125 | /** | |
126 | * Auto-link hashtags. | |
127 | * | |
128 | * @param string $description Given description. | |
129 | * @param string $indexUrl Root URL. | |
130 | * | |
131 | * @return string Description with auto-linked hashtags. | |
132 | */ | |
133 | function hashtag_autolink($description, $indexUrl = '') | |
134 | { | |
135 | /* | |
136 | * To support unicode: http://stackoverflow.com/a/35498078/1484919 | |
137 | * \p{Pc} - to match underscore | |
138 | * \p{N} - numeric character in any script | |
139 | * \p{L} - letter from any language | |
140 | * \p{Mn} - any non marking space (accents, umlauts, etc) | |
141 | */ | |
142 | $regex = '/(^|\s)#([\p{Pc}\p{N}\p{L}\p{Mn}]+)/mui'; | |
143 | $replacement = '$1<a href="'. $indexUrl .'?addtag=$2" title="Hashtag $2">#$2</a>'; | |
144 | return preg_replace($regex, $replacement, $description); | |
145 | } | |
146 | ||
147 | /** | |
148 | * This function inserts where relevant so that multiple spaces are properly displayed in HTML | |
149 | * even in the absence of <pre> (This is used in description to keep text formatting). | |
150 | * | |
151 | * @param string $text input text. | |
152 | * | |
153 | * @return string formatted text. | |
154 | */ | |
155 | function space2nbsp($text) | |
156 | { | |
157 | return preg_replace('/(^| ) /m', '$1 ', $text); | |
158 | } | |
159 | ||
160 | /** | |
161 | * Format Shaarli's description | |
162 | * | |
163 | * @param string $description shaare's description. | |
164 | * @param string $redirector if a redirector is set, use it to gerenate links. | |
7af9a418 | 165 | * @param string $indexUrl URL to Shaarli's index. |
9ccca401 A |
166 | * |
167 | * @return string formatted description. | |
168 | */ | |
169 | function format_description($description, $redirector = '', $indexUrl = '') { | |
170 | return nl2br(space2nbsp(hashtag_autolink(text2clickable($description, $redirector), $indexUrl))); | |
171 | } | |
d592daea A |
172 | |
173 | /** | |
174 | * Generate a small hash for a link. | |
175 | * | |
176 | * @param DateTime $date Link creation date. | |
177 | * @param int $id Link ID. | |
178 | * | |
179 | * @return string the small hash generated from link data. | |
180 | */ | |
181 | function link_small_hash($date, $id) | |
182 | { | |
183 | return smallHash($date->format(LinkDB::LINK_DATE_FORMAT) . $id); | |
184 | } |