]>
Commit | Line | Data |
---|---|---|
24619534 | 1 | <?php |
6f87a197 | 2 | /** |
3 | * poche, a read it later open source system | |
4 | * | |
5 | * @category poche | |
6 | * @author Nicolas Lœuillet <support@inthepoche.com> | |
7 | * @copyright 2013 | |
8 | * @license http://www.wtfpl.net/ see COPYING file | |
9 | */ | |
24619534 | 10 | |
c8bbe19b | 11 | /** |
12 | * Permet de générer l'URL de poche pour le bookmarklet | |
13 | */ | |
8046748b | 14 | function get_poche_url() |
c8bbe19b | 15 | { |
16 | $protocol = "http"; | |
17 | if(isset($_SERVER['HTTPS'])) { | |
f0fc5011 | 18 | if($_SERVER['HTTPS'] != "off" && $_SERVER['HTTPS'] != "") { |
c8bbe19b | 19 | $protocol = "https"; |
20 | } | |
21 | } | |
24619534 | 22 | |
c8bbe19b | 23 | return $protocol . "://" . $_SERVER['HTTP_HOST'] . $_SERVER['REQUEST_URI']; |
24619534 | 24 | } |
25 | ||
24619534 | 26 | // function define to retrieve url content |
1c182b6c | 27 | function get_external_file($url) |
c8bbe19b | 28 | { |
1c182b6c | 29 | $timeout = 15; |
24619534 | 30 | // spoofing FireFox 18.0 |
31 | $useragent="Mozilla/5.0 (Windows NT 5.1; rv:18.0) Gecko/20100101 Firefox/18.0"; | |
32 | ||
33 | if (in_array ('curl', get_loaded_extensions())) { | |
34 | // Fetch feed from URL | |
35 | $curl = curl_init(); | |
36 | curl_setopt($curl, CURLOPT_URL, $url); | |
37 | curl_setopt($curl, CURLOPT_TIMEOUT, $timeout); | |
38 | curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true); | |
39 | curl_setopt($curl, CURLOPT_RETURNTRANSFER, true); | |
40 | curl_setopt($curl, CURLOPT_HEADER, false); | |
41 | ||
2987031b | 42 | // FOR SSL do not verified certificate |
4ddbd267 | 43 | curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, FALSE); |
2987031b | 44 | curl_setopt($curl, CURLOPT_AUTOREFERER, TRUE ); |
4ddbd267 | 45 | |
24619534 | 46 | // FeedBurner requires a proper USER-AGENT... |
47 | curl_setopt($curl, CURL_HTTP_VERSION_1_1, true); | |
48 | curl_setopt($curl, CURLOPT_ENCODING, "gzip, deflate"); | |
49 | curl_setopt($curl, CURLOPT_USERAGENT, $useragent); | |
50 | ||
51 | $data = curl_exec($curl); | |
52 | ||
53 | $httpcode = curl_getinfo($curl, CURLINFO_HTTP_CODE); | |
54 | ||
55 | $httpcodeOK = isset($httpcode) and ($httpcode == 200 or $httpcode == 301); | |
56 | ||
57 | curl_close($curl); | |
58 | } else { | |
59 | ||
60 | // create http context and add timeout and user-agent | |
4ddbd267 | 61 | $context = stream_context_create(array( |
2987031b | 62 | 'http'=>array('timeout' => $timeout, |
63 | 'header'=> "User-Agent: ".$useragent, /*spoot Mozilla Firefox*/ | |
64 | 'follow_location' => true), | |
65 | // FOR SSL do not verified certificate | |
66 | 'ssl' => array('verify_peer' => false, | |
67 | 'allow_self_signed' => true) | |
68 | ) | |
69 | ); | |
24619534 | 70 | |
71 | // only download page lesser than 4MB | |
72 | $data = @file_get_contents($url, false, $context, -1, 4000000); // We download at most 4 MB from source. | |
24619534 | 73 | |
74 | if(isset($http_response_header) and isset($http_response_header[0])) { | |
75 | $httpcodeOK = isset($http_response_header) and isset($http_response_header[0]) and ((strpos($http_response_header[0], '200 OK') !== FALSE) or (strpos($http_response_header[0], '301 Moved Permanently') !== FALSE)); | |
76 | } | |
77 | } | |
78 | ||
79 | // if response is not empty and response is OK | |
80 | if (isset($data) and isset($httpcodeOK) and $httpcodeOK ) { | |
81 | ||
82 | // take charset of page and get it | |
83 | preg_match('#<meta .*charset=.*>#Usi', $data, $meta); | |
84 | ||
85 | // if meta tag is found | |
86 | if (!empty($meta[0])) { | |
87 | // retrieve encoding in $enc | |
88 | preg_match('#charset="?(.*)"#si', $meta[0], $enc); | |
89 | ||
90 | // if charset is found set it otherwise, set it to utf-8 | |
91 | $html_charset = (!empty($enc[1])) ? strtolower($enc[1]) : 'utf-8'; | |
92 | ||
93 | } else { | |
94 | $html_charset = 'utf-8'; | |
95 | $enc[1] = ''; | |
96 | } | |
97 | ||
98 | // replace charset of url to charset of page | |
99 | $data = str_replace('charset='.$enc[1], 'charset='.$html_charset, $data); | |
100 | ||
101 | return $data; | |
102 | } | |
103 | else { | |
104 | return FALSE; | |
105 | } | |
3c8d80ae | 106 | } |
107 | ||
8046748b | 108 | /** |
109 | * Préparation de l'URL avec récupération du contenu avant insertion en base | |
110 | */ | |
d06f30ef | 111 | function prepare_url($url) |
3c8d80ae | 112 | { |
113 | $parametres = array(); | |
e4d2565e | 114 | $url = html_entity_decode(trim($url)); |
3c8d80ae | 115 | |
116 | // We remove the annoying parameters added by FeedBurner and GoogleFeedProxy (?utm_source=...) | |
117 | // from shaarli, by sebsauvage | |
118 | $i=strpos($url,'&utm_source='); if ($i!==false) $url=substr($url,0,$i); | |
119 | $i=strpos($url,'?utm_source='); if ($i!==false) $url=substr($url,0,$i); | |
120 | $i=strpos($url,'#xtor=RSS-'); if ($i!==false) $url=substr($url,0,$i); | |
121 | ||
e4d2565e | 122 | $title = $url; |
2987031b | 123 | $html = Encoding::toUTF8(get_external_file($url,15)); |
124 | // If get_external_file if not able to retrieve HTTPS content try the same URL with HTTP protocol | |
125 | if (!preg_match('!^https?://!i', $url) && (!isset($html) || strlen($html) <= 0)) { | |
126 | $url = 'http://' . $url; | |
127 | $html = Encoding::toUTF8(get_external_file($url,15)); | |
128 | } | |
3c8d80ae | 129 | |
494e21b4 | 130 | if (function_exists('tidy_parse_string')) { |
131 | $tidy = tidy_parse_string($html, array(), 'UTF8'); | |
132 | $tidy->cleanRepair(); | |
133 | $html = $tidy->value; | |
134 | } | |
135 | ||
3c8d80ae | 136 | if (isset($html) and strlen($html) > 0) |
137 | { | |
138 | $r = new Readability($html, $url); | |
2987031b | 139 | |
d06f30ef | 140 | $r->convertLinksToFootnotes = CONVERT_LINKS_FOOTNOTES; |
2987031b | 141 | $r->revertForcedParagraphElements = REVERT_FORCED_PARAGRAPH_ELEMENTS; |
4ddbd267 | 142 | |
3c8d80ae | 143 | if($r->init()) |
144 | { | |
1c182b6c | 145 | $content = $r->articleContent->innerHTML; |
146 | $parametres['title'] = $r->articleTitle->innerHTML; | |
64458521 | 147 | $parametres['content'] = $content; |
1c182b6c | 148 | return $parametres; |
3c8d80ae | 149 | } |
150 | } | |
151 | ||
1c182b6c | 152 | return FALSE; |
153 | } | |
154 | ||
155 | /** | |
156 | * On modifie les URLS des images dans le corps de l'article | |
157 | */ | |
158 | function filtre_picture($content, $url, $id) | |
159 | { | |
160 | $matches = array(); | |
161 | preg_match_all('#<\s*(img)[^>]+src="([^"]*)"[^>]*>#Si', $content, $matches, PREG_SET_ORDER); | |
162 | foreach($matches as $i => $link) | |
163 | { | |
164 | $link[1] = trim($link[1]); | |
165 | if (!preg_match('#^(([a-z]+://)|(\#))#', $link[1]) ) | |
166 | { | |
167 | $absolute_path = get_absolute_link($link[2],$url); | |
168 | $filename = basename(parse_url($absolute_path, PHP_URL_PATH)); | |
169 | $directory = create_assets_directory($id); | |
170 | $fullpath = $directory . '/' . $filename; | |
171 | download_pictures($absolute_path, $fullpath); | |
172 | $content = str_replace($matches[$i][2], $fullpath, $content); | |
173 | } | |
174 | ||
175 | } | |
176 | ||
177 | return $content; | |
178 | } | |
179 | ||
180 | /** | |
181 | * Retourne le lien absolu | |
182 | */ | |
183 | function get_absolute_link($relative_link, $url) | |
184 | { | |
185 | /* return if already absolute URL */ | |
186 | if (parse_url($relative_link, PHP_URL_SCHEME) != '') return $relative_link; | |
187 | ||
188 | /* queries and anchors */ | |
189 | if ($relative_link[0]=='#' || $relative_link[0]=='?') return $url . $relative_link; | |
190 | ||
191 | /* parse base URL and convert to local variables: | |
192 | $scheme, $host, $path */ | |
193 | extract(parse_url($url)); | |
194 | ||
195 | /* remove non-directory element from path */ | |
196 | $path = preg_replace('#/[^/]*$#', '', $path); | |
197 | ||
198 | /* destroy path if relative url points to root */ | |
199 | if ($relative_link[0] == '/') $path = ''; | |
200 | ||
201 | /* dirty absolute URL */ | |
202 | $abs = $host . $path . '/' . $relative_link; | |
203 | ||
204 | /* replace '//' or '/./' or '/foo/../' with '/' */ | |
205 | $re = array('#(/\.?/)#', '#/(?!\.\.)[^/]+/\.\./#'); | |
206 | for($n=1; $n>0; $abs=preg_replace($re, '/', $abs, -1, $n)) {} | |
207 | ||
208 | /* absolute URL is ready! */ | |
209 | return $scheme.'://'.$abs; | |
210 | } | |
211 | ||
212 | /** | |
213 | * Téléchargement des images | |
214 | */ | |
215 | ||
216 | function download_pictures($absolute_path, $fullpath) | |
217 | { | |
218 | $rawdata = get_external_file($absolute_path); | |
3c8d80ae | 219 | |
1c182b6c | 220 | if(file_exists($fullpath)) { |
221 | unlink($fullpath); | |
222 | } | |
223 | $fp = fopen($fullpath, 'x'); | |
224 | fwrite($fp, $rawdata); | |
225 | fclose($fp); | |
226 | } | |
227 | ||
228 | /** | |
229 | * Crée un répertoire de médias pour l'article | |
230 | */ | |
231 | function create_assets_directory($id) | |
232 | { | |
233 | $assets_path = ABS_PATH; | |
234 | if(!is_dir($assets_path)) { | |
235 | mkdir($assets_path, 0705); | |
236 | } | |
237 | ||
238 | $article_directory = $assets_path . $id; | |
239 | if(!is_dir($article_directory)) { | |
240 | mkdir($article_directory, 0705); | |
241 | } | |
242 | ||
243 | return $article_directory; | |
244 | } | |
245 | ||
246 | /** | |
247 | * Suppression du répertoire d'images | |
248 | */ | |
249 | function remove_directory($directory) | |
250 | { | |
251 | if(is_dir($directory)) { | |
252 | $files = array_diff(scandir($directory), array('.','..')); | |
253 | foreach ($files as $file) { | |
07a19252 | 254 | (is_dir("$directory/$file")) ? remove_directory("$directory/$file") : unlink("$directory/$file"); |
1c182b6c | 255 | } |
256 | return rmdir($directory); | |
257 | } | |
263d6c67 | 258 | } |
259 | ||
a1953dff | 260 | function display_view($view, $id = 0, $full_head = 'yes') |
261 | { | |
f0070a15 | 262 | global $tpl, $store, $msg; |
a1953dff | 263 | |
264 | switch ($view) | |
265 | { | |
44e77bfa | 266 | case 'export': |
267 | $entries = $store->retrieveAll(); | |
6f87a197 | 268 | $tpl->assign('export', myTool::renderJson($entries)); |
44e77bfa | 269 | $tpl->draw('export'); |
270 | logm('export view'); | |
271 | break; | |
016989b7 | 272 | case 'config': |
273 | $tpl->assign('load_all_js', 0); | |
274 | $tpl->draw('head'); | |
275 | $tpl->draw('home'); | |
276 | $tpl->draw('config'); | |
277 | $tpl->draw('js'); | |
278 | $tpl->draw('footer'); | |
279 | logm('config view'); | |
44e77bfa | 280 | break; |
a1953dff | 281 | case 'view': |
14890de3 | 282 | $entry = $store->retrieveOneById($id); |
a1953dff | 283 | |
284 | if ($entry != NULL) { | |
14890de3 | 285 | $tpl->assign('id', $entry['id']); |
286 | $tpl->assign('url', $entry['url']); | |
287 | $tpl->assign('title', $entry['title']); | |
494e21b4 | 288 | $content = $entry['content']; |
289 | if (function_exists('tidy_parse_string')) { | |
290 | $tidy = tidy_parse_string($content, array('indent'=>true, 'show-body-only' => true), 'UTF8'); | |
291 | $tidy->cleanRepair(); | |
292 | $content = $tidy->value; | |
293 | } | |
294 | $tpl->assign('content', $content); | |
14890de3 | 295 | $tpl->assign('is_fav', $entry['is_fav']); |
296 | $tpl->assign('is_read', $entry['is_read']); | |
a1953dff | 297 | $tpl->assign('load_all_js', 0); |
298 | $tpl->draw('view'); | |
299 | } | |
300 | else { | |
301 | logm('error in view call : entry is NULL'); | |
302 | } | |
303 | ||
304 | logm('view link #' . $id); | |
305 | break; | |
306 | default: # home view | |
14890de3 | 307 | $entries = $store->getEntriesByView($view); |
a1953dff | 308 | |
309 | $tpl->assign('entries', $entries); | |
310 | ||
311 | if ($full_head == 'yes') { | |
312 | $tpl->assign('load_all_js', 1); | |
313 | $tpl->draw('head'); | |
314 | $tpl->draw('home'); | |
315 | } | |
316 | ||
317 | $tpl->draw('entries'); | |
318 | ||
319 | if ($full_head == 'yes') { | |
320 | $tpl->draw('js'); | |
321 | $tpl->draw('footer'); | |
322 | } | |
323 | break; | |
324 | } | |
325 | } | |
326 | ||
263d6c67 | 327 | /** |
328 | * Appel d'une action (mark as fav, archive, delete) | |
329 | */ | |
e4d2565e | 330 | function action_to_do($action, $url, $id = 0) |
263d6c67 | 331 | { |
f0070a15 | 332 | global $store, $msg; |
263d6c67 | 333 | |
334 | switch ($action) | |
335 | { | |
336 | case 'add': | |
337 | if ($url == '') | |
338 | continue; | |
339 | ||
6f87a197 | 340 | if (MyTool::isUrl($url)) { |
341 | if($parametres_url = prepare_url($url)) { | |
29c6fd46 | 342 | if ($store->add($url, $parametres_url['title'], $parametres_url['content'])) { |
343 | $last_id = $store->getLastId(); | |
344 | if (DOWNLOAD_PICTURES) { | |
345 | $content = filtre_picture($parametres_url['content'], $url, $last_id); | |
346 | } | |
347 | $msg->add('s', 'the link has been added successfully'); | |
6f87a197 | 348 | } |
29c6fd46 | 349 | else { |
350 | $msg->add('e', 'error during insertion : the link wasn\'t added'); | |
351 | } | |
352 | } | |
353 | else { | |
354 | $msg->add('e', 'error during url preparation : the link wasn\'t added'); | |
355 | logm('error during url preparation'); | |
14890de3 | 356 | } |
1c182b6c | 357 | } |
6f87a197 | 358 | else { |
29c6fd46 | 359 | $msg->add('e', 'error during url preparation : the link is not valid'); |
6f87a197 | 360 | logm($url . ' is not a valid url'); |
361 | } | |
1c182b6c | 362 | |
a81cd067 | 363 | logm('add link ' . $url); |
263d6c67 | 364 | break; |
365 | case 'delete': | |
29c6fd46 | 366 | if ($store->deleteById($id)) { |
367 | remove_directory(ABS_PATH . $id); | |
368 | $msg->add('s', 'the link has been deleted successfully'); | |
369 | logm('delete link #' . $id); | |
370 | } | |
371 | else { | |
372 | $msg->add('e', 'the link wasn\'t deleted'); | |
373 | logm('error : can\'t delete link #' . $id); | |
374 | } | |
263d6c67 | 375 | break; |
139769aa | 376 | case 'toggle_fav' : |
14890de3 | 377 | $store->favoriteById($id); |
f0070a15 | 378 | $msg->add('s', 'the favorite toggle has been done successfully'); |
e4d2565e | 379 | logm('mark as favorite link #' . $id); |
139769aa | 380 | break; |
381 | case 'toggle_archive' : | |
14890de3 | 382 | $store->archiveById($id); |
f0070a15 | 383 | $msg->add('s', 'the archive toggle has been done successfully'); |
e4d2565e | 384 | logm('archive link #' . $id); |
139769aa | 385 | break; |
263d6c67 | 386 | default: |
387 | break; | |
388 | } | |
cf3180f6 | 389 | } |
390 | ||
713b2d69 | 391 | function logm($message) |
392 | { | |
393 | $t = strval(date('Y/m/d_H:i:s')).' - '.$_SERVER["REMOTE_ADDR"].' - '.strval($message)."\n"; | |
a81cd067 | 394 | file_put_contents('./log.txt',$t,FILE_APPEND); |
cdcc8d25 | 395 | } |