aboutsummaryrefslogtreecommitdiffhomepage
path: root/index.php
diff options
context:
space:
mode:
authorArthurHoaro <arthur@hoa.ro>2017-09-30 11:04:13 +0200
committerArthurHoaro <arthur@hoa.ro>2017-10-28 14:35:49 +0200
commitd65342e304f92643ba922200953cfebc51e1e482 (patch)
tree3097c77bb4dd0590c4644422b5dc4369a4186eb7 /index.php
parenta59bbf50d7530d7e82a91896a210b9da49cb1568 (diff)
downloadShaarli-d65342e304f92643ba922200953cfebc51e1e482.tar.gz
Shaarli-d65342e304f92643ba922200953cfebc51e1e482.tar.zst
Shaarli-d65342e304f92643ba922200953cfebc51e1e482.zip
Extract the title/charset during page download, and check content type
Use CURLOPT_WRITEFUNCTION to check the response code and content type (only allow HTML). Also extract the title and charset during downloading chunk of data, and stop it when everything has been extracted. Closes #579
Diffstat (limited to 'index.php')
-rw-r--r--index.php14
1 files changed, 4 insertions, 10 deletions
diff --git a/index.php b/index.php
index fb00a9fa..ac51038d 100644
--- a/index.php
+++ b/index.php
@@ -1428,16 +1428,10 @@ function renderPage($conf, $pluginManager, $LINKSDB, $history)
1428 // If this is an HTTP(S) link, we try go get the page to extract the title (otherwise we will to straight to the edit form.) 1428 // If this is an HTTP(S) link, we try go get the page to extract the title (otherwise we will to straight to the edit form.)
1429 if (empty($title) && strpos(get_url_scheme($url), 'http') !== false) { 1429 if (empty($title) && strpos(get_url_scheme($url), 'http') !== false) {
1430 // Short timeout to keep the application responsive 1430 // Short timeout to keep the application responsive
1431 list($headers, $content) = get_http_response($url, 4); 1431 // The callback will fill $charset and $title with data from the downloaded page.
1432 if (strpos($headers[0], '200 OK') !== false) { 1432 get_http_response($url, 25, 4194304, get_curl_download_callback($charset, $title));
1433 // Retrieve charset. 1433 if (! empty($title) && strtolower($charset) != 'utf-8') {
1434 $charset = get_charset($headers, $content); 1434 $title = mb_convert_encoding($title, 'utf-8', $charset);
1435 // Extract title.
1436 $title = html_extract_title($content);
1437 // Re-encode title in utf-8 if necessary.
1438 if (! empty($title) && strtolower($charset) != 'utf-8') {
1439 $title = mb_convert_encoding($title, 'utf-8', $charset);
1440 }
1441 } 1435 }
1442 } 1436 }
1443 1437