diff options
author | ArthurHoaro <arthur@hoa.ro> | 2017-09-30 11:04:13 +0200 |
---|---|---|
committer | ArthurHoaro <arthur@hoa.ro> | 2017-10-28 14:35:49 +0200 |
commit | d65342e304f92643ba922200953cfebc51e1e482 (patch) | |
tree | 3097c77bb4dd0590c4644422b5dc4369a4186eb7 /index.php | |
parent | a59bbf50d7530d7e82a91896a210b9da49cb1568 (diff) | |
download | Shaarli-d65342e304f92643ba922200953cfebc51e1e482.tar.gz Shaarli-d65342e304f92643ba922200953cfebc51e1e482.tar.zst Shaarli-d65342e304f92643ba922200953cfebc51e1e482.zip |
Extract the title/charset during page download, and check content type
Use CURLOPT_WRITEFUNCTION to check the response code and content type (only allow HTML).
Also extract the title and charset during downloading chunk of data, and stop it when everything has been extracted.
Closes #579
Diffstat (limited to 'index.php')
-rw-r--r-- | index.php | 14 |
1 files changed, 4 insertions, 10 deletions
@@ -1428,16 +1428,10 @@ function renderPage($conf, $pluginManager, $LINKSDB, $history) | |||
1428 | // If this is an HTTP(S) link, we try go get the page to extract the title (otherwise we will to straight to the edit form.) | 1428 | // If this is an HTTP(S) link, we try go get the page to extract the title (otherwise we will to straight to the edit form.) |
1429 | if (empty($title) && strpos(get_url_scheme($url), 'http') !== false) { | 1429 | if (empty($title) && strpos(get_url_scheme($url), 'http') !== false) { |
1430 | // Short timeout to keep the application responsive | 1430 | // Short timeout to keep the application responsive |
1431 | list($headers, $content) = get_http_response($url, 4); | 1431 | // The callback will fill $charset and $title with data from the downloaded page. |
1432 | if (strpos($headers[0], '200 OK') !== false) { | 1432 | get_http_response($url, 25, 4194304, get_curl_download_callback($charset, $title)); |
1433 | // Retrieve charset. | 1433 | if (! empty($title) && strtolower($charset) != 'utf-8') { |
1434 | $charset = get_charset($headers, $content); | 1434 | $title = mb_convert_encoding($title, 'utf-8', $charset); |
1435 | // Extract title. | ||
1436 | $title = html_extract_title($content); | ||
1437 | // Re-encode title in utf-8 if necessary. | ||
1438 | if (! empty($title) && strtolower($charset) != 'utf-8') { | ||
1439 | $title = mb_convert_encoding($title, 'utf-8', $charset); | ||
1440 | } | ||
1441 | } | 1435 | } |
1442 | } | 1436 | } |
1443 | 1437 | ||