From d65342e304f92643ba922200953cfebc51e1e482 Mon Sep 17 00:00:00 2001 From: ArthurHoaro Date: Sat, 30 Sep 2017 11:04:13 +0200 Subject: Extract the title/charset during page download, and check content type Use CURLOPT_WRITEFUNCTION to check the response code and content type (only allow HTML). Also extract the title and charset during downloading chunk of data, and stop it when everything has been extracted. Closes #579 --- index.php | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'index.php') diff --git a/index.php b/index.php index fb00a9fa..ac51038d 100644 --- a/index.php +++ b/index.php @@ -1428,16 +1428,10 @@ function renderPage($conf, $pluginManager, $LINKSDB, $history) // If this is an HTTP(S) link, we try go get the page to extract the title (otherwise we will to straight to the edit form.) if (empty($title) && strpos(get_url_scheme($url), 'http') !== false) { // Short timeout to keep the application responsive - list($headers, $content) = get_http_response($url, 4); - if (strpos($headers[0], '200 OK') !== false) { - // Retrieve charset. - $charset = get_charset($headers, $content); - // Extract title. - $title = html_extract_title($content); - // Re-encode title in utf-8 if necessary. - if (! empty($title) && strtolower($charset) != 'utf-8') { - $title = mb_convert_encoding($title, 'utf-8', $charset); - } + // The callback will fill $charset and $title with data from the downloaded page. + get_http_response($url, 25, 4194304, get_curl_download_callback($charset, $title)); + if (! empty($title) && strtolower($charset) != 'utf-8') { + $title = mb_convert_encoding($title, 'utf-8', $charset); } } -- cgit v1.2.3