From 5334090be04e66da5cb5c3ad487604b3733c5cac Mon Sep 17 00:00:00 2001 From: ArthurHoaro Date: Thu, 15 Oct 2020 11:20:33 +0200 Subject: Improve metadata retrieval (performances and accuracy) - Use dedicated function to download headers to avoid apply multiple regexps on headers - Also try to extract title from meta tags --- application/http/HttpAccess.php | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'application/http/HttpAccess.php') diff --git a/application/http/HttpAccess.php b/application/http/HttpAccess.php index 81d9e076..646a5264 100644 --- a/application/http/HttpAccess.php +++ b/application/http/HttpAccess.php @@ -14,9 +14,14 @@ namespace Shaarli\Http; */ class HttpAccess { - public function getHttpResponse($url, $timeout = 30, $maxBytes = 4194304, $curlWriteFunction = null) - { - return get_http_response($url, $timeout, $maxBytes, $curlWriteFunction); + public function getHttpResponse( + $url, + $timeout = 30, + $maxBytes = 4194304, + $curlHeaderFunction = null, + $curlWriteFunction = null + ) { + return get_http_response($url, $timeout, $maxBytes, $curlHeaderFunction, $curlWriteFunction); } public function getCurlDownloadCallback( @@ -24,16 +29,19 @@ class HttpAccess &$title, &$description, &$keywords, - $retrieveDescription, - $curlGetInfo = 'curl_getinfo' + $retrieveDescription ) { return get_curl_download_callback( $charset, $title, $description, $keywords, - $retrieveDescription, - $curlGetInfo + $retrieveDescription ); } + + public function getCurlHeaderCallback(&$charset, $curlGetInfo = 'curl_getinfo') + { + return get_curl_header_callback($charset, $curlGetInfo); + } } -- cgit v1.2.3