diff options
author | ArthurHoaro <arthur@hoa.ro> | 2017-09-30 11:04:13 +0200 |
---|---|---|
committer | ArthurHoaro <arthur@hoa.ro> | 2017-10-28 14:35:49 +0200 |
commit | d65342e304f92643ba922200953cfebc51e1e482 (patch) | |
tree | 3097c77bb4dd0590c4644422b5dc4369a4186eb7 /application/HttpUtils.php | |
parent | a59bbf50d7530d7e82a91896a210b9da49cb1568 (diff) | |
download | Shaarli-d65342e304f92643ba922200953cfebc51e1e482.tar.gz Shaarli-d65342e304f92643ba922200953cfebc51e1e482.tar.zst Shaarli-d65342e304f92643ba922200953cfebc51e1e482.zip |
Extract the title/charset during page download, and check content type
Use CURLOPT_WRITEFUNCTION to check the response code and content type (only allow HTML).
Also extract the title and charset during downloading chunk of data, and stop it when everything has been extracted.
Closes #579
Diffstat (limited to 'application/HttpUtils.php')
-rw-r--r-- | application/HttpUtils.php | 14 |
1 files changed, 10 insertions, 4 deletions
diff --git a/application/HttpUtils.php b/application/HttpUtils.php index 00835966..2edf5ce2 100644 --- a/application/HttpUtils.php +++ b/application/HttpUtils.php | |||
@@ -3,9 +3,11 @@ | |||
3 | * GET an HTTP URL to retrieve its content | 3 | * GET an HTTP URL to retrieve its content |
4 | * Uses the cURL library or a fallback method | 4 | * Uses the cURL library or a fallback method |
5 | * | 5 | * |
6 | * @param string $url URL to get (http://...) | 6 | * @param string $url URL to get (http://...) |
7 | * @param int $timeout network timeout (in seconds) | 7 | * @param int $timeout network timeout (in seconds) |
8 | * @param int $maxBytes maximum downloaded bytes (default: 4 MiB) | 8 | * @param int $maxBytes maximum downloaded bytes (default: 4 MiB) |
9 | * @param callable|string $curlWriteFunction Optional callback called during the download (cURL CURLOPT_WRITEFUNCTION). | ||
10 | * Can be used to add download conditions on the headers (response code, content type, etc.). | ||
9 | * | 11 | * |
10 | * @return array HTTP response headers, downloaded content | 12 | * @return array HTTP response headers, downloaded content |
11 | * | 13 | * |
@@ -29,7 +31,7 @@ | |||
29 | * @see http://stackoverflow.com/q/9183178 | 31 | * @see http://stackoverflow.com/q/9183178 |
30 | * @see http://stackoverflow.com/q/1462720 | 32 | * @see http://stackoverflow.com/q/1462720 |
31 | */ | 33 | */ |
32 | function get_http_response($url, $timeout = 30, $maxBytes = 4194304) | 34 | function get_http_response($url, $timeout = 30, $maxBytes = 4194304, $curlWriteFunction = null) |
33 | { | 35 | { |
34 | $urlObj = new Url($url); | 36 | $urlObj = new Url($url); |
35 | $cleanUrl = $urlObj->idnToAscii(); | 37 | $cleanUrl = $urlObj->idnToAscii(); |
@@ -75,6 +77,10 @@ function get_http_response($url, $timeout = 30, $maxBytes = 4194304) | |||
75 | curl_setopt($ch, CURLOPT_TIMEOUT, $timeout); | 77 | curl_setopt($ch, CURLOPT_TIMEOUT, $timeout); |
76 | curl_setopt($ch, CURLOPT_USERAGENT, $userAgent); | 78 | curl_setopt($ch, CURLOPT_USERAGENT, $userAgent); |
77 | 79 | ||
80 | if (is_callable($curlWriteFunction)) { | ||
81 | curl_setopt($ch, CURLOPT_WRITEFUNCTION, $curlWriteFunction); | ||
82 | } | ||
83 | |||
78 | // Max download size management | 84 | // Max download size management |
79 | curl_setopt($ch, CURLOPT_BUFFERSIZE, 1024); | 85 | curl_setopt($ch, CURLOPT_BUFFERSIZE, 1024); |
80 | curl_setopt($ch, CURLOPT_NOPROGRESS, false); | 86 | curl_setopt($ch, CURLOPT_NOPROGRESS, false); |