aboutsummaryrefslogtreecommitdiffhomepage
path: root/inc/3rdparty/libraries/humble-http-agent
diff options
context:
space:
mode:
authorNicolas LÅ“uillet <nicolas.loeuillet@gmail.com>2013-12-06 09:45:27 +0100
committerNicolas LÅ“uillet <nicolas.loeuillet@gmail.com>2013-12-06 09:45:27 +0100
commit42c80841c846610be280218d53fcde06b0f0063b (patch)
tree26f7b26af6ca27ec8d3d7b8579e93cfe8a85be22 /inc/3rdparty/libraries/humble-http-agent
parent59cc585271a5f253b15617d97e26a29403a929dc (diff)
downloadwallabag-42c80841c846610be280218d53fcde06b0f0063b.tar.gz
wallabag-42c80841c846610be280218d53fcde06b0f0063b.tar.zst
wallabag-42c80841c846610be280218d53fcde06b0f0063b.zip
[change] we now use Full-Text RSS 3.1, thank you so much @fivefilters
Diffstat (limited to 'inc/3rdparty/libraries/humble-http-agent')
-rw-r--r--inc/3rdparty/libraries/humble-http-agent/CookieJar.php404
-rw-r--r--inc/3rdparty/libraries/humble-http-agent/HumbleHttpAgent.php779
-rw-r--r--inc/3rdparty/libraries/humble-http-agent/RollingCurl.php402
-rw-r--r--inc/3rdparty/libraries/humble-http-agent/SimplePie_HumbleHttpAgent.php79
4 files changed, 1664 insertions, 0 deletions
diff --git a/inc/3rdparty/libraries/humble-http-agent/CookieJar.php b/inc/3rdparty/libraries/humble-http-agent/CookieJar.php
new file mode 100644
index 00000000..83e94f14
--- /dev/null
+++ b/inc/3rdparty/libraries/humble-http-agent/CookieJar.php
@@ -0,0 +1,404 @@
1<?php
2/**
3 * Cookie Jar
4 *
5 * PHP class for handling cookies, as defined by the Netscape spec:
6 * <http://curl.haxx.se/rfc/cookie_spec.html>
7 *
8 * This class should be used to handle cookies (storing cookies from HTTP response messages, and
9 * sending out cookies in HTTP request messages). This has been adapted for FiveFilters.org
10 * from the original version used in HTTP Navigator. See http://www.keyvan.net/code/http-navigator/
11 *
12 * This class is mainly based on Cookies.pm <http://search.cpan.org/author/GAAS/libwww-perl-5.65/
13 * lib/HTTP/Cookies.pm> from the libwww-perl collection <http://www.linpro.no/lwp/>.
14 * Unlike Cookies.pm, this class only supports the Netscape cookie spec, not RFC 2965.
15 *
16 * @version 0.5
17 * @date 2011-03-15
18 * @see http://php.net/HttpRequestPool
19 * @author Keyvan Minoukadeh
20 * @copyright 2011 Keyvan Minoukadeh
21 * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
22 */
23
24class CookieJar
25{
26 /**
27 * Cookies - array containing all cookies.
28 *
29 * <pre>
30 * Cookies are stored like this:
31 * [domain][path][name] = array
32 * where array is:
33 * 0 => value, 1 => secure, 2 => expires
34 * </pre>
35 * @var array
36 * @access private
37 */
38 public $cookies = array();
39 public $debug = false;
40
41 /**
42 * Constructor
43 */
44 function __construct() {
45 }
46
47 protected function debug($msg, $file=null, $line=null) {
48 if ($this->debug) {
49 $mem = round(memory_get_usage()/1024, 2);
50 $memPeak = round(memory_get_peak_usage()/1024, 2);
51 echo '* ',$msg;
52 if (isset($file, $line)) echo " ($file line $line)";
53 echo ' - mem used: ',$mem," (peak: $memPeak)\n";
54 ob_flush();
55 flush();
56 }
57 }
58
59 /**
60 * Get matching cookies
61 *
62 * Only use this method if you cannot use add_cookie_header(), for example, if you want to use
63 * this cookie jar class without using the request class.
64 *
65 * @param array $param associative array containing 'domain', 'path', 'secure' keys
66 * @return string
67 * @see add_cookie_header()
68 */
69 public function getMatchingCookies($url)
70 {
71 if (($parts = @parse_url($url)) && isset($parts['scheme'], $parts['host'], $parts['path'])) {
72 $param['domain'] = $parts['host'];
73 $param['path'] = $parts['path'];
74 $param['secure'] = (strtolower($parts['scheme']) == 'https');
75 unset($parts);
76 } else {
77 return false;
78 }
79 // RFC 2965 notes:
80 // If multiple cookies satisfy the criteria above, they are ordered in
81 // the Cookie header such that those with more specific Path attributes
82 // precede those with less specific. Ordering with respect to other
83 // attributes (e.g., Domain) is unspecified.
84 $domain = $param['domain'];
85 if (strpos($domain, '.') === false) $domain .= '.local';
86 $request_path = $param['path'];
87 if ($request_path == '') $request_path = '/';
88 $request_secure = $param['secure'];
89 $now = time();
90 $matched_cookies = array();
91 // domain - find matching domains
92 $this->debug('Finding matching domains for '.$domain, __FILE__, __LINE__);
93 while (strpos($domain, '.') !== false) {
94 if (isset($this->cookies[$domain])) {
95 $this->debug(' domain match found: '.$domain);
96 $cookies =& $this->cookies[$domain];
97 } else {
98 $domain = $this->_reduce_domain($domain);
99 continue;
100 }
101 // paths - find matching paths starting from most specific
102 $this->debug(' - Finding matching paths for '.$request_path);
103 $paths = array_keys($cookies);
104 usort($paths, array($this, '_cmp_length'));
105 foreach ($paths as $path) {
106 // continue to next cookie if request path does not path-match cookie path
107 if (!$this->_path_match($request_path, $path)) continue;
108 // loop through cookie names
109 $this->debug(' path match found: '.$path);
110 foreach ($cookies[$path] as $name => $values) {
111 // if this cookie is secure but request isn't, continue to next cookie
112 if ($values[1] && !$request_secure) continue;
113 // if cookie is not a session cookie and has expired, continue to next cookie
114 if (is_int($values[2]) && ($values[2] < $now)) continue;
115 // cookie matches request
116 $this->debug(' cookie match: '.$name.'='.$values[0]);
117 $matched_cookies[] = $name.'='.$values[0];
118 }
119 }
120 $domain = $this->_reduce_domain($domain);
121 }
122 // return cookies
123 return implode('; ', $matched_cookies);
124 }
125
126 /**
127 * Parse Set-Cookie values.
128 *
129 * Only use this method if you cannot use extract_cookies(), for example, if you want to use
130 * this cookie jar class without using the response class.
131 *
132 * @param array $set_cookies array holding 1 or more "Set-Cookie" header values
133 * @param array $param associative array containing 'host', 'path' keys
134 * @return void
135 * @see extract_cookies()
136 */
137 public function storeCookies($url, $set_cookies)
138 {
139 if (count($set_cookies) == 0) return;
140 $param = @parse_url($url);
141 if (!is_array($param) || !isset($param['host'])) return;
142 $request_host = $param['host'];
143 if (strpos($request_host, '.') === false) $request_host .= '.local';
144 $request_path = @$param['path'];
145 if ($request_path == '') $request_path = '/';
146 //
147 // loop through set-cookie headers
148 //
149 foreach ($set_cookies as $set_cookie) {
150 $this->debug('Parsing: '.$set_cookie);
151 // temporary cookie store (before adding to jar)
152 $tmp_cookie = array();
153 $param = explode(';', $set_cookie);
154 // loop through params
155 for ($x=0; $x<count($param); $x++) {
156 $key_val = explode('=', $param[$x], 2);
157 if (count($key_val) != 2) {
158 // if the first param isn't a name=value pair, continue to the next set-cookie
159 // header
160 if ($x == 0) continue 2;
161 // check for secure flag
162 if (strtolower(trim($key_val[0])) == 'secure') $tmp_cookie['secure'] = true;
163 // continue to next param
164 continue;
165 }
166 list($key, $val) = array_map('trim', $key_val);
167 // first name=value pair is the cookie name and value
168 // the name and value are stored under 'name' and 'value' to avoid conflicts
169 // with later parameters.
170 if ($x == 0) {
171 $tmp_cookie = array('name'=>$key, 'value'=>$val);
172 continue;
173 }
174 $key = strtolower($key);
175 if (in_array($key, array('expires', 'path', 'domain', 'secure'))) {
176 $tmp_cookie[$key] = $val;
177 }
178 }
179 //
180 // set cookie
181 //
182 // check domain
183 if (isset($tmp_cookie['domain']) && ($tmp_cookie['domain'] != $request_host) &&
184 ($tmp_cookie['domain'] != ".$request_host")) {
185 $domain = $tmp_cookie['domain'];
186 if ((strpos($domain, '.') === false) && ($domain != 'local')) {
187 $this->debug(' - domain "'.$domain.'" has no dot and is not a local domain');
188 continue;
189 }
190 if (preg_match('/\.[0-9]+$/', $domain)) {
191 $this->debug(' - domain "'.$domain.'" appears to be an ip address');
192 continue;
193 }
194 if (substr($domain, 0, 1) != '.') $domain = ".$domain";
195 if (!$this->_domain_match($request_host, $domain)) {
196 $this->debug(' - request host "'.$request_host.'" does not domain-match "'.$domain.'"');
197 continue;
198 }
199 } else {
200 // if domain is not specified in the set-cookie header, domain will default to
201 // the request host
202 $domain = $request_host;
203 }
204 // check path
205 if (isset($tmp_cookie['path']) && ($tmp_cookie['path'] != '')) {
206 $path = urldecode($tmp_cookie['path']);
207 if (!$this->_path_match($request_path, $path)) {
208 $this->debug(' - request path "'.$request_path.'" does not path-match "'.$path.'"');
209 continue;
210 }
211 } else {
212 $path = $request_path;
213 $path = substr($path, 0, strrpos($path, '/'));
214 if ($path == '') $path = '/';
215 }
216 // check if secure
217 $secure = (isset($tmp_cookie['secure'])) ? true : false;
218 // check expiry
219 if (isset($tmp_cookie['expires'])) {
220 if (($expires = strtotime($tmp_cookie['expires'])) < 0) {
221 $expires = null;
222 }
223 } else {
224 $expires = null;
225 }
226 // set cookie
227 $this->set_cookie($domain, $path, $tmp_cookie['name'], $tmp_cookie['value'], $secure, $expires);
228 }
229 }
230
231 // return array of set-cookie values extracted from HTTP response headers (string $h)
232 public function extractCookies($h) {
233 $x = 0;
234 $lines = 0;
235 $headers = array();
236 $last_match = false;
237 $h = explode("\n", $h);
238 foreach ($h as $line) {
239 $line = rtrim($line);
240 $lines++;
241
242 $trimmed_line = trim($line);
243 if (isset($line_last)) {
244 // check if we have \r\n\r\n (indicating the end of headers)
245 // some servers will not use CRLF (\r\n), so we make CR (\r) optional.
246 // if (preg_match('/\015?\012\015?\012/', $line_last.$line)) {
247 // break;
248 // }
249 // As an alternative, we can check if the current trimmed line is empty
250 if ($trimmed_line == '') {
251 break;
252 }
253
254 // check for continuation line...
255 // RFC 2616 Section 2.2 "Basic Rules":
256 // HTTP/1.1 header field values can be folded onto multiple lines if the
257 // continuation line begins with a space or horizontal tab. All linear
258 // white space, including folding, has the same semantics as SP. A
259 // recipient MAY replace any linear white space with a single SP before
260 // interpreting the field value or forwarding the message downstream.
261 if ($last_match && preg_match('/^\s+(.*)/', $line, $match)) {
262 // append to previous header value
263 $headers[$x-1] .= ' '.rtrim($match[1]);
264 continue;
265 }
266 }
267 $line_last = $line;
268
269 // split header name and value
270 if (preg_match('/^Set-Cookie\s*:\s*(.*)/i', $line, $match)) {
271 $headers[$x++] = rtrim($match[1]);
272 $last_match = true;
273 } else {
274 $last_match = false;
275 }
276 }
277 return $headers;
278 }
279
280 /**
281 * Set Cookie
282 * @param string $domain
283 * @param string $path
284 * @param string $name cookie name
285 * @param string $value cookie value
286 * @param bool $secure
287 * @param int $expires expiry time (null if session cookie, <= 0 will delete cookie)
288 * @return void
289 */
290 function set_cookie($domain, $path, $name, $value, $secure=false, $expires=null)
291 {
292 if ($domain == '') return;
293 if ($path == '') return;
294 if ($name == '') return;
295 // check if cookie needs to go
296 if (isset($expires) && ($expires <= 0)) {
297 if (isset($this->cookies[$domain][$path][$name])) unset($this->cookies[$domain][$path][$name]);
298 return;
299 }
300 if ($value == '') return;
301 $this->cookies[$domain][$path][$name] = array($value, $secure, $expires);
302 return;
303 }
304
305 /**
306 * Clear cookies - [domain [,path [,name]]] - call method with no arguments to clear all cookies.
307 * @param string $domain
308 * @param string $path
309 * @param string $name
310 * @return void
311 */
312 function clear($domain=null, $path=null, $name=null)
313 {
314 if (!isset($domain)) {
315 $this->cookies = array();
316 } elseif (!isset($path)) {
317 if (isset($this->cookies[$domain])) unset($this->cookies[$domain]);
318 } elseif (!isset($name)) {
319 if (isset($this->cookies[$domain][$path])) unset($this->cookies[$domain][$path]);
320 } elseif (isset($name)) {
321 if (isset($this->cookies[$domain][$path][$name])) unset($this->cookies[$domain][$path][$name]);
322 }
323 }
324
325 /**
326 * Compare string length - used for sorting
327 * @access private
328 * @return int
329 */
330 function _cmp_length($a, $b)
331 {
332 $la = strlen($a); $lb = strlen($b);
333 if ($la == $lb) return 0;
334 return ($la > $lb) ? -1 : 1;
335 }
336
337 /**
338 * Reduce domain
339 * @param string $domain
340 * @return string
341 * @access private
342 */
343 function _reduce_domain($domain)
344 {
345 if ($domain == '') return '';
346 if (substr($domain, 0, 1) == '.') return substr($domain, 1);
347 return substr($domain, strpos($domain, '.'));
348 }
349
350 /**
351 * Path match - check if path1 path-matches path2
352 *
353 * From RFC 2965:
354 * <i>For two strings that represent paths, P1 and P2, P1 path-matches P2
355 * if P2 is a prefix of P1 (including the case where P1 and P2 string-
356 * compare equal). Thus, the string /tec/waldo path-matches /tec.</i>
357 * @param string $path1
358 * @param string $path2
359 * @return bool
360 * @access private
361 */
362 function _path_match($path1, $path2)
363 {
364 return (substr($path1, 0, strlen($path2)) == $path2);
365 }
366
367 /**
368 * Domain match - check if domain1 domain-matches domain2
369 *
370 * A few extracts from RFC 2965:
371 * - A Set-Cookie2 from request-host y.x.foo.com for Domain=.foo.com
372 * would be rejected, because H is y.x and contains a dot.
373 *
374 * - A Set-Cookie2 from request-host x.foo.com for Domain=.foo.com
375 * would be accepted.
376 *
377 * - A Set-Cookie2 with Domain=.com or Domain=.com., will always be
378 * rejected, because there is no embedded dot.
379 *
380 * - A Set-Cookie2 from request-host example for Domain=.local will
381 * be accepted, because the effective host name for the request-
382 * host is example.local, and example.local domain-matches .local.
383 *
384 * I'm ignoring the first point for now (must check to see how other browsers handle
385 * this rule for Set-Cookie headers)
386 *
387 * @param string $domain1
388 * @param string $domain2
389 * @return bool
390 * @access private
391 */
392 function _domain_match($domain1, $domain2)
393 {
394 $domain1 = strtolower($domain1);
395 $domain2 = strtolower($domain2);
396 while (strpos($domain1, '.') !== false) {
397 if ($domain1 == $domain2) return true;
398 $domain1 = $this->_reduce_domain($domain1);
399 continue;
400 }
401 return false;
402 }
403}
404?> \ No newline at end of file
diff --git a/inc/3rdparty/libraries/humble-http-agent/HumbleHttpAgent.php b/inc/3rdparty/libraries/humble-http-agent/HumbleHttpAgent.php
new file mode 100644
index 00000000..e4f1b3b3
--- /dev/null
+++ b/inc/3rdparty/libraries/humble-http-agent/HumbleHttpAgent.php
@@ -0,0 +1,779 @@
1<?php
2/**
3 * Humble HTTP Agent
4 *
5 * This class is designed to take advantage of parallel HTTP requests
6 * offered by PHP's PECL HTTP extension or the curl_multi_* functions.
7 * For environments which do not have these options, it reverts to standard sequential
8 * requests (using file_get_contents())
9 *
10 * @version 1.1
11 * @date 2012-08-20
12 * @see http://php.net/HttpRequestPool
13 * @author Keyvan Minoukadeh
14 * @copyright 2011-2012 Keyvan Minoukadeh
15 * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
16 */
17
18class HumbleHttpAgent
19{
20 const METHOD_REQUEST_POOL = 1;
21 const METHOD_CURL_MULTI = 2;
22 const METHOD_FILE_GET_CONTENTS = 4;
23 //const UA_BROWSER = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0.1) Gecko/20100101 Firefox/4.0.1';
24 const UA_BROWSER = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.92 Safari/535.2';
25 const UA_PHP = 'PHP/5.2';
26 const REF_GOOGLE = 'http://www.google.co.uk/url?sa=t&source=web&cd=1';
27
28 protected $requests = array();
29 protected $redirectQueue = array();
30 protected $requestOptions;
31 protected $maxParallelRequests = 5;
32 protected $cache = null; //TODO
33 protected $httpContext;
34 protected $minimiseMemoryUse = false; //TODO
35 protected $method;
36 protected $cookieJar;
37 public $debug = false;
38 public $debugVerbose = false;
39 public $rewriteHashbangFragment = true; // see http://code.google.com/web/ajaxcrawling/docs/specification.html
40 public $maxRedirects = 5;
41 public $userAgentMap = array();
42 public $rewriteUrls = array();
43 public $userAgentDefault;
44 public $referer;
45 //public $userAgent = 'Mozilla/5.0';
46
47 // Prevent certain file/mime types
48 // HTTP responses which match these content types will
49 // be returned without body.
50 public $headerOnlyTypes = array();
51 // URLs ending with one of these extensions will
52 // prompt Humble HTTP Agent to send a HEAD request first
53 // to see if returned content type matches $headerOnlyTypes.
54 public $headerOnlyClues = array('pdf','mp3','zip','exe','gif','gzip','gz','jpeg','jpg','mpg','mpeg','png','ppt','mov');
55 // AJAX triggers to search for.
56 // for AJAX sites, e.g. Blogger with its dynamic views templates.
57 public $ajaxTriggers = array("<meta name='fragment' content='!'",'<meta name="fragment" content="!"',"<meta content='!' name='fragment'",'<meta content="!" name="fragment"');
58
59 //TODO: set max file size
60 //TODO: normalise headers
61
62 function __construct($requestOptions=null, $method=null) {
63 $this->userAgentDefault = self::UA_BROWSER;
64 $this->referer = self::REF_GOOGLE;
65 // set the request method
66 if (in_array($method, array(1,2,4))) {
67 $this->method = $method;
68 } else {
69 if (class_exists('HttpRequestPool')) {
70 $this->method = self::METHOD_REQUEST_POOL;
71 } elseif (function_exists('curl_multi_init')) {
72 $this->method = self::METHOD_CURL_MULTI;
73 } else {
74 $this->method = self::METHOD_FILE_GET_CONTENTS;
75 }
76 }
77 if ($this->method == self::METHOD_CURL_MULTI) {
78 require_once(dirname(__FILE__).'/RollingCurl.php');
79 }
80 // create cookie jar
81 $this->cookieJar = new CookieJar();
82 // set request options (redirect must be 0)
83 $this->requestOptions = array(
84 'timeout' => 15,
85 'redirect' => 0 // we handle redirects manually so we can rewrite the new hashbang URLs that are creeping up over the web
86 // TODO: test onprogress?
87 );
88 if (is_array($requestOptions)) {
89 $this->requestOptions = array_merge($this->requestOptions, $requestOptions);
90 }
91 $this->httpContext = array(
92 'http' => array(
93 'ignore_errors' => true,
94 'timeout' => $this->requestOptions['timeout'],
95 'max_redirects' => $this->requestOptions['redirect'],
96 'header' => "Accept: */*\r\n"
97 )
98 );
99 }
100
101 protected function debug($msg) {
102 if ($this->debug) {
103 $mem = round(memory_get_usage()/1024, 2);
104 $memPeak = round(memory_get_peak_usage()/1024, 2);
105 echo '* ',$msg;
106 if ($this->debugVerbose) echo ' - mem used: ',$mem," (peak: $memPeak)";
107 echo "\n";
108 ob_flush();
109 flush();
110 }
111 }
112
113 protected function getUserAgent($url, $asArray=false) {
114 $host = @parse_url($url, PHP_URL_HOST);
115 if (strtolower(substr($host, 0, 4)) == 'www.') {
116 $host = substr($host, 4);
117 }
118 if ($host) {
119 $try = array($host);
120 $split = explode('.', $host);
121 if (count($split) > 1) {
122 array_shift($split);
123 $try[] = '.'.implode('.', $split);
124 }
125 foreach ($try as $h) {
126 if (isset($this->userAgentMap[$h])) {
127 $ua = $this->userAgentMap[$h];
128 break;
129 }
130 }
131 }
132 if (!isset($ua)) $ua = $this->userAgentDefault;
133 if ($asArray) {
134 return array('User-Agent' => $ua);
135 } else {
136 return 'User-Agent: '.$ua;
137 }
138 }
139
140 public function rewriteHashbangFragment($url) {
141 // return $url if there's no '#!'
142 if (strpos($url, '#!') === false) return $url;
143 // split $url and rewrite
144 // TODO: is SimplePie_IRI included?
145 $iri = new SimplePie_IRI($url);
146 $fragment = substr($iri->fragment, 1); // strip '!'
147 $iri->fragment = null;
148 if (isset($iri->query)) {
149 parse_str($iri->query, $query);
150 } else {
151 $query = array();
152 }
153 $query['_escaped_fragment_'] = (string)$fragment;
154 $iri->query = str_replace('%2F', '/', http_build_query($query)); // needed for some sites
155 return $iri->get_iri();
156 }
157
158 public function getUglyURL($url, $html) {
159 if ($html == '') return false;
160 $found = false;
161 foreach ($this->ajaxTriggers as $string) {
162 if (stripos($html, $string)) {
163 $found = true;
164 break;
165 }
166 }
167 if (!$found) return false;
168 $iri = new SimplePie_IRI($url);
169 if (isset($iri->query)) {
170 parse_str($iri->query, $query);
171 } else {
172 $query = array();
173 }
174 $query['_escaped_fragment_'] = '';
175 $iri->query = str_replace('%2F', '/', http_build_query($query)); // needed for some sites
176 return $iri->get_iri();
177 }
178
179 public function removeFragment($url) {
180 $pos = strpos($url, '#');
181 if ($pos === false) {
182 return $url;
183 } else {
184 return substr($url, 0, $pos);
185 }
186 }
187
188 public function rewriteUrls($url) {
189 foreach ($this->rewriteUrls as $find => $action) {
190 if (strpos($url, $find) !== false) {
191 if (is_array($action)) {
192 return strtr($url, $action);
193 }
194 }
195 }
196 return $url;
197 }
198
199 public function enableDebug($bool=true) {
200 $this->debug = (bool)$bool;
201 }
202
203 public function minimiseMemoryUse($bool = true) {
204 $this->minimiseMemoryUse = $bool;
205 }
206
207 public function setMaxParallelRequests($max) {
208 $this->maxParallelRequests = $max;
209 }
210
211 public function validateUrl($url) {
212 $url = filter_var($url, FILTER_SANITIZE_URL);
213 $test = filter_var($url, FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED);
214 // deal with bug http://bugs.php.net/51192 (present in PHP 5.2.13 and PHP 5.3.2)
215 if ($test === false) {
216 $test = filter_var(strtr($url, '-', '_'), FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED);
217 }
218 if ($test !== false && $test !== null && preg_match('!^https?://!', $url)) {
219 return $url;
220 } else {
221 return false;
222 }
223 }
224
225 public function fetchAll(array $urls) {
226 $this->fetchAllOnce($urls, $isRedirect=false);
227 $redirects = 0;
228 while (!empty($this->redirectQueue) && ++$redirects <= $this->maxRedirects) {
229 $this->debug("Following redirects #$redirects...");
230 $this->fetchAllOnce($this->redirectQueue, $isRedirect=true);
231 }
232 }
233
234 // fetch all URLs without following redirects
235 public function fetchAllOnce(array $urls, $isRedirect=false) {
236 if (!$isRedirect) $urls = array_unique($urls);
237 if (empty($urls)) return;
238
239 //////////////////////////////////////////////////////
240 // parallel (HttpRequestPool)
241 if ($this->method == self::METHOD_REQUEST_POOL) {
242 $this->debug('Starting parallel fetch (HttpRequestPool)');
243 try {
244 while (count($urls) > 0) {
245 $this->debug('Processing set of '.min($this->maxParallelRequests, count($urls)));
246 $subset = array_splice($urls, 0, $this->maxParallelRequests);
247 $pool = new HttpRequestPool();
248 foreach ($subset as $orig => $url) {
249 if (!$isRedirect) $orig = $url;
250 unset($this->redirectQueue[$orig]);
251 $this->debug("...$url");
252 if (!$isRedirect && isset($this->requests[$url])) {
253 $this->debug("......in memory");
254 /*
255 } elseif ($this->isCached($url)) {
256 $this->debug("......is cached");
257 if (!$this->minimiseMemoryUse) {
258 $this->requests[$url] = $this->getCached($url);
259 }
260 */
261 } else {
262 $this->debug("......adding to pool");
263 $req_url = $this->rewriteUrls($url);
264 $req_url = ($this->rewriteHashbangFragment) ? $this->rewriteHashbangFragment($req_url) : $req_url;
265 $req_url = $this->removeFragment($req_url);
266 if (!empty($this->headerOnlyTypes) && !isset($this->requests[$orig]['wrongGuess']) && $this->possibleUnsupportedType($req_url)) {
267 $_meth = HttpRequest::METH_HEAD;
268 } else {
269 $_meth = HttpRequest::METH_GET;
270 unset($this->requests[$orig]['wrongGuess']);
271 }
272 $httpRequest = new HttpRequest($req_url, $_meth, $this->requestOptions);
273 // send cookies, if we have any
274 if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) {
275 $this->debug("......sending cookies: $cookies");
276 $httpRequest->addHeaders(array('Cookie' => $cookies));
277 }
278 //$httpRequest->addHeaders(array('User-Agent' => $this->userAgent));
279 $httpRequest->addHeaders($this->getUserAgent($req_url, true));
280 // add referer for picky sites
281 $httpRequest->addheaders(array('Referer' => $this->referer));
282 $this->requests[$orig] = array('headers'=>null, 'body'=>null, 'httpRequest'=>$httpRequest);
283 $this->requests[$orig]['original_url'] = $orig;
284 $pool->attach($httpRequest);
285 }
286 }
287 // did we get anything into the pool?
288 if (count($pool) > 0) {
289 $this->debug('Sending request...');
290 try {
291 $pool->send();
292 } catch (HttpRequestPoolException $e) {
293 // do nothing
294 }
295 $this->debug('Received responses');
296 foreach($subset as $orig => $url) {
297 if (!$isRedirect) $orig = $url;
298 $request = $this->requests[$orig]['httpRequest'];
299 //$this->requests[$orig]['headers'] = $this->headersToString($request->getResponseHeader());
300 // getResponseHeader() doesn't return status line, so, for consistency...
301 $this->requests[$orig]['headers'] = substr($request->getRawResponseMessage(), 0, $request->getResponseInfo('header_size'));
302 // check content type
303 // TODO: use getResponseHeader('content-type') or getResponseInfo()
304 if ($this->headerOnlyType($this->requests[$orig]['headers'])) {
305 $this->requests[$orig]['body'] = '';
306 $_header_only_type = true;
307 $this->debug('Header only type returned');
308 } else {
309 $this->requests[$orig]['body'] = $request->getResponseBody();
310 $_header_only_type = false;
311 }
312 $this->requests[$orig]['effective_url'] = $request->getResponseInfo('effective_url');
313 $this->requests[$orig]['status_code'] = $status_code = $request->getResponseCode();
314 // is redirect?
315 if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && $request->getResponseHeader('location')) {
316 $redirectURL = $request->getResponseHeader('location');
317 if (!preg_match('!^https?://!i', $redirectURL)) {
318 $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url);
319 }
320 if ($this->validateURL($redirectURL)) {
321 $this->debug('Redirect detected. Valid URL: '.$redirectURL);
322 // store any cookies
323 $cookies = $request->getResponseHeader('set-cookie');
324 if ($cookies && !is_array($cookies)) $cookies = array($cookies);
325 if ($cookies) $this->cookieJar->storeCookies($url, $cookies);
326 $this->redirectQueue[$orig] = $redirectURL;
327 } else {
328 $this->debug('Redirect detected. Invalid URL: '.$redirectURL);
329 }
330 } elseif (!$_header_only_type && $request->getMethod() === HttpRequest::METH_HEAD) {
331 // the response content-type did not match our 'header only' types,
332 // but we'd issues a HEAD request because we assumed it would. So
333 // let's queue a proper GET request for this item...
334 $this->debug('Wrong guess at content-type, queing GET request');
335 $this->requests[$orig]['wrongGuess'] = true;
336 $this->redirectQueue[$orig] = $this->requests[$orig]['effective_url'];
337 } elseif (strpos($this->requests[$orig]['effective_url'], '_escaped_fragment_') === false) {
338 // check for <meta name='fragment' content='!'/>
339 // for AJAX sites, e.g. Blogger with its dynamic views templates.
340 // Based on Google's spec: https://developers.google.com/webmasters/ajax-crawling/docs/specification
341 if (isset($this->requests[$orig]['body'])) {
342 $redirectURL = $this->getUglyURL($this->requests[$orig]['effective_url'], substr($this->requests[$orig]['body'], 0, 4000));
343 if ($redirectURL) {
344 $this->debug('AJAX trigger (meta name="fragment" content="!") found. Queueing '.$redirectURL);
345 $this->redirectQueue[$orig] = $redirectURL;
346 }
347 }
348 }
349 //die($url.' -multi- '.$request->getResponseInfo('effective_url'));
350 $pool->detach($request);
351 unset($this->requests[$orig]['httpRequest'], $request);
352 /*
353 if ($this->minimiseMemoryUse) {
354 if ($this->cache($url)) {
355 unset($this->requests[$url]);
356 }
357 }
358 */
359 }
360 }
361 }
362 } catch (HttpException $e) {
363 $this->debug($e);
364 return false;
365 }
366 }
367
368 //////////////////////////////////////////////////////////
369 // parallel (curl_multi_*)
370 elseif ($this->method == self::METHOD_CURL_MULTI) {
371 $this->debug('Starting parallel fetch (curl_multi_*)');
372 while (count($urls) > 0) {
373 $this->debug('Processing set of '.min($this->maxParallelRequests, count($urls)));
374 $subset = array_splice($urls, 0, $this->maxParallelRequests);
375 $pool = new RollingCurl(array($this, 'handleCurlResponse'));
376 $pool->window_size = count($subset);
377
378 foreach ($subset as $orig => $url) {
379 if (!$isRedirect) $orig = $url;
380 unset($this->redirectQueue[$orig]);
381 $this->debug("...$url");
382 if (!$isRedirect && isset($this->requests[$url])) {
383 $this->debug("......in memory");
384 /*
385 } elseif ($this->isCached($url)) {
386 $this->debug("......is cached");
387 if (!$this->minimiseMemoryUse) {
388 $this->requests[$url] = $this->getCached($url);
389 }
390 */
391 } else {
392 $this->debug("......adding to pool");
393 $req_url = $this->rewriteUrls($url);
394 $req_url = ($this->rewriteHashbangFragment) ? $this->rewriteHashbangFragment($req_url) : $req_url;
395 $req_url = $this->removeFragment($req_url);
396 if (!empty($this->headerOnlyTypes) && !isset($this->requests[$orig]['wrongGuess']) && $this->possibleUnsupportedType($req_url)) {
397 $_meth = 'HEAD';
398 } else {
399 $_meth = 'GET';
400 unset($this->requests[$orig]['wrongGuess']);
401 }
402 $headers = array();
403 //$headers[] = 'User-Agent: '.$this->userAgent;
404 $headers[] = $this->getUserAgent($req_url);
405 // add referer for picky sites
406 $headers[] = 'Referer: '.$this->referer;
407 // send cookies, if we have any
408 if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) {
409 $this->debug("......sending cookies: $cookies");
410 $headers[] = 'Cookie: '.$cookies;
411 }
412 $httpRequest = new RollingCurlRequest($req_url, $_meth, null, $headers, array(
413 CURLOPT_CONNECTTIMEOUT => $this->requestOptions['timeout'],
414 CURLOPT_TIMEOUT => $this->requestOptions['timeout']
415 ));
416 $httpRequest->set_original_url($orig);
417 $this->requests[$orig] = array('headers'=>null, 'body'=>null, 'httpRequest'=>$httpRequest);
418 $this->requests[$orig]['original_url'] = $orig; // TODO: is this needed anymore?
419 $pool->add($httpRequest);
420 }
421 }
422 // did we get anything into the pool?
423 if (count($pool) > 0) {
424 $this->debug('Sending request...');
425 $pool->execute(); // this will call handleCurlResponse() and populate $this->requests[$orig]
426 $this->debug('Received responses');
427 foreach($subset as $orig => $url) {
428 if (!$isRedirect) $orig = $url;
429 // $this->requests[$orig]['headers']
430 // $this->requests[$orig]['body']
431 // $this->requests[$orig]['effective_url']
432 // check content type
433 if ($this->headerOnlyType($this->requests[$orig]['headers'])) {
434 $this->requests[$orig]['body'] = '';
435 $_header_only_type = true;
436 $this->debug('Header only type returned');
437 } else {
438 $_header_only_type = false;
439 }
440 $status_code = $this->requests[$orig]['status_code'];
441 if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && isset($this->requests[$orig]['location'])) {
442 $redirectURL = $this->requests[$orig]['location'];
443 if (!preg_match('!^https?://!i', $redirectURL)) {
444 $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url);
445 }
446 if ($this->validateURL($redirectURL)) {
447 $this->debug('Redirect detected. Valid URL: '.$redirectURL);
448 // store any cookies
449 $cookies = $this->cookieJar->extractCookies($this->requests[$orig]['headers']);
450 if (!empty($cookies)) $this->cookieJar->storeCookies($url, $cookies);
451 $this->redirectQueue[$orig] = $redirectURL;
452 } else {
453 $this->debug('Redirect detected. Invalid URL: '.$redirectURL);
454 }
455 } elseif (!$_header_only_type && $this->requests[$orig]['method'] == 'HEAD') {
456 // the response content-type did not match our 'header only' types,
457 // but we'd issues a HEAD request because we assumed it would. So
458 // let's queue a proper GET request for this item...
459 $this->debug('Wrong guess at content-type, queing GET request');
460 $this->requests[$orig]['wrongGuess'] = true;
461 $this->redirectQueue[$orig] = $this->requests[$orig]['effective_url'];
462 } elseif (strpos($this->requests[$orig]['effective_url'], '_escaped_fragment_') === false) {
463 // check for <meta name='fragment' content='!'/>
464 // for AJAX sites, e.g. Blogger with its dynamic views templates.
465 // Based on Google's spec: https://developers.google.com/webmasters/ajax-crawling/docs/specification
466 if (isset($this->requests[$orig]['body'])) {
467 $redirectURL = $this->getUglyURL($this->requests[$orig]['effective_url'], substr($this->requests[$orig]['body'], 0, 4000));
468 if ($redirectURL) {
469 $this->debug('AJAX trigger (meta name="fragment" content="!") found. Queueing '.$redirectURL);
470 $this->redirectQueue[$orig] = $redirectURL;
471 }
472 }
473 }
474 // die($url.' -multi- '.$request->getResponseInfo('effective_url'));
475 unset($this->requests[$orig]['httpRequest'], $this->requests[$orig]['method']);
476 }
477 }
478 }
479 }
480
481 //////////////////////////////////////////////////////
482 // sequential (file_get_contents)
483 else {
484 $this->debug('Starting sequential fetch (file_get_contents)');
485 $this->debug('Processing set of '.count($urls));
486 foreach ($urls as $orig => $url) {
487 if (!$isRedirect) $orig = $url;
488 unset($this->redirectQueue[$orig]);
489 $this->debug("...$url");
490 if (!$isRedirect && isset($this->requests[$url])) {
491 $this->debug("......in memory");
492 /*
493 } elseif ($this->isCached($url)) {
494 $this->debug("......is cached");
495 if (!$this->minimiseMemoryUse) {
496 $this->requests[$url] = $this->getCached($url);
497 }
498 */
499 } else {
500 $this->debug("Sending request for $url");
501 $this->requests[$orig]['original_url'] = $orig;
502 $req_url = $this->rewriteUrls($url);
503 $req_url = ($this->rewriteHashbangFragment) ? $this->rewriteHashbangFragment($req_url) : $req_url;
504 $req_url = $this->removeFragment($req_url);
505 // send cookies, if we have any
506 $httpContext = $this->httpContext;
507 $httpContext['http']['header'] .= $this->getUserAgent($req_url)."\r\n";
508 // add referer for picky sites
509 $httpContext['http']['header'] .= 'Referer: '.$this->referer."\r\n";
510 if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) {
511 $this->debug("......sending cookies: $cookies");
512 $httpContext['http']['header'] .= 'Cookie: '.$cookies."\r\n";
513 }
514 if (false !== ($html = @file_get_contents($req_url, false, stream_context_create($httpContext)))) {
515 $this->debug('Received response');
516 // get status code
517 if (!isset($http_response_header[0]) || !preg_match('!^HTTP/\d+\.\d+\s+(\d+)!', trim($http_response_header[0]), $match)) {
518 $this->debug('Error: no status code found');
519 // TODO: handle error - no status code
520 } else {
521 $this->requests[$orig]['headers'] = $this->headersToString($http_response_header, false);
522 // check content type
523 if ($this->headerOnlyType($this->requests[$orig]['headers'])) {
524 $this->requests[$orig]['body'] = '';
525 } else {
526 $this->requests[$orig]['body'] = $html;
527 }
528 $this->requests[$orig]['effective_url'] = $req_url;
529 $this->requests[$orig]['status_code'] = $status_code = (int)$match[1];
530 unset($match);
531 // handle redirect
532 if (preg_match('/^Location:(.*?)$/mi', $this->requests[$orig]['headers'], $match)) {
533 $this->requests[$orig]['location'] = trim($match[1]);
534 }
535 if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && isset($this->requests[$orig]['location'])) {
536 $redirectURL = $this->requests[$orig]['location'];
537 if (!preg_match('!^https?://!i', $redirectURL)) {
538 $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url);
539 }
540 if ($this->validateURL($redirectURL)) {
541 $this->debug('Redirect detected. Valid URL: '.$redirectURL);
542 // store any cookies
543 $cookies = $this->cookieJar->extractCookies($this->requests[$orig]['headers']);
544 if (!empty($cookies)) $this->cookieJar->storeCookies($url, $cookies);
545 $this->redirectQueue[$orig] = $redirectURL;
546 } else {
547 $this->debug('Redirect detected. Invalid URL: '.$redirectURL);
548 }
549 } elseif (strpos($this->requests[$orig]['effective_url'], '_escaped_fragment_') === false) {
550 // check for <meta name='fragment' content='!'/>
551 // for AJAX sites, e.g. Blogger with its dynamic views templates.
552 // Based on Google's spec: https://developers.google.com/webmasters/ajax-crawling/docs/specification
553 if (isset($this->requests[$orig]['body'])) {
554 $redirectURL = $this->getUglyURL($this->requests[$orig]['effective_url'], substr($this->requests[$orig]['body'], 0, 4000));
555 if ($redirectURL) {
556 $this->debug('AJAX trigger (meta name="fragment" content="!") found. Queueing '.$redirectURL);
557 $this->redirectQueue[$orig] = $redirectURL;
558 }
559 }
560 }
561 }
562 } else {
563 $this->debug('Error retrieving URL');
564 //print_r($req_url);
565 //print_r($http_response_header);
566 //print_r($html);
567
568 // TODO: handle error - failed to retrieve URL
569 }
570 }
571 }
572 }
573 }
574
575 public function handleCurlResponse($response, $info, $request) {
576 $orig = $request->url_original;
577 $this->requests[$orig]['headers'] = substr($response, 0, $info['header_size']);
578 $this->requests[$orig]['body'] = substr($response, $info['header_size']);
579 $this->requests[$orig]['method'] = $request->method;
580 $this->requests[$orig]['effective_url'] = $info['url'];
581 $this->requests[$orig]['status_code'] = (int)$info['http_code'];
582 if (preg_match('/^Location:(.*?)$/mi', $this->requests[$orig]['headers'], $match)) {
583 $this->requests[$orig]['location'] = trim($match[1]);
584 }
585 }
586
587 protected function headersToString(array $headers, $associative=true) {
588 if (!$associative) {
589 return implode("\n", $headers);
590 } else {
591 $str = '';
592 foreach ($headers as $key => $val) {
593 if (is_array($val)) {
594 foreach ($val as $v) $str .= "$key: $v\n";
595 } else {
596 $str .= "$key: $val\n";
597 }
598 }
599 return rtrim($str);
600 }
601 }
602
603 public function get($url, $remove=false, $gzdecode=true) {
604 $url = "$url";
605 if (isset($this->requests[$url]) && isset($this->requests[$url]['body'])) {
606 $this->debug("URL already fetched - in memory ($url, effective: {$this->requests[$url]['effective_url']})");
607 $response = $this->requests[$url];
608 /*
609 } elseif ($this->isCached($url)) {
610 $this->debug("URL already fetched - in disk cache ($url)");
611 $response = $this->getCached($url);
612 $this->requests[$url] = $response;
613 */
614 } else {
615 $this->debug("Fetching URL ($url)");
616 $this->fetchAll(array($url));
617 if (isset($this->requests[$url]) && isset($this->requests[$url]['body'])) {
618 $response = $this->requests[$url];
619 } else {
620 $this->debug("Request failed");
621 $response = false;
622 }
623 }
624 /*
625 if ($this->minimiseMemoryUse && $response) {
626 $this->cache($url);
627 unset($this->requests[$url]);
628 }
629 */
630 if ($remove && $response) unset($this->requests[$url]);
631 if ($gzdecode && stripos($response['headers'], 'Content-Encoding: gzip')) {
632 if ($html = gzdecode($response['body'])) {
633 $response['body'] = $html;
634 }
635 }
636 return $response;
637 }
638
639 public function parallelSupport() {
640 return class_exists('HttpRequestPool') || function_exists('curl_multi_init');
641 }
642
643 private function headerOnlyType($headers) {
644 if (preg_match('!^Content-Type:\s*(([a-z-]+)/([^;\r\n ]+))!im', $headers, $match)) {
645 // look for full mime type (e.g. image/jpeg) or just type (e.g. image)
646 $match[1] = strtolower(trim($match[1]));
647 $match[2] = strtolower(trim($match[2]));
648 foreach (array($match[1], $match[2]) as $mime) {
649 if (in_array($mime, $this->headerOnlyTypes)) return true;
650 }
651 }
652 return false;
653 }
654
655 private function possibleUnsupportedType($url) {
656 $path = @parse_url($url, PHP_URL_PATH);
657 if ($path && strpos($path, '.') !== false) {
658 $ext = strtolower(trim(pathinfo($path, PATHINFO_EXTENSION)));
659 return in_array($ext, $this->headerOnlyClues);
660 }
661 return false;
662 }
663}
664
665// gzdecode from http://www.php.net/manual/en/function.gzdecode.php#82930
666if (!function_exists('gzdecode')) {
667 function gzdecode($data,&$filename='',&$error='',$maxlength=null)
668 {
669 $len = strlen($data);
670 if ($len < 18 || strcmp(substr($data,0,2),"\x1f\x8b")) {
671 $error = "Not in GZIP format.";
672 return null; // Not GZIP format (See RFC 1952)
673 }
674 $method = ord(substr($data,2,1)); // Compression method
675 $flags = ord(substr($data,3,1)); // Flags
676 if ($flags & 31 != $flags) {
677 $error = "Reserved bits not allowed.";
678 return null;
679 }
680 // NOTE: $mtime may be negative (PHP integer limitations)
681 $mtime = unpack("V", substr($data,4,4));
682 $mtime = $mtime[1];
683 $xfl = substr($data,8,1);
684 $os = substr($data,8,1);
685 $headerlen = 10;
686 $extralen = 0;
687 $extra = "";
688 if ($flags & 4) {
689 // 2-byte length prefixed EXTRA data in header
690 if ($len - $headerlen - 2 < 8) {
691 return false; // invalid
692 }
693 $extralen = unpack("v",substr($data,8,2));
694 $extralen = $extralen[1];
695 if ($len - $headerlen - 2 - $extralen < 8) {
696 return false; // invalid
697 }
698 $extra = substr($data,10,$extralen);
699 $headerlen += 2 + $extralen;
700 }
701 $filenamelen = 0;
702 $filename = "";
703 if ($flags & 8) {
704 // C-style string
705 if ($len - $headerlen - 1 < 8) {
706 return false; // invalid
707 }
708 $filenamelen = strpos(substr($data,$headerlen),chr(0));
709 if ($filenamelen === false || $len - $headerlen - $filenamelen - 1 < 8) {
710 return false; // invalid
711 }
712 $filename = substr($data,$headerlen,$filenamelen);
713 $headerlen += $filenamelen + 1;
714 }
715 $commentlen = 0;
716 $comment = "";
717 if ($flags & 16) {
718 // C-style string COMMENT data in header
719 if ($len - $headerlen - 1 < 8) {
720 return false; // invalid
721 }
722 $commentlen = strpos(substr($data,$headerlen),chr(0));
723 if ($commentlen === false || $len - $headerlen - $commentlen - 1 < 8) {
724 return false; // Invalid header format
725 }
726 $comment = substr($data,$headerlen,$commentlen);
727 $headerlen += $commentlen + 1;
728 }
729 $headercrc = "";
730 if ($flags & 2) {
731 // 2-bytes (lowest order) of CRC32 on header present
732 if ($len - $headerlen - 2 < 8) {
733 return false; // invalid
734 }
735 $calccrc = crc32(substr($data,0,$headerlen)) & 0xffff;
736 $headercrc = unpack("v", substr($data,$headerlen,2));
737 $headercrc = $headercrc[1];
738 if ($headercrc != $calccrc) {
739 $error = "Header checksum failed.";
740 return false; // Bad header CRC
741 }
742 $headerlen += 2;
743 }
744 // GZIP FOOTER
745 $datacrc = unpack("V",substr($data,-8,4));
746 $datacrc = sprintf('%u',$datacrc[1] & 0xFFFFFFFF);
747 $isize = unpack("V",substr($data,-4));
748 $isize = $isize[1];
749 // decompression:
750 $bodylen = $len-$headerlen-8;
751 if ($bodylen < 1) {
752 // IMPLEMENTATION BUG!
753 return null;
754 }
755 $body = substr($data,$headerlen,$bodylen);
756 $data = "";
757 if ($bodylen > 0) {
758 switch ($method) {
759 case 8:
760 // Currently the only supported compression method:
761 $data = gzinflate($body,$maxlength);
762 break;
763 default:
764 $error = "Unknown compression method.";
765 return false;
766 }
767 } // zero-byte body content is allowed
768 // Verifiy CRC32
769 $crc = sprintf("%u",crc32($data));
770 $crcOK = $crc == $datacrc;
771 $lenOK = $isize == strlen($data);
772 if (!$lenOK || !$crcOK) {
773 $error = ( $lenOK ? '' : 'Length check FAILED. ') . ( $crcOK ? '' : 'Checksum FAILED.');
774 return false;
775 }
776 return $data;
777 }
778}
779?> \ No newline at end of file
diff --git a/inc/3rdparty/libraries/humble-http-agent/RollingCurl.php b/inc/3rdparty/libraries/humble-http-agent/RollingCurl.php
new file mode 100644
index 00000000..d24dc690
--- /dev/null
+++ b/inc/3rdparty/libraries/humble-http-agent/RollingCurl.php
@@ -0,0 +1,402 @@
1<?php
2/*
3Authored by Josh Fraser (www.joshfraser.com)
4Released under Apache License 2.0
5
6Maintained by Alexander Makarov, http://rmcreative.ru/
7
8Modified by Keyvan Minoukadeh for the Five Filters project: http://fivefilters.org
9*/
10
11/**
12 * Class that represent a single curl request
13 */
14class RollingCurlRequest {
15 public $url = false;
16 public $url_original = false; // used for tracking redirects
17 public $method = 'GET';
18 public $post_data = null;
19 public $headers = null;
20 public $options = null;
21
22 /**
23 * @param string $url
24 * @param string $method
25 * @param $post_data
26 * @param $headers
27 * @param $options
28 * @return void
29 */
30 function __construct($url, $method = "GET", $post_data = null, $headers = null, $options = null) {
31 $this->url = $url;
32 $this->url_original = $url;
33 $this->method = $method;
34 $this->post_data = $post_data;
35 $this->headers = $headers;
36 $this->options = $options;
37 }
38
39 /**
40 * @param string $url
41 * @return void
42 */
43 public function set_original_url($url) {
44 $this->url_original = $url;
45 }
46 /**
47 * @return void
48 */
49 public function __destruct() {
50 unset($this->url, $this->url_original, $this->method, $this->post_data, $this->headers, $this->options);
51 }
52}
53
54/**
55 * RollingCurl custom exception
56 */
57class RollingCurlException extends Exception {
58}
59
60/**
61 * Class that holds a rolling queue of curl requests.
62 *
63 * @throws RollingCurlException
64 */
65class RollingCurl implements Countable {
66 /**
67 * @var int
68 *
69 * Window size is the max number of simultaneous connections allowed.
70 *
71 * REMEMBER TO RESPECT THE SERVERS:
72 * Sending too many requests at one time can easily be perceived
73 * as a DOS attack. Increase this window_size if you are making requests
74 * to multiple servers or have permission from the receving server admins.
75 */
76 private $window_size = 5;
77
78 /**
79 * @var float
80 *
81 * Timeout is the timeout used for curl_multi_select.
82 */
83 private $timeout = 10;
84
85 /**
86 * @var string|array
87 *
88 * Callback function to be applied to each result.
89 */
90 private $callback;
91
92 /**
93 * @var array
94 *
95 * Set your base options that you want to be used with EVERY request.
96 */
97 protected $options = array(
98 CURLOPT_SSL_VERIFYPEER => 0,
99 CURLOPT_RETURNTRANSFER => 1,
100 CURLOPT_CONNECTTIMEOUT => 30,
101 CURLOPT_TIMEOUT => 30
102 );
103
104 /**
105 * @var array
106 */
107 private $headers = array();
108
109 /**
110 * @var Request[]
111 *
112 * The request queue
113 */
114 private $requests = array();
115
116 /**
117 * @var RequestMap[]
118 *
119 * Maps handles to request indexes
120 */
121 private $requestMap = array();
122
123 /**
124 * @param $callback
125 * Callback function to be applied to each result.
126 *
127 * Can be specified as 'my_callback_function'
128 * or array($object, 'my_callback_method').
129 *
130 * Function should take three parameters: $response, $info, $request.
131 * $response is response body, $info is additional curl info.
132 * $request is the original request
133 *
134 * @return void
135 */
136 function __construct($callback = null) {
137 $this->callback = $callback;
138 }
139
140 /**
141 * @param string $name
142 * @return mixed
143 */
144 public function __get($name) {
145 return (isset($this->{$name})) ? $this->{$name} : null;
146 }
147
148 /**
149 * @param string $name
150 * @param mixed $value
151 * @return bool
152 */
153 public function __set($name, $value) {
154 // append the base options & headers
155 if ($name == "options" || $name == "headers") {
156 $this->{$name} = $value + $this->{$name};
157 } else {
158 $this->{$name} = $value;
159 }
160 return true;
161 }
162
163 /**
164 * Count number of requests added (Countable interface)
165 *
166 * @return int
167 */
168 public function count() {
169 return count($this->requests);
170 }
171
172 /**
173 * Add a request to the request queue
174 *
175 * @param Request $request
176 * @return bool
177 */
178 public function add($request) {
179 $this->requests[] = $request;
180 return true;
181 }
182
183 /**
184 * Create new Request and add it to the request queue
185 *
186 * @param string $url
187 * @param string $method
188 * @param $post_data
189 * @param $headers
190 * @param $options
191 * @return bool
192 */
193 public function request($url, $method = "GET", $post_data = null, $headers = null, $options = null) {
194 $this->requests[] = new RollingCurlRequest($url, $method, $post_data, $headers, $options);
195 return true;
196 }
197
198 /**
199 * Perform GET request
200 *
201 * @param string $url
202 * @param $headers
203 * @param $options
204 * @return bool
205 */
206 public function get($url, $headers = null, $options = null) {
207 return $this->request($url, "GET", null, $headers, $options);
208 }
209
210 /**
211 * Perform POST request
212 *
213 * @param string $url
214 * @param $post_data
215 * @param $headers
216 * @param $options
217 * @return bool
218 */
219 public function post($url, $post_data = null, $headers = null, $options = null) {
220 return $this->request($url, "POST", $post_data, $headers, $options);
221 }
222
223 /**
224 * Execute processing
225 *
226 * @param int $window_size Max number of simultaneous connections
227 * @return string|bool
228 */
229 public function execute($window_size = null) {
230 // rolling curl window must always be greater than 1
231 if (sizeof($this->requests) == 1) {
232 return $this->single_curl();
233 } else {
234 // start the rolling curl. window_size is the max number of simultaneous connections
235 return $this->rolling_curl($window_size);
236 }
237 }
238
239 /**
240 * Performs a single curl request
241 *
242 * @access private
243 * @return string
244 */
245 private function single_curl() {
246 $ch = curl_init();
247 $request = array_shift($this->requests);
248 $options = $this->get_options($request);
249 curl_setopt_array($ch, $options);
250 $output = curl_exec($ch);
251 $info = curl_getinfo($ch);
252
253 // it's not neccesary to set a callback for one-off requests
254 if ($this->callback) {
255 $callback = $this->callback;
256 if (is_callable($this->callback)) {
257 call_user_func($callback, $output, $info, $request);
258 }
259 }
260 else
261 return $output;
262 return true;
263 }
264
265 /**
266 * Performs multiple curl requests
267 *
268 * @access private
269 * @throws RollingCurlException
270 * @param int $window_size Max number of simultaneous connections
271 * @return bool
272 */
273 private function rolling_curl($window_size = null) {
274 if ($window_size)
275 $this->window_size = $window_size;
276
277 // make sure the rolling window isn't greater than the # of urls
278 if (sizeof($this->requests) < $this->window_size)
279 $this->window_size = sizeof($this->requests);
280
281 if ($this->window_size < 2) {
282 throw new RollingCurlException("Window size must be greater than 1");
283 }
284
285 $master = curl_multi_init();
286
287 // start the first batch of requests
288 for ($i = 0; $i < $this->window_size; $i++) {
289 $ch = curl_init();
290
291 $options = $this->get_options($this->requests[$i]);
292
293 curl_setopt_array($ch, $options);
294 curl_multi_add_handle($master, $ch);
295
296 // Add to our request Maps
297 $key = (string) $ch;
298 $this->requestMap[$key] = $i;
299 }
300
301 do {
302 while (($execrun = curl_multi_exec($master, $running)) == CURLM_CALL_MULTI_PERFORM) ;
303 if ($execrun != CURLM_OK)
304 break;
305 // a request was just completed -- find out which one
306 while ($done = curl_multi_info_read($master)) {
307
308 // get the info and content returned on the request
309 $info = curl_getinfo($done['handle']);
310 $output = curl_multi_getcontent($done['handle']);
311
312 // send the return values to the callback function.
313 $callback = $this->callback;
314 if (is_callable($callback)) {
315 $key = (string) $done['handle'];
316 $request = $this->requests[$this->requestMap[$key]];
317 unset($this->requestMap[$key]);
318 call_user_func($callback, $output, $info, $request);
319 }
320
321 // start a new request (it's important to do this before removing the old one)
322 if ($i < sizeof($this->requests) && isset($this->requests[$i]) && $i < count($this->requests)) {
323 $ch = curl_init();
324 $options = $this->get_options($this->requests[$i]);
325 curl_setopt_array($ch, $options);
326 curl_multi_add_handle($master, $ch);
327
328 // Add to our request Maps
329 $key = (string) $ch;
330 $this->requestMap[$key] = $i;
331 $i++;
332 }
333
334 // remove the curl handle that just completed
335 curl_multi_remove_handle($master, $done['handle']);
336
337 }
338
339 // Block for data in / output; error handling is done by curl_multi_exec
340 //if ($running) curl_multi_select($master, $this->timeout);
341 // removing timeout as it causes problems on Windows with PHP 5.3.5 and Curl 7.20.0
342 if ($running) curl_multi_select($master);
343
344 } while ($running);
345 curl_multi_close($master);
346 return true;
347 }
348
349
350 /**
351 * Helper function to set up a new request by setting the appropriate options
352 *
353 * @access private
354 * @param Request $request
355 * @return array
356 */
357 private function get_options($request) {
358 // options for this entire curl object
359 $options = $this->__get('options');
360 // We're managing reirects in PHP - allows us to intervene and rewrite/block URLs
361 // before the next request goes out.
362 $options[CURLOPT_FOLLOWLOCATION] = 0;
363 $options[CURLOPT_MAXREDIRS] = 0;
364 //if (ini_get('safe_mode') == 'Off' || !ini_get('safe_mode')) {
365 // $options[CURLOPT_FOLLOWLOCATION] = 1;
366 // $options[CURLOPT_MAXREDIRS] = 5;
367 //}
368 $headers = $this->__get('headers');
369 // append custom headers for this specific request
370 if ($request->headers) {
371 $headers = $headers + $request->headers;
372 }
373
374 // append custom options for this specific request
375 if ($request->options) {
376 $options = $request->options + $options;
377 }
378
379 // set the request URL
380 $options[CURLOPT_URL] = $request->url;
381
382 if ($headers) {
383 $options[CURLOPT_HTTPHEADER] = $headers;
384 }
385 // return response headers
386 $options[CURLOPT_HEADER] = 1;
387
388 // send HEAD request?
389 if ($request->method == 'HEAD') {
390 $options[CURLOPT_NOBODY] = 1;
391 }
392
393 return $options;
394 }
395
396 /**
397 * @return void
398 */
399 public function __destruct() {
400 unset($this->window_size, $this->callback, $this->options, $this->headers, $this->requests);
401 }
402} \ No newline at end of file
diff --git a/inc/3rdparty/libraries/humble-http-agent/SimplePie_HumbleHttpAgent.php b/inc/3rdparty/libraries/humble-http-agent/SimplePie_HumbleHttpAgent.php
new file mode 100644
index 00000000..ecd46d5f
--- /dev/null
+++ b/inc/3rdparty/libraries/humble-http-agent/SimplePie_HumbleHttpAgent.php
@@ -0,0 +1,79 @@
1<?php
2/**
3 * Humble HTTP Agent extension for SimplePie_File
4 *
5 * This class is designed to extend and override SimplePie_File
6 * in order to prevent duplicate HTTP requests being sent out.
7 * The idea is to initialise an instance of Humble HTTP Agent
8 * and attach it, to a static class variable, of this class.
9 * SimplePie will then automatically initialise this class
10 *
11 * @date 2011-02-28
12 */
13
14class SimplePie_HumbleHttpAgent extends SimplePie_File
15{
16 protected static $agent;
17 var $url;
18 var $useragent;
19 var $success = true;
20 var $headers = array();
21 var $body;
22 var $status_code;
23 var $redirects = 0;
24 var $error;
25 var $method = SIMPLEPIE_FILE_SOURCE_NONE;
26
27 public static function set_agent(HumbleHttpAgent $agent) {
28 self::$agent = $agent;
29 }
30
31 public function __construct($url, $timeout = 10, $redirects = 5, $headers = null, $useragent = null, $force_fsockopen = false) {
32 if (class_exists('idna_convert'))
33 {
34 $idn = new idna_convert();
35 $parsed = SimplePie_Misc::parse_url($url);
36 $url = SimplePie_Misc::compress_parse_url($parsed['scheme'], $idn->encode($parsed['authority']), $parsed['path'], $parsed['query'], $parsed['fragment']);
37 }
38 $this->url = $url;
39 $this->useragent = $useragent;
40 if (preg_match('/^http(s)?:\/\//i', $url))
41 {
42 if (!is_array($headers))
43 {
44 $headers = array();
45 }
46 $this->method = SIMPLEPIE_FILE_SOURCE_REMOTE | SIMPLEPIE_FILE_SOURCE_CURL;
47 $headers2 = array();
48 foreach ($headers as $key => $value) {
49 $headers2[] = "$key: $value";
50 }
51 //TODO: allow for HTTP headers
52 // curl_setopt($fp, CURLOPT_HTTPHEADER, $headers2);
53
54 $response = self::$agent->get($url);
55
56 if ($response === false || !isset($response['status_code'])) {
57 $this->error = 'failed to fetch URL';
58 $this->success = false;
59 } else {
60 // The extra lines at the end are there to satisfy SimplePie's HTTP parser.
61 // The class expects a full HTTP message, whereas we're giving it only
62 // headers - the new lines indicate the start of the body.
63 $parser = new SimplePie_HTTP_Parser($response['headers']."\r\n\r\n");
64 if ($parser->parse()) {
65 $this->headers = $parser->headers;
66 //$this->body = $parser->body;
67 $this->body = $response['body'];
68 $this->status_code = $parser->status_code;
69 }
70 }
71 }
72 else
73 {
74 $this->error = 'invalid URL';
75 $this->success = false;
76 }
77 }
78}
79?> \ No newline at end of file