aboutsummaryrefslogtreecommitdiffhomepage
path: root/inc/3rdparty/libraries/humble-http-agent
diff options
context:
space:
mode:
authorNicolas LÅ“uillet <nicolas@loeuillet.org>2014-05-29 12:50:28 +0200
committerNicolas LÅ“uillet <nicolas@loeuillet.org>2014-05-29 12:50:28 +0200
commit87f01ea2e97715ac5df4ef7a6741cc26f3a5cd1b (patch)
tree558818975ac41403e7d55ad07c5b0ac29806e907 /inc/3rdparty/libraries/humble-http-agent
parentab157bbb75ba226917145c9bf906cbf764a85cd0 (diff)
parent0b9bb8cb7868f24137c5d8b85c39cc88ea877411 (diff)
downloadwallabag-87f01ea2e97715ac5df4ef7a6741cc26f3a5cd1b.tar.gz
wallabag-87f01ea2e97715ac5df4ef7a6741cc26f3a5cd1b.tar.zst
wallabag-87f01ea2e97715ac5df4ef7a6741cc26f3a5cd1b.zip
Merge pull request #707 from mariroz/dev
update to 3.2 version of full-text-rss, issue #694
Diffstat (limited to 'inc/3rdparty/libraries/humble-http-agent')
-rw-r--r--inc/3rdparty/libraries/humble-http-agent/CookieJar.php807
-rw-r--r--inc/3rdparty/libraries/humble-http-agent/HumbleHttpAgent.php1589
-rw-r--r--inc/3rdparty/libraries/humble-http-agent/SimplePie_HumbleHttpAgent.php157
3 files changed, 1291 insertions, 1262 deletions
diff --git a/inc/3rdparty/libraries/humble-http-agent/CookieJar.php b/inc/3rdparty/libraries/humble-http-agent/CookieJar.php
index 83e94f14..e4d5f495 100644
--- a/inc/3rdparty/libraries/humble-http-agent/CookieJar.php
+++ b/inc/3rdparty/libraries/humble-http-agent/CookieJar.php
@@ -1,404 +1,403 @@
1<?php 1<?php
2/** 2/**
3 * Cookie Jar 3 * Cookie Jar
4 * 4 *
5 * PHP class for handling cookies, as defined by the Netscape spec: 5 * PHP class for handling cookies, as defined by the Netscape spec:
6 * <http://curl.haxx.se/rfc/cookie_spec.html> 6 * <http://curl.haxx.se/rfc/cookie_spec.html>
7 * 7 *
8 * This class should be used to handle cookies (storing cookies from HTTP response messages, and 8 * This class should be used to handle cookies (storing cookies from HTTP response messages, and
9 * sending out cookies in HTTP request messages). This has been adapted for FiveFilters.org 9 * sending out cookies in HTTP request messages). This has been adapted for FiveFilters.org
10 * from the original version used in HTTP Navigator. See http://www.keyvan.net/code/http-navigator/ 10 * from the original version used in HTTP Navigator. See http://www.keyvan.net/code/http-navigator/
11 * 11 *
12 * This class is mainly based on Cookies.pm <http://search.cpan.org/author/GAAS/libwww-perl-5.65/ 12 * This class is mainly based on Cookies.pm <http://search.cpan.org/author/GAAS/libwww-perl-5.65/
13 * lib/HTTP/Cookies.pm> from the libwww-perl collection <http://www.linpro.no/lwp/>. 13 * lib/HTTP/Cookies.pm> from the libwww-perl collection <http://www.linpro.no/lwp/>.
14 * Unlike Cookies.pm, this class only supports the Netscape cookie spec, not RFC 2965. 14 * Unlike Cookies.pm, this class only supports the Netscape cookie spec, not RFC 2965.
15 * 15 *
16 * @version 0.5 16 * @version 0.5
17 * @date 2011-03-15 17 * @date 2011-03-15
18 * @see http://php.net/HttpRequestPool 18 * @see http://php.net/HttpRequestPool
19 * @author Keyvan Minoukadeh 19 * @author Keyvan Minoukadeh
20 * @copyright 2011 Keyvan Minoukadeh 20 * @copyright 2011 Keyvan Minoukadeh
21 * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3 21 * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
22 */ 22 */
23 23
24class CookieJar 24class CookieJar
25{ 25{
26 /** 26 /**
27 * Cookies - array containing all cookies. 27 * Cookies - array containing all cookies.
28 * 28 *
29 * <pre> 29 * <pre>
30 * Cookies are stored like this: 30 * Cookies are stored like this:
31 * [domain][path][name] = array 31 * [domain][path][name] = array
32 * where array is: 32 * where array is:
33 * 0 => value, 1 => secure, 2 => expires 33 * 0 => value, 1 => secure, 2 => expires
34 * </pre> 34 * </pre>
35 * @var array 35 * @var array
36 * @access private 36 * @access private
37 */ 37 */
38 public $cookies = array(); 38 public $cookies = array();
39 public $debug = false; 39 public $debug = false;
40 40
41 /** 41 /**
42 * Constructor 42 * Constructor
43 */ 43 */
44 function __construct() { 44 function __construct() {
45 } 45 }
46 46
47 protected function debug($msg, $file=null, $line=null) { 47 protected function debug($msg, $file=null, $line=null) {
48 if ($this->debug) { 48 if ($this->debug) {
49 $mem = round(memory_get_usage()/1024, 2); 49 $mem = round(memory_get_usage()/1024, 2);
50 $memPeak = round(memory_get_peak_usage()/1024, 2); 50 $memPeak = round(memory_get_peak_usage()/1024, 2);
51 echo '* ',$msg; 51 echo '* ',$msg;
52 if (isset($file, $line)) echo " ($file line $line)"; 52 if (isset($file, $line)) echo " ($file line $line)";
53 echo ' - mem used: ',$mem," (peak: $memPeak)\n"; 53 echo ' - mem used: ',$mem," (peak: $memPeak)\n";
54 ob_flush(); 54 ob_flush();
55 flush(); 55 flush();
56 } 56 }
57 } 57 }
58 58
59 /** 59 /**
60 * Get matching cookies 60 * Get matching cookies
61 * 61 *
62 * Only use this method if you cannot use add_cookie_header(), for example, if you want to use 62 * Only use this method if you cannot use add_cookie_header(), for example, if you want to use
63 * this cookie jar class without using the request class. 63 * this cookie jar class without using the request class.
64 * 64 *
65 * @param array $param associative array containing 'domain', 'path', 'secure' keys 65 * @param array $param associative array containing 'domain', 'path', 'secure' keys
66 * @return string 66 * @return string
67 * @see add_cookie_header() 67 * @see add_cookie_header()
68 */ 68 */
69 public function getMatchingCookies($url) 69 public function getMatchingCookies($url)
70 { 70 {
71 if (($parts = @parse_url($url)) && isset($parts['scheme'], $parts['host'], $parts['path'])) { 71 if (($parts = @parse_url($url)) && isset($parts['scheme'], $parts['host'], $parts['path'])) {
72 $param['domain'] = $parts['host']; 72 $param['domain'] = $parts['host'];
73 $param['path'] = $parts['path']; 73 $param['path'] = $parts['path'];
74 $param['secure'] = (strtolower($parts['scheme']) == 'https'); 74 $param['secure'] = (strtolower($parts['scheme']) == 'https');
75 unset($parts); 75 unset($parts);
76 } else { 76 } else {
77 return false; 77 return false;
78 } 78 }
79 // RFC 2965 notes: 79 // RFC 2965 notes:
80 // If multiple cookies satisfy the criteria above, they are ordered in 80 // If multiple cookies satisfy the criteria above, they are ordered in
81 // the Cookie header such that those with more specific Path attributes 81 // the Cookie header such that those with more specific Path attributes
82 // precede those with less specific. Ordering with respect to other 82 // precede those with less specific. Ordering with respect to other
83 // attributes (e.g., Domain) is unspecified. 83 // attributes (e.g., Domain) is unspecified.
84 $domain = $param['domain']; 84 $domain = $param['domain'];
85 if (strpos($domain, '.') === false) $domain .= '.local'; 85 if (strpos($domain, '.') === false) $domain .= '.local';
86 $request_path = $param['path']; 86 $request_path = $param['path'];
87 if ($request_path == '') $request_path = '/'; 87 if ($request_path == '') $request_path = '/';
88 $request_secure = $param['secure']; 88 $request_secure = $param['secure'];
89 $now = time(); 89 $now = time();
90 $matched_cookies = array(); 90 $matched_cookies = array();
91 // domain - find matching domains 91 // domain - find matching domains
92 $this->debug('Finding matching domains for '.$domain, __FILE__, __LINE__); 92 $this->debug('Finding matching domains for '.$domain, __FILE__, __LINE__);
93 while (strpos($domain, '.') !== false) { 93 while (strpos($domain, '.') !== false) {
94 if (isset($this->cookies[$domain])) { 94 if (isset($this->cookies[$domain])) {
95 $this->debug(' domain match found: '.$domain); 95 $this->debug(' domain match found: '.$domain);
96 $cookies =& $this->cookies[$domain]; 96 $cookies =& $this->cookies[$domain];
97 } else { 97 } else {
98 $domain = $this->_reduce_domain($domain); 98 $domain = $this->_reduce_domain($domain);
99 continue; 99 continue;
100 } 100 }
101 // paths - find matching paths starting from most specific 101 // paths - find matching paths starting from most specific
102 $this->debug(' - Finding matching paths for '.$request_path); 102 $this->debug(' - Finding matching paths for '.$request_path);
103 $paths = array_keys($cookies); 103 $paths = array_keys($cookies);
104 usort($paths, array($this, '_cmp_length')); 104 usort($paths, array($this, '_cmp_length'));
105 foreach ($paths as $path) { 105 foreach ($paths as $path) {
106 // continue to next cookie if request path does not path-match cookie path 106 // continue to next cookie if request path does not path-match cookie path
107 if (!$this->_path_match($request_path, $path)) continue; 107 if (!$this->_path_match($request_path, $path)) continue;
108 // loop through cookie names 108 // loop through cookie names
109 $this->debug(' path match found: '.$path); 109 $this->debug(' path match found: '.$path);
110 foreach ($cookies[$path] as $name => $values) { 110 foreach ($cookies[$path] as $name => $values) {
111 // if this cookie is secure but request isn't, continue to next cookie 111 // if this cookie is secure but request isn't, continue to next cookie
112 if ($values[1] && !$request_secure) continue; 112 if ($values[1] && !$request_secure) continue;
113 // if cookie is not a session cookie and has expired, continue to next cookie 113 // if cookie is not a session cookie and has expired, continue to next cookie
114 if (is_int($values[2]) && ($values[2] < $now)) continue; 114 if (is_int($values[2]) && ($values[2] < $now)) continue;
115 // cookie matches request 115 // cookie matches request
116 $this->debug(' cookie match: '.$name.'='.$values[0]); 116 $this->debug(' cookie match: '.$name.'='.$values[0]);
117 $matched_cookies[] = $name.'='.$values[0]; 117 $matched_cookies[] = $name.'='.$values[0];
118 } 118 }
119 } 119 }
120 $domain = $this->_reduce_domain($domain); 120 $domain = $this->_reduce_domain($domain);
121 } 121 }
122 // return cookies 122 // return cookies
123 return implode('; ', $matched_cookies); 123 return implode('; ', $matched_cookies);
124 } 124 }
125 125
126 /** 126 /**
127 * Parse Set-Cookie values. 127 * Parse Set-Cookie values.
128 * 128 *
129 * Only use this method if you cannot use extract_cookies(), for example, if you want to use 129 * Only use this method if you cannot use extract_cookies(), for example, if you want to use
130 * this cookie jar class without using the response class. 130 * this cookie jar class without using the response class.
131 * 131 *
132 * @param array $set_cookies array holding 1 or more "Set-Cookie" header values 132 * @param array $set_cookies array holding 1 or more "Set-Cookie" header values
133 * @param array $param associative array containing 'host', 'path' keys 133 * @param array $param associative array containing 'host', 'path' keys
134 * @return void 134 * @return void
135 * @see extract_cookies() 135 * @see extract_cookies()
136 */ 136 */
137 public function storeCookies($url, $set_cookies) 137 public function storeCookies($url, $set_cookies)
138 { 138 {
139 if (count($set_cookies) == 0) return; 139 if (count($set_cookies) == 0) return;
140 $param = @parse_url($url); 140 $param = @parse_url($url);
141 if (!is_array($param) || !isset($param['host'])) return; 141 if (!is_array($param) || !isset($param['host'])) return;
142 $request_host = $param['host']; 142 $request_host = $param['host'];
143 if (strpos($request_host, '.') === false) $request_host .= '.local'; 143 if (strpos($request_host, '.') === false) $request_host .= '.local';
144 $request_path = @$param['path']; 144 $request_path = @$param['path'];
145 if ($request_path == '') $request_path = '/'; 145 if ($request_path == '') $request_path = '/';
146 // 146 //
147 // loop through set-cookie headers 147 // loop through set-cookie headers
148 // 148 //
149 foreach ($set_cookies as $set_cookie) { 149 foreach ($set_cookies as $set_cookie) {
150 $this->debug('Parsing: '.$set_cookie); 150 $this->debug('Parsing: '.$set_cookie);
151 // temporary cookie store (before adding to jar) 151 // temporary cookie store (before adding to jar)
152 $tmp_cookie = array(); 152 $tmp_cookie = array();
153 $param = explode(';', $set_cookie); 153 $param = explode(';', $set_cookie);
154 // loop through params 154 // loop through params
155 for ($x=0; $x<count($param); $x++) { 155 for ($x=0; $x<count($param); $x++) {
156 $key_val = explode('=', $param[$x], 2); 156 $key_val = explode('=', $param[$x], 2);
157 if (count($key_val) != 2) { 157 if (count($key_val) != 2) {
158 // if the first param isn't a name=value pair, continue to the next set-cookie 158 // if the first param isn't a name=value pair, continue to the next set-cookie
159 // header 159 // header
160 if ($x == 0) continue 2; 160 if ($x == 0) continue 2;
161 // check for secure flag 161 // check for secure flag
162 if (strtolower(trim($key_val[0])) == 'secure') $tmp_cookie['secure'] = true; 162 if (strtolower(trim($key_val[0])) == 'secure') $tmp_cookie['secure'] = true;
163 // continue to next param 163 // continue to next param
164 continue; 164 continue;
165 } 165 }
166 list($key, $val) = array_map('trim', $key_val); 166 list($key, $val) = array_map('trim', $key_val);
167 // first name=value pair is the cookie name and value 167 // first name=value pair is the cookie name and value
168 // the name and value are stored under 'name' and 'value' to avoid conflicts 168 // the name and value are stored under 'name' and 'value' to avoid conflicts
169 // with later parameters. 169 // with later parameters.
170 if ($x == 0) { 170 if ($x == 0) {
171 $tmp_cookie = array('name'=>$key, 'value'=>$val); 171 $tmp_cookie = array('name'=>$key, 'value'=>$val);
172 continue; 172 continue;
173 } 173 }
174 $key = strtolower($key); 174 $key = strtolower($key);
175 if (in_array($key, array('expires', 'path', 'domain', 'secure'))) { 175 if (in_array($key, array('expires', 'path', 'domain', 'secure'))) {
176 $tmp_cookie[$key] = $val; 176 $tmp_cookie[$key] = $val;
177 } 177 }
178 } 178 }
179 // 179 //
180 // set cookie 180 // set cookie
181 // 181 //
182 // check domain 182 // check domain
183 if (isset($tmp_cookie['domain']) && ($tmp_cookie['domain'] != $request_host) && 183 if (isset($tmp_cookie['domain']) && ($tmp_cookie['domain'] != $request_host) &&
184 ($tmp_cookie['domain'] != ".$request_host")) { 184 ($tmp_cookie['domain'] != ".$request_host")) {
185 $domain = $tmp_cookie['domain']; 185 $domain = $tmp_cookie['domain'];
186 if ((strpos($domain, '.') === false) && ($domain != 'local')) { 186 if ((strpos($domain, '.') === false) && ($domain != 'local')) {
187 $this->debug(' - domain "'.$domain.'" has no dot and is not a local domain'); 187 $this->debug(' - domain "'.$domain.'" has no dot and is not a local domain');
188 continue; 188 continue;
189 } 189 }
190 if (preg_match('/\.[0-9]+$/', $domain)) { 190 if (preg_match('/\.[0-9]+$/', $domain)) {
191 $this->debug(' - domain "'.$domain.'" appears to be an ip address'); 191 $this->debug(' - domain "'.$domain.'" appears to be an ip address');
192 continue; 192 continue;
193 } 193 }
194 if (substr($domain, 0, 1) != '.') $domain = ".$domain"; 194 if (substr($domain, 0, 1) != '.') $domain = ".$domain";
195 if (!$this->_domain_match($request_host, $domain)) { 195 if (!$this->_domain_match($request_host, $domain)) {
196 $this->debug(' - request host "'.$request_host.'" does not domain-match "'.$domain.'"'); 196 $this->debug(' - request host "'.$request_host.'" does not domain-match "'.$domain.'"');
197 continue; 197 continue;
198 } 198 }
199 } else { 199 } else {
200 // if domain is not specified in the set-cookie header, domain will default to 200 // if domain is not specified in the set-cookie header, domain will default to
201 // the request host 201 // the request host
202 $domain = $request_host; 202 $domain = $request_host;
203 } 203 }
204 // check path 204 // check path
205 if (isset($tmp_cookie['path']) && ($tmp_cookie['path'] != '')) { 205 if (isset($tmp_cookie['path']) && ($tmp_cookie['path'] != '')) {
206 $path = urldecode($tmp_cookie['path']); 206 $path = urldecode($tmp_cookie['path']);
207 if (!$this->_path_match($request_path, $path)) { 207 if (!$this->_path_match($request_path, $path)) {
208 $this->debug(' - request path "'.$request_path.'" does not path-match "'.$path.'"'); 208 $this->debug(' - request path "'.$request_path.'" does not path-match "'.$path.'"');
209 continue; 209 continue;
210 } 210 }
211 } else { 211 } else {
212 $path = $request_path; 212 $path = $request_path;
213 $path = substr($path, 0, strrpos($path, '/')); 213 $path = substr($path, 0, strrpos($path, '/'));
214 if ($path == '') $path = '/'; 214 if ($path == '') $path = '/';
215 } 215 }
216 // check if secure 216 // check if secure
217 $secure = (isset($tmp_cookie['secure'])) ? true : false; 217 $secure = (isset($tmp_cookie['secure'])) ? true : false;
218 // check expiry 218 // check expiry
219 if (isset($tmp_cookie['expires'])) { 219 if (isset($tmp_cookie['expires'])) {
220 if (($expires = strtotime($tmp_cookie['expires'])) < 0) { 220 if (($expires = strtotime($tmp_cookie['expires'])) < 0) {
221 $expires = null; 221 $expires = null;
222 } 222 }
223 } else { 223 } else {
224 $expires = null; 224 $expires = null;
225 } 225 }
226 // set cookie 226 // set cookie
227 $this->set_cookie($domain, $path, $tmp_cookie['name'], $tmp_cookie['value'], $secure, $expires); 227 $this->set_cookie($domain, $path, $tmp_cookie['name'], $tmp_cookie['value'], $secure, $expires);
228 } 228 }
229 } 229 }
230 230
231 // return array of set-cookie values extracted from HTTP response headers (string $h) 231 // return array of set-cookie values extracted from HTTP response headers (string $h)
232 public function extractCookies($h) { 232 public function extractCookies($h) {
233 $x = 0; 233 $x = 0;
234 $lines = 0; 234 $lines = 0;
235 $headers = array(); 235 $headers = array();
236 $last_match = false; 236 $last_match = false;
237 $h = explode("\n", $h); 237 $h = explode("\n", $h);
238 foreach ($h as $line) { 238 foreach ($h as $line) {
239 $line = rtrim($line); 239 $line = rtrim($line);
240 $lines++; 240 $lines++;
241 241
242 $trimmed_line = trim($line); 242 $trimmed_line = trim($line);
243 if (isset($line_last)) { 243 if (isset($line_last)) {
244 // check if we have \r\n\r\n (indicating the end of headers) 244 // check if we have \r\n\r\n (indicating the end of headers)
245 // some servers will not use CRLF (\r\n), so we make CR (\r) optional. 245 // some servers will not use CRLF (\r\n), so we make CR (\r) optional.
246 // if (preg_match('/\015?\012\015?\012/', $line_last.$line)) { 246 // if (preg_match('/\015?\012\015?\012/', $line_last.$line)) {
247 // break; 247 // break;
248 // } 248 // }
249 // As an alternative, we can check if the current trimmed line is empty 249 // As an alternative, we can check if the current trimmed line is empty
250 if ($trimmed_line == '') { 250 if ($trimmed_line == '') {
251 break; 251 break;
252 } 252 }
253 253
254 // check for continuation line... 254 // check for continuation line...
255 // RFC 2616 Section 2.2 "Basic Rules": 255 // RFC 2616 Section 2.2 "Basic Rules":
256 // HTTP/1.1 header field values can be folded onto multiple lines if the 256 // HTTP/1.1 header field values can be folded onto multiple lines if the
257 // continuation line begins with a space or horizontal tab. All linear 257 // continuation line begins with a space or horizontal tab. All linear
258 // white space, including folding, has the same semantics as SP. A 258 // white space, including folding, has the same semantics as SP. A
259 // recipient MAY replace any linear white space with a single SP before 259 // recipient MAY replace any linear white space with a single SP before
260 // interpreting the field value or forwarding the message downstream. 260 // interpreting the field value or forwarding the message downstream.
261 if ($last_match && preg_match('/^\s+(.*)/', $line, $match)) { 261 if ($last_match && preg_match('/^\s+(.*)/', $line, $match)) {
262 // append to previous header value 262 // append to previous header value
263 $headers[$x-1] .= ' '.rtrim($match[1]); 263 $headers[$x-1] .= ' '.rtrim($match[1]);
264 continue; 264 continue;
265 } 265 }
266 } 266 }
267 $line_last = $line; 267 $line_last = $line;
268 268
269 // split header name and value 269 // split header name and value
270 if (preg_match('/^Set-Cookie\s*:\s*(.*)/i', $line, $match)) { 270 if (preg_match('/^Set-Cookie\s*:\s*(.*)/i', $line, $match)) {
271 $headers[$x++] = rtrim($match[1]); 271 $headers[$x++] = rtrim($match[1]);
272 $last_match = true; 272 $last_match = true;
273 } else { 273 } else {
274 $last_match = false; 274 $last_match = false;
275 } 275 }
276 } 276 }
277 return $headers; 277 return $headers;
278 } 278 }
279 279
280 /** 280 /**
281 * Set Cookie 281 * Set Cookie
282 * @param string $domain 282 * @param string $domain
283 * @param string $path 283 * @param string $path
284 * @param string $name cookie name 284 * @param string $name cookie name
285 * @param string $value cookie value 285 * @param string $value cookie value
286 * @param bool $secure 286 * @param bool $secure
287 * @param int $expires expiry time (null if session cookie, <= 0 will delete cookie) 287 * @param int $expires expiry time (null if session cookie, <= 0 will delete cookie)
288 * @return void 288 * @return void
289 */ 289 */
290 function set_cookie($domain, $path, $name, $value, $secure=false, $expires=null) 290 function set_cookie($domain, $path, $name, $value, $secure=false, $expires=null)
291 { 291 {
292 if ($domain == '') return; 292 if ($domain == '') return;
293 if ($path == '') return; 293 if ($path == '') return;
294 if ($name == '') return; 294 if ($name == '') return;
295 // check if cookie needs to go 295 // check if cookie needs to go
296 if (isset($expires) && ($expires <= 0)) { 296 if (isset($expires) && ($expires <= 0)) {
297 if (isset($this->cookies[$domain][$path][$name])) unset($this->cookies[$domain][$path][$name]); 297 if (isset($this->cookies[$domain][$path][$name])) unset($this->cookies[$domain][$path][$name]);
298 return; 298 return;
299 } 299 }
300 if ($value == '') return; 300 if ($value == '') return;
301 $this->cookies[$domain][$path][$name] = array($value, $secure, $expires); 301 $this->cookies[$domain][$path][$name] = array($value, $secure, $expires);
302 return; 302 return;
303 } 303 }
304 304
305 /** 305 /**
306 * Clear cookies - [domain [,path [,name]]] - call method with no arguments to clear all cookies. 306 * Clear cookies - [domain [,path [,name]]] - call method with no arguments to clear all cookies.
307 * @param string $domain 307 * @param string $domain
308 * @param string $path 308 * @param string $path
309 * @param string $name 309 * @param string $name
310 * @return void 310 * @return void
311 */ 311 */
312 function clear($domain=null, $path=null, $name=null) 312 function clear($domain=null, $path=null, $name=null)
313 { 313 {
314 if (!isset($domain)) { 314 if (!isset($domain)) {
315 $this->cookies = array(); 315 $this->cookies = array();
316 } elseif (!isset($path)) { 316 } elseif (!isset($path)) {
317 if (isset($this->cookies[$domain])) unset($this->cookies[$domain]); 317 if (isset($this->cookies[$domain])) unset($this->cookies[$domain]);
318 } elseif (!isset($name)) { 318 } elseif (!isset($name)) {
319 if (isset($this->cookies[$domain][$path])) unset($this->cookies[$domain][$path]); 319 if (isset($this->cookies[$domain][$path])) unset($this->cookies[$domain][$path]);
320 } elseif (isset($name)) { 320 } elseif (isset($name)) {
321 if (isset($this->cookies[$domain][$path][$name])) unset($this->cookies[$domain][$path][$name]); 321 if (isset($this->cookies[$domain][$path][$name])) unset($this->cookies[$domain][$path][$name]);
322 } 322 }
323 } 323 }
324 324
325 /** 325 /**
326 * Compare string length - used for sorting 326 * Compare string length - used for sorting
327 * @access private 327 * @access private
328 * @return int 328 * @return int
329 */ 329 */
330 function _cmp_length($a, $b) 330 function _cmp_length($a, $b)
331 { 331 {
332 $la = strlen($a); $lb = strlen($b); 332 $la = strlen($a); $lb = strlen($b);
333 if ($la == $lb) return 0; 333 if ($la == $lb) return 0;
334 return ($la > $lb) ? -1 : 1; 334 return ($la > $lb) ? -1 : 1;
335 } 335 }
336 336
337 /** 337 /**
338 * Reduce domain 338 * Reduce domain
339 * @param string $domain 339 * @param string $domain
340 * @return string 340 * @return string
341 * @access private 341 * @access private
342 */ 342 */
343 function _reduce_domain($domain) 343 function _reduce_domain($domain)
344 { 344 {
345 if ($domain == '') return ''; 345 if ($domain == '') return '';
346 if (substr($domain, 0, 1) == '.') return substr($domain, 1); 346 if (substr($domain, 0, 1) == '.') return substr($domain, 1);
347 return substr($domain, strpos($domain, '.')); 347 return substr($domain, strpos($domain, '.'));
348 } 348 }
349 349
350 /** 350 /**
351 * Path match - check if path1 path-matches path2 351 * Path match - check if path1 path-matches path2
352 * 352 *
353 * From RFC 2965: 353 * From RFC 2965:
354 * <i>For two strings that represent paths, P1 and P2, P1 path-matches P2 354 * <i>For two strings that represent paths, P1 and P2, P1 path-matches P2
355 * if P2 is a prefix of P1 (including the case where P1 and P2 string- 355 * if P2 is a prefix of P1 (including the case where P1 and P2 string-
356 * compare equal). Thus, the string /tec/waldo path-matches /tec.</i> 356 * compare equal). Thus, the string /tec/waldo path-matches /tec.</i>
357 * @param string $path1 357 * @param string $path1
358 * @param string $path2 358 * @param string $path2
359 * @return bool 359 * @return bool
360 * @access private 360 * @access private
361 */ 361 */
362 function _path_match($path1, $path2) 362 function _path_match($path1, $path2)
363 { 363 {
364 return (substr($path1, 0, strlen($path2)) == $path2); 364 return (substr($path1, 0, strlen($path2)) == $path2);
365 } 365 }
366 366
367 /** 367 /**
368 * Domain match - check if domain1 domain-matches domain2 368 * Domain match - check if domain1 domain-matches domain2
369 * 369 *
370 * A few extracts from RFC 2965: 370 * A few extracts from RFC 2965:
371 * - A Set-Cookie2 from request-host y.x.foo.com for Domain=.foo.com 371 * - A Set-Cookie2 from request-host y.x.foo.com for Domain=.foo.com
372 * would be rejected, because H is y.x and contains a dot. 372 * would be rejected, because H is y.x and contains a dot.
373 * 373 *
374 * - A Set-Cookie2 from request-host x.foo.com for Domain=.foo.com 374 * - A Set-Cookie2 from request-host x.foo.com for Domain=.foo.com
375 * would be accepted. 375 * would be accepted.
376 * 376 *
377 * - A Set-Cookie2 with Domain=.com or Domain=.com., will always be 377 * - A Set-Cookie2 with Domain=.com or Domain=.com., will always be
378 * rejected, because there is no embedded dot. 378 * rejected, because there is no embedded dot.
379 * 379 *
380 * - A Set-Cookie2 from request-host example for Domain=.local will 380 * - A Set-Cookie2 from request-host example for Domain=.local will
381 * be accepted, because the effective host name for the request- 381 * be accepted, because the effective host name for the request-
382 * host is example.local, and example.local domain-matches .local. 382 * host is example.local, and example.local domain-matches .local.
383 * 383 *
384 * I'm ignoring the first point for now (must check to see how other browsers handle 384 * I'm ignoring the first point for now (must check to see how other browsers handle
385 * this rule for Set-Cookie headers) 385 * this rule for Set-Cookie headers)
386 * 386 *
387 * @param string $domain1 387 * @param string $domain1
388 * @param string $domain2 388 * @param string $domain2
389 * @return bool 389 * @return bool
390 * @access private 390 * @access private
391 */ 391 */
392 function _domain_match($domain1, $domain2) 392 function _domain_match($domain1, $domain2)
393 { 393 {
394 $domain1 = strtolower($domain1); 394 $domain1 = strtolower($domain1);
395 $domain2 = strtolower($domain2); 395 $domain2 = strtolower($domain2);
396 while (strpos($domain1, '.') !== false) { 396 while (strpos($domain1, '.') !== false) {
397 if ($domain1 == $domain2) return true; 397 if ($domain1 == $domain2) return true;
398 $domain1 = $this->_reduce_domain($domain1); 398 $domain1 = $this->_reduce_domain($domain1);
399 continue; 399 continue;
400 } 400 }
401 return false; 401 return false;
402 } 402 }
403} 403} \ No newline at end of file
404?> \ No newline at end of file
diff --git a/inc/3rdparty/libraries/humble-http-agent/HumbleHttpAgent.php b/inc/3rdparty/libraries/humble-http-agent/HumbleHttpAgent.php
index e4f1b3b3..963f0c05 100644
--- a/inc/3rdparty/libraries/humble-http-agent/HumbleHttpAgent.php
+++ b/inc/3rdparty/libraries/humble-http-agent/HumbleHttpAgent.php
@@ -1,779 +1,810 @@
1<?php 1<?php
2/** 2/**
3 * Humble HTTP Agent 3 * Humble HTTP Agent
4 * 4 *
5 * This class is designed to take advantage of parallel HTTP requests 5 * This class is designed to take advantage of parallel HTTP requests
6 * offered by PHP's PECL HTTP extension or the curl_multi_* functions. 6 * offered by PHP's PECL HTTP extension or the curl_multi_* functions.
7 * For environments which do not have these options, it reverts to standard sequential 7 * For environments which do not have these options, it reverts to standard sequential
8 * requests (using file_get_contents()) 8 * requests (using file_get_contents())
9 * 9 *
10 * @version 1.1 10 * @version 1.4
11 * @date 2012-08-20 11 * @date 2013-05-10
12 * @see http://php.net/HttpRequestPool 12 * @see http://php.net/HttpRequestPool
13 * @author Keyvan Minoukadeh 13 * @author Keyvan Minoukadeh
14 * @copyright 2011-2012 Keyvan Minoukadeh 14 * @copyright 2011-2013 Keyvan Minoukadeh
15 * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3 15 * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
16 */ 16 */
17 17
18class HumbleHttpAgent 18class HumbleHttpAgent
19{ 19{
20 const METHOD_REQUEST_POOL = 1; 20 const METHOD_REQUEST_POOL = 1;
21 const METHOD_CURL_MULTI = 2; 21 const METHOD_CURL_MULTI = 2;
22 const METHOD_FILE_GET_CONTENTS = 4; 22 const METHOD_FILE_GET_CONTENTS = 4;
23 //const UA_BROWSER = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0.1) Gecko/20100101 Firefox/4.0.1'; 23 //const UA_BROWSER = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0.1) Gecko/20100101 Firefox/4.0.1';
24 const UA_BROWSER = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.92 Safari/535.2'; 24 const UA_BROWSER = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.92 Safari/535.2';
25 const UA_PHP = 'PHP/5.2'; 25 const UA_PHP = 'PHP/5.4';
26 const REF_GOOGLE = 'http://www.google.co.uk/url?sa=t&source=web&cd=1'; 26 const REF_GOOGLE = 'http://www.google.co.uk/url?sa=t&source=web&cd=1';
27 27
28 protected $requests = array(); 28 protected $requests = array();
29 protected $redirectQueue = array(); 29 protected $redirectQueue = array();
30 protected $requestOptions; 30 protected $requestOptions;
31 protected $maxParallelRequests = 5; 31 protected $maxParallelRequests = 5;
32 protected $cache = null; //TODO 32 protected $cache = null; //TODO
33 protected $httpContext; 33 protected $httpContext;
34 protected $minimiseMemoryUse = false; //TODO 34 protected $minimiseMemoryUse = false; //TODO
35 protected $method; 35 protected $method;
36 protected $cookieJar; 36 protected $cookieJar;
37 public $debug = false; 37 public $debug = false;
38 public $debugVerbose = false; 38 public $debugVerbose = false;
39 public $rewriteHashbangFragment = true; // see http://code.google.com/web/ajaxcrawling/docs/specification.html 39 public $rewriteHashbangFragment = true; // see http://code.google.com/web/ajaxcrawling/docs/specification.html
40 public $maxRedirects = 5; 40 public $maxRedirects = 5;
41 public $userAgentMap = array(); 41 public $userAgentMap = array();
42 public $rewriteUrls = array(); 42 public $rewriteUrls = array();
43 public $userAgentDefault; 43 public $userAgentDefault;
44 public $referer; 44 public $referer;
45 //public $userAgent = 'Mozilla/5.0'; 45 //public $userAgent = 'Mozilla/5.0';
46 46
47 // Prevent certain file/mime types 47 // Prevent certain file/mime types
48 // HTTP responses which match these content types will 48 // HTTP responses which match these content types will
49 // be returned without body. 49 // be returned without body.
50 public $headerOnlyTypes = array(); 50 public $headerOnlyTypes = array();
51 // URLs ending with one of these extensions will 51 // URLs ending with one of these extensions will
52 // prompt Humble HTTP Agent to send a HEAD request first 52 // prompt Humble HTTP Agent to send a HEAD request first
53 // to see if returned content type matches $headerOnlyTypes. 53 // to see if returned content type matches $headerOnlyTypes.
54 public $headerOnlyClues = array('pdf','mp3','zip','exe','gif','gzip','gz','jpeg','jpg','mpg','mpeg','png','ppt','mov'); 54 public $headerOnlyClues = array('pdf','mp3','zip','exe','gif','gzip','gz','jpeg','jpg','mpg','mpeg','png','ppt','mov');
55 // AJAX triggers to search for. 55 // AJAX triggers to search for.
56 // for AJAX sites, e.g. Blogger with its dynamic views templates. 56 // for AJAX sites, e.g. Blogger with its dynamic views templates.
57 public $ajaxTriggers = array("<meta name='fragment' content='!'",'<meta name="fragment" content="!"',"<meta content='!' name='fragment'",'<meta content="!" name="fragment"'); 57 public $ajaxTriggers = array("<meta name='fragment' content='!'",'<meta name="fragment" content="!"',"<meta content='!' name='fragment'",'<meta content="!" name="fragment"');
58 58
59 //TODO: set max file size 59 //TODO: set max file size
60 //TODO: normalise headers 60 //TODO: normalise headers
61 61
62 function __construct($requestOptions=null, $method=null) { 62 function __construct($requestOptions=null, $method=null) {
63 $this->userAgentDefault = self::UA_BROWSER; 63 $this->userAgentDefault = self::UA_BROWSER;
64 $this->referer = self::REF_GOOGLE; 64 $this->referer = self::REF_GOOGLE;
65 // set the request method 65 // set the request method
66 if (in_array($method, array(1,2,4))) { 66 if (in_array($method, array(1,2,4))) {
67 $this->method = $method; 67 $this->method = $method;
68 } else { 68 } else {
69 if (class_exists('HttpRequestPool')) { 69 if (class_exists('HttpRequestPool')) {
70 $this->method = self::METHOD_REQUEST_POOL; 70 $this->method = self::METHOD_REQUEST_POOL;
71 } elseif (function_exists('curl_multi_init')) { 71 } elseif (function_exists('curl_multi_init')) {
72 $this->method = self::METHOD_CURL_MULTI; 72 $this->method = self::METHOD_CURL_MULTI;
73 } else { 73 } else {
74 $this->method = self::METHOD_FILE_GET_CONTENTS; 74 $this->method = self::METHOD_FILE_GET_CONTENTS;
75 } 75 }
76 } 76 }
77 if ($this->method == self::METHOD_CURL_MULTI) { 77 if ($this->method == self::METHOD_CURL_MULTI) {
78 require_once(dirname(__FILE__).'/RollingCurl.php'); 78 require_once(dirname(__FILE__).'/RollingCurl.php');
79 } 79 }
80 // create cookie jar 80 // create cookie jar
81 $this->cookieJar = new CookieJar(); 81 $this->cookieJar = new CookieJar();
82 // set request options (redirect must be 0) 82 // set request options (redirect must be 0)
83 $this->requestOptions = array( 83 $this->requestOptions = array(
84 'timeout' => 15, 84 'timeout' => 15,
85 'redirect' => 0 // we handle redirects manually so we can rewrite the new hashbang URLs that are creeping up over the web 85 'connecttimeout' => 15,
86 // TODO: test onprogress? 86 'dns_cache_timeout' => 300,
87 ); 87 'redirect' => 0 // we handle redirects manually so we can rewrite the new hashbang URLs that are creeping up over the web
88 if (is_array($requestOptions)) { 88 // TODO: test onprogress?
89 $this->requestOptions = array_merge($this->requestOptions, $requestOptions); 89 );
90 } 90 if (is_array($requestOptions)) {
91 $this->httpContext = array( 91 $this->requestOptions = array_merge($this->requestOptions, $requestOptions);
92 'http' => array( 92 }
93 'ignore_errors' => true, 93 $this->httpContext = array(
94 'timeout' => $this->requestOptions['timeout'], 94 'http' => array(
95 'max_redirects' => $this->requestOptions['redirect'], 95 'ignore_errors' => true,
96 'header' => "Accept: */*\r\n" 96 'timeout' => $this->requestOptions['timeout'],
97 ) 97 'max_redirects' => $this->requestOptions['redirect'],
98 ); 98 'header' => "Accept: */*\r\n"
99 } 99 )
100 100 );
101 protected function debug($msg) { 101 }
102 if ($this->debug) { 102
103 $mem = round(memory_get_usage()/1024, 2); 103 protected function debug($msg) {
104 $memPeak = round(memory_get_peak_usage()/1024, 2); 104 if ($this->debug) {
105 echo '* ',$msg; 105 $mem = round(memory_get_usage()/1024, 2);
106 if ($this->debugVerbose) echo ' - mem used: ',$mem," (peak: $memPeak)"; 106 $memPeak = round(memory_get_peak_usage()/1024, 2);
107 echo "\n"; 107 echo '* ',$msg;
108 ob_flush(); 108 if ($this->debugVerbose) echo ' - mem used: ',$mem," (peak: $memPeak)";
109 flush(); 109 echo "\n";
110 } 110 ob_flush();
111 } 111 flush();
112 112 }
113 protected function getUserAgent($url, $asArray=false) { 113 }
114 $host = @parse_url($url, PHP_URL_HOST); 114
115 if (strtolower(substr($host, 0, 4)) == 'www.') { 115 protected function getUserAgent($url, $asArray=false) {
116 $host = substr($host, 4); 116 $host = @parse_url($url, PHP_URL_HOST);
117 } 117 if (strtolower(substr($host, 0, 4)) == 'www.') {
118 if ($host) { 118 $host = substr($host, 4);
119 $try = array($host); 119 }
120 $split = explode('.', $host); 120 if ($host) {
121 if (count($split) > 1) { 121 $try = array($host);
122 array_shift($split); 122 $split = explode('.', $host);
123 $try[] = '.'.implode('.', $split); 123 if (count($split) > 1) {
124 } 124 array_shift($split);
125 foreach ($try as $h) { 125 $try[] = '.'.implode('.', $split);
126 if (isset($this->userAgentMap[$h])) { 126 }
127 $ua = $this->userAgentMap[$h]; 127 foreach ($try as $h) {
128 break; 128 if (isset($this->userAgentMap[$h])) {
129 } 129 $ua = $this->userAgentMap[$h];
130 } 130 break;
131 } 131 }
132 if (!isset($ua)) $ua = $this->userAgentDefault; 132 }
133 if ($asArray) { 133 }
134 return array('User-Agent' => $ua); 134 if (!isset($ua)) $ua = $this->userAgentDefault;
135 } else { 135 if ($asArray) {
136 return 'User-Agent: '.$ua; 136 return array('User-Agent' => $ua);
137 } 137 } else {
138 } 138 return 'User-Agent: '.$ua;
139 139 }
140 public function rewriteHashbangFragment($url) { 140 }
141 // return $url if there's no '#!' 141
142 if (strpos($url, '#!') === false) return $url; 142 public function rewriteHashbangFragment($url) {
143 // split $url and rewrite 143 // return $url if there's no '#!'
144 // TODO: is SimplePie_IRI included? 144 if (strpos($url, '#!') === false) return $url;
145 $iri = new SimplePie_IRI($url); 145 // split $url and rewrite
146 $fragment = substr($iri->fragment, 1); // strip '!' 146 // TODO: is SimplePie_IRI included?
147 $iri->fragment = null; 147 $iri = new SimplePie_IRI($url);
148 if (isset($iri->query)) { 148 $fragment = substr($iri->fragment, 1); // strip '!'
149 parse_str($iri->query, $query); 149 $iri->fragment = null;
150 } else { 150 if (isset($iri->query)) {
151 $query = array(); 151 parse_str($iri->query, $query);
152 } 152 } else {
153 $query['_escaped_fragment_'] = (string)$fragment; 153 $query = array();
154 $iri->query = str_replace('%2F', '/', http_build_query($query)); // needed for some sites 154 }
155 return $iri->get_iri(); 155 $query['_escaped_fragment_'] = (string)$fragment;
156 } 156 $iri->query = str_replace('%2F', '/', http_build_query($query)); // needed for some sites
157 157 return $iri->get_iri();
158 public function getUglyURL($url, $html) { 158 }
159 if ($html == '') return false; 159
160 $found = false; 160 public function getRedirectURLfromHTML($url, $html) {
161 foreach ($this->ajaxTriggers as $string) { 161 $redirect_url = $this->getMetaRefreshURL($url, $html);
162 if (stripos($html, $string)) { 162 if (!$redirect_url) {
163 $found = true; 163 $redirect_url = $this->getUglyURL($url, $html);
164 break; 164 }
165 } 165 return $redirect_url;
166 } 166 }
167 if (!$found) return false; 167
168 $iri = new SimplePie_IRI($url); 168 public function getMetaRefreshURL($url, $html) {
169 if (isset($iri->query)) { 169 if ($html == '') return false;
170 parse_str($iri->query, $query); 170 // <meta HTTP-EQUIV="REFRESH" content="0; url=http://www.bernama.com/bernama/v6/newsindex.php?id=943513">
171 } else { 171 if (!preg_match('!<meta http-equiv=["\']?refresh["\']? content=["\']?[0-9];\s*url=["\']?([^"\'>]+)["\']*>!i', $html, $match)) {
172 $query = array(); 172 return false;
173 } 173 }
174 $query['_escaped_fragment_'] = ''; 174 $redirect_url = $match[1];
175 $iri->query = str_replace('%2F', '/', http_build_query($query)); // needed for some sites 175 if (preg_match('!^https?://!i', $redirect_url)) {
176 return $iri->get_iri(); 176 // already absolute
177 } 177 $this->debug('Meta refresh redirect found (http-equiv="refresh"), new URL: '.$redirect_url);
178 178 return $redirect_url;
179 public function removeFragment($url) { 179 }
180 $pos = strpos($url, '#'); 180 // absolutize redirect URL
181 if ($pos === false) { 181 $base = new SimplePie_IRI($url);
182 return $url; 182 // remove '//' in URL path (causes URLs not to resolve properly)
183 } else { 183 if (isset($base->path)) $base->path = preg_replace('!//+!', '/', $base->path);
184 return substr($url, 0, $pos); 184 if ($absolute = SimplePie_IRI::absolutize($base, $redirect_url)) {
185 } 185 $this->debug('Meta refresh redirect found (http-equiv="refresh"), new URL: '.$absolute);
186 } 186 return $absolute;
187 187 }
188 public function rewriteUrls($url) { 188 return false;
189 foreach ($this->rewriteUrls as $find => $action) { 189 }
190 if (strpos($url, $find) !== false) { 190
191 if (is_array($action)) { 191 public function getUglyURL($url, $html) {
192 return strtr($url, $action); 192 if ($html == '') return false;
193 } 193 $found = false;
194 } 194 foreach ($this->ajaxTriggers as $string) {
195 } 195 if (stripos($html, $string)) {
196 return $url; 196 $found = true;
197 } 197 break;
198 198 }
199 public function enableDebug($bool=true) { 199 }
200 $this->debug = (bool)$bool; 200 if (!$found) return false;
201 } 201 $iri = new SimplePie_IRI($url);
202 202 if (isset($iri->query)) {
203 public function minimiseMemoryUse($bool = true) { 203 parse_str($iri->query, $query);
204 $this->minimiseMemoryUse = $bool; 204 } else {
205 } 205 $query = array();
206 206 }
207 public function setMaxParallelRequests($max) { 207 $query['_escaped_fragment_'] = '';
208 $this->maxParallelRequests = $max; 208 $iri->query = str_replace('%2F', '/', http_build_query($query)); // needed for some sites
209 } 209 $ugly_url = $iri->get_iri();
210 210 $this->debug('AJAX trigger (meta name="fragment" content="!") found, new URL: '.$ugly_url);
211 public function validateUrl($url) { 211 return $ugly_url;
212 $url = filter_var($url, FILTER_SANITIZE_URL); 212 }
213 $test = filter_var($url, FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED); 213
214 // deal with bug http://bugs.php.net/51192 (present in PHP 5.2.13 and PHP 5.3.2) 214 public function removeFragment($url) {
215 if ($test === false) { 215 $pos = strpos($url, '#');
216 $test = filter_var(strtr($url, '-', '_'), FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED); 216 if ($pos === false) {
217 } 217 return $url;
218 if ($test !== false && $test !== null && preg_match('!^https?://!', $url)) { 218 } else {
219 return $url; 219 return substr($url, 0, $pos);
220 } else { 220 }
221 return false; 221 }
222 } 222
223 } 223 public function rewriteUrls($url) {
224 224 foreach ($this->rewriteUrls as $find => $action) {
225 public function fetchAll(array $urls) { 225 if (strpos($url, $find) !== false) {
226 $this->fetchAllOnce($urls, $isRedirect=false); 226 if (is_array($action)) {
227 $redirects = 0; 227 return strtr($url, $action);
228 while (!empty($this->redirectQueue) && ++$redirects <= $this->maxRedirects) { 228 }
229 $this->debug("Following redirects #$redirects..."); 229 }
230 $this->fetchAllOnce($this->redirectQueue, $isRedirect=true); 230 }
231 } 231 return $url;
232 } 232 }
233 233
234 // fetch all URLs without following redirects 234 public function enableDebug($bool=true) {
235 public function fetchAllOnce(array $urls, $isRedirect=false) { 235 $this->debug = (bool)$bool;
236 if (!$isRedirect) $urls = array_unique($urls); 236 }
237 if (empty($urls)) return; 237
238 238 public function minimiseMemoryUse($bool = true) {
239 ////////////////////////////////////////////////////// 239 $this->minimiseMemoryUse = $bool;
240 // parallel (HttpRequestPool) 240 }
241 if ($this->method == self::METHOD_REQUEST_POOL) { 241
242 $this->debug('Starting parallel fetch (HttpRequestPool)'); 242 public function setMaxParallelRequests($max) {
243 try { 243 $this->maxParallelRequests = $max;
244 while (count($urls) > 0) { 244 }
245 $this->debug('Processing set of '.min($this->maxParallelRequests, count($urls))); 245
246 $subset = array_splice($urls, 0, $this->maxParallelRequests); 246 public function validateUrl($url) {
247 $pool = new HttpRequestPool(); 247 $url = filter_var($url, FILTER_SANITIZE_URL);
248 foreach ($subset as $orig => $url) { 248 $test = filter_var($url, FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED);
249 if (!$isRedirect) $orig = $url; 249 // deal with bug http://bugs.php.net/51192 (present in PHP 5.2.13 and PHP 5.3.2)
250 unset($this->redirectQueue[$orig]); 250 if ($test === false) {
251 $this->debug("...$url"); 251 $test = filter_var(strtr($url, '-', '_'), FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED);
252 if (!$isRedirect && isset($this->requests[$url])) { 252 }
253 $this->debug("......in memory"); 253 if ($test !== false && $test !== null && preg_match('!^https?://!', $url)) {
254 /* 254 return $url;
255 } elseif ($this->isCached($url)) { 255 } else {
256 $this->debug("......is cached"); 256 return false;
257 if (!$this->minimiseMemoryUse) { 257 }
258 $this->requests[$url] = $this->getCached($url); 258 }
259 } 259
260 */ 260 public function fetchAll(array $urls) {
261 } else { 261 $this->fetchAllOnce($urls, $isRedirect=false);
262 $this->debug("......adding to pool"); 262 $redirects = 0;
263 $req_url = $this->rewriteUrls($url); 263 while (!empty($this->redirectQueue) && ++$redirects <= $this->maxRedirects) {
264 $req_url = ($this->rewriteHashbangFragment) ? $this->rewriteHashbangFragment($req_url) : $req_url; 264 $this->debug("Following redirects #$redirects...");
265 $req_url = $this->removeFragment($req_url); 265 $this->fetchAllOnce($this->redirectQueue, $isRedirect=true);
266 if (!empty($this->headerOnlyTypes) && !isset($this->requests[$orig]['wrongGuess']) && $this->possibleUnsupportedType($req_url)) { 266 }
267 $_meth = HttpRequest::METH_HEAD; 267 }
268 } else { 268
269 $_meth = HttpRequest::METH_GET; 269 // fetch all URLs without following redirects
270 unset($this->requests[$orig]['wrongGuess']); 270 public function fetchAllOnce(array $urls, $isRedirect=false) {
271 } 271 if (!$isRedirect) $urls = array_unique($urls);
272 $httpRequest = new HttpRequest($req_url, $_meth, $this->requestOptions); 272 if (empty($urls)) return;
273 // send cookies, if we have any 273
274 if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) { 274 //////////////////////////////////////////////////////
275 $this->debug("......sending cookies: $cookies"); 275 // parallel (HttpRequestPool)
276 $httpRequest->addHeaders(array('Cookie' => $cookies)); 276 if ($this->method == self::METHOD_REQUEST_POOL) {
277 } 277 $this->debug('Starting parallel fetch (HttpRequestPool)');
278 //$httpRequest->addHeaders(array('User-Agent' => $this->userAgent)); 278 try {
279 $httpRequest->addHeaders($this->getUserAgent($req_url, true)); 279 while (count($urls) > 0) {
280 // add referer for picky sites 280 $this->debug('Processing set of '.min($this->maxParallelRequests, count($urls)));
281 $httpRequest->addheaders(array('Referer' => $this->referer)); 281 $subset = array_splice($urls, 0, $this->maxParallelRequests);
282 $this->requests[$orig] = array('headers'=>null, 'body'=>null, 'httpRequest'=>$httpRequest); 282 $pool = new HttpRequestPool();
283 $this->requests[$orig]['original_url'] = $orig; 283 foreach ($subset as $orig => $url) {
284 $pool->attach($httpRequest); 284 if (!$isRedirect) $orig = $url;
285 } 285 unset($this->redirectQueue[$orig]);
286 } 286 $this->debug("...$url");
287 // did we get anything into the pool? 287 if (!$isRedirect && isset($this->requests[$url])) {
288 if (count($pool) > 0) { 288 $this->debug("......in memory");
289 $this->debug('Sending request...'); 289 /*
290 try { 290 } elseif ($this->isCached($url)) {
291 $pool->send(); 291 $this->debug("......is cached");
292 } catch (HttpRequestPoolException $e) { 292 if (!$this->minimiseMemoryUse) {
293 // do nothing 293 $this->requests[$url] = $this->getCached($url);
294 } 294 }
295 $this->debug('Received responses'); 295 */
296 foreach($subset as $orig => $url) { 296 } else {
297 if (!$isRedirect) $orig = $url; 297 $this->debug("......adding to pool");
298 $request = $this->requests[$orig]['httpRequest']; 298 $req_url = $this->rewriteUrls($url);
299 //$this->requests[$orig]['headers'] = $this->headersToString($request->getResponseHeader()); 299 $req_url = ($this->rewriteHashbangFragment) ? $this->rewriteHashbangFragment($req_url) : $req_url;
300 // getResponseHeader() doesn't return status line, so, for consistency... 300 $req_url = $this->removeFragment($req_url);
301 $this->requests[$orig]['headers'] = substr($request->getRawResponseMessage(), 0, $request->getResponseInfo('header_size')); 301 if (!empty($this->headerOnlyTypes) && !isset($this->requests[$orig]['wrongGuess']) && $this->possibleUnsupportedType($req_url)) {
302 // check content type 302 $_meth = HttpRequest::METH_HEAD;
303 // TODO: use getResponseHeader('content-type') or getResponseInfo() 303 } else {
304 if ($this->headerOnlyType($this->requests[$orig]['headers'])) { 304 $_meth = HttpRequest::METH_GET;
305 $this->requests[$orig]['body'] = ''; 305 unset($this->requests[$orig]['wrongGuess']);
306 $_header_only_type = true; 306 }
307 $this->debug('Header only type returned'); 307 $httpRequest = new HttpRequest($req_url, $_meth, $this->requestOptions);
308 } else { 308 // send cookies, if we have any
309 $this->requests[$orig]['body'] = $request->getResponseBody(); 309 if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) {
310 $_header_only_type = false; 310 $this->debug("......sending cookies: $cookies");
311 } 311 $httpRequest->addHeaders(array('Cookie' => $cookies));
312 $this->requests[$orig]['effective_url'] = $request->getResponseInfo('effective_url'); 312 }
313 $this->requests[$orig]['status_code'] = $status_code = $request->getResponseCode(); 313 //$httpRequest->addHeaders(array('User-Agent' => $this->userAgent));
314 // is redirect? 314 $httpRequest->addHeaders($this->getUserAgent($req_url, true));
315 if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && $request->getResponseHeader('location')) { 315 // add referer for picky sites
316 $redirectURL = $request->getResponseHeader('location'); 316 $httpRequest->addheaders(array('Referer' => $this->referer));
317 if (!preg_match('!^https?://!i', $redirectURL)) { 317 $this->requests[$orig] = array('headers'=>null, 'body'=>null, 'httpRequest'=>$httpRequest);
318 $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url); 318 $this->requests[$orig]['original_url'] = $orig;
319 } 319 $pool->attach($httpRequest);
320 if ($this->validateURL($redirectURL)) { 320 }
321 $this->debug('Redirect detected. Valid URL: '.$redirectURL); 321 }
322 // store any cookies 322 // did we get anything into the pool?
323 $cookies = $request->getResponseHeader('set-cookie'); 323 if (count($pool) > 0) {
324 if ($cookies && !is_array($cookies)) $cookies = array($cookies); 324 $this->debug('Sending request...');
325 if ($cookies) $this->cookieJar->storeCookies($url, $cookies); 325 try {
326 $this->redirectQueue[$orig] = $redirectURL; 326 $pool->send();
327 } else { 327 } catch (HttpRequestPoolException $e) {
328 $this->debug('Redirect detected. Invalid URL: '.$redirectURL); 328 // do nothing
329 } 329 }
330 } elseif (!$_header_only_type && $request->getMethod() === HttpRequest::METH_HEAD) { 330 $this->debug('Received responses');
331 // the response content-type did not match our 'header only' types, 331 foreach($subset as $orig => $url) {
332 // but we'd issues a HEAD request because we assumed it would. So 332 if (!$isRedirect) $orig = $url;
333 // let's queue a proper GET request for this item... 333 $request = $this->requests[$orig]['httpRequest'];
334 $this->debug('Wrong guess at content-type, queing GET request'); 334 //$this->requests[$orig]['headers'] = $this->headersToString($request->getResponseHeader());
335 $this->requests[$orig]['wrongGuess'] = true; 335 // getResponseHeader() doesn't return status line, so, for consistency...
336 $this->redirectQueue[$orig] = $this->requests[$orig]['effective_url']; 336 $this->requests[$orig]['headers'] = substr($request->getRawResponseMessage(), 0, $request->getResponseInfo('header_size'));
337 } elseif (strpos($this->requests[$orig]['effective_url'], '_escaped_fragment_') === false) { 337 // check content type
338 // check for <meta name='fragment' content='!'/> 338 // TODO: use getResponseHeader('content-type') or getResponseInfo()
339 // for AJAX sites, e.g. Blogger with its dynamic views templates. 339 if ($this->headerOnlyType($this->requests[$orig]['headers'])) {
340 // Based on Google's spec: https://developers.google.com/webmasters/ajax-crawling/docs/specification 340 $this->requests[$orig]['body'] = '';
341 if (isset($this->requests[$orig]['body'])) { 341 $_header_only_type = true;
342 $redirectURL = $this->getUglyURL($this->requests[$orig]['effective_url'], substr($this->requests[$orig]['body'], 0, 4000)); 342 $this->debug('Header only type returned');
343 if ($redirectURL) { 343 } else {
344 $this->debug('AJAX trigger (meta name="fragment" content="!") found. Queueing '.$redirectURL); 344 $this->requests[$orig]['body'] = $request->getResponseBody();
345 $this->redirectQueue[$orig] = $redirectURL; 345 $_header_only_type = false;
346 } 346 }
347 } 347 $this->requests[$orig]['effective_url'] = $request->getResponseInfo('effective_url');
348 } 348 $this->requests[$orig]['status_code'] = $status_code = $request->getResponseCode();
349 //die($url.' -multi- '.$request->getResponseInfo('effective_url')); 349 // is redirect?
350 $pool->detach($request); 350 if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && $request->getResponseHeader('location')) {
351 unset($this->requests[$orig]['httpRequest'], $request); 351 $redirectURL = $request->getResponseHeader('location');
352 /* 352 if (!preg_match('!^https?://!i', $redirectURL)) {
353 if ($this->minimiseMemoryUse) { 353 $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url);
354 if ($this->cache($url)) { 354 }
355 unset($this->requests[$url]); 355 if ($this->validateURL($redirectURL)) {
356 } 356 $this->debug('Redirect detected. Valid URL: '.$redirectURL);
357 } 357 // store any cookies
358 */ 358 $cookies = $request->getResponseHeader('set-cookie');
359 } 359 if ($cookies && !is_array($cookies)) $cookies = array($cookies);
360 } 360 if ($cookies) $this->cookieJar->storeCookies($url, $cookies);
361 } 361 $this->redirectQueue[$orig] = $redirectURL;
362 } catch (HttpException $e) { 362 } else {
363 $this->debug($e); 363 $this->debug('Redirect detected. Invalid URL: '.$redirectURL);
364 return false; 364 }
365 } 365 } elseif (!$_header_only_type && $request->getMethod() === HttpRequest::METH_HEAD) {
366 } 366 // the response content-type did not match our 'header only' types,
367 367 // but we'd issues a HEAD request because we assumed it would. So
368 ////////////////////////////////////////////////////////// 368 // let's queue a proper GET request for this item...
369 // parallel (curl_multi_*) 369 $this->debug('Wrong guess at content-type, queing GET request');
370 elseif ($this->method == self::METHOD_CURL_MULTI) { 370 $this->requests[$orig]['wrongGuess'] = true;
371 $this->debug('Starting parallel fetch (curl_multi_*)'); 371 $this->redirectQueue[$orig] = $this->requests[$orig]['effective_url'];
372 while (count($urls) > 0) { 372 } elseif (strpos($this->requests[$orig]['effective_url'], '_escaped_fragment_') === false) {
373 $this->debug('Processing set of '.min($this->maxParallelRequests, count($urls))); 373 // check for <meta name='fragment' content='!'/>
374 $subset = array_splice($urls, 0, $this->maxParallelRequests); 374 // for AJAX sites, e.g. Blogger with its dynamic views templates.
375 $pool = new RollingCurl(array($this, 'handleCurlResponse')); 375 // Based on Google's spec: https://developers.google.com/webmasters/ajax-crawling/docs/specification
376 $pool->window_size = count($subset); 376 if (isset($this->requests[$orig]['body'])) {
377 377 $redirectURL = $this->getRedirectURLfromHTML($this->requests[$orig]['effective_url'], substr($this->requests[$orig]['body'], 0, 4000));
378 foreach ($subset as $orig => $url) { 378 if ($redirectURL) {
379 if (!$isRedirect) $orig = $url; 379 $this->redirectQueue[$orig] = $redirectURL;
380 unset($this->redirectQueue[$orig]); 380 }
381 $this->debug("...$url"); 381 }
382 if (!$isRedirect && isset($this->requests[$url])) { 382 }
383 $this->debug("......in memory"); 383 //die($url.' -multi- '.$request->getResponseInfo('effective_url'));
384 /* 384 $pool->detach($request);
385 } elseif ($this->isCached($url)) { 385 unset($this->requests[$orig]['httpRequest'], $request);
386 $this->debug("......is cached"); 386 /*
387 if (!$this->minimiseMemoryUse) { 387 if ($this->minimiseMemoryUse) {
388 $this->requests[$url] = $this->getCached($url); 388 if ($this->cache($url)) {
389 } 389 unset($this->requests[$url]);
390 */ 390 }
391 } else { 391 }
392 $this->debug("......adding to pool"); 392 */
393 $req_url = $this->rewriteUrls($url); 393 }
394 $req_url = ($this->rewriteHashbangFragment) ? $this->rewriteHashbangFragment($req_url) : $req_url; 394 }
395 $req_url = $this->removeFragment($req_url); 395 }
396 if (!empty($this->headerOnlyTypes) && !isset($this->requests[$orig]['wrongGuess']) && $this->possibleUnsupportedType($req_url)) { 396 } catch (HttpException $e) {
397 $_meth = 'HEAD'; 397 $this->debug($e);
398 } else { 398 return false;
399 $_meth = 'GET'; 399 }
400 unset($this->requests[$orig]['wrongGuess']); 400 }
401 } 401
402 $headers = array(); 402 //////////////////////////////////////////////////////////
403 //$headers[] = 'User-Agent: '.$this->userAgent; 403 // parallel (curl_multi_*)
404 $headers[] = $this->getUserAgent($req_url); 404 elseif ($this->method == self::METHOD_CURL_MULTI) {
405 // add referer for picky sites 405 $this->debug('Starting parallel fetch (curl_multi_*)');
406 $headers[] = 'Referer: '.$this->referer; 406 while (count($urls) > 0) {
407 // send cookies, if we have any 407 $this->debug('Processing set of '.min($this->maxParallelRequests, count($urls)));
408 if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) { 408 $subset = array_splice($urls, 0, $this->maxParallelRequests);
409 $this->debug("......sending cookies: $cookies"); 409 $pool = new RollingCurl(array($this, 'handleCurlResponse'));
410 $headers[] = 'Cookie: '.$cookies; 410 $pool->window_size = count($subset);
411 } 411
412 $httpRequest = new RollingCurlRequest($req_url, $_meth, null, $headers, array( 412 foreach ($subset as $orig => $url) {
413 CURLOPT_CONNECTTIMEOUT => $this->requestOptions['timeout'], 413 if (!$isRedirect) $orig = $url;
414 CURLOPT_TIMEOUT => $this->requestOptions['timeout'] 414 unset($this->redirectQueue[$orig]);
415 )); 415 $this->debug("...$url");
416 $httpRequest->set_original_url($orig); 416 if (!$isRedirect && isset($this->requests[$url])) {
417 $this->requests[$orig] = array('headers'=>null, 'body'=>null, 'httpRequest'=>$httpRequest); 417 $this->debug("......in memory");
418 $this->requests[$orig]['original_url'] = $orig; // TODO: is this needed anymore? 418 /*
419 $pool->add($httpRequest); 419 } elseif ($this->isCached($url)) {
420 } 420 $this->debug("......is cached");
421 } 421 if (!$this->minimiseMemoryUse) {
422 // did we get anything into the pool? 422 $this->requests[$url] = $this->getCached($url);
423 if (count($pool) > 0) { 423 }
424 $this->debug('Sending request...'); 424 */
425 $pool->execute(); // this will call handleCurlResponse() and populate $this->requests[$orig] 425 } else {
426 $this->debug('Received responses'); 426 $this->debug("......adding to pool");
427 foreach($subset as $orig => $url) { 427 $req_url = $this->rewriteUrls($url);
428 if (!$isRedirect) $orig = $url; 428 $req_url = ($this->rewriteHashbangFragment) ? $this->rewriteHashbangFragment($req_url) : $req_url;
429 // $this->requests[$orig]['headers'] 429 $req_url = $this->removeFragment($req_url);
430 // $this->requests[$orig]['body'] 430 if (!empty($this->headerOnlyTypes) && !isset($this->requests[$orig]['wrongGuess']) && $this->possibleUnsupportedType($req_url)) {
431 // $this->requests[$orig]['effective_url'] 431 $_meth = 'HEAD';
432 // check content type 432 } else {
433 if ($this->headerOnlyType($this->requests[$orig]['headers'])) { 433 $_meth = 'GET';
434 $this->requests[$orig]['body'] = ''; 434 unset($this->requests[$orig]['wrongGuess']);
435 $_header_only_type = true; 435 }
436 $this->debug('Header only type returned'); 436 $headers = array();
437 } else { 437 //$headers[] = 'User-Agent: '.$this->userAgent;
438 $_header_only_type = false; 438 $headers[] = $this->getUserAgent($req_url);
439 } 439 // add referer for picky sites
440 $status_code = $this->requests[$orig]['status_code']; 440 $headers[] = 'Referer: '.$this->referer;
441 if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && isset($this->requests[$orig]['location'])) { 441 // send cookies, if we have any
442 $redirectURL = $this->requests[$orig]['location']; 442 if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) {
443 if (!preg_match('!^https?://!i', $redirectURL)) { 443 $this->debug("......sending cookies: $cookies");
444 $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url); 444 $headers[] = 'Cookie: '.$cookies;
445 } 445 }
446 if ($this->validateURL($redirectURL)) { 446 $httpRequest = new RollingCurlRequest($req_url, $_meth, null, $headers, array(
447 $this->debug('Redirect detected. Valid URL: '.$redirectURL); 447 CURLOPT_CONNECTTIMEOUT => $this->requestOptions['timeout'],
448 // store any cookies 448 CURLOPT_TIMEOUT => $this->requestOptions['timeout']
449 $cookies = $this->cookieJar->extractCookies($this->requests[$orig]['headers']); 449 ));
450 if (!empty($cookies)) $this->cookieJar->storeCookies($url, $cookies); 450 $httpRequest->set_original_url($orig);
451 $this->redirectQueue[$orig] = $redirectURL; 451 $this->requests[$orig] = array('headers'=>null, 'body'=>null, 'httpRequest'=>$httpRequest);
452 } else { 452 $this->requests[$orig]['original_url'] = $orig; // TODO: is this needed anymore?
453 $this->debug('Redirect detected. Invalid URL: '.$redirectURL); 453 $pool->add($httpRequest);
454 } 454 }
455 } elseif (!$_header_only_type && $this->requests[$orig]['method'] == 'HEAD') { 455 }
456 // the response content-type did not match our 'header only' types, 456 // did we get anything into the pool?
457 // but we'd issues a HEAD request because we assumed it would. So 457 if (count($pool) > 0) {
458 // let's queue a proper GET request for this item... 458 $this->debug('Sending request...');
459 $this->debug('Wrong guess at content-type, queing GET request'); 459 $pool->execute(); // this will call handleCurlResponse() and populate $this->requests[$orig]
460 $this->requests[$orig]['wrongGuess'] = true; 460 $this->debug('Received responses');
461 $this->redirectQueue[$orig] = $this->requests[$orig]['effective_url']; 461 foreach($subset as $orig => $url) {
462 } elseif (strpos($this->requests[$orig]['effective_url'], '_escaped_fragment_') === false) { 462 if (!$isRedirect) $orig = $url;
463 // check for <meta name='fragment' content='!'/> 463 // $this->requests[$orig]['headers']
464 // for AJAX sites, e.g. Blogger with its dynamic views templates. 464 // $this->requests[$orig]['body']
465 // Based on Google's spec: https://developers.google.com/webmasters/ajax-crawling/docs/specification 465 // $this->requests[$orig]['effective_url']
466 if (isset($this->requests[$orig]['body'])) { 466 // check content type
467 $redirectURL = $this->getUglyURL($this->requests[$orig]['effective_url'], substr($this->requests[$orig]['body'], 0, 4000)); 467 if ($this->headerOnlyType($this->requests[$orig]['headers'])) {
468 if ($redirectURL) { 468 $this->requests[$orig]['body'] = '';
469 $this->debug('AJAX trigger (meta name="fragment" content="!") found. Queueing '.$redirectURL); 469 $_header_only_type = true;
470 $this->redirectQueue[$orig] = $redirectURL; 470 $this->debug('Header only type returned');
471 } 471 } else {
472 } 472 $_header_only_type = false;
473 } 473 }
474 // die($url.' -multi- '.$request->getResponseInfo('effective_url')); 474 $status_code = $this->requests[$orig]['status_code'];
475 unset($this->requests[$orig]['httpRequest'], $this->requests[$orig]['method']); 475 if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && isset($this->requests[$orig]['location'])) {
476 } 476 $redirectURL = $this->requests[$orig]['location'];
477 } 477 if (!preg_match('!^https?://!i', $redirectURL)) {
478 } 478 $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url);
479 } 479 }
480 480 if ($this->validateURL($redirectURL)) {
481 ////////////////////////////////////////////////////// 481 $this->debug('Redirect detected. Valid URL: '.$redirectURL);
482 // sequential (file_get_contents) 482 // store any cookies
483 else { 483 $cookies = $this->cookieJar->extractCookies($this->requests[$orig]['headers']);
484 $this->debug('Starting sequential fetch (file_get_contents)'); 484 if (!empty($cookies)) $this->cookieJar->storeCookies($url, $cookies);
485 $this->debug('Processing set of '.count($urls)); 485 $this->redirectQueue[$orig] = $redirectURL;
486 foreach ($urls as $orig => $url) { 486 } else {
487 if (!$isRedirect) $orig = $url; 487 $this->debug('Redirect detected. Invalid URL: '.$redirectURL);
488 unset($this->redirectQueue[$orig]); 488 }
489 $this->debug("...$url"); 489 } elseif (!$_header_only_type && $this->requests[$orig]['method'] == 'HEAD') {
490 if (!$isRedirect && isset($this->requests[$url])) { 490 // the response content-type did not match our 'header only' types,
491 $this->debug("......in memory"); 491 // but we'd issues a HEAD request because we assumed it would. So
492 /* 492 // let's queue a proper GET request for this item...
493 } elseif ($this->isCached($url)) { 493 $this->debug('Wrong guess at content-type, queing GET request');
494 $this->debug("......is cached"); 494 $this->requests[$orig]['wrongGuess'] = true;
495 if (!$this->minimiseMemoryUse) { 495 $this->redirectQueue[$orig] = $this->requests[$orig]['effective_url'];
496 $this->requests[$url] = $this->getCached($url); 496 } elseif (strpos($this->requests[$orig]['effective_url'], '_escaped_fragment_') === false) {
497 } 497 // check for <meta name='fragment' content='!'/>
498 */ 498 // for AJAX sites, e.g. Blogger with its dynamic views templates.
499 } else { 499 // Based on Google's spec: https://developers.google.com/webmasters/ajax-crawling/docs/specification
500 $this->debug("Sending request for $url"); 500 if (isset($this->requests[$orig]['body'])) {
501 $this->requests[$orig]['original_url'] = $orig; 501 $redirectURL = $this->getRedirectURLfromHTML($this->requests[$orig]['effective_url'], substr($this->requests[$orig]['body'], 0, 4000));
502 $req_url = $this->rewriteUrls($url); 502 if ($redirectURL) {
503 $req_url = ($this->rewriteHashbangFragment) ? $this->rewriteHashbangFragment($req_url) : $req_url; 503 $this->redirectQueue[$orig] = $redirectURL;
504 $req_url = $this->removeFragment($req_url); 504 }
505 // send cookies, if we have any 505 }
506 $httpContext = $this->httpContext; 506 }
507 $httpContext['http']['header'] .= $this->getUserAgent($req_url)."\r\n"; 507 // die($url.' -multi- '.$request->getResponseInfo('effective_url'));
508 // add referer for picky sites 508 unset($this->requests[$orig]['httpRequest'], $this->requests[$orig]['method']);
509 $httpContext['http']['header'] .= 'Referer: '.$this->referer."\r\n"; 509 }
510 if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) { 510 }
511 $this->debug("......sending cookies: $cookies"); 511 }
512 $httpContext['http']['header'] .= 'Cookie: '.$cookies."\r\n"; 512 }
513 } 513
514 if (false !== ($html = @file_get_contents($req_url, false, stream_context_create($httpContext)))) { 514 //////////////////////////////////////////////////////
515 $this->debug('Received response'); 515 // sequential (file_get_contents)
516 // get status code 516 else {
517 if (!isset($http_response_header[0]) || !preg_match('!^HTTP/\d+\.\d+\s+(\d+)!', trim($http_response_header[0]), $match)) { 517 $this->debug('Starting sequential fetch (file_get_contents)');
518 $this->debug('Error: no status code found'); 518 $this->debug('Processing set of '.count($urls));
519 // TODO: handle error - no status code 519 foreach ($urls as $orig => $url) {
520 } else { 520 if (!$isRedirect) $orig = $url;
521 $this->requests[$orig]['headers'] = $this->headersToString($http_response_header, false); 521 unset($this->redirectQueue[$orig]);
522 // check content type 522 $this->debug("...$url");
523 if ($this->headerOnlyType($this->requests[$orig]['headers'])) { 523 if (!$isRedirect && isset($this->requests[$url])) {
524 $this->requests[$orig]['body'] = ''; 524 $this->debug("......in memory");
525 } else { 525 /*
526 $this->requests[$orig]['body'] = $html; 526 } elseif ($this->isCached($url)) {
527 } 527 $this->debug("......is cached");
528 $this->requests[$orig]['effective_url'] = $req_url; 528 if (!$this->minimiseMemoryUse) {
529 $this->requests[$orig]['status_code'] = $status_code = (int)$match[1]; 529 $this->requests[$url] = $this->getCached($url);
530 unset($match); 530 }
531 // handle redirect 531 */
532 if (preg_match('/^Location:(.*?)$/mi', $this->requests[$orig]['headers'], $match)) { 532 } else {
533 $this->requests[$orig]['location'] = trim($match[1]); 533 $this->debug("Sending request for $url");
534 } 534 $this->requests[$orig]['original_url'] = $orig;
535 if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && isset($this->requests[$orig]['location'])) { 535 $req_url = $this->rewriteUrls($url);
536 $redirectURL = $this->requests[$orig]['location']; 536 $req_url = ($this->rewriteHashbangFragment) ? $this->rewriteHashbangFragment($req_url) : $req_url;
537 if (!preg_match('!^https?://!i', $redirectURL)) { 537 $req_url = $this->removeFragment($req_url);
538 $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url); 538 // send cookies, if we have any
539 } 539 $httpContext = $this->httpContext;
540 if ($this->validateURL($redirectURL)) { 540 $httpContext['http']['header'] .= $this->getUserAgent($req_url)."\r\n";
541 $this->debug('Redirect detected. Valid URL: '.$redirectURL); 541 // add referer for picky sites
542 // store any cookies 542 $httpContext['http']['header'] .= 'Referer: '.$this->referer."\r\n";
543 $cookies = $this->cookieJar->extractCookies($this->requests[$orig]['headers']); 543 if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) {
544 if (!empty($cookies)) $this->cookieJar->storeCookies($url, $cookies); 544 $this->debug("......sending cookies: $cookies");
545 $this->redirectQueue[$orig] = $redirectURL; 545 $httpContext['http']['header'] .= 'Cookie: '.$cookies."\r\n";
546 } else { 546 }
547 $this->debug('Redirect detected. Invalid URL: '.$redirectURL); 547 if (false !== ($html = @file_get_contents($req_url, false, stream_context_create($httpContext)))) {
548 } 548 $this->debug('Received response');
549 } elseif (strpos($this->requests[$orig]['effective_url'], '_escaped_fragment_') === false) { 549 // get status code
550 // check for <meta name='fragment' content='!'/> 550 if (!isset($http_response_header[0]) || !preg_match('!^HTTP/\d+\.\d+\s+(\d+)!', trim($http_response_header[0]), $match)) {
551 // for AJAX sites, e.g. Blogger with its dynamic views templates. 551 $this->debug('Error: no status code found');
552 // Based on Google's spec: https://developers.google.com/webmasters/ajax-crawling/docs/specification 552 // TODO: handle error - no status code
553 if (isset($this->requests[$orig]['body'])) { 553 } else {
554 $redirectURL = $this->getUglyURL($this->requests[$orig]['effective_url'], substr($this->requests[$orig]['body'], 0, 4000)); 554 $this->requests[$orig]['headers'] = $this->headersToString($http_response_header, false);
555 if ($redirectURL) { 555 // check content type
556 $this->debug('AJAX trigger (meta name="fragment" content="!") found. Queueing '.$redirectURL); 556 if ($this->headerOnlyType($this->requests[$orig]['headers'])) {
557 $this->redirectQueue[$orig] = $redirectURL; 557 $this->requests[$orig]['body'] = '';
558 } 558 } else {
559 } 559 $this->requests[$orig]['body'] = $html;
560 } 560 }
561 } 561 $this->requests[$orig]['effective_url'] = $req_url;
562 } else { 562 $this->requests[$orig]['status_code'] = $status_code = (int)$match[1];
563 $this->debug('Error retrieving URL'); 563 unset($match);
564 //print_r($req_url); 564 // handle redirect
565 //print_r($http_response_header); 565 if (preg_match('/^Location:(.*?)$/mi', $this->requests[$orig]['headers'], $match)) {
566 //print_r($html); 566 $this->requests[$orig]['location'] = trim($match[1]);
567 567 }
568 // TODO: handle error - failed to retrieve URL 568 if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && isset($this->requests[$orig]['location'])) {
569 } 569 $redirectURL = $this->requests[$orig]['location'];
570 } 570 if (!preg_match('!^https?://!i', $redirectURL)) {
571 } 571 $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url);
572 } 572 }
573 } 573 if ($this->validateURL($redirectURL)) {
574 574 $this->debug('Redirect detected. Valid URL: '.$redirectURL);
575 public function handleCurlResponse($response, $info, $request) { 575 // store any cookies
576 $orig = $request->url_original; 576 $cookies = $this->cookieJar->extractCookies($this->requests[$orig]['headers']);
577 $this->requests[$orig]['headers'] = substr($response, 0, $info['header_size']); 577 if (!empty($cookies)) $this->cookieJar->storeCookies($url, $cookies);
578 $this->requests[$orig]['body'] = substr($response, $info['header_size']); 578 $this->redirectQueue[$orig] = $redirectURL;
579 $this->requests[$orig]['method'] = $request->method; 579 } else {
580 $this->requests[$orig]['effective_url'] = $info['url']; 580 $this->debug('Redirect detected. Invalid URL: '.$redirectURL);
581 $this->requests[$orig]['status_code'] = (int)$info['http_code']; 581 }
582 if (preg_match('/^Location:(.*?)$/mi', $this->requests[$orig]['headers'], $match)) { 582 } elseif (strpos($this->requests[$orig]['effective_url'], '_escaped_fragment_') === false) {
583 $this->requests[$orig]['location'] = trim($match[1]); 583 // check for <meta name='fragment' content='!'/>
584 } 584 // for AJAX sites, e.g. Blogger with its dynamic views templates.
585 } 585 // Based on Google's spec: https://developers.google.com/webmasters/ajax-crawling/docs/specification
586 586 if (isset($this->requests[$orig]['body'])) {
587 protected function headersToString(array $headers, $associative=true) { 587 $redirectURL = $this->getRedirectURLfromHTML($this->requests[$orig]['effective_url'], substr($this->requests[$orig]['body'], 0, 4000));
588 if (!$associative) { 588 if ($redirectURL) {
589 return implode("\n", $headers); 589 $this->redirectQueue[$orig] = $redirectURL;
590 } else { 590 }
591 $str = ''; 591 }
592 foreach ($headers as $key => $val) { 592 }
593 if (is_array($val)) { 593 }
594 foreach ($val as $v) $str .= "$key: $v\n"; 594 } else {
595 } else { 595 $this->debug('Error retrieving URL');
596 $str .= "$key: $val\n"; 596 //print_r($req_url);
597 } 597 //print_r($http_response_header);
598 } 598 //print_r($html);
599 return rtrim($str); 599
600 } 600 // TODO: handle error - failed to retrieve URL
601 } 601 }
602 602 }
603 public function get($url, $remove=false, $gzdecode=true) { 603 }
604 $url = "$url"; 604 }
605 if (isset($this->requests[$url]) && isset($this->requests[$url]['body'])) { 605 }
606 $this->debug("URL already fetched - in memory ($url, effective: {$this->requests[$url]['effective_url']})"); 606
607 $response = $this->requests[$url]; 607 public function handleCurlResponse($response, $info, $request) {
608 /* 608 $orig = $request->url_original;
609 } elseif ($this->isCached($url)) { 609 $this->requests[$orig]['headers'] = substr($response, 0, $info['header_size']);
610 $this->debug("URL already fetched - in disk cache ($url)"); 610 $this->requests[$orig]['body'] = substr($response, $info['header_size']);
611 $response = $this->getCached($url); 611 $this->requests[$orig]['method'] = $request->method;
612 $this->requests[$url] = $response; 612 $this->requests[$orig]['effective_url'] = $info['url'];
613 */ 613 $this->requests[$orig]['status_code'] = (int)$info['http_code'];
614 } else { 614 if (preg_match('/^Location:(.*?)$/mi', $this->requests[$orig]['headers'], $match)) {
615 $this->debug("Fetching URL ($url)"); 615 $this->requests[$orig]['location'] = trim($match[1]);
616 $this->fetchAll(array($url)); 616 }
617 if (isset($this->requests[$url]) && isset($this->requests[$url]['body'])) { 617 }
618 $response = $this->requests[$url]; 618
619 } else { 619 protected function headersToString(array $headers, $associative=true) {
620 $this->debug("Request failed"); 620 if (!$associative) {
621 $response = false; 621 return implode("\n", $headers);
622 } 622 } else {
623 } 623 $str = '';
624 /* 624 foreach ($headers as $key => $val) {
625 if ($this->minimiseMemoryUse && $response) { 625 if (is_array($val)) {
626 $this->cache($url); 626 foreach ($val as $v) $str .= "$key: $v\n";
627 unset($this->requests[$url]); 627 } else {
628 } 628 $str .= "$key: $val\n";
629 */ 629 }
630 if ($remove && $response) unset($this->requests[$url]); 630 }
631 if ($gzdecode && stripos($response['headers'], 'Content-Encoding: gzip')) { 631 return rtrim($str);
632 if ($html = gzdecode($response['body'])) { 632 }
633 $response['body'] = $html; 633 }
634 } 634
635 } 635 public function get($url, $remove=false, $gzdecode=true) {
636 return $response; 636 $url = "$url";
637 } 637 if (isset($this->requests[$url]) && isset($this->requests[$url]['body'])) {
638 638 $this->debug("URL already fetched - in memory ($url, effective: {$this->requests[$url]['effective_url']})");
639 public function parallelSupport() { 639 $response = $this->requests[$url];
640 return class_exists('HttpRequestPool') || function_exists('curl_multi_init'); 640 /*
641 } 641 } elseif ($this->isCached($url)) {
642 642 $this->debug("URL already fetched - in disk cache ($url)");
643 private function headerOnlyType($headers) { 643 $response = $this->getCached($url);
644 if (preg_match('!^Content-Type:\s*(([a-z-]+)/([^;\r\n ]+))!im', $headers, $match)) { 644 $this->requests[$url] = $response;
645 // look for full mime type (e.g. image/jpeg) or just type (e.g. image) 645 */
646 $match[1] = strtolower(trim($match[1])); 646 } else {
647 $match[2] = strtolower(trim($match[2])); 647 $this->debug("Fetching URL ($url)");
648 foreach (array($match[1], $match[2]) as $mime) { 648 $this->fetchAll(array($url));
649 if (in_array($mime, $this->headerOnlyTypes)) return true; 649 if (isset($this->requests[$url]) && isset($this->requests[$url]['body'])) {
650 } 650 $response = $this->requests[$url];
651 } 651 } else {
652 return false; 652 $this->debug("Request failed");
653 } 653 $response = false;
654 654 }
655 private function possibleUnsupportedType($url) { 655 }
656 $path = @parse_url($url, PHP_URL_PATH); 656 /*
657 if ($path && strpos($path, '.') !== false) { 657 if ($this->minimiseMemoryUse && $response) {
658 $ext = strtolower(trim(pathinfo($path, PATHINFO_EXTENSION))); 658 $this->cache($url);
659 return in_array($ext, $this->headerOnlyClues); 659 unset($this->requests[$url]);
660 } 660 }
661 return false; 661 */
662 } 662 if ($remove && $response) unset($this->requests[$url]);
663} 663 if ($gzdecode && stripos($response['headers'], 'Content-Encoding: gzip')) {
664 664 if ($html = gzdecode($response['body'])) {
665// gzdecode from http://www.php.net/manual/en/function.gzdecode.php#82930 665 $response['body'] = $html;
666if (!function_exists('gzdecode')) { 666 }
667 function gzdecode($data,&$filename='',&$error='',$maxlength=null) 667 }
668 { 668 return $response;
669 $len = strlen($data); 669 }
670 if ($len < 18 || strcmp(substr($data,0,2),"\x1f\x8b")) { 670
671 $error = "Not in GZIP format."; 671 public function parallelSupport() {
672 return null; // Not GZIP format (See RFC 1952) 672 return class_exists('HttpRequestPool') || function_exists('curl_multi_init');
673 } 673 }
674 $method = ord(substr($data,2,1)); // Compression method 674
675 $flags = ord(substr($data,3,1)); // Flags 675 private function headerOnlyType($headers) {
676 if ($flags & 31 != $flags) { 676 if (preg_match('!^Content-Type:\s*(([a-z-]+)/([^;\r\n ]+))!im', $headers, $match)) {
677 $error = "Reserved bits not allowed."; 677 // look for full mime type (e.g. image/jpeg) or just type (e.g. image)
678 return null; 678 $match[1] = strtolower(trim($match[1]));
679 } 679 $match[2] = strtolower(trim($match[2]));
680 // NOTE: $mtime may be negative (PHP integer limitations) 680 foreach (array($match[1], $match[2]) as $mime) {
681 $mtime = unpack("V", substr($data,4,4)); 681 if (in_array($mime, $this->headerOnlyTypes)) return true;
682 $mtime = $mtime[1]; 682 }
683 $xfl = substr($data,8,1); 683 }
684 $os = substr($data,8,1); 684 return false;
685 $headerlen = 10; 685 }
686 $extralen = 0; 686
687 $extra = ""; 687 private function possibleUnsupportedType($url) {
688 if ($flags & 4) { 688 $path = @parse_url($url, PHP_URL_PATH);
689 // 2-byte length prefixed EXTRA data in header 689 if ($path && strpos($path, '.') !== false) {
690 if ($len - $headerlen - 2 < 8) { 690 $ext = strtolower(trim(pathinfo($path, PATHINFO_EXTENSION)));
691 return false; // invalid 691 return in_array($ext, $this->headerOnlyClues);
692 } 692 }
693 $extralen = unpack("v",substr($data,8,2)); 693 return false;
694 $extralen = $extralen[1]; 694 }
695 if ($len - $headerlen - 2 - $extralen < 8) { 695}
696 return false; // invalid 696
697 } 697// gzdecode from http://www.php.net/manual/en/function.gzdecode.php#82930
698 $extra = substr($data,10,$extralen); 698if (!function_exists('gzdecode')) {
699 $headerlen += 2 + $extralen; 699 function gzdecode($data,&$filename='',&$error='',$maxlength=null)
700 } 700 {
701 $filenamelen = 0; 701 $len = strlen($data);
702 $filename = ""; 702 if ($len < 18 || strcmp(substr($data,0,2),"\x1f\x8b")) {
703 if ($flags & 8) { 703 $error = "Not in GZIP format.";
704 // C-style string 704 return null; // Not GZIP format (See RFC 1952)
705 if ($len - $headerlen - 1 < 8) { 705 }
706 return false; // invalid 706 $method = ord(substr($data,2,1)); // Compression method
707 } 707 $flags = ord(substr($data,3,1)); // Flags
708 $filenamelen = strpos(substr($data,$headerlen),chr(0)); 708 if ($flags & 31 != $flags) {
709 if ($filenamelen === false || $len - $headerlen - $filenamelen - 1 < 8) { 709 $error = "Reserved bits not allowed.";
710 return false; // invalid 710 return null;
711 } 711 }
712 $filename = substr($data,$headerlen,$filenamelen); 712 // NOTE: $mtime may be negative (PHP integer limitations)
713 $headerlen += $filenamelen + 1; 713 $mtime = unpack("V", substr($data,4,4));
714 } 714 $mtime = $mtime[1];
715 $commentlen = 0; 715 $xfl = substr($data,8,1);
716 $comment = ""; 716 $os = substr($data,8,1);
717 if ($flags & 16) { 717 $headerlen = 10;
718 // C-style string COMMENT data in header 718 $extralen = 0;
719 if ($len - $headerlen - 1 < 8) { 719 $extra = "";
720 return false; // invalid 720 if ($flags & 4) {
721 } 721 // 2-byte length prefixed EXTRA data in header
722 $commentlen = strpos(substr($data,$headerlen),chr(0)); 722 if ($len - $headerlen - 2 < 8) {
723 if ($commentlen === false || $len - $headerlen - $commentlen - 1 < 8) { 723 return false; // invalid
724 return false; // Invalid header format 724 }
725 } 725 $extralen = unpack("v",substr($data,8,2));
726 $comment = substr($data,$headerlen,$commentlen); 726 $extralen = $extralen[1];
727 $headerlen += $commentlen + 1; 727 if ($len - $headerlen - 2 - $extralen < 8) {
728 } 728 return false; // invalid
729 $headercrc = ""; 729 }
730 if ($flags & 2) { 730 $extra = substr($data,10,$extralen);
731 // 2-bytes (lowest order) of CRC32 on header present 731 $headerlen += 2 + $extralen;
732 if ($len - $headerlen - 2 < 8) { 732 }
733 return false; // invalid 733 $filenamelen = 0;
734 } 734 $filename = "";
735 $calccrc = crc32(substr($data,0,$headerlen)) & 0xffff; 735 if ($flags & 8) {
736 $headercrc = unpack("v", substr($data,$headerlen,2)); 736 // C-style string
737 $headercrc = $headercrc[1]; 737 if ($len - $headerlen - 1 < 8) {
738 if ($headercrc != $calccrc) { 738 return false; // invalid
739 $error = "Header checksum failed."; 739 }
740 return false; // Bad header CRC 740 $filenamelen = strpos(substr($data,$headerlen),chr(0));
741 } 741 if ($filenamelen === false || $len - $headerlen - $filenamelen - 1 < 8) {
742 $headerlen += 2; 742 return false; // invalid
743 } 743 }
744 // GZIP FOOTER 744 $filename = substr($data,$headerlen,$filenamelen);
745 $datacrc = unpack("V",substr($data,-8,4)); 745 $headerlen += $filenamelen + 1;
746 $datacrc = sprintf('%u',$datacrc[1] & 0xFFFFFFFF); 746 }
747 $isize = unpack("V",substr($data,-4)); 747 $commentlen = 0;
748 $isize = $isize[1]; 748 $comment = "";
749 // decompression: 749 if ($flags & 16) {
750 $bodylen = $len-$headerlen-8; 750 // C-style string COMMENT data in header
751 if ($bodylen < 1) { 751 if ($len - $headerlen - 1 < 8) {
752 // IMPLEMENTATION BUG! 752 return false; // invalid
753 return null; 753 }
754 } 754 $commentlen = strpos(substr($data,$headerlen),chr(0));
755 $body = substr($data,$headerlen,$bodylen); 755 if ($commentlen === false || $len - $headerlen - $commentlen - 1 < 8) {
756 $data = ""; 756 return false; // Invalid header format
757 if ($bodylen > 0) { 757 }
758 switch ($method) { 758 $comment = substr($data,$headerlen,$commentlen);
759 case 8: 759 $headerlen += $commentlen + 1;
760 // Currently the only supported compression method: 760 }
761 $data = gzinflate($body,$maxlength); 761 $headercrc = "";
762 break; 762 if ($flags & 2) {
763 default: 763 // 2-bytes (lowest order) of CRC32 on header present
764 $error = "Unknown compression method."; 764 if ($len - $headerlen - 2 < 8) {
765 return false; 765 return false; // invalid
766 } 766 }
767 } // zero-byte body content is allowed 767 $calccrc = crc32(substr($data,0,$headerlen)) & 0xffff;
768 // Verifiy CRC32 768 $headercrc = unpack("v", substr($data,$headerlen,2));
769 $crc = sprintf("%u",crc32($data)); 769 $headercrc = $headercrc[1];
770 $crcOK = $crc == $datacrc; 770 if ($headercrc != $calccrc) {
771 $lenOK = $isize == strlen($data); 771 $error = "Header checksum failed.";
772 if (!$lenOK || !$crcOK) { 772 return false; // Bad header CRC
773 $error = ( $lenOK ? '' : 'Length check FAILED. ') . ( $crcOK ? '' : 'Checksum FAILED.'); 773 }
774 return false; 774 $headerlen += 2;
775 } 775 }
776 return $data; 776 // GZIP FOOTER
777 } 777 $datacrc = unpack("V",substr($data,-8,4));
778} 778 $datacrc = sprintf('%u',$datacrc[1] & 0xFFFFFFFF);
779?> \ No newline at end of file 779 $isize = unpack("V",substr($data,-4));
780 $isize = $isize[1];
781 // decompression:
782 $bodylen = $len-$headerlen-8;
783 if ($bodylen < 1) {
784 // IMPLEMENTATION BUG!
785 return null;
786 }
787 $body = substr($data,$headerlen,$bodylen);
788 $data = "";
789 if ($bodylen > 0) {
790 switch ($method) {
791 case 8:
792 // Currently the only supported compression method:
793 $data = gzinflate($body,$maxlength);
794 break;
795 default:
796 $error = "Unknown compression method.";
797 return false;
798 }
799 } // zero-byte body content is allowed
800 // Verifiy CRC32
801 $crc = sprintf("%u",crc32($data));
802 $crcOK = $crc == $datacrc;
803 $lenOK = $isize == strlen($data);
804 if (!$lenOK || !$crcOK) {
805 $error = ( $lenOK ? '' : 'Length check FAILED. ') . ( $crcOK ? '' : 'Checksum FAILED.');
806 return false;
807 }
808 return $data;
809 }
810} \ No newline at end of file
diff --git a/inc/3rdparty/libraries/humble-http-agent/SimplePie_HumbleHttpAgent.php b/inc/3rdparty/libraries/humble-http-agent/SimplePie_HumbleHttpAgent.php
index ecd46d5f..c524a1ee 100644
--- a/inc/3rdparty/libraries/humble-http-agent/SimplePie_HumbleHttpAgent.php
+++ b/inc/3rdparty/libraries/humble-http-agent/SimplePie_HumbleHttpAgent.php
@@ -1,79 +1,78 @@
1<?php 1<?php
2/** 2/**
3 * Humble HTTP Agent extension for SimplePie_File 3 * Humble HTTP Agent extension for SimplePie_File
4 * 4 *
5 * This class is designed to extend and override SimplePie_File 5 * This class is designed to extend and override SimplePie_File
6 * in order to prevent duplicate HTTP requests being sent out. 6 * in order to prevent duplicate HTTP requests being sent out.
7 * The idea is to initialise an instance of Humble HTTP Agent 7 * The idea is to initialise an instance of Humble HTTP Agent
8 * and attach it, to a static class variable, of this class. 8 * and attach it, to a static class variable, of this class.
9 * SimplePie will then automatically initialise this class 9 * SimplePie will then automatically initialise this class
10 * 10 *
11 * @date 2011-02-28 11 * @date 2011-02-28
12 */ 12 */
13 13
14class SimplePie_HumbleHttpAgent extends SimplePie_File 14class SimplePie_HumbleHttpAgent extends SimplePie_File
15{ 15{
16 protected static $agent; 16 protected static $agent;
17 var $url; 17 var $url;
18 var $useragent; 18 var $useragent;
19 var $success = true; 19 var $success = true;
20 var $headers = array(); 20 var $headers = array();
21 var $body; 21 var $body;
22 var $status_code; 22 var $status_code;
23 var $redirects = 0; 23 var $redirects = 0;
24 var $error; 24 var $error;
25 var $method = SIMPLEPIE_FILE_SOURCE_NONE; 25 var $method = SIMPLEPIE_FILE_SOURCE_NONE;
26 26
27 public static function set_agent(HumbleHttpAgent $agent) { 27 public static function set_agent(HumbleHttpAgent $agent) {
28 self::$agent = $agent; 28 self::$agent = $agent;
29 } 29 }
30 30
31 public function __construct($url, $timeout = 10, $redirects = 5, $headers = null, $useragent = null, $force_fsockopen = false) { 31 public function __construct($url, $timeout = 10, $redirects = 5, $headers = null, $useragent = null, $force_fsockopen = false) {
32 if (class_exists('idna_convert')) 32 if (class_exists('idna_convert'))
33 { 33 {
34 $idn = new idna_convert(); 34 $idn = new idna_convert();
35 $parsed = SimplePie_Misc::parse_url($url); 35 $parsed = SimplePie_Misc::parse_url($url);
36 $url = SimplePie_Misc::compress_parse_url($parsed['scheme'], $idn->encode($parsed['authority']), $parsed['path'], $parsed['query'], $parsed['fragment']); 36 $url = SimplePie_Misc::compress_parse_url($parsed['scheme'], $idn->encode($parsed['authority']), $parsed['path'], $parsed['query'], $parsed['fragment']);
37 } 37 }
38 $this->url = $url; 38 $this->url = $url;
39 $this->useragent = $useragent; 39 $this->useragent = $useragent;
40 if (preg_match('/^http(s)?:\/\//i', $url)) 40 if (preg_match('/^http(s)?:\/\//i', $url))
41 { 41 {
42 if (!is_array($headers)) 42 if (!is_array($headers))
43 { 43 {
44 $headers = array(); 44 $headers = array();
45 } 45 }
46 $this->method = SIMPLEPIE_FILE_SOURCE_REMOTE | SIMPLEPIE_FILE_SOURCE_CURL; 46 $this->method = SIMPLEPIE_FILE_SOURCE_REMOTE | SIMPLEPIE_FILE_SOURCE_CURL;
47 $headers2 = array(); 47 $headers2 = array();
48 foreach ($headers as $key => $value) { 48 foreach ($headers as $key => $value) {
49 $headers2[] = "$key: $value"; 49 $headers2[] = "$key: $value";
50 } 50 }
51 //TODO: allow for HTTP headers 51 //TODO: allow for HTTP headers
52 // curl_setopt($fp, CURLOPT_HTTPHEADER, $headers2); 52 // curl_setopt($fp, CURLOPT_HTTPHEADER, $headers2);
53 53
54 $response = self::$agent->get($url); 54 $response = self::$agent->get($url);
55 55
56 if ($response === false || !isset($response['status_code'])) { 56 if ($response === false || !isset($response['status_code'])) {
57 $this->error = 'failed to fetch URL'; 57 $this->error = 'failed to fetch URL';
58 $this->success = false; 58 $this->success = false;
59 } else { 59 } else {
60 // The extra lines at the end are there to satisfy SimplePie's HTTP parser. 60 // The extra lines at the end are there to satisfy SimplePie's HTTP parser.
61 // The class expects a full HTTP message, whereas we're giving it only 61 // The class expects a full HTTP message, whereas we're giving it only
62 // headers - the new lines indicate the start of the body. 62 // headers - the new lines indicate the start of the body.
63 $parser = new SimplePie_HTTP_Parser($response['headers']."\r\n\r\n"); 63 $parser = new SimplePie_HTTP_Parser($response['headers']."\r\n\r\n");
64 if ($parser->parse()) { 64 if ($parser->parse()) {
65 $this->headers = $parser->headers; 65 $this->headers = $parser->headers;
66 //$this->body = $parser->body; 66 //$this->body = $parser->body;
67 $this->body = $response['body']; 67 $this->body = $response['body'];
68 $this->status_code = $parser->status_code; 68 $this->status_code = $parser->status_code;
69 } 69 }
70 } 70 }
71 } 71 }
72 else 72 else
73 { 73 {
74 $this->error = 'invalid URL'; 74 $this->error = 'invalid URL';
75 $this->success = false; 75 $this->success = false;
76 } 76 }
77 } 77 }
78} 78} \ No newline at end of file
79?> \ No newline at end of file