From 42c80841c846610be280218d53fcde06b0f0063b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20L=C5=93uillet?= Date: Fri, 6 Dec 2013 09:45:27 +0100 Subject: [change] we now use Full-Text RSS 3.1, thank you so much @fivefilters --- inc/3rdparty/humble-http-agent/CookieJar.php | 404 ------------ inc/3rdparty/humble-http-agent/HumbleHttpAgent.php | 720 --------------------- inc/3rdparty/humble-http-agent/RollingCurl.php | 402 ------------ .../SimplePie_HumbleHttpAgent.php | 79 --- 4 files changed, 1605 deletions(-) delete mode 100644 inc/3rdparty/humble-http-agent/CookieJar.php delete mode 100644 inc/3rdparty/humble-http-agent/HumbleHttpAgent.php delete mode 100644 inc/3rdparty/humble-http-agent/RollingCurl.php delete mode 100644 inc/3rdparty/humble-http-agent/SimplePie_HumbleHttpAgent.php (limited to 'inc/3rdparty/humble-http-agent') diff --git a/inc/3rdparty/humble-http-agent/CookieJar.php b/inc/3rdparty/humble-http-agent/CookieJar.php deleted file mode 100644 index d91b711e..00000000 --- a/inc/3rdparty/humble-http-agent/CookieJar.php +++ /dev/null @@ -1,404 +0,0 @@ - - * - * This class should be used to handle cookies (storing cookies from HTTP response messages, and - * sending out cookies in HTTP request messages). This has been adapted for FiveFilters.org - * from the original version used in HTTP Navigator. See http://www.keyvan.net/code/http-navigator/ - * - * This class is mainly based on Cookies.pm from the libwww-perl collection . - * Unlike Cookies.pm, this class only supports the Netscape cookie spec, not RFC 2965. - * - * @version 0.5 - * @date 2011-03-15 - * @see http://php.net/HttpRequestPool - * @author Keyvan Minoukadeh - * @copyright 2011 Keyvan Minoukadeh - * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3 - */ - -class CookieJar -{ - /** - * Cookies - array containing all cookies. - * - *
-    * Cookies are stored like this:
-    *   [domain][path][name] = array
-    * where array is:
-    *   0 => value, 1 => secure, 2 => expires
-    * 
- * @var array - * @access private - */ - public $cookies = array(); - public $debug = false; - - /** - * Constructor - */ - function __construct() { - } - - protected function debug($msg, $file=null, $line=null) { - if ($this->debug) { - $mem = round(memory_get_usage()/1024, 2); - $memPeak = round(memory_get_peak_usage()/1024, 2); - echo '* ',$msg; - if (isset($file, $line)) echo " ($file line $line)"; - echo ' - mem used: ',$mem," (peak: $memPeak)\n"; - ob_flush(); - flush(); - } - } - - /** - * Get matching cookies - * - * Only use this method if you cannot use add_cookie_header(), for example, if you want to use - * this cookie jar class without using the request class. - * - * @param array $param associative array containing 'domain', 'path', 'secure' keys - * @return string - * @see add_cookie_header() - */ - public function getMatchingCookies($url) - { - if (($parts = @parse_url($url)) && isset($parts['scheme'], $parts['host'], $parts['path'])) { - $param['domain'] = $parts['host']; - $param['path'] = $parts['path']; - $param['secure'] = (strtolower($parts['scheme']) == 'https'); - unset($parts); - } else { - return false; - } - // RFC 2965 notes: - // If multiple cookies satisfy the criteria above, they are ordered in - // the Cookie header such that those with more specific Path attributes - // precede those with less specific. Ordering with respect to other - // attributes (e.g., Domain) is unspecified. - $domain = $param['domain']; - if (strpos($domain, '.') === false) $domain .= '.local'; - $request_path = $param['path']; - if ($request_path == '') $request_path = '/'; - $request_secure = $param['secure']; - $now = time(); - $matched_cookies = array(); - // domain - find matching domains - $this->debug('Finding matching domains for '.$domain, __FILE__, __LINE__); - while (strpos($domain, '.') !== false) { - if (isset($this->cookies[$domain])) { - $this->debug(' domain match found: '.$domain); - $cookies =& $this->cookies[$domain]; - } else { - $domain = $this->_reduce_domain($domain); - continue; - } - // paths - find matching paths starting from most specific - $this->debug(' - Finding matching paths for '.$request_path); - $paths = array_keys($cookies); - usort($paths, array($this, '_cmp_length')); - foreach ($paths as $path) { - // continue to next cookie if request path does not path-match cookie path - if (!$this->_path_match($request_path, $path)) continue; - // loop through cookie names - $this->debug(' path match found: '.$path); - foreach ($cookies[$path] as $name => $values) { - // if this cookie is secure but request isn't, continue to next cookie - if ($values[1] && !$request_secure) continue; - // if cookie is not a session cookie and has expired, continue to next cookie - if (is_int($values[2]) && ($values[2] < $now)) continue; - // cookie matches request - $this->debug(' cookie match: '.$name.'='.$values[0]); - $matched_cookies[] = $name.'='.$values[0]; - } - } - $domain = $this->_reduce_domain($domain); - } - // return cookies - return implode('; ', $matched_cookies); - } - - /** - * Parse Set-Cookie values. - * - * Only use this method if you cannot use extract_cookies(), for example, if you want to use - * this cookie jar class without using the response class. - * - * @param array $set_cookies array holding 1 or more "Set-Cookie" header values - * @param array $param associative array containing 'host', 'path' keys - * @return void - * @see extract_cookies() - */ - public function storeCookies($url, $set_cookies) - { - if (count($set_cookies) == 0) return; - $param = @parse_url($url); - if (!is_array($param) || !isset($param['host'])) return; - $request_host = $param['host']; - if (strpos($request_host, '.') === false) $request_host .= '.local'; - $request_path = @$param['path']; - if ($request_path == '') $request_path = '/'; - // - // loop through set-cookie headers - // - foreach ($set_cookies as $set_cookie) { - $this->debug('Parsing: '.$set_cookie); - // temporary cookie store (before adding to jar) - $tmp_cookie = array(); - $param = explode(';', $set_cookie); - // loop through params - for ($x=0; $x$key, 'value'=>$val); - continue; - } - $key = strtolower($key); - if (in_array($key, array('expires', 'path', 'domain', 'secure'))) { - $tmp_cookie[$key] = $val; - } - } - // - // set cookie - // - // check domain - if (isset($tmp_cookie['domain']) && ($tmp_cookie['domain'] != $request_host) && - ($tmp_cookie['domain'] != ".$request_host")) { - $domain = $tmp_cookie['domain']; - if ((strpos($domain, '.') === false) && ($domain != 'local')) { - $this->debug(' - domain "'.$domain.'" has no dot and is not a local domain'); - continue; - } - if (preg_match('/\.[0-9]+$/', $domain)) { - $this->debug(' - domain "'.$domain.'" appears to be an ip address'); - continue; - } - if (substr($domain, 0, 1) != '.') $domain = ".$domain"; - if (!$this->_domain_match($request_host, $domain)) { - $this->debug(' - request host "'.$request_host.'" does not domain-match "'.$domain.'"'); - continue; - } - } else { - // if domain is not specified in the set-cookie header, domain will default to - // the request host - $domain = $request_host; - } - // check path - if (isset($tmp_cookie['path']) && ($tmp_cookie['path'] != '')) { - $path = urldecode($tmp_cookie['path']); - if (!$this->_path_match($request_path, $path)) { - $this->debug(' - request path "'.$request_path.'" does not path-match "'.$path.'"'); - continue; - } - } else { - $path = $request_path; - $path = substr($path, 0, strrpos($path, '/')); - if ($path == '') $path = '/'; - } - // check if secure - $secure = (isset($tmp_cookie['secure'])) ? true : false; - // check expiry - if (isset($tmp_cookie['expires'])) { - if (($expires = strtotime($tmp_cookie['expires'])) < 0) { - $expires = null; - } - } else { - $expires = null; - } - // set cookie - $this->set_cookie($domain, $path, $tmp_cookie['name'], $tmp_cookie['value'], $secure, $expires); - } - } - - // return array of set-cookie values extracted from HTTP response headers (string $h) - public function extractCookies($h) { - $x = 0; - $lines = 0; - $headers = array(); - $last_match = false; - $h = explode("\n", $h); - foreach ($h as $line) { - $line = rtrim($line); - $lines++; - - $trimmed_line = trim($line); - if (isset($line_last)) { - // check if we have \r\n\r\n (indicating the end of headers) - // some servers will not use CRLF (\r\n), so we make CR (\r) optional. - // if (preg_match('/\015?\012\015?\012/', $line_last.$line)) { - // break; - // } - // As an alternative, we can check if the current trimmed line is empty - if ($trimmed_line == '') { - break; - } - - // check for continuation line... - // RFC 2616 Section 2.2 "Basic Rules": - // HTTP/1.1 header field values can be folded onto multiple lines if the - // continuation line begins with a space or horizontal tab. All linear - // white space, including folding, has the same semantics as SP. A - // recipient MAY replace any linear white space with a single SP before - // interpreting the field value or forwarding the message downstream. - if ($last_match && preg_match('/^\s+(.*)/', $line, $match)) { - // append to previous header value - $headers[$x-1] .= ' '.rtrim($match[1]); - continue; - } - } - $line_last = $line; - - // split header name and value - if (preg_match('/^Set-Cookie\s*:\s*(.*)/i', $line, $match)) { - $headers[$x++] = rtrim($match[1]); - $last_match = true; - } else { - $last_match = false; - } - } - return $headers; - } - - /** - * Set Cookie - * @param string $domain - * @param string $path - * @param string $name cookie name - * @param string $value cookie value - * @param bool $secure - * @param int $expires expiry time (null if session cookie, <= 0 will delete cookie) - * @return void - */ - function set_cookie($domain, $path, $name, $value, $secure=false, $expires=null) - { - if ($domain == '') return; - if ($path == '') return; - if ($name == '') return; - // check if cookie needs to go - if (isset($expires) && ($expires <= 0)) { - if (isset($this->cookies[$domain][$path][$name])) unset($this->cookies[$domain][$path][$name]); - return; - } - if ($value == '') return; - $this->cookies[$domain][$path][$name] = array($value, $secure, $expires); - return; - } - - /** - * Clear cookies - [domain [,path [,name]]] - call method with no arguments to clear all cookies. - * @param string $domain - * @param string $path - * @param string $name - * @return void - */ - function clear($domain=null, $path=null, $name=null) - { - if (!isset($domain)) { - $this->cookies = array(); - } elseif (!isset($path)) { - if (isset($this->cookies[$domain])) unset($this->cookies[$domain]); - } elseif (!isset($name)) { - if (isset($this->cookies[$domain][$path])) unset($this->cookies[$domain][$path]); - } elseif (isset($name)) { - if (isset($this->cookies[$domain][$path][$name])) unset($this->cookies[$domain][$path][$name]); - } - } - - /** - * Compare string length - used for sorting - * @access private - * @return int - */ - function _cmp_length($a, $b) - { - $la = strlen($a); $lb = strlen($b); - if ($la == $lb) return 0; - return ($la > $lb) ? -1 : 1; - } - - /** - * Reduce domain - * @param string $domain - * @return string - * @access private - */ - function _reduce_domain($domain) - { - if ($domain == '') return ''; - if (substr($domain, 0, 1) == '.') return substr($domain, 1); - return substr($domain, strpos($domain, '.')); - } - - /** - * Path match - check if path1 path-matches path2 - * - * From RFC 2965: - * For two strings that represent paths, P1 and P2, P1 path-matches P2 - * if P2 is a prefix of P1 (including the case where P1 and P2 string- - * compare equal). Thus, the string /tec/waldo path-matches /tec. - * @param string $path1 - * @param string $path2 - * @return bool - * @access private - */ - function _path_match($path1, $path2) - { - return (substr($path1, 0, strlen($path2)) == $path2); - } - - /** - * Domain match - check if domain1 domain-matches domain2 - * - * A few extracts from RFC 2965: - * - A Set-Cookie2 from request-host y.x.foo.com for Domain=.foo.com - * would be rejected, because H is y.x and contains a dot. - * - * - A Set-Cookie2 from request-host x.foo.com for Domain=.foo.com - * would be accepted. - * - * - A Set-Cookie2 with Domain=.com or Domain=.com., will always be - * rejected, because there is no embedded dot. - * - * - A Set-Cookie2 from request-host example for Domain=.local will - * be accepted, because the effective host name for the request- - * host is example.local, and example.local domain-matches .local. - * - * I'm ignoring the first point for now (must check to see how other browsers handle - * this rule for Set-Cookie headers) - * - * @param string $domain1 - * @param string $domain2 - * @return bool - * @access private - */ - function _domain_match($domain1, $domain2) - { - $domain1 = strtolower($domain1); - $domain2 = strtolower($domain2); - while (strpos($domain1, '.') !== false) { - if ($domain1 == $domain2) return true; - $domain1 = $this->_reduce_domain($domain1); - continue; - } - return false; - } -} -?> \ No newline at end of file diff --git a/inc/3rdparty/humble-http-agent/HumbleHttpAgent.php b/inc/3rdparty/humble-http-agent/HumbleHttpAgent.php deleted file mode 100644 index 7e5834ab..00000000 --- a/inc/3rdparty/humble-http-agent/HumbleHttpAgent.php +++ /dev/null @@ -1,720 +0,0 @@ -userAgentDefault = self::UA_BROWSER; - $this->referer = self::REF_GOOGLE; - // set the request method - if (in_array($method, array(1,2,4))) { - $this->method = $method; - } else { - if (class_exists('HttpRequestPool')) { - $this->method = self::METHOD_REQUEST_POOL; - } elseif (function_exists('curl_multi_init')) { - $this->method = self::METHOD_CURL_MULTI; - } else { - $this->method = self::METHOD_FILE_GET_CONTENTS; - } - } - if ($this->method == self::METHOD_CURL_MULTI) { - require_once(dirname(__FILE__).'/RollingCurl.php'); - } - // create cookie jar - $this->cookieJar = new CookieJar(); - // set request options (redirect must be 0) - $this->requestOptions = array( - 'timeout' => 15, - 'redirect' => 0 // we handle redirects manually so we can rewrite the new hashbang URLs that are creeping up over the web - // TODO: test onprogress? - ); - if (is_array($requestOptions)) { - $this->requestOptions = array_merge($this->requestOptions, $requestOptions); - } - $this->httpContext = array( - 'http' => array( - 'ignore_errors' => true, - 'timeout' => $this->requestOptions['timeout'], - 'max_redirects' => $this->requestOptions['redirect'], - 'header' => "Accept: */*\r\n" - ) - ); - } - - protected function debug($msg) { - if ($this->debug) { - $mem = round(memory_get_usage()/1024, 2); - $memPeak = round(memory_get_peak_usage()/1024, 2); - echo '* ',$msg; - echo ' - mem used: ',$mem," (peak: $memPeak)\n"; - ob_flush(); - flush(); - } - } - - protected function getUserAgent($url, $asArray=false) { - $host = @parse_url($url, PHP_URL_HOST); - if (strtolower(substr($host, 0, 4)) == 'www.') { - $host = substr($host, 4); - } - if ($host) { - $try = array($host); - $split = explode('.', $host); - if (count($split) > 1) { - array_shift($split); - $try[] = '.'.implode('.', $split); - } - foreach ($try as $h) { - if (isset($this->userAgentMap[$h])) { - $ua = $this->userAgentMap[$h]; - break; - } - } - } - if (!isset($ua)) $ua = $this->userAgentDefault; - if ($asArray) { - return array('User-Agent' => $ua); - } else { - return 'User-Agent: '.$ua; - } - } - - public function rewriteHashbangFragment($url) { - // return $url if there's no '#!' - if (strpos($url, '#!') === false) return $url; - // split $url and rewrite - // TODO: is SimplePie_IRI included? - $iri = new SimplePie_IRI($url); - $fragment = substr($iri->fragment, 1); // strip '!' - $iri->fragment = null; - if (isset($iri->query)) { - parse_str($iri->query, $query); - } else { - $query = array(); - } - $query['_escaped_fragment_'] = (string)$fragment; - $iri->query = str_replace('%2F', '/', http_build_query($query)); // needed for some sites - return $iri->get_iri(); - } - - public function removeFragment($url) { - $pos = strpos($url, '#'); - if ($pos === false) { - return $url; - } else { - return substr($url, 0, $pos); - } - } - - public function rewriteUrls($url) { - foreach ($this->rewriteUrls as $find => $action) { - if (strpos($url, $find) !== false) { - if (is_array($action)) { - return strtr($url, $action); - } - } - } - return $url; - } - - public function enableDebug($bool=true) { - $this->debug = (bool)$bool; - } - - public function minimiseMemoryUse($bool = true) { - $this->minimiseMemoryUse = $bool; - } - - public function setMaxParallelRequests($max) { - $this->maxParallelRequests = $max; - } - - public function validateUrl($url) { - $url = filter_var($url, FILTER_SANITIZE_URL); - $test = filter_var($url, FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED); - // deal with bug http://bugs.php.net/51192 (present in PHP 5.2.13 and PHP 5.3.2) - if ($test === false) { - $test = filter_var(strtr($url, '-', '_'), FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED); - } - if ($test !== false && $test !== null && preg_match('!^https?://!', $url)) { - return $url; - } else { - return false; - } - } - - public function fetchAll(array $urls) { - $this->fetchAllOnce($urls, $isRedirect=false); - $redirects = 0; - while (!empty($this->redirectQueue) && ++$redirects <= $this->maxRedirects) { - $this->debug("Following redirects #$redirects..."); - $this->fetchAllOnce($this->redirectQueue, $isRedirect=true); - } - } - - // fetch all URLs without following redirects - public function fetchAllOnce(array $urls, $isRedirect=false) { - if (!$isRedirect) $urls = array_unique($urls); - if (empty($urls)) return; - - ////////////////////////////////////////////////////// - // parallel (HttpRequestPool) - if ($this->method == self::METHOD_REQUEST_POOL) { - $this->debug('Starting parallel fetch (HttpRequestPool)'); - try { - while (count($urls) > 0) { - $this->debug('Processing set of '.min($this->maxParallelRequests, count($urls))); - $subset = array_splice($urls, 0, $this->maxParallelRequests); - $pool = new HttpRequestPool(); - foreach ($subset as $orig => $url) { - if (!$isRedirect) $orig = $url; - unset($this->redirectQueue[$orig]); - $this->debug("...$url"); - if (!$isRedirect && isset($this->requests[$url])) { - $this->debug("......in memory"); - /* - } elseif ($this->isCached($url)) { - $this->debug("......is cached"); - if (!$this->minimiseMemoryUse) { - $this->requests[$url] = $this->getCached($url); - } - */ - } else { - $this->debug("......adding to pool"); - $req_url = $this->rewriteUrls($url); - $req_url = ($this->rewriteHashbangFragment) ? $this->rewriteHashbangFragment($req_url) : $req_url; - $req_url = $this->removeFragment($req_url); - if (!empty($this->headerOnlyTypes) && !isset($this->requests[$orig]['wrongGuess']) && $this->possibleUnsupportedType($req_url)) { - $_meth = HttpRequest::METH_HEAD; - } else { - $_meth = HttpRequest::METH_GET; - unset($this->requests[$orig]['wrongGuess']); - } - $httpRequest = new HttpRequest($req_url, $_meth, $this->requestOptions); - // send cookies, if we have any - if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) { - $this->debug("......sending cookies: $cookies"); - $httpRequest->addHeaders(array('Cookie' => $cookies)); - } - //$httpRequest->addHeaders(array('User-Agent' => $this->userAgent)); - $httpRequest->addHeaders($this->getUserAgent($req_url, true)); - // add referer for picky sites - $httpRequest->addheaders(array('Referer' => $this->referer)); - $this->requests[$orig] = array('headers'=>null, 'body'=>null, 'httpRequest'=>$httpRequest); - $this->requests[$orig]['original_url'] = $orig; - $pool->attach($httpRequest); - } - } - // did we get anything into the pool? - if (count($pool) > 0) { - $this->debug('Sending request...'); - try { - $pool->send(); - } catch (HttpRequestPoolException $e) { - // do nothing - } - $this->debug('Received responses'); - foreach($subset as $orig => $url) { - if (!$isRedirect) $orig = $url; - $request = $this->requests[$orig]['httpRequest']; - //$this->requests[$orig]['headers'] = $this->headersToString($request->getResponseHeader()); - // getResponseHeader() doesn't return status line, so, for consistency... - $this->requests[$orig]['headers'] = substr($request->getRawResponseMessage(), 0, $request->getResponseInfo('header_size')); - // check content type - // TODO: use getResponseHeader('content-type') or getResponseInfo() - if ($this->headerOnlyType($this->requests[$orig]['headers'])) { - $this->requests[$orig]['body'] = ''; - $_header_only_type = true; - $this->debug('Header only type returned'); - } else { - $this->requests[$orig]['body'] = $request->getResponseBody(); - $_header_only_type = false; - } - $this->requests[$orig]['effective_url'] = $request->getResponseInfo('effective_url'); - $this->requests[$orig]['status_code'] = $status_code = $request->getResponseCode(); - // is redirect? - if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && $request->getResponseHeader('location')) { - $redirectURL = $request->getResponseHeader('location'); - if (!preg_match('!^https?://!i', $redirectURL)) { - $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url); - } - if ($this->validateURL($redirectURL)) { - $this->debug('Redirect detected. Valid URL: '.$redirectURL); - // store any cookies - $cookies = $request->getResponseHeader('set-cookie'); - if ($cookies && !is_array($cookies)) $cookies = array($cookies); - if ($cookies) $this->cookieJar->storeCookies($url, $cookies); - $this->redirectQueue[$orig] = $redirectURL; - } else { - $this->debug('Redirect detected. Invalid URL: '.$redirectURL); - } - } elseif (!$_header_only_type && $request->getMethod() === HttpRequest::METH_HEAD) { - // the response content-type did not match our 'header only' types, - // but we'd issues a HEAD request because we assumed it would. So - // let's queue a proper GET request for this item... - $this->debug('Wrong guess at content-type, queing GET request'); - $this->requests[$orig]['wrongGuess'] = true; - $this->redirectQueue[$orig] = $this->requests[$orig]['effective_url']; - } - //die($url.' -multi- '.$request->getResponseInfo('effective_url')); - $pool->detach($request); - unset($this->requests[$orig]['httpRequest'], $request); - /* - if ($this->minimiseMemoryUse) { - if ($this->cache($url)) { - unset($this->requests[$url]); - } - } - */ - } - } - } - } catch (HttpException $e) { - $this->debug($e); - return false; - } - } - - ////////////////////////////////////////////////////////// - // parallel (curl_multi_*) - elseif ($this->method == self::METHOD_CURL_MULTI) { - $this->debug('Starting parallel fetch (curl_multi_*)'); - while (count($urls) > 0) { - $this->debug('Processing set of '.min($this->maxParallelRequests, count($urls))); - $subset = array_splice($urls, 0, $this->maxParallelRequests); - $pool = new RollingCurl(array($this, 'handleCurlResponse')); - $pool->window_size = count($subset); - - foreach ($subset as $orig => $url) { - if (!$isRedirect) $orig = $url; - unset($this->redirectQueue[$orig]); - $this->debug("...$url"); - if (!$isRedirect && isset($this->requests[$url])) { - $this->debug("......in memory"); - /* - } elseif ($this->isCached($url)) { - $this->debug("......is cached"); - if (!$this->minimiseMemoryUse) { - $this->requests[$url] = $this->getCached($url); - } - */ - } else { - $this->debug("......adding to pool"); - $req_url = $this->rewriteUrls($url); - $req_url = ($this->rewriteHashbangFragment) ? $this->rewriteHashbangFragment($req_url) : $req_url; - $req_url = $this->removeFragment($req_url); - if (!empty($this->headerOnlyTypes) && !isset($this->requests[$orig]['wrongGuess']) && $this->possibleUnsupportedType($req_url)) { - $_meth = 'HEAD'; - } else { - $_meth = 'GET'; - unset($this->requests[$orig]['wrongGuess']); - } - $headers = array(); - //$headers[] = 'User-Agent: '.$this->userAgent; - $headers[] = $this->getUserAgent($req_url); - // add referer for picky sites - $headers[] = 'Referer: '.$this->referer; - // send cookies, if we have any - if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) { - $this->debug("......sending cookies: $cookies"); - $headers[] = 'Cookie: '.$cookies; - } - $httpRequest = new RollingCurlRequest($req_url, $_meth, null, $headers, array( - CURLOPT_CONNECTTIMEOUT => $this->requestOptions['timeout'], - CURLOPT_TIMEOUT => $this->requestOptions['timeout'] - )); - $httpRequest->set_original_url($orig); - $this->requests[$orig] = array('headers'=>null, 'body'=>null, 'httpRequest'=>$httpRequest); - $this->requests[$orig]['original_url'] = $orig; // TODO: is this needed anymore? - $pool->add($httpRequest); - } - } - // did we get anything into the pool? - if (count($pool) > 0) { - $this->debug('Sending request...'); - $pool->execute(); // this will call handleCurlResponse() and populate $this->requests[$orig] - $this->debug('Received responses'); - foreach($subset as $orig => $url) { - if (!$isRedirect) $orig = $url; - // $this->requests[$orig]['headers'] - // $this->requests[$orig]['body'] - // $this->requests[$orig]['effective_url'] - // check content type - if ($this->headerOnlyType($this->requests[$orig]['headers'])) { - $this->requests[$orig]['body'] = ''; - $_header_only_type = true; - $this->debug('Header only type returned'); - } else { - $_header_only_type = false; - } - $status_code = $this->requests[$orig]['status_code']; - if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && isset($this->requests[$orig]['location'])) { - $redirectURL = $this->requests[$orig]['location']; - if (!preg_match('!^https?://!i', $redirectURL)) { - $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url); - } - if ($this->validateURL($redirectURL)) { - $this->debug('Redirect detected. Valid URL: '.$redirectURL); - // store any cookies - $cookies = $this->cookieJar->extractCookies($this->requests[$orig]['headers']); - if (!empty($cookies)) $this->cookieJar->storeCookies($url, $cookies); - $this->redirectQueue[$orig] = $redirectURL; - } else { - $this->debug('Redirect detected. Invalid URL: '.$redirectURL); - } - } elseif (!$_header_only_type && $this->requests[$orig]['method'] == 'HEAD') { - // the response content-type did not match our 'header only' types, - // but we'd issues a HEAD request because we assumed it would. So - // let's queue a proper GET request for this item... - $this->debug('Wrong guess at content-type, queing GET request'); - $this->requests[$orig]['wrongGuess'] = true; - $this->redirectQueue[$orig] = $this->requests[$orig]['effective_url']; - } - // die($url.' -multi- '.$request->getResponseInfo('effective_url')); - unset($this->requests[$orig]['httpRequest'], $this->requests[$orig]['method']); - } - } - } - } - - ////////////////////////////////////////////////////// - // sequential (file_get_contents) - else { - $this->debug('Starting sequential fetch (file_get_contents)'); - $this->debug('Processing set of '.count($urls)); - foreach ($urls as $orig => $url) { - if (!$isRedirect) $orig = $url; - unset($this->redirectQueue[$orig]); - $this->debug("...$url"); - if (!$isRedirect && isset($this->requests[$url])) { - $this->debug("......in memory"); - /* - } elseif ($this->isCached($url)) { - $this->debug("......is cached"); - if (!$this->minimiseMemoryUse) { - $this->requests[$url] = $this->getCached($url); - } - */ - } else { - $this->debug("Sending request for $url"); - $this->requests[$orig]['original_url'] = $orig; - $req_url = $this->rewriteUrls($url); - $req_url = ($this->rewriteHashbangFragment) ? $this->rewriteHashbangFragment($req_url) : $req_url; - $req_url = $this->removeFragment($req_url); - // send cookies, if we have any - $httpContext = $this->httpContext; - $httpContext['http']['header'] .= $this->getUserAgent($req_url)."\r\n"; - // add referer for picky sites - $httpContext['http']['header'] .= 'Referer: '.$this->referer."\r\n"; - if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) { - $this->debug("......sending cookies: $cookies"); - $httpContext['http']['header'] .= 'Cookie: '.$cookies."\r\n"; - } - if (false !== ($html = @file_get_contents($req_url, false, stream_context_create($httpContext)))) { - $this->debug('Received response'); - // get status code - if (!isset($http_response_header[0]) || !preg_match('!^HTTP/\d+\.\d+\s+(\d+)!', trim($http_response_header[0]), $match)) { - $this->debug('Error: no status code found'); - // TODO: handle error - no status code - } else { - $this->requests[$orig]['headers'] = $this->headersToString($http_response_header, false); - // check content type - if ($this->headerOnlyType($this->requests[$orig]['headers'])) { - $this->requests[$orig]['body'] = ''; - } else { - $this->requests[$orig]['body'] = $html; - } - $this->requests[$orig]['effective_url'] = $req_url; - $this->requests[$orig]['status_code'] = $status_code = (int)$match[1]; - unset($match); - // handle redirect - if (preg_match('/^Location:(.*?)$/m', $this->requests[$orig]['headers'], $match)) { - $this->requests[$orig]['location'] = trim($match[1]); - } - if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && isset($this->requests[$orig]['location'])) { - $redirectURL = $this->requests[$orig]['location']; - if (!preg_match('!^https?://!i', $redirectURL)) { - $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url); - } - if ($this->validateURL($redirectURL)) { - $this->debug('Redirect detected. Valid URL: '.$redirectURL); - // store any cookies - $cookies = $this->cookieJar->extractCookies($this->requests[$orig]['headers']); - if (!empty($cookies)) $this->cookieJar->storeCookies($url, $cookies); - $this->redirectQueue[$orig] = $redirectURL; - } else { - $this->debug('Redirect detected. Invalid URL: '.$redirectURL); - } - } - } - } else { - $this->debug('Error retrieving URL'); - //print_r($req_url); - //print_r($http_response_header); - //print_r($html); - - // TODO: handle error - failed to retrieve URL - } - } - } - } - } - - public function handleCurlResponse($response, $info, $request) { - $orig = $request->url_original; - $this->requests[$orig]['headers'] = substr($response, 0, $info['header_size']); - $this->requests[$orig]['body'] = substr($response, $info['header_size']); - $this->requests[$orig]['method'] = $request->method; - $this->requests[$orig]['effective_url'] = $info['url']; - $this->requests[$orig]['status_code'] = (int)$info['http_code']; - if (preg_match('/^Location:(.*?)$/m', $this->requests[$orig]['headers'], $match)) { - $this->requests[$orig]['location'] = trim($match[1]); - } - } - - protected function headersToString(array $headers, $associative=true) { - if (!$associative) { - return implode("\n", $headers); - } else { - $str = ''; - foreach ($headers as $key => $val) { - if (is_array($val)) { - foreach ($val as $v) $str .= "$key: $v\n"; - } else { - $str .= "$key: $val\n"; - } - } - return rtrim($str); - } - } - - public function get($url, $remove=false, $gzdecode=true) { - $url = "$url"; - if (isset($this->requests[$url]) && isset($this->requests[$url]['body'])) { - $this->debug("URL already fetched - in memory ($url, effective: {$this->requests[$url]['effective_url']})"); - $response = $this->requests[$url]; - /* - } elseif ($this->isCached($url)) { - $this->debug("URL already fetched - in disk cache ($url)"); - $response = $this->getCached($url); - $this->requests[$url] = $response; - */ - } else { - $this->debug("Fetching URL ($url)"); - $this->fetchAll(array($url)); - if (isset($this->requests[$url]) && isset($this->requests[$url]['body'])) { - $response = $this->requests[$url]; - } else { - $this->debug("Request failed"); - $response = false; - } - } - /* - if ($this->minimiseMemoryUse && $response) { - $this->cache($url); - unset($this->requests[$url]); - } - */ - if ($remove && $response) unset($this->requests[$url]); - if ($gzdecode && stripos($response['headers'], 'Content-Encoding: gzip')) { - if ($html = gzdecode($response['body'])) { - $response['body'] = $html; - } - } - return $response; - } - - public function parallelSupport() { - return class_exists('HttpRequestPool') || function_exists('curl_multi_init'); - } - - private function headerOnlyType($headers) { - if (preg_match('!^Content-Type:\s*(([a-z-]+)/([^;\r\n ]+))!im', $headers, $match)) { - // look for full mime type (e.g. image/jpeg) or just type (e.g. image) - $match[1] = strtolower(trim($match[1])); - $match[2] = strtolower(trim($match[2])); - foreach (array($match[1], $match[2]) as $mime) { - if (in_array($mime, $this->headerOnlyTypes)) return true; - } - } - return false; - } - - private function possibleUnsupportedType($url) { - $path = @parse_url($url, PHP_URL_PATH); - if ($path && strpos($path, '.') !== false) { - $ext = strtolower(trim(pathinfo($path, PATHINFO_EXTENSION))); - return in_array($ext, $this->headerOnlyClues); - } - return false; - } -} - -// gzdecode from http://www.php.net/manual/en/function.gzdecode.php#82930 -if (!function_exists('gzdecode')) { - function gzdecode($data,&$filename='',&$error='',$maxlength=null) - { - $len = strlen($data); - if ($len < 18 || strcmp(substr($data,0,2),"\x1f\x8b")) { - $error = "Not in GZIP format."; - return null; // Not GZIP format (See RFC 1952) - } - $method = ord(substr($data,2,1)); // Compression method - $flags = ord(substr($data,3,1)); // Flags - if ($flags & 31 != $flags) { - $error = "Reserved bits not allowed."; - return null; - } - // NOTE: $mtime may be negative (PHP integer limitations) - $mtime = unpack("V", substr($data,4,4)); - $mtime = $mtime[1]; - $xfl = substr($data,8,1); - $os = substr($data,8,1); - $headerlen = 10; - $extralen = 0; - $extra = ""; - if ($flags & 4) { - // 2-byte length prefixed EXTRA data in header - if ($len - $headerlen - 2 < 8) { - return false; // invalid - } - $extralen = unpack("v",substr($data,8,2)); - $extralen = $extralen[1]; - if ($len - $headerlen - 2 - $extralen < 8) { - return false; // invalid - } - $extra = substr($data,10,$extralen); - $headerlen += 2 + $extralen; - } - $filenamelen = 0; - $filename = ""; - if ($flags & 8) { - // C-style string - if ($len - $headerlen - 1 < 8) { - return false; // invalid - } - $filenamelen = strpos(substr($data,$headerlen),chr(0)); - if ($filenamelen === false || $len - $headerlen - $filenamelen - 1 < 8) { - return false; // invalid - } - $filename = substr($data,$headerlen,$filenamelen); - $headerlen += $filenamelen + 1; - } - $commentlen = 0; - $comment = ""; - if ($flags & 16) { - // C-style string COMMENT data in header - if ($len - $headerlen - 1 < 8) { - return false; // invalid - } - $commentlen = strpos(substr($data,$headerlen),chr(0)); - if ($commentlen === false || $len - $headerlen - $commentlen - 1 < 8) { - return false; // Invalid header format - } - $comment = substr($data,$headerlen,$commentlen); - $headerlen += $commentlen + 1; - } - $headercrc = ""; - if ($flags & 2) { - // 2-bytes (lowest order) of CRC32 on header present - if ($len - $headerlen - 2 < 8) { - return false; // invalid - } - $calccrc = crc32(substr($data,0,$headerlen)) & 0xffff; - $headercrc = unpack("v", substr($data,$headerlen,2)); - $headercrc = $headercrc[1]; - if ($headercrc != $calccrc) { - $error = "Header checksum failed."; - return false; // Bad header CRC - } - $headerlen += 2; - } - // GZIP FOOTER - $datacrc = unpack("V",substr($data,-8,4)); - $datacrc = sprintf('%u',$datacrc[1] & 0xFFFFFFFF); - $isize = unpack("V",substr($data,-4)); - $isize = $isize[1]; - // decompression: - $bodylen = $len-$headerlen-8; - if ($bodylen < 1) { - // IMPLEMENTATION BUG! - return null; - } - $body = substr($data,$headerlen,$bodylen); - $data = ""; - if ($bodylen > 0) { - switch ($method) { - case 8: - // Currently the only supported compression method: - $data = gzinflate($body,$maxlength); - break; - default: - $error = "Unknown compression method."; - return false; - } - } // zero-byte body content is allowed - // Verifiy CRC32 - $crc = sprintf("%u",crc32($data)); - $crcOK = $crc == $datacrc; - $lenOK = $isize == strlen($data); - if (!$lenOK || !$crcOK) { - $error = ( $lenOK ? '' : 'Length check FAILED. ') . ( $crcOK ? '' : 'Checksum FAILED.'); - return false; - } - return $data; - } -} -?> \ No newline at end of file diff --git a/inc/3rdparty/humble-http-agent/RollingCurl.php b/inc/3rdparty/humble-http-agent/RollingCurl.php deleted file mode 100644 index fdd021af..00000000 --- a/inc/3rdparty/humble-http-agent/RollingCurl.php +++ /dev/null @@ -1,402 +0,0 @@ -url = $url; - $this->url_original = $url; - $this->method = $method; - $this->post_data = $post_data; - $this->headers = $headers; - $this->options = $options; - } - - /** - * @param string $url - * @return void - */ - public function set_original_url($url) { - $this->url_original = $url; - } - /** - * @return void - */ - public function __destruct() { - unset($this->url, $this->url_original, $this->method, $this->post_data, $this->headers, $this->options); - } -} - -/** - * RollingCurl custom exception - */ -class RollingCurlException extends Exception { -} - -/** - * Class that holds a rolling queue of curl requests. - * - * @throws RollingCurlException - */ -class RollingCurl implements Countable { - /** - * @var int - * - * Window size is the max number of simultaneous connections allowed. - * - * REMEMBER TO RESPECT THE SERVERS: - * Sending too many requests at one time can easily be perceived - * as a DOS attack. Increase this window_size if you are making requests - * to multiple servers or have permission from the receving server admins. - */ - private $window_size = 5; - - /** - * @var float - * - * Timeout is the timeout used for curl_multi_select. - */ - private $timeout = 10; - - /** - * @var string|array - * - * Callback function to be applied to each result. - */ - private $callback; - - /** - * @var array - * - * Set your base options that you want to be used with EVERY request. - */ - protected $options = array( - CURLOPT_SSL_VERIFYPEER => 0, - CURLOPT_RETURNTRANSFER => 1, - CURLOPT_CONNECTTIMEOUT => 30, - CURLOPT_TIMEOUT => 30 - ); - - /** - * @var array - */ - private $headers = array(); - - /** - * @var Request[] - * - * The request queue - */ - private $requests = array(); - - /** - * @var RequestMap[] - * - * Maps handles to request indexes - */ - private $requestMap = array(); - - /** - * @param $callback - * Callback function to be applied to each result. - * - * Can be specified as 'my_callback_function' - * or array($object, 'my_callback_method'). - * - * Function should take three parameters: $response, $info, $request. - * $response is response body, $info is additional curl info. - * $request is the original request - * - * @return void - */ - function __construct($callback = null) { - $this->callback = $callback; - } - - /** - * @param string $name - * @return mixed - */ - public function __get($name) { - return (isset($this->{$name})) ? $this->{$name} : null; - } - - /** - * @param string $name - * @param mixed $value - * @return bool - */ - public function __set($name, $value) { - // append the base options & headers - if ($name == "options" || $name == "headers") { - $this->{$name} = $value + $this->{$name}; - } else { - $this->{$name} = $value; - } - return true; - } - - /** - * Count number of requests added (Countable interface) - * - * @return int - */ - public function count() { - return count($this->requests); - } - - /** - * Add a request to the request queue - * - * @param Request $request - * @return bool - */ - public function add($request) { - $this->requests[] = $request; - return true; - } - - /** - * Create new Request and add it to the request queue - * - * @param string $url - * @param string $method - * @param $post_data - * @param $headers - * @param $options - * @return bool - */ - public function request($url, $method = "GET", $post_data = null, $headers = null, $options = null) { - $this->requests[] = new RollingCurlRequest($url, $method, $post_data, $headers, $options); - return true; - } - - /** - * Perform GET request - * - * @param string $url - * @param $headers - * @param $options - * @return bool - */ - public function get($url, $headers = null, $options = null) { - return $this->request($url, "GET", null, $headers, $options); - } - - /** - * Perform POST request - * - * @param string $url - * @param $post_data - * @param $headers - * @param $options - * @return bool - */ - public function post($url, $post_data = null, $headers = null, $options = null) { - return $this->request($url, "POST", $post_data, $headers, $options); - } - - /** - * Execute processing - * - * @param int $window_size Max number of simultaneous connections - * @return string|bool - */ - public function execute($window_size = null) { - // rolling curl window must always be greater than 1 - if (sizeof($this->requests) == 1) { - return $this->single_curl(); - } else { - // start the rolling curl. window_size is the max number of simultaneous connections - return $this->rolling_curl($window_size); - } - } - - /** - * Performs a single curl request - * - * @access private - * @return string - */ - private function single_curl() { - $ch = curl_init(); - $request = array_shift($this->requests); - $options = $this->get_options($request); - curl_setopt_array($ch, $options); - $output = curl_exec($ch); - $info = curl_getinfo($ch); - - // it's not neccesary to set a callback for one-off requests - if ($this->callback) { - $callback = $this->callback; - if (is_callable($this->callback)) { - call_user_func($callback, $output, $info, $request); - } - } - else - return $output; - return true; - } - - /** - * Performs multiple curl requests - * - * @access private - * @throws RollingCurlException - * @param int $window_size Max number of simultaneous connections - * @return bool - */ - private function rolling_curl($window_size = null) { - if ($window_size) - $this->window_size = $window_size; - - // make sure the rolling window isn't greater than the # of urls - if (sizeof($this->requests) < $this->window_size) - $this->window_size = sizeof($this->requests); - - if ($this->window_size < 2) { - throw new RollingCurlException("Window size must be greater than 1"); - } - - $master = curl_multi_init(); - - // start the first batch of requests - for ($i = 0; $i < $this->window_size; $i++) { - $ch = curl_init(); - - $options = $this->get_options($this->requests[$i]); - - curl_setopt_array($ch, $options); - curl_multi_add_handle($master, $ch); - - // Add to our request Maps - $key = (string) $ch; - $this->requestMap[$key] = $i; - } - - do { - while (($execrun = curl_multi_exec($master, $running)) == CURLM_CALL_MULTI_PERFORM) ; - if ($execrun != CURLM_OK) - break; - // a request was just completed -- find out which one - while ($done = curl_multi_info_read($master)) { - - // get the info and content returned on the request - $info = curl_getinfo($done['handle']); - $output = curl_multi_getcontent($done['handle']); - - // send the return values to the callback function. - $callback = $this->callback; - if (is_callable($callback)) { - $key = (string) $done['handle']; - $request = $this->requests[$this->requestMap[$key]]; - unset($this->requestMap[$key]); - call_user_func($callback, $output, $info, $request); - } - - // start a new request (it's important to do this before removing the old one) - if ($i < sizeof($this->requests) && isset($this->requests[$i]) && $i < count($this->requests)) { - $ch = curl_init(); - $options = $this->get_options($this->requests[$i]); - curl_setopt_array($ch, $options); - curl_multi_add_handle($master, $ch); - - // Add to our request Maps - $key = (string) $ch; - $this->requestMap[$key] = $i; - $i++; - } - - // remove the curl handle that just completed - curl_multi_remove_handle($master, $done['handle']); - - } - - // Block for data in / output; error handling is done by curl_multi_exec - //if ($running) curl_multi_select($master, $this->timeout); - // removing timeout as it causes problems on Windows with PHP 5.3.5 and Curl 7.20.0 - if ($running) curl_multi_select($master); - - } while ($running); - curl_multi_close($master); - return true; - } - - - /** - * Helper function to set up a new request by setting the appropriate options - * - * @access private - * @param Request $request - * @return array - */ - private function get_options($request) { - // options for this entire curl object - $options = $this->__get('options'); - // We're managing reirects in PHP - allows us to intervene and rewrite/block URLs - // before the next request goes out. - $options[CURLOPT_FOLLOWLOCATION] = 0; - $options[CURLOPT_MAXREDIRS] = 0; - //if (ini_get('safe_mode') == 'Off' || !ini_get('safe_mode')) { - // $options[CURLOPT_FOLLOWLOCATION] = 1; - // $options[CURLOPT_MAXREDIRS] = 5; - //} - $headers = $this->__get('headers'); - // append custom headers for this specific request - if ($request->headers) { - $headers = $headers + $request->headers; - } - - // append custom options for this specific request - if ($request->options) { - $options = $request->options + $options; - } - - // set the request URL - $options[CURLOPT_URL] = $request->url; - - if ($headers) { - $options[CURLOPT_HTTPHEADER] = $headers; - } - // return response headers - $options[CURLOPT_HEADER] = 1; - - // send HEAD request? - if ($request->method == 'HEAD') { - $options[CURLOPT_NOBODY] = 1; - } - - return $options; - } - - /** - * @return void - */ - public function __destruct() { - unset($this->window_size, $this->callback, $this->options, $this->headers, $this->requests); - } -} \ No newline at end of file diff --git a/inc/3rdparty/humble-http-agent/SimplePie_HumbleHttpAgent.php b/inc/3rdparty/humble-http-agent/SimplePie_HumbleHttpAgent.php deleted file mode 100644 index ce76a929..00000000 --- a/inc/3rdparty/humble-http-agent/SimplePie_HumbleHttpAgent.php +++ /dev/null @@ -1,79 +0,0 @@ -encode($parsed['authority']), $parsed['path'], $parsed['query'], $parsed['fragment']); - } - $this->url = $url; - $this->useragent = $useragent; - if (preg_match('/^http(s)?:\/\//i', $url)) - { - if (!is_array($headers)) - { - $headers = array(); - } - $this->method = SIMPLEPIE_FILE_SOURCE_REMOTE | SIMPLEPIE_FILE_SOURCE_CURL; - $headers2 = array(); - foreach ($headers as $key => $value) { - $headers2[] = "$key: $value"; - } - //TODO: allow for HTTP headers - // curl_setopt($fp, CURLOPT_HTTPHEADER, $headers2); - - $response = self::$agent->get($url); - - if ($response === false || !isset($response['status_code'])) { - $this->error = 'failed to fetch URL'; - $this->success = false; - } else { - // The extra lines at the end are there to satisfy SimplePie's HTTP parser. - // The class expects a full HTTP message, whereas we're giving it only - // headers - the new lines indicate the start of the body. - $parser = new SimplePie_HTTP_Parser($response['headers']."\r\n\r\n"); - if ($parser->parse()) { - $this->headers = $parser->headers; - //$this->body = $parser->body; - $this->body = $response['body']; - $this->status_code = $parser->status_code; - } - } - } - else - { - $this->error = 'invalid URL'; - $this->success = false; - } - } -} -?> \ No newline at end of file -- cgit v1.2.3