]> git.immae.eu Git - github/wallabag/wallabag.git/blobdiff - inc/3rdparty/libraries/humble-http-agent/HumbleHttpAgent.php
[change] we now use Full-Text RSS 3.1, thank you so much @fivefilters
[github/wallabag/wallabag.git] / inc / 3rdparty / libraries / humble-http-agent / HumbleHttpAgent.php
similarity index 84%
rename from inc/3rdparty/humble-http-agent/HumbleHttpAgent.php
rename to inc/3rdparty/libraries/humble-http-agent/HumbleHttpAgent.php
index 7e5834aba9b0938441d8f0cd7b6a1cd61cb23620..e4f1b3b3834c989cb22925fc17e14b4e6b70891a 100644 (file)
-<?php
-/**
- * Humble HTTP Agent
- * 
- * This class is designed to take advantage of parallel HTTP requests
- * offered by PHP's PECL HTTP extension or the curl_multi_* functions. 
- * For environments which do not have these options, it reverts to standard sequential 
- * requests (using file_get_contents())
- * 
- * @version 1.0
- * @date 2012-02-09
- * @see http://php.net/HttpRequestPool
- * @author Keyvan Minoukadeh
- * @copyright 2011-2012 Keyvan Minoukadeh
- * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
- */
-
-class HumbleHttpAgent
-{
-       const METHOD_REQUEST_POOL = 1;
-       const METHOD_CURL_MULTI = 2;
-       const METHOD_FILE_GET_CONTENTS = 4;
-       //const UA_BROWSER = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0.1) Gecko/20100101 Firefox/4.0.1';
-       const UA_BROWSER = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.92 Safari/535.2';
-       const UA_PHP = 'PHP/5.2';
-       const REF_GOOGLE = 'http://www.google.co.uk/url?sa=t&source=web&cd=1';
-       
-       protected $requests = array();
-       protected $redirectQueue = array();
-       protected $requestOptions;
-       protected $maxParallelRequests = 5;
-       protected $cache = null; //TODO
-       protected $httpContext;
-       protected $minimiseMemoryUse = false; //TODO
-       protected $debug = false;
-       protected $method;
-       protected $cookieJar;
-       public $rewriteHashbangFragment = true; // see http://code.google.com/web/ajaxcrawling/docs/specification.html
-       public $maxRedirects = 5;
-       public $userAgentMap = array();
-       public $rewriteUrls = array();
-       public $userAgentDefault;
-       public $referer;
-       //public $userAgent = 'Mozilla/5.0';
-       
-       // Prevent certain file/mime types
-       // HTTP responses which match these content types will
-       // be returned without body.
-       public $headerOnlyTypes = array();
-       // URLs ending with one of these extensions will
-       // prompt Humble HTTP Agent to send a HEAD request first
-       // to see if returned content type matches $headerOnlyTypes.
-       public $headerOnlyClues = array('pdf','mp3','zip','exe','gif','gzip','gz','jpeg','jpg','mpg','mpeg','png','ppt','mov'); 
-       
-       //TODO: set max file size
-       //TODO: normalise headers
-       
-       function __construct($requestOptions=null, $method=null) {
-               $this->userAgentDefault = self::UA_BROWSER;
-               $this->referer = self::REF_GOOGLE;
-               // set the request method
-               if (in_array($method, array(1,2,4))) {
-                       $this->method = $method;
-               } else {
-                       if (class_exists('HttpRequestPool')) {
-                               $this->method = self::METHOD_REQUEST_POOL;
-                       } elseif (function_exists('curl_multi_init')) {
-                               $this->method = self::METHOD_CURL_MULTI;
-                       } else {
-                               $this->method = self::METHOD_FILE_GET_CONTENTS;
-                       }
-               }
-               if ($this->method == self::METHOD_CURL_MULTI) {
-                       require_once(dirname(__FILE__).'/RollingCurl.php');
-               }
-               // create cookie jar
-               $this->cookieJar = new CookieJar();
-               // set request options (redirect must be 0)
-               $this->requestOptions = array(
-                       'timeout' => 15,
-                       'redirect' => 0 // we handle redirects manually so we can rewrite the new hashbang URLs that are creeping up over the web
-                       // TODO: test onprogress?
-               );
-               if (is_array($requestOptions)) {
-                       $this->requestOptions = array_merge($this->requestOptions, $requestOptions);
-               }
-               $this->httpContext = array(
-                       'http' => array(
-                               'ignore_errors' => true,
-                               'timeout' => $this->requestOptions['timeout'],
-                               'max_redirects' => $this->requestOptions['redirect'],
-                               'header' => "Accept: */*\r\n"
-                               )
-                       );
-       }
-       
-       protected function debug($msg) {
-               if ($this->debug) {
-                       $mem = round(memory_get_usage()/1024, 2);
-                       $memPeak = round(memory_get_peak_usage()/1024, 2);
-                       echo '* ',$msg;
-                       echo ' - mem used: ',$mem," (peak: $memPeak)\n";        
-                       ob_flush();
-                       flush();
-               }
-       }
-       
-       protected function getUserAgent($url, $asArray=false) {
-               $host = @parse_url($url, PHP_URL_HOST);
-               if (strtolower(substr($host, 0, 4)) == 'www.') {
-                       $host = substr($host, 4);
-               }
-               if ($host) {
-                       $try = array($host);
-                       $split = explode('.', $host);
-                       if (count($split) > 1) {
-                               array_shift($split);
-                               $try[] = '.'.implode('.', $split);
-                       }
-                       foreach ($try as $h) {
-                               if (isset($this->userAgentMap[$h])) {
-                                       $ua = $this->userAgentMap[$h];
-                                       break;
-                               }
-                       }
-               }
-               if (!isset($ua)) $ua = $this->userAgentDefault;
-               if ($asArray) {
-                       return array('User-Agent' => $ua);
-               } else {
-                       return 'User-Agent: '.$ua;
-               }
-       }
-       
-       public function rewriteHashbangFragment($url) {
-               // return $url if there's no '#!'
-               if (strpos($url, '#!') === false) return $url;
-               // split $url and rewrite
-               // TODO: is SimplePie_IRI included?
-               $iri = new SimplePie_IRI($url);
-               $fragment = substr($iri->fragment, 1); // strip '!'
-               $iri->fragment = null;
-               if (isset($iri->query)) {
-                       parse_str($iri->query, $query);
-               } else {
-                       $query = array();
-               }
-               $query['_escaped_fragment_'] = (string)$fragment;
-               $iri->query = str_replace('%2F', '/', http_build_query($query)); // needed for some sites
-               return $iri->get_iri();
-       }
-       
-       public function removeFragment($url) {
-               $pos = strpos($url, '#');
-               if ($pos === false) {
-                       return $url;
-               } else {
-                       return substr($url, 0, $pos);
-               }
-       }
-       
-       public function rewriteUrls($url) {
-               foreach ($this->rewriteUrls as $find => $action) {
-                       if (strpos($url, $find) !== false) {
-                               if (is_array($action)) {
-                                       return strtr($url, $action);
-                               }
-                       }
-               }
-               return $url;
-       }
-       
-       public function enableDebug($bool=true) {
-               $this->debug = (bool)$bool;
-       }
-       
-       public function minimiseMemoryUse($bool = true) {
-               $this->minimiseMemoryUse = $bool;
-       }
-       
-       public function setMaxParallelRequests($max) {
-               $this->maxParallelRequests = $max;
-       }
-       
-       public function validateUrl($url) {
-               $url = filter_var($url, FILTER_SANITIZE_URL);
-               $test = filter_var($url, FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED);
-               // deal with bug http://bugs.php.net/51192 (present in PHP 5.2.13 and PHP 5.3.2)
-               if ($test === false) {
-                       $test = filter_var(strtr($url, '-', '_'), FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED);
-               }
-               if ($test !== false && $test !== null && preg_match('!^https?://!', $url)) {
-                       return $url;
-               } else {
-                       return false;
-               }
-       }
-       
-       public function fetchAll(array $urls) {
-               $this->fetchAllOnce($urls, $isRedirect=false);
-               $redirects = 0;
-               while (!empty($this->redirectQueue) && ++$redirects <= $this->maxRedirects) {
-                       $this->debug("Following redirects #$redirects...");
-                       $this->fetchAllOnce($this->redirectQueue, $isRedirect=true);
-               }
-       }
-       
-       // fetch all URLs without following redirects
-       public function fetchAllOnce(array $urls, $isRedirect=false) {
-               if (!$isRedirect) $urls = array_unique($urls);
-               if (empty($urls)) return;
-               
-               //////////////////////////////////////////////////////
-               // parallel (HttpRequestPool)
-               if ($this->method == self::METHOD_REQUEST_POOL) {
-                       $this->debug('Starting parallel fetch (HttpRequestPool)');
-                       try {
-                               while (count($urls) > 0) {
-                                       $this->debug('Processing set of '.min($this->maxParallelRequests, count($urls)));
-                                       $subset = array_splice($urls, 0, $this->maxParallelRequests);
-                                       $pool = new HttpRequestPool();
-                                       foreach ($subset as $orig => $url) {
-                                               if (!$isRedirect) $orig = $url;
-                                               unset($this->redirectQueue[$orig]);
-                                               $this->debug("...$url");
-                                               if (!$isRedirect && isset($this->requests[$url])) {
-                                                       $this->debug("......in memory");
-                                               /*
-                                               } elseif ($this->isCached($url)) {
-                                                       $this->debug("......is cached");
-                                                       if (!$this->minimiseMemoryUse) {
-                                                               $this->requests[$url] = $this->getCached($url);
-                                                       }
-                                               */
-                                               } else {
-                                                       $this->debug("......adding to pool");
-                                                       $req_url = $this->rewriteUrls($url);
-                                                       $req_url = ($this->rewriteHashbangFragment) ? $this->rewriteHashbangFragment($req_url) : $req_url;
-                                                       $req_url = $this->removeFragment($req_url);
-                                                       if (!empty($this->headerOnlyTypes) && !isset($this->requests[$orig]['wrongGuess']) && $this->possibleUnsupportedType($req_url)) {
-                                                               $_meth = HttpRequest::METH_HEAD;
-                                                       } else {
-                                                               $_meth = HttpRequest::METH_GET;
-                                                               unset($this->requests[$orig]['wrongGuess']);
-                                                       }
-                                                       $httpRequest = new HttpRequest($req_url, $_meth, $this->requestOptions);
-                                                       // send cookies, if we have any
-                                                       if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) {
-                                                               $this->debug("......sending cookies: $cookies");
-                                                               $httpRequest->addHeaders(array('Cookie' => $cookies));
-                                                       }
-                                                       //$httpRequest->addHeaders(array('User-Agent' => $this->userAgent));
-                                                       $httpRequest->addHeaders($this->getUserAgent($req_url, true));
-                                                       // add referer for picky sites
-                                                       $httpRequest->addheaders(array('Referer' => $this->referer));
-                                                       $this->requests[$orig] = array('headers'=>null, 'body'=>null, 'httpRequest'=>$httpRequest);
-                                                       $this->requests[$orig]['original_url'] = $orig;
-                                                       $pool->attach($httpRequest);
-                                               }
-                                       }
-                                       // did we get anything into the pool?
-                                       if (count($pool) > 0) {
-                                               $this->debug('Sending request...');
-                                               try {
-                                                       $pool->send();
-                                               } catch (HttpRequestPoolException $e) {
-                                                       // do nothing
-                                               }
-                                               $this->debug('Received responses');
-                                               foreach($subset as $orig => $url) {
-                                                       if (!$isRedirect) $orig = $url;
-                                                       $request = $this->requests[$orig]['httpRequest'];
-                                                       //$this->requests[$orig]['headers'] = $this->headersToString($request->getResponseHeader());
-                                                       // getResponseHeader() doesn't return status line, so, for consistency...
-                                                       $this->requests[$orig]['headers'] = substr($request->getRawResponseMessage(), 0, $request->getResponseInfo('header_size'));
-                                                       // check content type
-                                                       // TODO: use getResponseHeader('content-type') or getResponseInfo()
-                                                       if ($this->headerOnlyType($this->requests[$orig]['headers'])) {
-                                                               $this->requests[$orig]['body'] = '';
-                                                               $_header_only_type = true;
-                                                               $this->debug('Header only type returned');
-                                                       } else {
-                                                               $this->requests[$orig]['body'] = $request->getResponseBody();
-                                                               $_header_only_type = false;
-                                                       }
-                                                       $this->requests[$orig]['effective_url'] = $request->getResponseInfo('effective_url');
-                                                       $this->requests[$orig]['status_code'] = $status_code = $request->getResponseCode();
-                                                       // is redirect?
-                                                       if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && $request->getResponseHeader('location')) {
-                                                               $redirectURL = $request->getResponseHeader('location');
-                                                               if (!preg_match('!^https?://!i', $redirectURL)) {
-                                                                       $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url);
-                                                               }
-                                                               if ($this->validateURL($redirectURL)) {
-                                                                       $this->debug('Redirect detected. Valid URL: '.$redirectURL);
-                                                                       // store any cookies
-                                                                       $cookies = $request->getResponseHeader('set-cookie');
-                                                                       if ($cookies && !is_array($cookies)) $cookies = array($cookies);
-                                                                       if ($cookies) $this->cookieJar->storeCookies($url, $cookies);
-                                                                       $this->redirectQueue[$orig] = $redirectURL;
-                                                               } else {
-                                                                       $this->debug('Redirect detected. Invalid URL: '.$redirectURL);
-                                                               }
-                                                       } elseif (!$_header_only_type && $request->getMethod() === HttpRequest::METH_HEAD) {
-                                                               // the response content-type did not match our 'header only' types, 
-                                                               // but we'd issues a HEAD request because we assumed it would. So
-                                                               // let's queue a proper GET request for this item...
-                                                               $this->debug('Wrong guess at content-type, queing GET request');
-                                                               $this->requests[$orig]['wrongGuess'] = true;
-                                                               $this->redirectQueue[$orig] = $this->requests[$orig]['effective_url'];
-                                                       }
-                                                       //die($url.' -multi- '.$request->getResponseInfo('effective_url'));
-                                                       $pool->detach($request);
-                                                       unset($this->requests[$orig]['httpRequest'], $request);
-                                                       /*
-                                                       if ($this->minimiseMemoryUse) {
-                                                               if ($this->cache($url)) {
-                                                                       unset($this->requests[$url]);
-                                                               }
-                                                       }
-                                                       */
-                                               }
-                                       }
-                               }
-                       } catch (HttpException $e) {
-                               $this->debug($e);
-                               return false;
-                       }
-               }
-               
-               //////////////////////////////////////////////////////////
-               // parallel (curl_multi_*)
-               elseif ($this->method == self::METHOD_CURL_MULTI) {
-                       $this->debug('Starting parallel fetch (curl_multi_*)');
-                       while (count($urls) > 0) {
-                               $this->debug('Processing set of '.min($this->maxParallelRequests, count($urls)));
-                               $subset = array_splice($urls, 0, $this->maxParallelRequests);
-                               $pool = new RollingCurl(array($this, 'handleCurlResponse'));
-                               $pool->window_size = count($subset);            
-                               
-                               foreach ($subset as $orig => $url) {
-                                       if (!$isRedirect) $orig = $url;
-                                       unset($this->redirectQueue[$orig]);
-                                       $this->debug("...$url");
-                                       if (!$isRedirect && isset($this->requests[$url])) {
-                                               $this->debug("......in memory");
-                                       /*
-                                       } elseif ($this->isCached($url)) {
-                                               $this->debug("......is cached");
-                                               if (!$this->minimiseMemoryUse) {
-                                                       $this->requests[$url] = $this->getCached($url);
-                                               }
-                                       */
-                                       } else {
-                                               $this->debug("......adding to pool");
-                                               $req_url = $this->rewriteUrls($url);
-                                               $req_url = ($this->rewriteHashbangFragment) ? $this->rewriteHashbangFragment($req_url) : $req_url;
-                                               $req_url = $this->removeFragment($req_url);
-                                               if (!empty($this->headerOnlyTypes) && !isset($this->requests[$orig]['wrongGuess']) && $this->possibleUnsupportedType($req_url)) {
-                                                       $_meth = 'HEAD';
-                                               } else {
-                                                       $_meth = 'GET';
-                                                       unset($this->requests[$orig]['wrongGuess']);
-                                               }                                               
-                                               $headers = array();
-                                               //$headers[] = 'User-Agent: '.$this->userAgent;
-                                               $headers[] = $this->getUserAgent($req_url);
-                                               // add referer for picky sites
-                                               $headers[] = 'Referer: '.$this->referer;
-                                               // send cookies, if we have any
-                                               if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) {
-                                                       $this->debug("......sending cookies: $cookies");
-                                                       $headers[] = 'Cookie: '.$cookies;
-                                               }
-                                               $httpRequest = new RollingCurlRequest($req_url, $_meth, null, $headers, array(
-                                                       CURLOPT_CONNECTTIMEOUT => $this->requestOptions['timeout'],
-                                                       CURLOPT_TIMEOUT => $this->requestOptions['timeout']
-                                                       ));
-                                               $httpRequest->set_original_url($orig);
-                                               $this->requests[$orig] = array('headers'=>null, 'body'=>null, 'httpRequest'=>$httpRequest);
-                                               $this->requests[$orig]['original_url'] = $orig; // TODO: is this needed anymore?
-                                               $pool->add($httpRequest);
-                                       }
-                               }
-                               // did we get anything into the pool?
-                               if (count($pool) > 0) {
-                                       $this->debug('Sending request...');
-                                       $pool->execute(); // this will call handleCurlResponse() and populate $this->requests[$orig]
-                                       $this->debug('Received responses');
-                                       foreach($subset as $orig => $url) {
-                                               if (!$isRedirect) $orig = $url;
-                                               // $this->requests[$orig]['headers']
-                                               // $this->requests[$orig]['body']
-                                               // $this->requests[$orig]['effective_url']
-                                               // check content type
-                                               if ($this->headerOnlyType($this->requests[$orig]['headers'])) {
-                                                       $this->requests[$orig]['body'] = '';
-                                                       $_header_only_type = true;
-                                                       $this->debug('Header only type returned');
-                                               } else {
-                                                       $_header_only_type = false;
-                                               }
-                                               $status_code = $this->requests[$orig]['status_code'];
-                                               if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && isset($this->requests[$orig]['location'])) {
-                                                       $redirectURL = $this->requests[$orig]['location'];
-                                                       if (!preg_match('!^https?://!i', $redirectURL)) {
-                                                               $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url);
-                                                       }
-                                                       if ($this->validateURL($redirectURL)) {
-                                                               $this->debug('Redirect detected. Valid URL: '.$redirectURL);
-                                                               // store any cookies
-                                                               $cookies = $this->cookieJar->extractCookies($this->requests[$orig]['headers']);
-                                                               if (!empty($cookies)) $this->cookieJar->storeCookies($url, $cookies);                                                   
-                                                               $this->redirectQueue[$orig] = $redirectURL;
-                                                       } else {
-                                                               $this->debug('Redirect detected. Invalid URL: '.$redirectURL);
-                                                       }
-                                               } elseif (!$_header_only_type && $this->requests[$orig]['method'] == 'HEAD') {
-                                                               // the response content-type did not match our 'header only' types, 
-                                                               // but we'd issues a HEAD request because we assumed it would. So
-                                                               // let's queue a proper GET request for this item...
-                                                               $this->debug('Wrong guess at content-type, queing GET request');
-                                                               $this->requests[$orig]['wrongGuess'] = true;
-                                                               $this->redirectQueue[$orig] = $this->requests[$orig]['effective_url'];
-                                               }
-                                               // die($url.' -multi- '.$request->getResponseInfo('effective_url'));
-                                               unset($this->requests[$orig]['httpRequest'], $this->requests[$orig]['method']);
-                                       }
-                               }
-                       }
-               }
-
-               //////////////////////////////////////////////////////
-               // sequential (file_get_contents)
-               else {
-                       $this->debug('Starting sequential fetch (file_get_contents)');
-                       $this->debug('Processing set of '.count($urls));
-                       foreach ($urls as $orig => $url) {
-                               if (!$isRedirect) $orig = $url;
-                               unset($this->redirectQueue[$orig]);
-                               $this->debug("...$url");
-                               if (!$isRedirect && isset($this->requests[$url])) {
-                                       $this->debug("......in memory");
-                               /*
-                               } elseif ($this->isCached($url)) {
-                                       $this->debug("......is cached");
-                                       if (!$this->minimiseMemoryUse) {
-                                               $this->requests[$url] = $this->getCached($url);
-                                       }
-                               */
-                               } else {
-                                       $this->debug("Sending request for $url");
-                                       $this->requests[$orig]['original_url'] = $orig;
-                                       $req_url = $this->rewriteUrls($url);
-                                       $req_url = ($this->rewriteHashbangFragment) ? $this->rewriteHashbangFragment($req_url) : $req_url;
-                                       $req_url = $this->removeFragment($req_url);
-                                       // send cookies, if we have any
-                                       $httpContext = $this->httpContext;
-                                       $httpContext['http']['header'] .= $this->getUserAgent($req_url)."\r\n";
-                                       // add referer for picky sites
-                                       $httpContext['http']['header'] .= 'Referer: '.$this->referer."\r\n";
-                                       if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) {
-                                               $this->debug("......sending cookies: $cookies");
-                                               $httpContext['http']['header'] .= 'Cookie: '.$cookies."\r\n";
-                                       }
-                                       if (false !== ($html = @file_get_contents($req_url, false, stream_context_create($httpContext)))) {
-                                               $this->debug('Received response');
-                                               // get status code
-                                               if (!isset($http_response_header[0]) || !preg_match('!^HTTP/\d+\.\d+\s+(\d+)!', trim($http_response_header[0]), $match)) {
-                                                       $this->debug('Error: no status code found');
-                                                       // TODO: handle error - no status code
-                                               } else {
-                                                       $this->requests[$orig]['headers'] = $this->headersToString($http_response_header, false);
-                                                       // check content type
-                                                       if ($this->headerOnlyType($this->requests[$orig]['headers'])) {
-                                                               $this->requests[$orig]['body'] = '';
-                                                       } else {
-                                                               $this->requests[$orig]['body'] = $html;
-                                                       }
-                                                       $this->requests[$orig]['effective_url'] = $req_url;
-                                                       $this->requests[$orig]['status_code'] = $status_code = (int)$match[1];
-                                                       unset($match);
-                                                       // handle redirect
-                                                       if (preg_match('/^Location:(.*?)$/m', $this->requests[$orig]['headers'], $match)) {
-                                                               $this->requests[$orig]['location'] =  trim($match[1]);
-                                                       }
-                                                       if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && isset($this->requests[$orig]['location'])) {
-                                                               $redirectURL = $this->requests[$orig]['location'];
-                                                               if (!preg_match('!^https?://!i', $redirectURL)) {
-                                                                       $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url);
-                                                               }
-                                                               if ($this->validateURL($redirectURL)) {
-                                                                       $this->debug('Redirect detected. Valid URL: '.$redirectURL);
-                                                                       // store any cookies
-                                                                       $cookies = $this->cookieJar->extractCookies($this->requests[$orig]['headers']);
-                                                                       if (!empty($cookies)) $this->cookieJar->storeCookies($url, $cookies);
-                                                                       $this->redirectQueue[$orig] = $redirectURL;
-                                                               } else {
-                                                                       $this->debug('Redirect detected. Invalid URL: '.$redirectURL);
-                                                               }
-                                                       }
-                                               }
-                                       } else {
-                                               $this->debug('Error retrieving URL');
-                                               //print_r($req_url);
-                                               //print_r($http_response_header);
-                                               //print_r($html);
-                                               
-                                               // TODO: handle error - failed to retrieve URL
-                                       }
-                               }
-                       }
-               }
-       }
-       
-       public function handleCurlResponse($response, $info, $request) {
-               $orig = $request->url_original;
-               $this->requests[$orig]['headers'] = substr($response, 0, $info['header_size']);
-               $this->requests[$orig]['body'] = substr($response, $info['header_size']);
-               $this->requests[$orig]['method'] = $request->method;
-               $this->requests[$orig]['effective_url'] = $info['url'];
-               $this->requests[$orig]['status_code'] = (int)$info['http_code'];
-               if (preg_match('/^Location:(.*?)$/m', $this->requests[$orig]['headers'], $match)) {
-                       $this->requests[$orig]['location'] =  trim($match[1]);
-               }
-       }
-       
-       protected function headersToString(array $headers, $associative=true) {
-               if (!$associative) {
-                       return implode("\n", $headers);
-               } else {
-                       $str = '';
-                       foreach ($headers as $key => $val) {
-                               if (is_array($val)) {
-                                       foreach ($val as $v) $str .= "$key: $v\n";
-                               } else {
-                                       $str .= "$key: $val\n";
-                               }
-                       }
-                       return rtrim($str);
-               }
-       }
-       
-       public function get($url, $remove=false, $gzdecode=true) {
-               $url = "$url";
-               if (isset($this->requests[$url]) && isset($this->requests[$url]['body'])) {
-                       $this->debug("URL already fetched - in memory ($url, effective: {$this->requests[$url]['effective_url']})");
-                       $response = $this->requests[$url];
-               /*
-               } elseif ($this->isCached($url)) {
-                       $this->debug("URL already fetched - in disk cache ($url)");
-                       $response = $this->getCached($url);
-                       $this->requests[$url] = $response;
-               */
-               } else {
-                       $this->debug("Fetching URL ($url)");
-                       $this->fetchAll(array($url));
-                       if (isset($this->requests[$url]) && isset($this->requests[$url]['body'])) {
-                               $response = $this->requests[$url];
-                       } else {
-                               $this->debug("Request failed");
-                               $response = false;
-                       }
-               }
-               /*
-               if ($this->minimiseMemoryUse && $response) {
-                       $this->cache($url);
-                       unset($this->requests[$url]);
-               }
-               */
-               if ($remove && $response) unset($this->requests[$url]);
-               if ($gzdecode && stripos($response['headers'], 'Content-Encoding: gzip')) {
-                       if ($html = gzdecode($response['body'])) {
-                               $response['body'] = $html;
-                       }
-               }
-               return $response;
-       }
-       
-       public function parallelSupport() {
-               return class_exists('HttpRequestPool') || function_exists('curl_multi_init');
-       }
-       
-       private function headerOnlyType($headers) {
-               if (preg_match('!^Content-Type:\s*(([a-z-]+)/([^;\r\n ]+))!im', $headers, $match)) {
-                       // look for full mime type (e.g. image/jpeg) or just type (e.g. image)
-                       $match[1] = strtolower(trim($match[1]));
-                       $match[2] = strtolower(trim($match[2]));
-                       foreach (array($match[1], $match[2]) as $mime) {
-                               if (in_array($mime, $this->headerOnlyTypes)) return true;
-                       }
-               }
-               return false;
-       }
-       
-       private function possibleUnsupportedType($url) {
-               $path = @parse_url($url, PHP_URL_PATH);
-               if ($path && strpos($path, '.') !== false) {
-                       $ext = strtolower(trim(pathinfo($path, PATHINFO_EXTENSION)));
-                       return in_array($ext, $this->headerOnlyClues);
-               }
-               return false;
-       }
-}
-
-// gzdecode from http://www.php.net/manual/en/function.gzdecode.php#82930
-if (!function_exists('gzdecode')) {
-       function gzdecode($data,&$filename='',&$error='',$maxlength=null) 
-       {
-               $len = strlen($data);
-               if ($len < 18 || strcmp(substr($data,0,2),"\x1f\x8b")) {
-                       $error = "Not in GZIP format.";
-                       return null;  // Not GZIP format (See RFC 1952)
-               }
-               $method = ord(substr($data,2,1));  // Compression method
-               $flags  = ord(substr($data,3,1));  // Flags
-               if ($flags & 31 != $flags) {
-                       $error = "Reserved bits not allowed.";
-                       return null;
-               }
-               // NOTE: $mtime may be negative (PHP integer limitations)
-               $mtime = unpack("V", substr($data,4,4));
-               $mtime = $mtime[1];
-               $xfl   = substr($data,8,1);
-               $os    = substr($data,8,1);
-               $headerlen = 10;
-               $extralen  = 0;
-               $extra     = "";
-               if ($flags & 4) {
-                       // 2-byte length prefixed EXTRA data in header
-                       if ($len - $headerlen - 2 < 8) {
-                               return false;  // invalid
-                       }
-                       $extralen = unpack("v",substr($data,8,2));
-                       $extralen = $extralen[1];
-                       if ($len - $headerlen - 2 - $extralen < 8) {
-                               return false;  // invalid
-                       }
-                       $extra = substr($data,10,$extralen);
-                       $headerlen += 2 + $extralen;
-               }
-               $filenamelen = 0;
-               $filename = "";
-               if ($flags & 8) {
-                       // C-style string
-                       if ($len - $headerlen - 1 < 8) {
-                               return false; // invalid
-                       }
-                       $filenamelen = strpos(substr($data,$headerlen),chr(0));
-                       if ($filenamelen === false || $len - $headerlen - $filenamelen - 1 < 8) {
-                               return false; // invalid
-                       }
-                       $filename = substr($data,$headerlen,$filenamelen);
-                       $headerlen += $filenamelen + 1;
-               }
-               $commentlen = 0;
-               $comment = "";
-               if ($flags & 16) {
-                       // C-style string COMMENT data in header
-                       if ($len - $headerlen - 1 < 8) {
-                               return false;    // invalid
-                       }
-                       $commentlen = strpos(substr($data,$headerlen),chr(0));
-                       if ($commentlen === false || $len - $headerlen - $commentlen - 1 < 8) {
-                               return false;    // Invalid header format
-                       }
-                       $comment = substr($data,$headerlen,$commentlen);
-                       $headerlen += $commentlen + 1;
-               }
-               $headercrc = "";
-               if ($flags & 2) {
-                       // 2-bytes (lowest order) of CRC32 on header present
-                       if ($len - $headerlen - 2 < 8) {
-                               return false;    // invalid
-                       }
-                       $calccrc = crc32(substr($data,0,$headerlen)) & 0xffff;
-                       $headercrc = unpack("v", substr($data,$headerlen,2));
-                       $headercrc = $headercrc[1];
-                       if ($headercrc != $calccrc) {
-                               $error = "Header checksum failed.";
-                               return false;    // Bad header CRC
-                       }
-                       $headerlen += 2;
-               }
-               // GZIP FOOTER
-               $datacrc = unpack("V",substr($data,-8,4));
-               $datacrc = sprintf('%u',$datacrc[1] & 0xFFFFFFFF);
-               $isize = unpack("V",substr($data,-4));
-               $isize = $isize[1];
-               // decompression:
-               $bodylen = $len-$headerlen-8;
-               if ($bodylen < 1) {
-                       // IMPLEMENTATION BUG!
-                       return null;
-               }
-               $body = substr($data,$headerlen,$bodylen);
-               $data = "";
-               if ($bodylen > 0) {
-                       switch ($method) {
-                       case 8:
-                               // Currently the only supported compression method:
-                               $data = gzinflate($body,$maxlength);
-                               break;
-                       default:
-                               $error = "Unknown compression method.";
-                               return false;
-                       }
-               }  // zero-byte body content is allowed
-               // Verifiy CRC32
-               $crc   = sprintf("%u",crc32($data));
-               $crcOK = $crc == $datacrc;
-               $lenOK = $isize == strlen($data);
-               if (!$lenOK || !$crcOK) {
-                       $error = ( $lenOK ? '' : 'Length check FAILED. ') . ( $crcOK ? '' : 'Checksum FAILED.');
-                       return false;
-               }
-               return $data;
-       }
-}
+<?php\r
+/**\r
+ * Humble HTTP Agent\r
+ * \r
+ * This class is designed to take advantage of parallel HTTP requests\r
+ * offered by PHP's PECL HTTP extension or the curl_multi_* functions. \r
+ * For environments which do not have these options, it reverts to standard sequential \r
+ * requests (using file_get_contents())\r
+ * \r
+ * @version 1.1\r
+ * @date 2012-08-20\r
+ * @see http://php.net/HttpRequestPool\r
+ * @author Keyvan Minoukadeh\r
+ * @copyright 2011-2012 Keyvan Minoukadeh\r
+ * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3\r
+ */\r
+\r
+class HumbleHttpAgent\r
+{\r
+       const METHOD_REQUEST_POOL = 1;\r
+       const METHOD_CURL_MULTI = 2;\r
+       const METHOD_FILE_GET_CONTENTS = 4;\r
+       //const UA_BROWSER = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0.1) Gecko/20100101 Firefox/4.0.1';\r
+       const UA_BROWSER = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.92 Safari/535.2';\r
+       const UA_PHP = 'PHP/5.2';\r
+       const REF_GOOGLE = 'http://www.google.co.uk/url?sa=t&source=web&cd=1';\r
+       \r
+       protected $requests = array();\r
+       protected $redirectQueue = array();\r
+       protected $requestOptions;\r
+       protected $maxParallelRequests = 5;\r
+       protected $cache = null; //TODO\r
+       protected $httpContext;\r
+       protected $minimiseMemoryUse = false; //TODO\r
+       protected $method;\r
+       protected $cookieJar;\r
+       public $debug = false;\r
+       public $debugVerbose = false;\r
+       public $rewriteHashbangFragment = true; // see http://code.google.com/web/ajaxcrawling/docs/specification.html\r
+       public $maxRedirects = 5;\r
+       public $userAgentMap = array();\r
+       public $rewriteUrls = array();\r
+       public $userAgentDefault;\r
+       public $referer;\r
+       //public $userAgent = 'Mozilla/5.0';\r
+       \r
+       // Prevent certain file/mime types\r
+       // HTTP responses which match these content types will\r
+       // be returned without body.\r
+       public $headerOnlyTypes = array();\r
+       // URLs ending with one of these extensions will\r
+       // prompt Humble HTTP Agent to send a HEAD request first\r
+       // to see if returned content type matches $headerOnlyTypes.\r
+       public $headerOnlyClues = array('pdf','mp3','zip','exe','gif','gzip','gz','jpeg','jpg','mpg','mpeg','png','ppt','mov');\r
+       // AJAX triggers to search for.\r
+       // for AJAX sites, e.g. Blogger with its dynamic views templates.\r
+       public $ajaxTriggers = array("<meta name='fragment' content='!'",'<meta name="fragment" content="!"',"<meta content='!' name='fragment'",'<meta content="!" name="fragment"');\r
+       \r
+       //TODO: set max file size\r
+       //TODO: normalise headers\r
+       \r
+       function __construct($requestOptions=null, $method=null) {\r
+               $this->userAgentDefault = self::UA_BROWSER;\r
+               $this->referer = self::REF_GOOGLE;\r
+               // set the request method\r
+               if (in_array($method, array(1,2,4))) {\r
+                       $this->method = $method;\r
+               } else {\r
+                       if (class_exists('HttpRequestPool')) {\r
+                               $this->method = self::METHOD_REQUEST_POOL;\r
+                       } elseif (function_exists('curl_multi_init')) {\r
+                               $this->method = self::METHOD_CURL_MULTI;\r
+                       } else {\r
+                               $this->method = self::METHOD_FILE_GET_CONTENTS;\r
+                       }\r
+               }\r
+               if ($this->method == self::METHOD_CURL_MULTI) {\r
+                       require_once(dirname(__FILE__).'/RollingCurl.php');\r
+               }\r
+               // create cookie jar\r
+               $this->cookieJar = new CookieJar();\r
+               // set request options (redirect must be 0)\r
+               $this->requestOptions = array(\r
+                       'timeout' => 15,\r
+                       'redirect' => 0 // we handle redirects manually so we can rewrite the new hashbang URLs that are creeping up over the web\r
+                       // TODO: test onprogress?\r
+               );\r
+               if (is_array($requestOptions)) {\r
+                       $this->requestOptions = array_merge($this->requestOptions, $requestOptions);\r
+               }\r
+               $this->httpContext = array(\r
+                       'http' => array(\r
+                               'ignore_errors' => true,\r
+                               'timeout' => $this->requestOptions['timeout'],\r
+                               'max_redirects' => $this->requestOptions['redirect'],\r
+                               'header' => "Accept: */*\r\n"\r
+                               )\r
+                       );\r
+       }\r
+       \r
+       protected function debug($msg) {\r
+               if ($this->debug) {\r
+                       $mem = round(memory_get_usage()/1024, 2);\r
+                       $memPeak = round(memory_get_peak_usage()/1024, 2);\r
+                       echo '* ',$msg;\r
+                       if ($this->debugVerbose) echo ' - mem used: ',$mem," (peak: $memPeak)";\r
+                       echo "\n";\r
+                       ob_flush();\r
+                       flush();\r
+               }\r
+       }\r
+       \r
+       protected function getUserAgent($url, $asArray=false) {\r
+               $host = @parse_url($url, PHP_URL_HOST);\r
+               if (strtolower(substr($host, 0, 4)) == 'www.') {\r
+                       $host = substr($host, 4);\r
+               }\r
+               if ($host) {\r
+                       $try = array($host);\r
+                       $split = explode('.', $host);\r
+                       if (count($split) > 1) {\r
+                               array_shift($split);\r
+                               $try[] = '.'.implode('.', $split);\r
+                       }\r
+                       foreach ($try as $h) {\r
+                               if (isset($this->userAgentMap[$h])) {\r
+                                       $ua = $this->userAgentMap[$h];\r
+                                       break;\r
+                               }\r
+                       }\r
+               }\r
+               if (!isset($ua)) $ua = $this->userAgentDefault;\r
+               if ($asArray) {\r
+                       return array('User-Agent' => $ua);\r
+               } else {\r
+                       return 'User-Agent: '.$ua;\r
+               }\r
+       }\r
+       \r
+       public function rewriteHashbangFragment($url) {\r
+               // return $url if there's no '#!'\r
+               if (strpos($url, '#!') === false) return $url;\r
+               // split $url and rewrite\r
+               // TODO: is SimplePie_IRI included?\r
+               $iri = new SimplePie_IRI($url);\r
+               $fragment = substr($iri->fragment, 1); // strip '!'\r
+               $iri->fragment = null;\r
+               if (isset($iri->query)) {\r
+                       parse_str($iri->query, $query);\r
+               } else {\r
+                       $query = array();\r
+               }\r
+               $query['_escaped_fragment_'] = (string)$fragment;\r
+               $iri->query = str_replace('%2F', '/', http_build_query($query)); // needed for some sites\r
+               return $iri->get_iri();\r
+       }\r
+       \r
+       public function getUglyURL($url, $html) {\r
+               if ($html == '') return false;\r
+               $found = false;\r
+               foreach ($this->ajaxTriggers as $string) {\r
+                       if (stripos($html, $string)) {\r
+                               $found = true;\r
+                               break;\r
+                       }\r
+               }\r
+               if (!$found) return false;\r
+               $iri = new SimplePie_IRI($url);\r
+               if (isset($iri->query)) {\r
+                       parse_str($iri->query, $query);\r
+               } else {\r
+                       $query = array();\r
+               }\r
+               $query['_escaped_fragment_'] = '';\r
+               $iri->query = str_replace('%2F', '/', http_build_query($query)); // needed for some sites\r
+               return $iri->get_iri();\r
+       }\r
+       \r
+       public function removeFragment($url) {\r
+               $pos = strpos($url, '#');\r
+               if ($pos === false) {\r
+                       return $url;\r
+               } else {\r
+                       return substr($url, 0, $pos);\r
+               }\r
+       }\r
+       \r
+       public function rewriteUrls($url) {\r
+               foreach ($this->rewriteUrls as $find => $action) {\r
+                       if (strpos($url, $find) !== false) {\r
+                               if (is_array($action)) {\r
+                                       return strtr($url, $action);\r
+                               }\r
+                       }\r
+               }\r
+               return $url;\r
+       }\r
+       \r
+       public function enableDebug($bool=true) {\r
+               $this->debug = (bool)$bool;\r
+       }\r
+       \r
+       public function minimiseMemoryUse($bool = true) {\r
+               $this->minimiseMemoryUse = $bool;\r
+       }\r
+       \r
+       public function setMaxParallelRequests($max) {\r
+               $this->maxParallelRequests = $max;\r
+       }\r
+       \r
+       public function validateUrl($url) {\r
+               $url = filter_var($url, FILTER_SANITIZE_URL);\r
+               $test = filter_var($url, FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED);\r
+               // deal with bug http://bugs.php.net/51192 (present in PHP 5.2.13 and PHP 5.3.2)\r
+               if ($test === false) {\r
+                       $test = filter_var(strtr($url, '-', '_'), FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED);\r
+               }\r
+               if ($test !== false && $test !== null && preg_match('!^https?://!', $url)) {\r
+                       return $url;\r
+               } else {\r
+                       return false;\r
+               }\r
+       }\r
+       \r
+       public function fetchAll(array $urls) {\r
+               $this->fetchAllOnce($urls, $isRedirect=false);\r
+               $redirects = 0;\r
+               while (!empty($this->redirectQueue) && ++$redirects <= $this->maxRedirects) {\r
+                       $this->debug("Following redirects #$redirects...");\r
+                       $this->fetchAllOnce($this->redirectQueue, $isRedirect=true);\r
+               }\r
+       }\r
+       \r
+       // fetch all URLs without following redirects\r
+       public function fetchAllOnce(array $urls, $isRedirect=false) {\r
+               if (!$isRedirect) $urls = array_unique($urls);\r
+               if (empty($urls)) return;\r
+               \r
+               //////////////////////////////////////////////////////\r
+               // parallel (HttpRequestPool)\r
+               if ($this->method == self::METHOD_REQUEST_POOL) {\r
+                       $this->debug('Starting parallel fetch (HttpRequestPool)');\r
+                       try {\r
+                               while (count($urls) > 0) {\r
+                                       $this->debug('Processing set of '.min($this->maxParallelRequests, count($urls)));\r
+                                       $subset = array_splice($urls, 0, $this->maxParallelRequests);\r
+                                       $pool = new HttpRequestPool();\r
+                                       foreach ($subset as $orig => $url) {\r
+                                               if (!$isRedirect) $orig = $url;\r
+                                               unset($this->redirectQueue[$orig]);\r
+                                               $this->debug("...$url");\r
+                                               if (!$isRedirect && isset($this->requests[$url])) {\r
+                                                       $this->debug("......in memory");\r
+                                               /*\r
+                                               } elseif ($this->isCached($url)) {\r
+                                                       $this->debug("......is cached");\r
+                                                       if (!$this->minimiseMemoryUse) {\r
+                                                               $this->requests[$url] = $this->getCached($url);\r
+                                                       }\r
+                                               */\r
+                                               } else {\r
+                                                       $this->debug("......adding to pool");\r
+                                                       $req_url = $this->rewriteUrls($url);\r
+                                                       $req_url = ($this->rewriteHashbangFragment) ? $this->rewriteHashbangFragment($req_url) : $req_url;\r
+                                                       $req_url = $this->removeFragment($req_url);\r
+                                                       if (!empty($this->headerOnlyTypes) && !isset($this->requests[$orig]['wrongGuess']) && $this->possibleUnsupportedType($req_url)) {\r
+                                                               $_meth = HttpRequest::METH_HEAD;\r
+                                                       } else {\r
+                                                               $_meth = HttpRequest::METH_GET;\r
+                                                               unset($this->requests[$orig]['wrongGuess']);\r
+                                                       }\r
+                                                       $httpRequest = new HttpRequest($req_url, $_meth, $this->requestOptions);\r
+                                                       // send cookies, if we have any\r
+                                                       if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) {\r
+                                                               $this->debug("......sending cookies: $cookies");\r
+                                                               $httpRequest->addHeaders(array('Cookie' => $cookies));\r
+                                                       }\r
+                                                       //$httpRequest->addHeaders(array('User-Agent' => $this->userAgent));\r
+                                                       $httpRequest->addHeaders($this->getUserAgent($req_url, true));\r
+                                                       // add referer for picky sites\r
+                                                       $httpRequest->addheaders(array('Referer' => $this->referer));\r
+                                                       $this->requests[$orig] = array('headers'=>null, 'body'=>null, 'httpRequest'=>$httpRequest);\r
+                                                       $this->requests[$orig]['original_url'] = $orig;\r
+                                                       $pool->attach($httpRequest);\r
+                                               }\r
+                                       }\r
+                                       // did we get anything into the pool?\r
+                                       if (count($pool) > 0) {\r
+                                               $this->debug('Sending request...');\r
+                                               try {\r
+                                                       $pool->send();\r
+                                               } catch (HttpRequestPoolException $e) {\r
+                                                       // do nothing\r
+                                               }\r
+                                               $this->debug('Received responses');\r
+                                               foreach($subset as $orig => $url) {\r
+                                                       if (!$isRedirect) $orig = $url;\r
+                                                       $request = $this->requests[$orig]['httpRequest'];\r
+                                                       //$this->requests[$orig]['headers'] = $this->headersToString($request->getResponseHeader());\r
+                                                       // getResponseHeader() doesn't return status line, so, for consistency...\r
+                                                       $this->requests[$orig]['headers'] = substr($request->getRawResponseMessage(), 0, $request->getResponseInfo('header_size'));\r
+                                                       // check content type\r
+                                                       // TODO: use getResponseHeader('content-type') or getResponseInfo()\r
+                                                       if ($this->headerOnlyType($this->requests[$orig]['headers'])) {\r
+                                                               $this->requests[$orig]['body'] = '';\r
+                                                               $_header_only_type = true;\r
+                                                               $this->debug('Header only type returned');\r
+                                                       } else {\r
+                                                               $this->requests[$orig]['body'] = $request->getResponseBody();\r
+                                                               $_header_only_type = false;\r
+                                                       }\r
+                                                       $this->requests[$orig]['effective_url'] = $request->getResponseInfo('effective_url');\r
+                                                       $this->requests[$orig]['status_code'] = $status_code = $request->getResponseCode();\r
+                                                       // is redirect?\r
+                                                       if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && $request->getResponseHeader('location')) {\r
+                                                               $redirectURL = $request->getResponseHeader('location');\r
+                                                               if (!preg_match('!^https?://!i', $redirectURL)) {\r
+                                                                       $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url);\r
+                                                               }\r
+                                                               if ($this->validateURL($redirectURL)) {\r
+                                                                       $this->debug('Redirect detected. Valid URL: '.$redirectURL);\r
+                                                                       // store any cookies\r
+                                                                       $cookies = $request->getResponseHeader('set-cookie');\r
+                                                                       if ($cookies && !is_array($cookies)) $cookies = array($cookies);\r
+                                                                       if ($cookies) $this->cookieJar->storeCookies($url, $cookies);\r
+                                                                       $this->redirectQueue[$orig] = $redirectURL;\r
+                                                               } else {\r
+                                                                       $this->debug('Redirect detected. Invalid URL: '.$redirectURL);\r
+                                                               }\r
+                                                       } elseif (!$_header_only_type && $request->getMethod() === HttpRequest::METH_HEAD) {\r
+                                                               // the response content-type did not match our 'header only' types, \r
+                                                               // but we'd issues a HEAD request because we assumed it would. So\r
+                                                               // let's queue a proper GET request for this item...\r
+                                                               $this->debug('Wrong guess at content-type, queing GET request');\r
+                                                               $this->requests[$orig]['wrongGuess'] = true;\r
+                                                               $this->redirectQueue[$orig] = $this->requests[$orig]['effective_url'];\r
+                                                       } elseif (strpos($this->requests[$orig]['effective_url'], '_escaped_fragment_') === false) {\r
+                                                               // check for <meta name='fragment' content='!'/>\r
+                                                               // for AJAX sites, e.g. Blogger with its dynamic views templates.\r
+                                                               // Based on Google's spec: https://developers.google.com/webmasters/ajax-crawling/docs/specification\r
+                                                               if (isset($this->requests[$orig]['body'])) {\r
+                                                                       $redirectURL = $this->getUglyURL($this->requests[$orig]['effective_url'], substr($this->requests[$orig]['body'], 0, 4000));\r
+                                                                       if ($redirectURL) {\r
+                                                                               $this->debug('AJAX trigger (meta name="fragment" content="!") found. Queueing '.$redirectURL);\r
+                                                                               $this->redirectQueue[$orig] = $redirectURL;\r
+                                                                       }\r
+                                                               }\r
+                                                       }\r
+                                                       //die($url.' -multi- '.$request->getResponseInfo('effective_url'));\r
+                                                       $pool->detach($request);\r
+                                                       unset($this->requests[$orig]['httpRequest'], $request);\r
+                                                       /*\r
+                                                       if ($this->minimiseMemoryUse) {\r
+                                                               if ($this->cache($url)) {\r
+                                                                       unset($this->requests[$url]);\r
+                                                               }\r
+                                                       }\r
+                                                       */\r
+                                               }\r
+                                       }\r
+                               }\r
+                       } catch (HttpException $e) {\r
+                               $this->debug($e);\r
+                               return false;\r
+                       }\r
+               }\r
+               \r
+               //////////////////////////////////////////////////////////\r
+               // parallel (curl_multi_*)\r
+               elseif ($this->method == self::METHOD_CURL_MULTI) {\r
+                       $this->debug('Starting parallel fetch (curl_multi_*)');\r
+                       while (count($urls) > 0) {\r
+                               $this->debug('Processing set of '.min($this->maxParallelRequests, count($urls)));\r
+                               $subset = array_splice($urls, 0, $this->maxParallelRequests);\r
+                               $pool = new RollingCurl(array($this, 'handleCurlResponse'));\r
+                               $pool->window_size = count($subset);            \r
+                               \r
+                               foreach ($subset as $orig => $url) {\r
+                                       if (!$isRedirect) $orig = $url;\r
+                                       unset($this->redirectQueue[$orig]);\r
+                                       $this->debug("...$url");\r
+                                       if (!$isRedirect && isset($this->requests[$url])) {\r
+                                               $this->debug("......in memory");\r
+                                       /*\r
+                                       } elseif ($this->isCached($url)) {\r
+                                               $this->debug("......is cached");\r
+                                               if (!$this->minimiseMemoryUse) {\r
+                                                       $this->requests[$url] = $this->getCached($url);\r
+                                               }\r
+                                       */\r
+                                       } else {\r
+                                               $this->debug("......adding to pool");\r
+                                               $req_url = $this->rewriteUrls($url);\r
+                                               $req_url = ($this->rewriteHashbangFragment) ? $this->rewriteHashbangFragment($req_url) : $req_url;\r
+                                               $req_url = $this->removeFragment($req_url);\r
+                                               if (!empty($this->headerOnlyTypes) && !isset($this->requests[$orig]['wrongGuess']) && $this->possibleUnsupportedType($req_url)) {\r
+                                                       $_meth = 'HEAD';\r
+                                               } else {\r
+                                                       $_meth = 'GET';\r
+                                                       unset($this->requests[$orig]['wrongGuess']);\r
+                                               }                                               \r
+                                               $headers = array();\r
+                                               //$headers[] = 'User-Agent: '.$this->userAgent;\r
+                                               $headers[] = $this->getUserAgent($req_url);\r
+                                               // add referer for picky sites\r
+                                               $headers[] = 'Referer: '.$this->referer;\r
+                                               // send cookies, if we have any\r
+                                               if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) {\r
+                                                       $this->debug("......sending cookies: $cookies");\r
+                                                       $headers[] = 'Cookie: '.$cookies;\r
+                                               }\r
+                                               $httpRequest = new RollingCurlRequest($req_url, $_meth, null, $headers, array(\r
+                                                       CURLOPT_CONNECTTIMEOUT => $this->requestOptions['timeout'],\r
+                                                       CURLOPT_TIMEOUT => $this->requestOptions['timeout']\r
+                                                       ));\r
+                                               $httpRequest->set_original_url($orig);\r
+                                               $this->requests[$orig] = array('headers'=>null, 'body'=>null, 'httpRequest'=>$httpRequest);\r
+                                               $this->requests[$orig]['original_url'] = $orig; // TODO: is this needed anymore?\r
+                                               $pool->add($httpRequest);\r
+                                       }\r
+                               }\r
+                               // did we get anything into the pool?\r
+                               if (count($pool) > 0) {\r
+                                       $this->debug('Sending request...');\r
+                                       $pool->execute(); // this will call handleCurlResponse() and populate $this->requests[$orig]\r
+                                       $this->debug('Received responses');\r
+                                       foreach($subset as $orig => $url) {\r
+                                               if (!$isRedirect) $orig = $url;\r
+                                               // $this->requests[$orig]['headers']\r
+                                               // $this->requests[$orig]['body']\r
+                                               // $this->requests[$orig]['effective_url']\r
+                                               // check content type\r
+                                               if ($this->headerOnlyType($this->requests[$orig]['headers'])) {\r
+                                                       $this->requests[$orig]['body'] = '';\r
+                                                       $_header_only_type = true;\r
+                                                       $this->debug('Header only type returned');\r
+                                               } else {\r
+                                                       $_header_only_type = false;\r
+                                               }\r
+                                               $status_code = $this->requests[$orig]['status_code'];\r
+                                               if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && isset($this->requests[$orig]['location'])) {\r
+                                                       $redirectURL = $this->requests[$orig]['location'];\r
+                                                       if (!preg_match('!^https?://!i', $redirectURL)) {\r
+                                                               $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url);\r
+                                                       }\r
+                                                       if ($this->validateURL($redirectURL)) {\r
+                                                               $this->debug('Redirect detected. Valid URL: '.$redirectURL);\r
+                                                               // store any cookies\r
+                                                               $cookies = $this->cookieJar->extractCookies($this->requests[$orig]['headers']);\r
+                                                               if (!empty($cookies)) $this->cookieJar->storeCookies($url, $cookies);                                                   \r
+                                                               $this->redirectQueue[$orig] = $redirectURL;\r
+                                                       } else {\r
+                                                               $this->debug('Redirect detected. Invalid URL: '.$redirectURL);\r
+                                                       }\r
+                                               } elseif (!$_header_only_type && $this->requests[$orig]['method'] == 'HEAD') {\r
+                                                       // the response content-type did not match our 'header only' types, \r
+                                                       // but we'd issues a HEAD request because we assumed it would. So\r
+                                                       // let's queue a proper GET request for this item...\r
+                                                       $this->debug('Wrong guess at content-type, queing GET request');\r
+                                                       $this->requests[$orig]['wrongGuess'] = true;\r
+                                                       $this->redirectQueue[$orig] = $this->requests[$orig]['effective_url'];\r
+                                               } elseif (strpos($this->requests[$orig]['effective_url'], '_escaped_fragment_') === false) {\r
+                                                       // check for <meta name='fragment' content='!'/>\r
+                                                       // for AJAX sites, e.g. Blogger with its dynamic views templates.\r
+                                                       // Based on Google's spec: https://developers.google.com/webmasters/ajax-crawling/docs/specification\r
+                                                       if (isset($this->requests[$orig]['body'])) {\r
+                                                               $redirectURL = $this->getUglyURL($this->requests[$orig]['effective_url'], substr($this->requests[$orig]['body'], 0, 4000));\r
+                                                               if ($redirectURL) {\r
+                                                                       $this->debug('AJAX trigger (meta name="fragment" content="!") found. Queueing '.$redirectURL);\r
+                                                                       $this->redirectQueue[$orig] = $redirectURL;\r
+                                                               }\r
+                                                       }\r
+                                               }\r
+                                               // die($url.' -multi- '.$request->getResponseInfo('effective_url'));\r
+                                               unset($this->requests[$orig]['httpRequest'], $this->requests[$orig]['method']);\r
+                                       }\r
+                               }\r
+                       }\r
+               }\r
+\r
+               //////////////////////////////////////////////////////\r
+               // sequential (file_get_contents)\r
+               else {\r
+                       $this->debug('Starting sequential fetch (file_get_contents)');\r
+                       $this->debug('Processing set of '.count($urls));\r
+                       foreach ($urls as $orig => $url) {\r
+                               if (!$isRedirect) $orig = $url;\r
+                               unset($this->redirectQueue[$orig]);\r
+                               $this->debug("...$url");\r
+                               if (!$isRedirect && isset($this->requests[$url])) {\r
+                                       $this->debug("......in memory");\r
+                               /*\r
+                               } elseif ($this->isCached($url)) {\r
+                                       $this->debug("......is cached");\r
+                                       if (!$this->minimiseMemoryUse) {\r
+                                               $this->requests[$url] = $this->getCached($url);\r
+                                       }\r
+                               */\r
+                               } else {\r
+                                       $this->debug("Sending request for $url");\r
+                                       $this->requests[$orig]['original_url'] = $orig;\r
+                                       $req_url = $this->rewriteUrls($url);\r
+                                       $req_url = ($this->rewriteHashbangFragment) ? $this->rewriteHashbangFragment($req_url) : $req_url;\r
+                                       $req_url = $this->removeFragment($req_url);\r
+                                       // send cookies, if we have any\r
+                                       $httpContext = $this->httpContext;\r
+                                       $httpContext['http']['header'] .= $this->getUserAgent($req_url)."\r\n";\r
+                                       // add referer for picky sites\r
+                                       $httpContext['http']['header'] .= 'Referer: '.$this->referer."\r\n";\r
+                                       if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) {\r
+                                               $this->debug("......sending cookies: $cookies");\r
+                                               $httpContext['http']['header'] .= 'Cookie: '.$cookies."\r\n";\r
+                                       }\r
+                                       if (false !== ($html = @file_get_contents($req_url, false, stream_context_create($httpContext)))) {\r
+                                               $this->debug('Received response');\r
+                                               // get status code\r
+                                               if (!isset($http_response_header[0]) || !preg_match('!^HTTP/\d+\.\d+\s+(\d+)!', trim($http_response_header[0]), $match)) {\r
+                                                       $this->debug('Error: no status code found');\r
+                                                       // TODO: handle error - no status code\r
+                                               } else {\r
+                                                       $this->requests[$orig]['headers'] = $this->headersToString($http_response_header, false);\r
+                                                       // check content type\r
+                                                       if ($this->headerOnlyType($this->requests[$orig]['headers'])) {\r
+                                                               $this->requests[$orig]['body'] = '';\r
+                                                       } else {\r
+                                                               $this->requests[$orig]['body'] = $html;\r
+                                                       }\r
+                                                       $this->requests[$orig]['effective_url'] = $req_url;\r
+                                                       $this->requests[$orig]['status_code'] = $status_code = (int)$match[1];\r
+                                                       unset($match);\r
+                                                       // handle redirect\r
+                                                       if (preg_match('/^Location:(.*?)$/mi', $this->requests[$orig]['headers'], $match)) {\r
+                                                               $this->requests[$orig]['location'] =  trim($match[1]);\r
+                                                       }\r
+                                                       if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && isset($this->requests[$orig]['location'])) {\r
+                                                               $redirectURL = $this->requests[$orig]['location'];\r
+                                                               if (!preg_match('!^https?://!i', $redirectURL)) {\r
+                                                                       $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url);\r
+                                                               }\r
+                                                               if ($this->validateURL($redirectURL)) {\r
+                                                                       $this->debug('Redirect detected. Valid URL: '.$redirectURL);\r
+                                                                       // store any cookies\r
+                                                                       $cookies = $this->cookieJar->extractCookies($this->requests[$orig]['headers']);\r
+                                                                       if (!empty($cookies)) $this->cookieJar->storeCookies($url, $cookies);\r
+                                                                       $this->redirectQueue[$orig] = $redirectURL;\r
+                                                               } else {\r
+                                                                       $this->debug('Redirect detected. Invalid URL: '.$redirectURL);\r
+                                                               }\r
+                                                       } elseif (strpos($this->requests[$orig]['effective_url'], '_escaped_fragment_') === false) {\r
+                                                               // check for <meta name='fragment' content='!'/>\r
+                                                               // for AJAX sites, e.g. Blogger with its dynamic views templates.\r
+                                                               // Based on Google's spec: https://developers.google.com/webmasters/ajax-crawling/docs/specification\r
+                                                               if (isset($this->requests[$orig]['body'])) {\r
+                                                                       $redirectURL = $this->getUglyURL($this->requests[$orig]['effective_url'], substr($this->requests[$orig]['body'], 0, 4000));\r
+                                                                       if ($redirectURL) {\r
+                                                                               $this->debug('AJAX trigger (meta name="fragment" content="!") found. Queueing '.$redirectURL);\r
+                                                                               $this->redirectQueue[$orig] = $redirectURL;\r
+                                                                       }\r
+                                                               }\r
+                                                       }\r
+                                               }\r
+                                       } else {\r
+                                               $this->debug('Error retrieving URL');\r
+                                               //print_r($req_url);\r
+                                               //print_r($http_response_header);\r
+                                               //print_r($html);\r
+                                               \r
+                                               // TODO: handle error - failed to retrieve URL\r
+                                       }\r
+                               }\r
+                       }\r
+               }\r
+       }\r
+       \r
+       public function handleCurlResponse($response, $info, $request) {\r
+               $orig = $request->url_original;\r
+               $this->requests[$orig]['headers'] = substr($response, 0, $info['header_size']);\r
+               $this->requests[$orig]['body'] = substr($response, $info['header_size']);\r
+               $this->requests[$orig]['method'] = $request->method;\r
+               $this->requests[$orig]['effective_url'] = $info['url'];\r
+               $this->requests[$orig]['status_code'] = (int)$info['http_code'];\r
+               if (preg_match('/^Location:(.*?)$/mi', $this->requests[$orig]['headers'], $match)) {\r
+                       $this->requests[$orig]['location'] =  trim($match[1]);\r
+               }\r
+       }\r
+       \r
+       protected function headersToString(array $headers, $associative=true) {\r
+               if (!$associative) {\r
+                       return implode("\n", $headers);\r
+               } else {\r
+                       $str = '';\r
+                       foreach ($headers as $key => $val) {\r
+                               if (is_array($val)) {\r
+                                       foreach ($val as $v) $str .= "$key: $v\n";\r
+                               } else {\r
+                                       $str .= "$key: $val\n";\r
+                               }\r
+                       }\r
+                       return rtrim($str);\r
+               }\r
+       }\r
+       \r
+       public function get($url, $remove=false, $gzdecode=true) {\r
+               $url = "$url";\r
+               if (isset($this->requests[$url]) && isset($this->requests[$url]['body'])) {\r
+                       $this->debug("URL already fetched - in memory ($url, effective: {$this->requests[$url]['effective_url']})");\r
+                       $response = $this->requests[$url];\r
+               /*\r
+               } elseif ($this->isCached($url)) {\r
+                       $this->debug("URL already fetched - in disk cache ($url)");\r
+                       $response = $this->getCached($url);\r
+                       $this->requests[$url] = $response;\r
+               */\r
+               } else {\r
+                       $this->debug("Fetching URL ($url)");\r
+                       $this->fetchAll(array($url));\r
+                       if (isset($this->requests[$url]) && isset($this->requests[$url]['body'])) {\r
+                               $response = $this->requests[$url];\r
+                       } else {\r
+                               $this->debug("Request failed");\r
+                               $response = false;\r
+                       }\r
+               }\r
+               /*\r
+               if ($this->minimiseMemoryUse && $response) {\r
+                       $this->cache($url);\r
+                       unset($this->requests[$url]);\r
+               }\r
+               */\r
+               if ($remove && $response) unset($this->requests[$url]);\r
+               if ($gzdecode && stripos($response['headers'], 'Content-Encoding: gzip')) {\r
+                       if ($html = gzdecode($response['body'])) {\r
+                               $response['body'] = $html;\r
+                       }\r
+               }\r
+               return $response;\r
+       }\r
+       \r
+       public function parallelSupport() {\r
+               return class_exists('HttpRequestPool') || function_exists('curl_multi_init');\r
+       }\r
+       \r
+       private function headerOnlyType($headers) {\r
+               if (preg_match('!^Content-Type:\s*(([a-z-]+)/([^;\r\n ]+))!im', $headers, $match)) {\r
+                       // look for full mime type (e.g. image/jpeg) or just type (e.g. image)\r
+                       $match[1] = strtolower(trim($match[1]));\r
+                       $match[2] = strtolower(trim($match[2]));\r
+                       foreach (array($match[1], $match[2]) as $mime) {\r
+                               if (in_array($mime, $this->headerOnlyTypes)) return true;\r
+                       }\r
+               }\r
+               return false;\r
+       }\r
+       \r
+       private function possibleUnsupportedType($url) {\r
+               $path = @parse_url($url, PHP_URL_PATH);\r
+               if ($path && strpos($path, '.') !== false) {\r
+                       $ext = strtolower(trim(pathinfo($path, PATHINFO_EXTENSION)));\r
+                       return in_array($ext, $this->headerOnlyClues);\r
+               }\r
+               return false;\r
+       }\r
+}\r
+\r
+// gzdecode from http://www.php.net/manual/en/function.gzdecode.php#82930\r
+if (!function_exists('gzdecode')) {\r
+       function gzdecode($data,&$filename='',&$error='',$maxlength=null) \r
+       {\r
+               $len = strlen($data);\r
+               if ($len < 18 || strcmp(substr($data,0,2),"\x1f\x8b")) {\r
+                       $error = "Not in GZIP format.";\r
+                       return null;  // Not GZIP format (See RFC 1952)\r
+               }\r
+               $method = ord(substr($data,2,1));  // Compression method\r
+               $flags  = ord(substr($data,3,1));  // Flags\r
+               if ($flags & 31 != $flags) {\r
+                       $error = "Reserved bits not allowed.";\r
+                       return null;\r
+               }\r
+               // NOTE: $mtime may be negative (PHP integer limitations)\r
+               $mtime = unpack("V", substr($data,4,4));\r
+               $mtime = $mtime[1];\r
+               $xfl   = substr($data,8,1);\r
+               $os    = substr($data,8,1);\r
+               $headerlen = 10;\r
+               $extralen  = 0;\r
+               $extra     = "";\r
+               if ($flags & 4) {\r
+                       // 2-byte length prefixed EXTRA data in header\r
+                       if ($len - $headerlen - 2 < 8) {\r
+                               return false;  // invalid\r
+                       }\r
+                       $extralen = unpack("v",substr($data,8,2));\r
+                       $extralen = $extralen[1];\r
+                       if ($len - $headerlen - 2 - $extralen < 8) {\r
+                               return false;  // invalid\r
+                       }\r
+                       $extra = substr($data,10,$extralen);\r
+                       $headerlen += 2 + $extralen;\r
+               }\r
+               $filenamelen = 0;\r
+               $filename = "";\r
+               if ($flags & 8) {\r
+                       // C-style string\r
+                       if ($len - $headerlen - 1 < 8) {\r
+                               return false; // invalid\r
+                       }\r
+                       $filenamelen = strpos(substr($data,$headerlen),chr(0));\r
+                       if ($filenamelen === false || $len - $headerlen - $filenamelen - 1 < 8) {\r
+                               return false; // invalid\r
+                       }\r
+                       $filename = substr($data,$headerlen,$filenamelen);\r
+                       $headerlen += $filenamelen + 1;\r
+               }\r
+               $commentlen = 0;\r
+               $comment = "";\r
+               if ($flags & 16) {\r
+                       // C-style string COMMENT data in header\r
+                       if ($len - $headerlen - 1 < 8) {\r
+                               return false;    // invalid\r
+                       }\r
+                       $commentlen = strpos(substr($data,$headerlen),chr(0));\r
+                       if ($commentlen === false || $len - $headerlen - $commentlen - 1 < 8) {\r
+                               return false;    // Invalid header format\r
+                       }\r
+                       $comment = substr($data,$headerlen,$commentlen);\r
+                       $headerlen += $commentlen + 1;\r
+               }\r
+               $headercrc = "";\r
+               if ($flags & 2) {\r
+                       // 2-bytes (lowest order) of CRC32 on header present\r
+                       if ($len - $headerlen - 2 < 8) {\r
+                               return false;    // invalid\r
+                       }\r
+                       $calccrc = crc32(substr($data,0,$headerlen)) & 0xffff;\r
+                       $headercrc = unpack("v", substr($data,$headerlen,2));\r
+                       $headercrc = $headercrc[1];\r
+                       if ($headercrc != $calccrc) {\r
+                               $error = "Header checksum failed.";\r
+                               return false;    // Bad header CRC\r
+                       }\r
+                       $headerlen += 2;\r
+               }\r
+               // GZIP FOOTER\r
+               $datacrc = unpack("V",substr($data,-8,4));\r
+               $datacrc = sprintf('%u',$datacrc[1] & 0xFFFFFFFF);\r
+               $isize = unpack("V",substr($data,-4));\r
+               $isize = $isize[1];\r
+               // decompression:\r
+               $bodylen = $len-$headerlen-8;\r
+               if ($bodylen < 1) {\r
+                       // IMPLEMENTATION BUG!\r
+                       return null;\r
+               }\r
+               $body = substr($data,$headerlen,$bodylen);\r
+               $data = "";\r
+               if ($bodylen > 0) {\r
+                       switch ($method) {\r
+                       case 8:\r
+                               // Currently the only supported compression method:\r
+                               $data = gzinflate($body,$maxlength);\r
+                               break;\r
+                       default:\r
+                               $error = "Unknown compression method.";\r
+                               return false;\r
+                       }\r
+               }  // zero-byte body content is allowed\r
+               // Verifiy CRC32\r
+               $crc   = sprintf("%u",crc32($data));\r
+               $crcOK = $crc == $datacrc;\r
+               $lenOK = $isize == strlen($data);\r
+               if (!$lenOK || !$crcOK) {\r
+                       $error = ( $lenOK ? '' : 'Length check FAILED. ') . ( $crcOK ? '' : 'Checksum FAILED.');\r
+                       return false;\r
+               }\r
+               return $data;\r
+       }\r
+}\r
 ?>
\ No newline at end of file