Merge pull request #707 from mariroz/dev

update to 3.2 version of full-text-rss, issue #694
author: Nicolas Lœuillet <nicolas@loeuillet.org> 2014-05-29 12:50:28 +0200
committer: Nicolas Lœuillet <nicolas@loeuillet.org> 2014-05-29 12:50:28 +0200
commit: 87f01ea2e97715ac5df4ef7a6741cc26f3a5cd1b (patch)
tree: 558818975ac41403e7d55ad07c5b0ac29806e907 /inc
parent: ab157bbb75ba226917145c9bf906cbf764a85cd0 (diff)
parent: 0b9bb8cb7868f24137c5d8b85c39cc88ea877411 (diff)
download: wallabag-87f01ea2e97715ac5df4ef7a6741cc26f3a5cd1b.tar.gz
wallabag-87f01ea2e97715ac5df4ef7a6741cc26f3a5cd1b.tar.zst
wallabag-87f01ea2e97715ac5df4ef7a6741cc26f3a5cd1b.zip
22 files changed, 4849 insertions, 4190 deletions
diff --git a/inc/3rdparty/config.php b/inc/3rdparty/config.php
index e618117b..ec680d86 100755
--- a/inc/3rdparty/config.php
+++ b/inc/3rdparty/config.php
@@ -19,7 +19,7 @@ if (!isset($options)) $options = new stdClass();
 // Enable service
 // ----------------------
 // Set this to false if you want to disable the service.
-// If set to false, no feed is produced and users will 
+// If set to false, no feed is produced and users will
 // be told that the service is disabled.
 $options->enabled = true;
@@ -43,10 +43,64 @@ $options->default_entries = 5;
 // ----------------------
 // The maximum number of feed items to process when no access key is supplied.
 // This limits the user-supplied &max=x value. For example, if the user
-// asks for 20 items to be processed (&max=20), if max_entries is set to 
+// asks for 20 items to be processed (&max=20), if max_entries is set to
 // 10, only 10 will be processed.
 $options->max_entries = 10;
+// Full content
+// ----------------------
+// By default Full-Text RSS includes the extracted content in the output.
+// You can exclude this from the output by passing '&content=0' in the querystring.
+//
+// Possible values...
+// Always include: true
+// Never include: false
+// Include unless user overrides (&content=0): 'user' (default)
+//
+// Note: currently this does not disable full content extraction. It simply omits it
+// from the output.
+$options->content = 'user';
+// Excerpts
+// ----------------------
+// By default Full-Text RSS does not include excerpts in the output.
+// You can enable this by passing '&summary=1' in the querystring.
+// This will include a plain text excerpt from the extracted content.
+//
+// Possible values...
+// Always include: true (recommended for new users)
+// Never include: false
+// Don't include unless user overrides (&summary=1): 'user' (default)
+//
+// Important: if both content and excerpts are requested, the excerpt will be
+// placed in the description element and the full content inside content:encoded.
+// If excerpts are not requested, the full content will go inside the description element.
+//
+// Why are we not returning both excerpts and content by default?
+// Mainly for backward compatibility.
+// Excerpts should appear in the feed item's description element. Previous versions
+// of Full-Text RSS did not return excerpts, so the description element was always
+// used for the full content (as recommended by the RSS advisory). When returning both,
+// we need somewhere else to place the content (content:encoded).
+// Having both enabled should not create any problems for news readers, but it may create
+// problems for developers upgrading from one of our earlier versions who may now find
+// their applications are returning excerpts instead of the full content they were
+// expecting. To avoid such surprises for users who are upgrading Full-Text RSS,
+// excerpts must be explicitly requested in the querystring by default.
+//
+// Why not use a different element name for excerpts?
+// According to the RSS advisory:
+// "Publishers who employ summaries should store the summary in description and
+// the full content in content:encoded, ordering description first within the item.
+// On items with no summary, the full content should be stored in description."
+// See: http://www.rssboard.org/rss-profile#namespace-elements-content-encoded
+//
+// For more consistent element naming, we recommend new users set this option to true.
+// The full content can still be excluded via the querystring, but the element names
+// will not change: when $options->summary = true, the description element will always
+// be reserved for the excerpt and content:encoded always for full content.
+$options->summary = 'user';
 // Rewrite relative URLs
 // ----------------------
 // With this enabled relative URLs found in the extracted content
@@ -67,7 +121,7 @@ $options->exclude_items_on_fail = 'user';
 // Enable multi-page support
 // -------------------------
 // If enabled, we will try to follow next page links on multi-page articles.
-// Currently this only happens for sites where next_page_link has been defined 
+// Currently this only happens for sites where next_page_link has been defined
 // in a site config file.
 $options->multipage = true;
@@ -125,10 +179,10 @@ $options->detect_language = 1;
 // Registration key
 // ---------------
-// The registration key is optional. It is not required to use Full-Text RSS, 
+// The registration key is optional. It is not required to use Full-Text RSS,
-// and does not affect the normal operation of Full-Text RSS. It is currently 
+// and does not affect the normal operation of Full-Text RSS. It is currently
-// only used on admin pages which help you update site patterns with the 
+// only used on admin pages which help you update site patterns with the
-// latest version offered by FiveFilters.org. For these admin-related 
+// latest version offered by FiveFilters.org. For these admin-related
 // tasks to complete, we will require a valid registration key.
 // If you would like one, you can purchase the latest version of Full-Text RSS
 // at http://fivefilters.org/content-only/
@@ -144,12 +198,12 @@ $options->registration_key = '';
 // ----------------------
 // Certain pages/actions, e.g. updating site patterns with our online tool, will require admin credentials.
 // To use these pages, enter a password here and you'll be prompted for it when you try to access those pages.
-// If no password or username is set, pages requiring admin privelages will be inaccessible. 
+// If no password or username is set, pages requiring admin privelages will be inaccessible.
 // The default username is 'admin'.
 // If overriding with an environment variable, separate username and password with a colon, e.g.:
 // ftr_admin_credentials: admin:my-secret-password
 // Example: $options->admin_credentials = array('username'=>'admin', 'password'=>'my-secret-password');
-$options->admin_credentials = array('username'=>'admin', 'password'=>'admin');
+$options->admin_credentials = array('username'=>'admin', 'password'=>'');
 // URLs to allow
 // ----------------------
@@ -178,12 +232,12 @@ $options->key_required = false;
 // ----------------------
 // By default, when processing feeds, we assume item titles in the feed
 // have not been truncated. So after processing web pages, the extracted titles
-// are not used in the generated feed. If you prefer to have extracted titles in 
+// are not used in the generated feed. If you prefer to have extracted titles in
-// the feed you can either set this to false, in which case we will always favour 
+// the feed you can either set this to false, in which case we will always favour
-// extracted titles. Alternatively, if set to 'user' (default) we'll use the 
+// extracted titles. Alternatively, if set to 'user' (default) we'll use the
 // extracted title if you pass '&use_extracted_title' in the querystring.
 // Possible values:
-// * Favour feed titles: true 
+// * Favour feed titles: true
 // * Favour extracted titles: false
 // * Favour feed titles with user override: 'user' (default)
 // Note: this has no effect when the input URL is to a web page - in these cases
@@ -192,17 +246,17 @@ $options->favour_feed_titles = 'user';
 // Access keys (password protected access)
 // ------------------------------------
-// NOTE: You do not need an API key from fivefilters.org to run your own 
+// NOTE: You do not need an API key from fivefilters.org to run your own
 // copy of the code. This is here if you'd like to restrict access to
 // _your_ copy.
 // Keys let you group users - those with a key and those without - and
 // restrict access to the service to those without a key.
 // If you want everyone to access the service in the same way, you can
 // leave the array below empty and ignore the access key options further down.
-// The options further down let you control how the service should behave 
+// The options further down let you control how the service should behave
 // in each mode.
-// Note: Explicitly including the index number (1 and 2 in the examples below) 
+// Note: Explicitly including the index number (1 and 2 in the examples below)
-// is highly recommended (when generating feeds, we encode the key and 
+// is highly recommended (when generating feeds, we encode the key and
 // refer to it by index number and hash).
 $options->api_keys = array();
 // Example:
@@ -232,13 +286,13 @@ $options->max_entries_with_key = 10;
 // filter the resulting HTML for XSS attacks, making it redundant for
 // Full-Text RSS do the same. Similarly with frameworks/CMS which display
 // feed content - the content should be treated like any other user-submitted content.
-// 
+//
 // If you are writing an application yourself which is processing feeds generated by
 // Full-Text RSS, you can either filter the HTML yourself to remove potential XSS attacks
 // or enable this option. This might be useful if you are processing our generated
 // feeds with JavaScript on the client side - although there's client side xss
 // filtering available too, e.g. https://code.google.com/p/google-caja/wiki/JsHtmlSanitizer
-// 
+//
 // If enabled, we'll pass retrieved HTML content through htmLawed with
 // safe flag on and style attributes denied, see
 // http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed/htmLawed_README.htm#s3.6
@@ -253,8 +307,8 @@ $options->xss_filter = 'user';
 // Allowed parsers
 // ----------------------
 // Full-Text RSS attempts to use PHP's libxml extension to process HTML.
-// While fast, on some sites it may not always produce good results. 
+// While fast, on some sites it may not always produce good results.
-// For these sites, you can specify an alternative HTML parser: 
+// For these sites, you can specify an alternative HTML parser:
 // parser: html5lib
 // The html5lib parser is bundled with Full-Text RSS.
 // see http://code.google.com/p/html5lib/
@@ -273,7 +327,7 @@ $options->cors = false;
 // Use APC user cache?
 // ----------------------
-// If enabled we will store site config files (when requested 
+// If enabled we will store site config files (when requested
 // for the first time) in APC's user cache. Keys prefixed with 'sc.'
 // This improves performance by reducing disk access.
 // Note: this has no effect if APC is unavailable on your server.
@@ -346,7 +400,7 @@ $options->rewrite_url = array(
 // Valid actions:
 // * 'exclude' - exclude this item from the result
 // * 'link' - create HTML link to the item
-$options->content_type_exc = array( 
+$options->content_type_exc = array(
                                                           'application/pdf' => array('action'=>'link', 'name'=>'PDF'),
                                                           'image' => array('action'=>'link', 'name'=>'Image'),
                                                           'audio' => array('action'=>'link', 'name'=>'Audio'),
@@ -375,13 +429,13 @@ $options->cache_cleanup = 100;
 /// DO NOT CHANGE ANYTHING BELOW THIS ///////////
 /////////////////////////////////////////////////
-if (!defined('_FF_FTR_VERSION')) define('_FF_FTR_VERSION', '3.1');
+if (!defined('_FF_FTR_VERSION')) define('_FF_FTR_VERSION', '3.2');
 if (basename(__FILE__) == 'config.php') {
        if (file_exists(dirname(__FILE__).'/custom_config.php')) {
                require_once dirname(__FILE__).'/custom_config.php';
        }
-        
        // check for environment variables - often used on cloud platforms
        // environment variables should be prefixed with 'ftr_', e.g.
        // ftr_max_entries: 1
diff --git a/inc/3rdparty/libraries/content-extractor/ContentExtractor.php b/inc/3rdparty/libraries/content-extractor/ContentExtractor.php
index ddd33bb5..21e693e7 100644
--- a/inc/3rdparty/libraries/content-extractor/ContentExtractor.php
+++ b/inc/3rdparty/libraries/content-extractor/ContentExtractor.php
@@ -1,728 +1,727 @@
-<?php
+<?php
-/**
+/**
- * Content Extractor
+ * Content Extractor
- * 
+ * 
- * Uses patterns specified in site config files and auto detection (hNews/PHP Readability) 
+ * Uses patterns specified in site config files and auto detection (hNews/PHP Readability) 
- * to extract content from HTML files.
+ * to extract content from HTML files.
- * 
+ * 
- * @version 1.0
+ * @version 1.0
- * @date 2013-02-05
+ * @date 2013-02-05
- * @author Keyvan Minoukadeh
+ * @author Keyvan Minoukadeh
- * @copyright 2013 Keyvan Minoukadeh
+ * @copyright 2013 Keyvan Minoukadeh
- * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
+ * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
- */
+ */
-class ContentExtractor
+class ContentExtractor
-{
+{
-        protected static $tidy_config = array(
+        protected static $tidy_config = array(
-                                 'clean' => true,
+                                 'clean' => true,
-                                 'output-xhtml' => true,
+                                 'output-xhtml' => true,
-                                 'logical-emphasis' => true,
+                                 'logical-emphasis' => true,
-                                 'show-body-only' => false,
+                                 'show-body-only' => false,
-                                 'new-blocklevel-tags' => 'article, aside, footer, header, hgroup, menu, nav, section, details, datagrid',
+                                 'new-blocklevel-tags' => 'article, aside, footer, header, hgroup, menu, nav, section, details, datagrid',
-                                 'new-inline-tags' => 'mark, time, meter, progress, data',
+                                 'new-inline-tags' => 'mark, time, meter, progress, data',
-                                 'wrap' => 0,
+                                 'wrap' => 0,
-                                 'drop-empty-paras' => true,
+                                 'drop-empty-paras' => true,
-                                 'drop-proprietary-attributes' => false,
+                                 'drop-proprietary-attributes' => false,
-                                 'enclose-text' => true,
+                                 'enclose-text' => true,
-                                 'enclose-block-text' => true,
+                                 'enclose-block-text' => true,
-                                 'merge-divs' => true,
+                                 'merge-divs' => true,
-                                 'merge-spans' => true,
+                                 'merge-spans' => true,
-                                 'char-encoding' => 'utf8',
+                                 'char-encoding' => 'utf8',
-                                 'hide-comments' => true
+                                 'hide-comments' => true
-                                 );
+                                 );
-        protected $html;
+        protected $html;
-        protected $config;
+        protected $config;
-        protected $title;
+        protected $title;
-        protected $author = array();
+        protected $author = array();
-        protected $language;
+        protected $language;
-        protected $date;
+        protected $date;
-        protected $body;
+        protected $body;
-        protected $success = false;
+        protected $success = false;
-        protected $nextPageUrl;
+        protected $nextPageUrl;
-        public $allowedParsers = array('libxml', 'html5lib');
+        public $allowedParsers = array('libxml', 'html5lib');
-        public $fingerprints = array();
+        public $fingerprints = array();
-        public $readability;
+        public $readability;
-        public $debug = false;
+        public $debug = false;
-        public $debugVerbose = false;
+        public $debugVerbose = false;
-        function __construct($path, $fallback=null) {
+        function __construct($path, $fallback=null) {
-                SiteConfig::set_config_path($path, $fallback);  
+                SiteConfig::set_config_path($path, $fallback);  
-        }
+        }
-        
+        
-        protected function debug($msg) {
+        protected function debug($msg) {
-                if ($this->debug) {
+                if ($this->debug) {
-                        $mem = round(memory_get_usage()/1024, 2);
+                        $mem = round(memory_get_usage()/1024, 2);
-                        $memPeak = round(memory_get_peak_usage()/1024, 2);
+                        $memPeak = round(memory_get_peak_usage()/1024, 2);
-                        echo '* ',$msg;
+                        echo '* ',$msg;
-                        if ($this->debugVerbose) echo ' - mem used: ',$mem," (peak: $memPeak)";
+                        if ($this->debugVerbose) echo ' - mem used: ',$mem," (peak: $memPeak)";
-                        echo "\n";
+                        echo "\n";
-                        ob_flush();
+                        ob_flush();
-                        flush();
+                        flush();
-                }
+                }
-        }
+        }
-        
+        
-        public function reset() {
+        public function reset() {
-                $this->html = null;
+                $this->html = null;
-                $this->readability = null;
+                $this->readability = null;
-                $this->config = null;
+                $this->config = null;
-                $this->title = null;
+                $this->title = null;
-                $this->body = null;
+                $this->body = null;
-                $this->author = array();
+                $this->author = array();
-                $this->language = null;
+                $this->language = null;
-                $this->date = null;
+                $this->date = null;
-                $this->nextPageUrl = null;
+                $this->nextPageUrl = null;
-                $this->success = false;
+                $this->success = false;
-        }
+        }
-        public function findHostUsingFingerprints($html) {
+        public function findHostUsingFingerprints($html) {
-                $this->debug('Checking fingerprints...');
+                $this->debug('Checking fingerprints...');
-                $head = substr($html, 0, 8000);
+                $head = substr($html, 0, 8000);
-                foreach ($this->fingerprints as $_fp => $_fphost) {
+                foreach ($this->fingerprints as $_fp => $_fphost) {
-                        $lookin = 'html';
+                        $lookin = 'html';
-                        if (is_array($_fphost)) {
+                        if (is_array($_fphost)) {
-                                if (isset($_fphost['head']) && $_fphost['head']) {
+                                if (isset($_fphost['head']) && $_fphost['head']) {
-                                        $lookin = 'head';
+                                        $lookin = 'head';
-                                }
+                                }
-                                $_fphost = $_fphost['hostname'];
+                                $_fphost = $_fphost['hostname'];
-                        }
+                        }
-                        if (strpos($$lookin, $_fp) !== false) {
+                        if (strpos($$lookin, $_fp) !== false) {
-                                $this->debug("Found match: $_fphost");
+                                $this->debug("Found match: $_fphost");
-                                return $_fphost;
+                                return $_fphost;
-                        }
+                        }
-                }
+                }
-                $this->debug('No fingerprint matches');
+                $this->debug('No fingerprint matches');
-                return false;
+                return false;
-        }
+        }
-        
+        
-        // returns SiteConfig instance (joined in order: exact match, wildcard, fingerprint, global, default)
+        // returns SiteConfig instance (joined in order: exact match, wildcard, fingerprint, global, default)
-        public function buildSiteConfig($url, $html='', $add_to_cache=true) {
+        public function buildSiteConfig($url, $html='', $add_to_cache=true) {
-                // extract host name
+                // extract host name
-                $host = @parse_url($url, PHP_URL_HOST);
+                $host = @parse_url($url, PHP_URL_HOST);
-                $host = strtolower($host);
+                $host = strtolower($host);
-                if (substr($host, 0, 4) == 'www.') $host = substr($host, 4);
+                if (substr($host, 0, 4) == 'www.') $host = substr($host, 4);
-                // is merged version already cached?
+                // is merged version already cached?
-                if (SiteConfig::is_cached("$host.merged")) {
+                if (SiteConfig::is_cached("$host.merged")) {
-                        $this->debug("Returning cached and merged site config for $host");
+                        $this->debug("Returning cached and merged site config for $host");
-                        return SiteConfig::build("$host.merged");
+                        return SiteConfig::build("$host.merged");
-                }
+                }
-                // let's build from site_config/custom/ and standard/
+                // let's build from site_config/custom/ and standard/
-                $config = SiteConfig::build($host);
+                $config = SiteConfig::build($host);
-                if ($add_to_cache && $config && !SiteConfig::is_cached("$host")) {
+                if ($add_to_cache && $config && !SiteConfig::is_cached("$host")) {
-                        SiteConfig::add_to_cache($host, $config);
+                        SiteConfig::add_to_cache($host, $config);
-                }
+                }
-                // if no match, use defaults
+                // if no match, use defaults
-                if (!$config) $config = new SiteConfig();
+                if (!$config) $config = new SiteConfig();
-                // load fingerprint config?
+                // load fingerprint config?
-                if ($config->autodetect_on_failure()) {
+                if ($config->autodetect_on_failure()) {
-                        // check HTML for fingerprints
+                        // check HTML for fingerprints
-                        if (!empty($this->fingerprints) && ($_fphost = $this->findHostUsingFingerprints($html))) {
+                        if (!empty($this->fingerprints) && ($_fphost = $this->findHostUsingFingerprints($html))) {
-                                if ($config_fingerprint = SiteConfig::build($_fphost)) {
+                                if ($config_fingerprint = SiteConfig::build($_fphost)) {
-                                        $this->debug("Appending site config settings from $_fphost (fingerprint match)");
+                                        $this->debug("Appending site config settings from $_fphost (fingerprint match)");
-                                        $config->append($config_fingerprint);
+                                        $config->append($config_fingerprint);
-                                        if ($add_to_cache && !SiteConfig::is_cached($_fphost)) {
+                                        if ($add_to_cache && !SiteConfig::is_cached($_fphost)) {
-                                                //$config_fingerprint->cache_in_apc = true;
+                                                //$config_fingerprint->cache_in_apc = true;
-                                                SiteConfig::add_to_cache($_fphost, $config_fingerprint);
+                                                SiteConfig::add_to_cache($_fphost, $config_fingerprint);
-                                        }
+                                        }
-                                }
+                                }
-                        }
+                        }
-                }
+                }
-                // load global config?
+                // load global config?
-                if ($config->autodetect_on_failure()) {
+                if ($config->autodetect_on_failure()) {
-                        if ($config_global = SiteConfig::build('global', true)) {
+                        if ($config_global = SiteConfig::build('global', true)) {
-                                $this->debug('Appending site config settings from global.txt');
+                                $this->debug('Appending site config settings from global.txt');
-                                $config->append($config_global);
+                                $config->append($config_global);
-                                if ($add_to_cache && !SiteConfig::is_cached('global')) {
+                                if ($add_to_cache && !SiteConfig::is_cached('global')) {
-                                        //$config_global->cache_in_apc = true;
+                                        //$config_global->cache_in_apc = true;
-                                        SiteConfig::add_to_cache('global', $config_global);
+                                        SiteConfig::add_to_cache('global', $config_global);
-                                }
+                                }
-                        }
+                        }
-                }
+                }
-                // store copy of merged config
+                // store copy of merged config
-                if ($add_to_cache) {
+                if ($add_to_cache) {
-                        // do not store in APC if wildcard match
+                        // do not store in APC if wildcard match
-                        $use_apc = ($host == $config->cache_key);
+                        $use_apc = ($host == $config->cache_key);
-                        $config->cache_key = null;
+                        $config->cache_key = null;
-                        SiteConfig::add_to_cache("$host.merged", $config, $use_apc);
+                        SiteConfig::add_to_cache("$host.merged", $config, $use_apc);
-                }
+                }
-                return $config;
+                return $config;
-        }
+        }
-        
+        
-        // returns true on success, false on failure
+        // returns true on success, false on failure
-        // $smart_tidy indicates that if tidy is used and no results are produced, we will
+        // $smart_tidy indicates that if tidy is used and no results are produced, we will
-        // try again without it. Tidy helps us deal with PHP's patchy HTML parsing most of the time
+        // try again without it. Tidy helps us deal with PHP's patchy HTML parsing most of the time
-        // but it has problems of its own which we try to avoid with this option.
+        // but it has problems of its own which we try to avoid with this option.
-        public function process($html, $url, $smart_tidy=true) {
+        public function process($html, $url, $smart_tidy=true) {
-                $this->reset();
+                $this->reset();
-                $this->config = $this->buildSiteConfig($url, $html);
+                $this->config = $this->buildSiteConfig($url, $html);
-                
+                
-                // do string replacements
+                // do string replacements
-                if (!empty($this->config->find_string)) {
+                if (!empty($this->config->find_string)) {
-                        if (count($this->config->find_string) == count($this->config->replace_string)) {
+                        if (count($this->config->find_string) == count($this->config->replace_string)) {
-                                $html = str_replace($this->config->find_string, $this->config->replace_string, $html, $_count);
+                                $html = str_replace($this->config->find_string, $this->config->replace_string, $html, $_count);
-                                $this->debug("Strings replaced: $_count (find_string and/or replace_string)");
+                                $this->debug("Strings replaced: $_count (find_string and/or replace_string)");
-                        } else {
+                        } else {
-                                $this->debug('Skipped string replacement - incorrect number of find-replace strings in site config');
+                                $this->debug('Skipped string replacement - incorrect number of find-replace strings in site config');
-                        }
+                        }
-                        unset($_count);
+                        unset($_count);
-                }
+                }
-                
+                
-                // use tidy (if it exists)?
+                // use tidy (if it exists)?
-                // This fixes problems with some sites which would otherwise
+                // This fixes problems with some sites which would otherwise
-                // trouble DOMDocument's HTML parsing. (Although sometimes it
+                // trouble DOMDocument's HTML parsing. (Although sometimes it
-                // makes matters worse, which is why you can override it in site config files.)
+                // makes matters worse, which is why you can override it in site config files.)
-                $tidied = false;
+                $tidied = false;
-                if ($this->config->tidy() && function_exists('tidy_parse_string') && $smart_tidy) {
+                if ($this->config->tidy() && function_exists('tidy_parse_string') && $smart_tidy) {
-                        $this->debug('Using Tidy');
+                        $this->debug('Using Tidy');
-                        $tidy = tidy_parse_string($html, self::$tidy_config, 'UTF8');
+                        $tidy = tidy_parse_string($html, self::$tidy_config, 'UTF8');
-                        if (tidy_clean_repair($tidy)) {
+                        if (tidy_clean_repair($tidy)) {
-                                $original_html = $html;
+                                $original_html = $html;
-                                $tidied = true;
+                                $tidied = true;
-                                $html = $tidy->value;
+                                $html = $tidy->value;
-                        }
+                        }
-                        unset($tidy);
+                        unset($tidy);
-                }
+                }
-                
+                
-                // load and parse html
+                // load and parse html
-                $_parser = $this->config->parser();
+                $_parser = $this->config->parser();
-                if (!in_array($_parser, $this->allowedParsers)) {
+                if (!in_array($_parser, $this->allowedParsers)) {
-                        $this->debug("HTML parser $_parser not listed, using libxml instead");
+                        $this->debug("HTML parser $_parser not listed, using libxml instead");
-                        $_parser = 'libxml';
+                        $_parser = 'libxml';
-                }
+                }
-                $this->debug("Attempting to parse HTML with $_parser");
+                $this->debug("Attempting to parse HTML with $_parser");
-                $this->readability = new Readability($html, $url, $_parser);
+                $this->readability = new Readability($html, $url, $_parser);
-                
+                
-                // we use xpath to find elements in the given HTML document
+                // we use xpath to find elements in the given HTML document
-                // see http://en.wikipedia.org/wiki/XPath_1.0
+                // see http://en.wikipedia.org/wiki/XPath_1.0
-                $xpath = new DOMXPath($this->readability->dom);
+                $xpath = new DOMXPath($this->readability->dom);
-                // try to get next page link
+                // try to get next page link
-                foreach ($this->config->next_page_link as $pattern) {
+                foreach ($this->config->next_page_link as $pattern) {
-                        $elems = @$xpath->evaluate($pattern, $this->readability->dom);
+                        $elems = @$xpath->evaluate($pattern, $this->readability->dom);
-                        if (is_string($elems)) {
+                        if (is_string($elems)) {
-                                $this->nextPageUrl = trim($elems);
+                                $this->nextPageUrl = trim($elems);
-                                break;
+                                break;
-                        } elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
+                        } elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
-                                foreach ($elems as $item) {
+                                foreach ($elems as $item) {
-                                        if ($item instanceof DOMElement && $item->hasAttribute('href')) {
+                                        if ($item instanceof DOMElement && $item->hasAttribute('href')) {
-                                                $this->nextPageUrl = $item->getAttribute('href');
+                                                $this->nextPageUrl = $item->getAttribute('href');
-                                                break 2;
+                                                break 2;
-                                        } elseif ($item instanceof DOMAttr && $item->value) {
+                                        } elseif ($item instanceof DOMAttr && $item->value) {
-                                                $this->nextPageUrl = $item->value;
+                                                $this->nextPageUrl = $item->value;
-                                                break 2;
+                                                break 2;
-                                        }
+                                        }
-                                }
+                                }
-                        }
+                        }
-                }
+                }
-                
+                
-                // try to get title
+                // try to get title
-                foreach ($this->config->title as $pattern) {
+                foreach ($this->config->title as $pattern) {
-                        // $this->debug("Trying $pattern");
+                        // $this->debug("Trying $pattern");
-                        $elems = @$xpath->evaluate($pattern, $this->readability->dom);
+                        $elems = @$xpath->evaluate($pattern, $this->readability->dom);
-                        if (is_string($elems)) {
+                        if (is_string($elems)) {
-                                $this->title = trim($elems);
+                                $this->title = trim($elems);
-                                $this->debug('Title expression evaluated as string: '.$this->title);
+                                $this->debug('Title expression evaluated as string: '.$this->title);
-                                $this->debug("...XPath match: $pattern");
+                                $this->debug("...XPath match: $pattern");
-                                break;
+                                break;
-                        } elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
+                        } elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
-                                $this->title = $elems->item(0)->textContent;
+                                $this->title = $elems->item(0)->textContent;
-                                $this->debug('Title matched: '.$this->title);
+                                $this->debug('Title matched: '.$this->title);
-                                $this->debug("...XPath match: $pattern");
+                                $this->debug("...XPath match: $pattern");
-                                // remove title from document
+                                // remove title from document
-                                try {
+                                try {
-                                        $elems->item(0)->parentNode->removeChild($elems->item(0));
+                                        @$elems->item(0)->parentNode->removeChild($elems->item(0));
-                                } catch (DOMException $e) {
+                                } catch (DOMException $e) {
-                                        // do nothing
+                                        // do nothing
-                                }
+                                }
-                                break;
+                                break;
-                        }
+                        }
-                }
+                }
-                
+                
-                // try to get author (if it hasn't already been set)
+                // try to get author (if it hasn't already been set)
-                if (empty($this->author)) {
+                if (empty($this->author)) {
-                        foreach ($this->config->author as $pattern) {
+                        foreach ($this->config->author as $pattern) {
-                                $elems = @$xpath->evaluate($pattern, $this->readability->dom);
+                                $elems = @$xpath->evaluate($pattern, $this->readability->dom);
-                                if (is_string($elems)) {
+                                if (is_string($elems)) {
-                                        if (trim($elems) != '') {
+                                        if (trim($elems) != '') {
-                                                $this->author[] = trim($elems);
+                                                $this->author[] = trim($elems);
-                                                $this->debug('Author expression evaluated as string: '.trim($elems));
+                                                $this->debug('Author expression evaluated as string: '.trim($elems));
-                                                $this->debug("...XPath match: $pattern");
+                                                $this->debug("...XPath match: $pattern");
-                                                break;
+                                                break;
-                                        }
+                                        }
-                                } elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
+                                } elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
-                                        foreach ($elems as $elem) {
+                                        foreach ($elems as $elem) {
-                                                if (!isset($elem->parentNode)) continue;
+                                                if (!isset($elem->parentNode)) continue;
-                                                $this->author[] = trim($elem->textContent);
+                                                $this->author[] = trim($elem->textContent);
-                                                $this->debug('Author matched: '.trim($elem->textContent));
+                                                $this->debug('Author matched: '.trim($elem->textContent));
-                                        }
+                                        }
-                                        if (!empty($this->author)) {
+                                        if (!empty($this->author)) {
-                                                $this->debug("...XPath match: $pattern");
+                                                $this->debug("...XPath match: $pattern");
-                                                break;
+                                                break;
-                                        }
+                                        }
-                                }
+                                }
-                        }
+                        }
-                }
+                }
-                
+                
-                // try to get language
+                // try to get language
-                $_lang_xpath = array('//html[@lang]/@lang', '//meta[@name="DC.language"]/@content');
+                $_lang_xpath = array('//html[@lang]/@lang', '//meta[@name="DC.language"]/@content');
-                foreach ($_lang_xpath as $pattern) {
+                foreach ($_lang_xpath as $pattern) {
-                        $elems = @$xpath->evaluate($pattern, $this->readability->dom);
+                        $elems = @$xpath->evaluate($pattern, $this->readability->dom);
-                        if (is_string($elems)) {
+                        if (is_string($elems)) {
-                                if (trim($elems) != '') {
+                                if (trim($elems) != '') {
-                                        $this->language = trim($elems);
+                                        $this->language = trim($elems);
-                                        $this->debug('Language matched: '.$this->language);
+                                        $this->debug('Language matched: '.$this->language);
-                                        break;
+                                        break;
-                                }
+                                }
-                        } elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
+                        } elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
-                                foreach ($elems as $elem) {
+                                foreach ($elems as $elem) {
-                                        if (!isset($elem->parentNode)) continue;
+                                        if (!isset($elem->parentNode)) continue;
-                                        $this->language = trim($elem->textContent);
+                                        $this->language = trim($elem->textContent);
-                                        $this->debug('Language matched: '.$this->language);                                     
+                                        $this->debug('Language matched: '.$this->language);                                     
-                                }
+                                }
-                                if ($this->language) break;
+                                if ($this->language) break;
-                        }
+                        }
-                }
+                }
-                
+                
-                // try to get date
+                // try to get date
-                foreach ($this->config->date as $pattern) {
+                foreach ($this->config->date as $pattern) {
-                        $elems = @$xpath->evaluate($pattern, $this->readability->dom);
+                        $elems = @$xpath->evaluate($pattern, $this->readability->dom);
-                        if (is_string($elems)) {
+                        if (is_string($elems)) {
-                                $this->date = strtotime(trim($elems, "; \t\n\r\0\x0B"));                                
+                                $this->date = strtotime(trim($elems, "; \t\n\r\0\x0B"));                                
-                        } elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
+                        } elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
-                                $this->date = $elems->item(0)->textContent;
+                                $this->date = $elems->item(0)->textContent;
-                                $this->date = strtotime(trim($this->date, "; \t\n\r\0\x0B"));
+                                $this->date = strtotime(trim($this->date, "; \t\n\r\0\x0B"));
-                                // remove date from document
+                                // remove date from document
-                                // $elems->item(0)->parentNode->removeChild($elems->item(0));
+                                // $elems->item(0)->parentNode->removeChild($elems->item(0));
-                        }
+                        }
-                        if (!$this->date) {
+                        if (!$this->date) {
-                                $this->date = null;
+                                $this->date = null;
-                        } else {
+                        } else {
-                                $this->debug('Date matched: '.date('Y-m-d H:i:s', $this->date));
+                                $this->debug('Date matched: '.date('Y-m-d H:i:s', $this->date));
-                                $this->debug("...XPath match: $pattern");
+                                $this->debug("...XPath match: $pattern");
-                                break;
+                                break;
-                        }
+                        }
-                }
+                }
-                // strip elements (using xpath expressions)
+                // strip elements (using xpath expressions)
-                foreach ($this->config->strip as $pattern) {
+                foreach ($this->config->strip as $pattern) {
-                        $elems = @$xpath->query($pattern, $this->readability->dom);
+                        $elems = @$xpath->query($pattern, $this->readability->dom);
-                        // check for matches
+                        // check for matches
-                        if ($elems && $elems->length > 0) {
+                        if ($elems && $elems->length > 0) {
-                                $this->debug('Stripping '.$elems->length.' elements (strip)');
+                                $this->debug('Stripping '.$elems->length.' elements (strip)');
-                                for ($i=$elems->length-1; $i >= 0; $i--) {
+                                for ($i=$elems->length-1; $i >= 0; $i--) {
-                                        $elems->item($i)->parentNode->removeChild($elems->item($i));
+                                        $elems->item($i)->parentNode->removeChild($elems->item($i));
-                                }
+                                }
-                        }
+                        }
-                }
+                }
-                
+                
-                // strip elements (using id and class attribute values)
+                // strip elements (using id and class attribute values)
-                foreach ($this->config->strip_id_or_class as $string) {
+                foreach ($this->config->strip_id_or_class as $string) {
-                        $string = strtr($string, array("'"=>'', '"'=>''));
+                        $string = strtr($string, array("'"=>'', '"'=>''));
-                        $elems = @$xpath->query("//*[contains(@class, '$string') or contains(@id, '$string')]", $this->readability->dom);
+                        $elems = @$xpath->query("//*[contains(@class, '$string') or contains(@id, '$string')]", $this->readability->dom);
-                        // check for matches
+                        // check for matches
-                        if ($elems && $elems->length > 0) {
+                        if ($elems && $elems->length > 0) {
-                                $this->debug('Stripping '.$elems->length.' elements (strip_id_or_class)');
+                                $this->debug('Stripping '.$elems->length.' elements (strip_id_or_class)');
-                                for ($i=$elems->length-1; $i >= 0; $i--) {
+                                for ($i=$elems->length-1; $i >= 0; $i--) {
-                                        $elems->item($i)->parentNode->removeChild($elems->item($i));
+                                        $elems->item($i)->parentNode->removeChild($elems->item($i));
-                                }
+                                }
-                        }
+                        }
-                }
+                }
-                
+                
-                // strip images (using src attribute values)
+                // strip images (using src attribute values)
-                foreach ($this->config->strip_image_src as $string) {
+                foreach ($this->config->strip_image_src as $string) {
-                        $string = strtr($string, array("'"=>'', '"'=>''));
+                        $string = strtr($string, array("'"=>'', '"'=>''));
-                        $elems = @$xpath->query("//img[contains(@src, '$string')]", $this->readability->dom);
+                        $elems = @$xpath->query("//img[contains(@src, '$string')]", $this->readability->dom);
-                        // check for matches
+                        // check for matches
-                        if ($elems && $elems->length > 0) {
+                        if ($elems && $elems->length > 0) {
-                                $this->debug('Stripping '.$elems->length.' image elements');
+                                $this->debug('Stripping '.$elems->length.' image elements');
-                                for ($i=$elems->length-1; $i >= 0; $i--) {
+                                for ($i=$elems->length-1; $i >= 0; $i--) {
-                                        $elems->item($i)->parentNode->removeChild($elems->item($i));
+                                        $elems->item($i)->parentNode->removeChild($elems->item($i));
-                                }
+                                }
-                        }
+                        }
-                }
+                }
-                // strip elements using Readability.com and Instapaper.com ignore class names
+                // strip elements using Readability.com and Instapaper.com ignore class names
-                // .entry-unrelated and .instapaper_ignore
+                // .entry-unrelated and .instapaper_ignore
-                // See https://www.readability.com/publishers/guidelines/#view-plainGuidelines
+                // See https://www.readability.com/publishers/guidelines/#view-plainGuidelines
-                // and http://blog.instapaper.com/post/730281947
+                // and http://blog.instapaper.com/post/730281947
-                $elems = @$xpath->query("//*[contains(concat(' ',normalize-space(@class),' '),' entry-unrelated ') or contains(concat(' ',normalize-space(@class),' '),' instapaper_ignore ')]", $this->readability->dom);
+                $elems = @$xpath->query("//*[contains(concat(' ',normalize-space(@class),' '),' entry-unrelated ') or contains(concat(' ',normalize-space(@class),' '),' instapaper_ignore ')]", $this->readability->dom);
-                // check for matches
+                // check for matches
-                if ($elems && $elems->length > 0) {
+                if ($elems && $elems->length > 0) {
-                        $this->debug('Stripping '.$elems->length.' .entry-unrelated,.instapaper_ignore elements');
+                        $this->debug('Stripping '.$elems->length.' .entry-unrelated,.instapaper_ignore elements');
-                        for ($i=$elems->length-1; $i >= 0; $i--) {
+                        for ($i=$elems->length-1; $i >= 0; $i--) {
-                                $elems->item($i)->parentNode->removeChild($elems->item($i));
+                                $elems->item($i)->parentNode->removeChild($elems->item($i));
-                        }
+                        }
-                }
+                }
-                
+                
-                // strip elements that contain style="display: none;"
+                // strip elements that contain style="display: none;"
-                $elems = @$xpath->query("//*[contains(@style,'display:none')]", $this->readability->dom);
+                $elems = @$xpath->query("//*[contains(@style,'display:none')]", $this->readability->dom);
-                // check for matches
+                // check for matches
-                if ($elems && $elems->length > 0) {
+                if ($elems && $elems->length > 0) {
-                        $this->debug('Stripping '.$elems->length.' elements with inline display:none style');
+                        $this->debug('Stripping '.$elems->length.' elements with inline display:none style');
-                        for ($i=$elems->length-1; $i >= 0; $i--) {
+                        for ($i=$elems->length-1; $i >= 0; $i--) {
-                                $elems->item($i)->parentNode->removeChild($elems->item($i));
+                                $elems->item($i)->parentNode->removeChild($elems->item($i));
-                        }
+                        }
-                }
+                }
-                
+                
-                // try to get body
+                // try to get body
-                foreach ($this->config->body as $pattern) {
+                foreach ($this->config->body as $pattern) {
-                        $elems = @$xpath->query($pattern, $this->readability->dom);
+                        $elems = @$xpath->query($pattern, $this->readability->dom);
-                        // check for matches
+                        // check for matches
-                        if ($elems && $elems->length > 0) {
+                        if ($elems && $elems->length > 0) {
-                                $this->debug('Body matched');
+                                $this->debug('Body matched');
-                                $this->debug("...XPath match: $pattern");
+                                $this->debug("...XPath match: $pattern");
-                                if ($elems->length == 1) {                              
+                                if ($elems->length == 1) {                              
-                                        $this->body = $elems->item(0);
+                                        $this->body = $elems->item(0);
-                                        // prune (clean up elements that may not be content)
+                                        // prune (clean up elements that may not be content)
-                                        if ($this->config->prune()) {
+                                        if ($this->config->prune()) {
-                                                $this->debug('...pruning content');
+                                                $this->debug('...pruning content');
-                                                $this->readability->prepArticle($this->body);
+                                                $this->readability->prepArticle($this->body);
-                                        }
+                                        }
-                                        break;
+                                        break;
-                                } else {
+                                } else {
-                                        $this->body = $this->readability->dom->createElement('div');
+                                        $this->body = $this->readability->dom->createElement('div');
-                                        $this->debug($elems->length.' body elems found');
+                                        $this->debug($elems->length.' body elems found');
-                                        foreach ($elems as $elem) {
+                                        foreach ($elems as $elem) {
-                                                if (!isset($elem->parentNode)) continue;
+                                                if (!isset($elem->parentNode)) continue;
-                                                $isDescendant = false;
+                                                $isDescendant = false;
-                                                foreach ($this->body->childNodes as $parent) {
+                                                foreach ($this->body->childNodes as $parent) {
-                                                        if ($this->isDescendant($parent, $elem)) {
+                                                        if ($this->isDescendant($parent, $elem)) {
-                                                                $isDescendant = true;
+                                                                $isDescendant = true;
-                                                                break;
+                                                                break;
-                                                        }
+                                                        }
-                                                }
+                                                }
-                                                if ($isDescendant) {
+                                                if ($isDescendant) {
-                                                        $this->debug('...element is child of another body element, skipping.');
+                                                        $this->debug('...element is child of another body element, skipping.');
-                                                } else {
+                                                } else {
-                                                        // prune (clean up elements that may not be content)
+                                                        // prune (clean up elements that may not be content)
-                                                        if ($this->config->prune()) {
+                                                        if ($this->config->prune()) {
-                                                                $this->debug('Pruning content');
+                                                                $this->debug('Pruning content');
-                                                                $this->readability->prepArticle($elem);
+                                                                $this->readability->prepArticle($elem);
-                                                        }
+                                                        }
-                                                        $this->debug('...element added to body');
+                                                        $this->debug('...element added to body');
-                                                        $this->body->appendChild($elem);
+                                                        $this->body->appendChild($elem);
-                                                }
+                                                }
-                                        }
+                                        }
-                                        if ($this->body->hasChildNodes()) break;
+                                        if ($this->body->hasChildNodes()) break;
-                                }
+                                }
-                        }
+                        }
-                }               
+                }               
-                
+                
-                // auto detect?
+                // auto detect?
-                $detect_title = $detect_body = $detect_author = $detect_date = false;
+                $detect_title = $detect_body = $detect_author = $detect_date = false;
-                // detect title?
+                // detect title?
-                if (!isset($this->title)) {
+                if (!isset($this->title)) {
-                        if (empty($this->config->title) || $this->config->autodetect_on_failure()) {
+                        if (empty($this->config->title) || $this->config->autodetect_on_failure()) {
-                                $detect_title = true;
+                                $detect_title = true;
-                        }
+                        }
-                }
+                }
-                // detect body?
+                // detect body?
-                if (!isset($this->body)) {
+                if (!isset($this->body)) {
-                        if (empty($this->config->body) || $this->config->autodetect_on_failure()) {
+                        if (empty($this->config->body) || $this->config->autodetect_on_failure()) {
-                                $detect_body = true;
+                                $detect_body = true;
-                        }
+                        }
-                }
+                }
-                // detect author?
+                // detect author?
-                if (empty($this->author)) {
+                if (empty($this->author)) {
-                        if (empty($this->config->author) || $this->config->autodetect_on_failure()) {
+                        if (empty($this->config->author) || $this->config->autodetect_on_failure()) {
-                                $detect_author = true;
+                                $detect_author = true;
-                        }
+                        }
-                }
+                }
-                // detect date?
+                // detect date?
-                if (!isset($this->date)) {
+                if (!isset($this->date)) {
-                        if (empty($this->config->date) || $this->config->autodetect_on_failure()) {
+                        if (empty($this->config->date) || $this->config->autodetect_on_failure()) {
-                                $detect_date = true;
+                                $detect_date = true;
-                        }
+                        }
-                }
+                }
-                // check for hNews
+                // check for hNews
-                if ($detect_title || $detect_body) {
+                if ($detect_title || $detect_body) {
-                        // check for hentry
+                        // check for hentry
-                        $elems = @$xpath->query("//*[contains(concat(' ',normalize-space(@class),' '),' hentry ')]", $this->readability->dom);
+                        $elems = @$xpath->query("//*[contains(concat(' ',normalize-space(@class),' '),' hentry ')]", $this->readability->dom);
-                        if ($elems && $elems->length > 0) {
+                        if ($elems && $elems->length > 0) {
-                                $this->debug('hNews: found hentry');
+                                $this->debug('hNews: found hentry');
-                                $hentry = $elems->item(0);
+                                $hentry = $elems->item(0);
-                                
+                                
-                                if ($detect_title) {
+                                if ($detect_title) {
-                                        // check for entry-title
+                                        // check for entry-title
-                                        $elems = @$xpath->query(".//*[contains(concat(' ',normalize-space(@class),' '),' entry-title ')]", $hentry);
+                                        $elems = @$xpath->query(".//*[contains(concat(' ',normalize-space(@class),' '),' entry-title ')]", $hentry);
-                                        if ($elems && $elems->length > 0) {
+                                        if ($elems && $elems->length > 0) {
-                                                $this->title = $elems->item(0)->textContent;
+                                                $this->title = $elems->item(0)->textContent;
-                                                $this->debug('hNews: found entry-title: '.$this->title);
+                                                $this->debug('hNews: found entry-title: '.$this->title);
-                                                // remove title from document
+                                                // remove title from document
-                                                $elems->item(0)->parentNode->removeChild($elems->item(0));
+                                                $elems->item(0)->parentNode->removeChild($elems->item(0));
-                                                $detect_title = false;
+                                                $detect_title = false;
-                                        }
+                                        }
-                                }
+                                }
-                                
+                                
-                                if ($detect_date) {
+                                if ($detect_date) {
-                                        // check for time element with pubdate attribute
+                                        // check for time element with pubdate attribute
-                                        $elems = @$xpath->query(".//time[@pubdate] | .//abbr[contains(concat(' ',normalize-space(@class),' '),' published ')]", $hentry);
+                                        $elems = @$xpath->query(".//time[@pubdate] | .//abbr[contains(concat(' ',normalize-space(@class),' '),' published ')]", $hentry);
-                                        if ($elems && $elems->length > 0) {
+                                        if ($elems && $elems->length > 0) {
-                                                $this->date = strtotime(trim($elems->item(0)->textContent));
+                                                $this->date = strtotime(trim($elems->item(0)->textContent));
-                                                // remove date from document
+                                                // remove date from document
-                                                //$elems->item(0)->parentNode->removeChild($elems->item(0));
+                                                //$elems->item(0)->parentNode->removeChild($elems->item(0));
-                                                if ($this->date) {
+                                                if ($this->date) {
-                                                        $this->debug('hNews: found publication date: '.date('Y-m-d H:i:s', $this->date));
+                                                        $this->debug('hNews: found publication date: '.date('Y-m-d H:i:s', $this->date));
-                                                        $detect_date = false;
+                                                        $detect_date = false;
-                                                } else {
+                                                } else {
-                                                        $this->date = null;
+                                                        $this->date = null;
-                                                }
+                                                }
-                                        }
+                                        }
-                                }
+                                }
-                                if ($detect_author) {
+                                if ($detect_author) {
-                                        // check for time element with pubdate attribute
+                                        // check for time element with pubdate attribute
-                                        $elems = @$xpath->query(".//*[contains(concat(' ',normalize-space(@class),' '),' vcard ') and (contains(concat(' ',normalize-space(@class),' '),' author ') or contains(concat(' ',normalize-space(@class),' '),' byline '))]", $hentry);
+                                        $elems = @$xpath->query(".//*[contains(concat(' ',normalize-space(@class),' '),' vcard ') and (contains(concat(' ',normalize-space(@class),' '),' author ') or contains(concat(' ',normalize-space(@class),' '),' byline '))]", $hentry);
-                                        if ($elems && $elems->length > 0) {
+                                        if ($elems && $elems->length > 0) {
-                                                $author = $elems->item(0);
+                                                $author = $elems->item(0);
-                                                $fn = @$xpath->query(".//*[contains(concat(' ',normalize-space(@class),' '),' fn ')]", $author);
+                                                $fn = @$xpath->query(".//*[contains(concat(' ',normalize-space(@class),' '),' fn ')]", $author);
-                                                if ($fn && $fn->length > 0) {
+                                                if ($fn && $fn->length > 0) {
-                                                        foreach ($fn as $_fn) {
+                                                        foreach ($fn as $_fn) {
-                                                                if (trim($_fn->textContent) != '') {
+                                                                if (trim($_fn->textContent) != '') {
-                                                                        $this->author[] = trim($_fn->textContent);
+                                                                        $this->author[] = trim($_fn->textContent);
-                                                                        $this->debug('hNews: found author: '.trim($_fn->textContent));
+                                                                        $this->debug('hNews: found author: '.trim($_fn->textContent));
-                                                                }
+                                                                }
-                                                        }
+                                                        }
-                                                } else {
+                                                } else {
-                                                        if (trim($author->textContent) != '') {
+                                                        if (trim($author->textContent) != '') {
-                                                                $this->author[] = trim($author->textContent);
+                                                                $this->author[] = trim($author->textContent);
-                                                                $this->debug('hNews: found author: '.trim($author->textContent));
+                                                                $this->debug('hNews: found author: '.trim($author->textContent));
-                                                        }
+                                                        }
-                                                }
+                                                }
-                                                $detect_author = empty($this->author);
+                                                $detect_author = empty($this->author);
-                                        }
+                                        }
-                                }
+                                }
-                                
+                                
-                                // check for entry-content.
+                                // check for entry-content.
-                                // according to hAtom spec, if there are multiple elements marked entry-content,
+                                // according to hAtom spec, if there are multiple elements marked entry-content,
-                                // we include all of these in the order they appear - see http://microformats.org/wiki/hatom#Entry_Content
+                                // we include all of these in the order they appear - see http://microformats.org/wiki/hatom#Entry_Content
-                                if ($detect_body) {
+                                if ($detect_body) {
-                                        $elems = @$xpath->query(".//*[contains(concat(' ',normalize-space(@class),' '),' entry-content ')]", $hentry);
+                                        $elems = @$xpath->query(".//*[contains(concat(' ',normalize-space(@class),' '),' entry-content ')]", $hentry);
-                                        if ($elems && $elems->length > 0) {
+                                        if ($elems && $elems->length > 0) {
-                                                $this->debug('hNews: found entry-content');
+                                                $this->debug('hNews: found entry-content');
-                                                if ($elems->length == 1) {
+                                                if ($elems->length == 1) {
-                                                        // what if it's empty? (some sites misuse hNews - place their content outside an empty entry-content element)
+                                                        // what if it's empty? (some sites misuse hNews - place their content outside an empty entry-content element)
-                                                        $e = $elems->item(0);
+                                                        $e = $elems->item(0);
-                                                        if (($e->tagName == 'img') || (trim($e->textContent) != '')) {
+                                                        if (($e->tagName == 'img') || (trim($e->textContent) != '')) {
-                                                                $this->body = $elems->item(0);
+                                                                $this->body = $elems->item(0);
-                                                                // prune (clean up elements that may not be content)
+                                                                // prune (clean up elements that may not be content)
-                                                                if ($this->config->prune()) {
+                                                                if ($this->config->prune()) {
-                                                                        $this->debug('Pruning content');
+                                                                        $this->debug('Pruning content');
-                                                                        $this->readability->prepArticle($this->body);
+                                                                        $this->readability->prepArticle($this->body);
-                                                                }
+                                                                }
-                                                                $detect_body = false;
+                                                                $detect_body = false;
-                                                        } else {
+                                                        } else {
-                                                                $this->debug('hNews: skipping entry-content - appears not to contain content');
+                                                                $this->debug('hNews: skipping entry-content - appears not to contain content');
-                                                        }
+                                                        }
-                                                        unset($e);
+                                                        unset($e);
-                                                } else {
+                                                } else {
-                                                        $this->body = $this->readability->dom->createElement('div');
+                                                        $this->body = $this->readability->dom->createElement('div');
-                                                        $this->debug($elems->length.' entry-content elems found');
+                                                        $this->debug($elems->length.' entry-content elems found');
-                                                        foreach ($elems as $elem) {
+                                                        foreach ($elems as $elem) {
-                                                                if (!isset($elem->parentNode)) continue;
+                                                                if (!isset($elem->parentNode)) continue;
-                                                                $isDescendant = false;
+                                                                $isDescendant = false;
-                                                                foreach ($this->body->childNodes as $parent) {
+                                                                foreach ($this->body->childNodes as $parent) {
-                                                                        if ($this->isDescendant($parent, $elem)) {
+                                                                        if ($this->isDescendant($parent, $elem)) {
-                                                                                $isDescendant = true;
+                                                                                $isDescendant = true;
-                                                                                break;
+                                                                                break;
-                                                                        }
+                                                                        }
-                                                                }
+                                                                }
-                                                                if ($isDescendant) {
+                                                                if ($isDescendant) {
-                                                                        $this->debug('Element is child of another body element, skipping.');
+                                                                        $this->debug('Element is child of another body element, skipping.');
-                                                                } else {
+                                                                } else {
-                                                                        // prune (clean up elements that may not be content)
+                                                                        // prune (clean up elements that may not be content)
-                                                                        if ($this->config->prune()) {
+                                                                        if ($this->config->prune()) {
-                                                                                $this->debug('Pruning content');
+                                                                                $this->debug('Pruning content');
-                                                                                $this->readability->prepArticle($elem);
+                                                                                $this->readability->prepArticle($elem);
-                                                                        }                                                               
+                                                                        }                                                               
-                                                                        $this->debug('Element added to body');                                                                  
+                                                                        $this->debug('Element added to body');                                                                  
-                                                                        $this->body->appendChild($elem);
+                                                                        $this->body->appendChild($elem);
-                                                                }
+                                                                }
-                                                        }
+                                                        }
-                                                        $detect_body = false;
+                                                        $detect_body = false;
-                                                }
+                                                }
-                                        }
+                                        }
-                                }
+                                }
-                        }
+                        }
-                }
+                }
-                // check for elements marked with instapaper_title
+                // check for elements marked with instapaper_title
-                if ($detect_title) {
+                if ($detect_title) {
-                        // check for instapaper_title
+                        // check for instapaper_title
-                        $elems = @$xpath->query("//*[contains(concat(' ',normalize-space(@class),' '),' instapaper_title ')]", $this->readability->dom);
+                        $elems = @$xpath->query("//*[contains(concat(' ',normalize-space(@class),' '),' instapaper_title ')]", $this->readability->dom);
-                        if ($elems && $elems->length > 0) {
+                        if ($elems && $elems->length > 0) {
-                                $this->title = $elems->item(0)->textContent;
+                                $this->title = $elems->item(0)->textContent;
-                                $this->debug('Title found (.instapaper_title): '.$this->title);
+                                $this->debug('Title found (.instapaper_title): '.$this->title);
-                                // remove title from document
+                                // remove title from document
-                                $elems->item(0)->parentNode->removeChild($elems->item(0));
+                                $elems->item(0)->parentNode->removeChild($elems->item(0));
-                                $detect_title = false;
+                                $detect_title = false;
-                        }
+                        }
-                }
+                }
-                // check for elements marked with instapaper_body
+                // check for elements marked with instapaper_body
-                if ($detect_body) {
+                if ($detect_body) {
-                        $elems = @$xpath->query("//*[contains(concat(' ',normalize-space(@class),' '),' instapaper_body ')]", $this->readability->dom);
+                        $elems = @$xpath->query("//*[contains(concat(' ',normalize-space(@class),' '),' instapaper_body ')]", $this->readability->dom);
-                        if ($elems && $elems->length > 0) {
+                        if ($elems && $elems->length > 0) {
-                                $this->debug('body found (.instapaper_body)');
+                                $this->debug('body found (.instapaper_body)');
-                                $this->body = $elems->item(0);
+                                $this->body = $elems->item(0);
-                                // prune (clean up elements that may not be content)
+                                // prune (clean up elements that may not be content)
-                                if ($this->config->prune()) {
+                                if ($this->config->prune()) {
-                                        $this->debug('Pruning content');
+                                        $this->debug('Pruning content');
-                                        $this->readability->prepArticle($this->body);
+                                        $this->readability->prepArticle($this->body);
-                                }
+                                }
-                                $detect_body = false;
+                                $detect_body = false;
-                        }
+                        }
-                }
+                }
-                
+                
-                // Find author in rel="author" marked element
+                // Find author in rel="author" marked element
-                // We only use this if there's exactly one.
+                // We only use this if there's exactly one.
-                // If there's more than one, it could indicate more than
+                // If there's more than one, it could indicate more than
-                // one author, but it could also indicate that we're processing
+                // one author, but it could also indicate that we're processing
-                // a page listing different articles with different authors.
+                // a page listing different articles with different authors.
-                if ($detect_author) {
+                if ($detect_author) {
-                        $elems = @$xpath->query("//a[contains(concat(' ',normalize-space(@rel),' '),' author ')]", $this->readability->dom);
+                        $elems = @$xpath->query("//a[contains(concat(' ',normalize-space(@rel),' '),' author ')]", $this->readability->dom);
-                        if ($elems && $elems->length == 1) {
+                        if ($elems && $elems->length == 1) {
-                                $author = trim($elems->item(0)->textContent);
+                                $author = trim($elems->item(0)->textContent);
-                                if ($author != '') {
+                                if ($author != '') {
-                                        $this->debug("Author found (rel=\"author\"): $author");
+                                        $this->debug("Author found (rel=\"author\"): $author");
-                                        $this->author[] = $author;
+                                        $this->author[] = $author;
-                                        $detect_author = false;
+                                        $detect_author = false;
-                                }
+                                }
-                        }
+                        }
-                }
+                }
-                // Find date in pubdate marked time element
+                // Find date in pubdate marked time element
-                // For the same reason given above, we only use this
+                // For the same reason given above, we only use this
-                // if there's exactly one element.
+                // if there's exactly one element.
-                if ($detect_date) {
+                if ($detect_date) {
-                        $elems = @$xpath->query("//time[@pubdate]", $this->readability->dom);
+                        $elems = @$xpath->query("//time[@pubdate]", $this->readability->dom);
-                        if ($elems && $elems->length == 1) {
+                        if ($elems && $elems->length == 1) {
-                                $this->date = strtotime(trim($elems->item(0)->textContent));
+                                $this->date = strtotime(trim($elems->item(0)->textContent));
-                                // remove date from document
+                                // remove date from document
-                                //$elems->item(0)->parentNode->removeChild($elems->item(0));
+                                //$elems->item(0)->parentNode->removeChild($elems->item(0));
-                                if ($this->date) {
+                                if ($this->date) {
-                                        $this->debug('Date found (pubdate marked time element): '.date('Y-m-d H:i:s', $this->date));
+                                        $this->debug('Date found (pubdate marked time element): '.date('Y-m-d H:i:s', $this->date));
-                                        $detect_date = false;
+                                        $detect_date = false;
-                                } else {
+                                } else {
-                                        $this->date = null;
+                                        $this->date = null;
-                                }
+                                }
-                        }
+                        }
-                }
+                }
-                // still missing title or body, so we detect using Readability
+                // still missing title or body, so we detect using Readability
-                if ($detect_title || $detect_body) {
+                if ($detect_title || $detect_body) {
-                        $this->debug('Using Readability');
+                        $this->debug('Using Readability');
-                        // clone body if we're only using Readability for title (otherwise it may interfere with body element)
+                        // clone body if we're only using Readability for title (otherwise it may interfere with body element)
-                        if (isset($this->body)) $this->body = $this->body->cloneNode(true);
+                        if (isset($this->body)) $this->body = $this->body->cloneNode(true);
-                        $success = $this->readability->init();
+                        $success = $this->readability->init();
-                }
+                }
-                if ($detect_title) {
+                if ($detect_title) {
-                        $this->debug('Detecting title');
+                        $this->debug('Detecting title');
-                        $this->title = $this->readability->getTitle()->textContent;
+                        $this->title = $this->readability->getTitle()->textContent;
-                }
+                }
-                if ($detect_body && $success) {
+                if ($detect_body && $success) {
-                        $this->debug('Detecting body');
+                        $this->debug('Detecting body');
-                        $this->body = $this->readability->getContent();
+                        $this->body = $this->readability->getContent();
-                        if ($this->body->childNodes->length == 1 && $this->body->firstChild->nodeType === XML_ELEMENT_NODE) {
+                        if ($this->body->childNodes->length == 1 && $this->body->firstChild->nodeType === XML_ELEMENT_NODE) {
-                                $this->body = $this->body->firstChild;
+                                $this->body = $this->body->firstChild;
-                        }
+                        }
-                        // prune (clean up elements that may not be content)
+                        // prune (clean up elements that may not be content)
-                        if ($this->config->prune()) {
+                        if ($this->config->prune()) {
-                                $this->debug('Pruning content');
+                                $this->debug('Pruning content');
-                                $this->readability->prepArticle($this->body);
+                                $this->readability->prepArticle($this->body);
-                        }
+                        }
-                }
+                }
-                if (isset($this->body)) {
+                if (isset($this->body)) {
-                        // remove scripts
+                        // remove scripts
-                        $this->readability->removeScripts($this->body);
+                        $this->readability->removeScripts($this->body);
-                        // remove any h1-h6 elements that appear as first thing in the body
+                        // remove any h1-h6 elements that appear as first thing in the body
-                        // and which match our title
+                        // and which match our title
-                        if (isset($this->title) && ($this->title != '')) {
+                        if (isset($this->title) && ($this->title != '')) {
-                                $firstChild = $this->body->firstChild;
+                                $firstChild = $this->body->firstChild;
-                                while ($firstChild->nodeType && ($firstChild->nodeType !== XML_ELEMENT_NODE)) {
+                                while ($firstChild->nodeType && ($firstChild->nodeType !== XML_ELEMENT_NODE)) {
-                                        $firstChild = $firstChild->nextSibling;
+                                        $firstChild = $firstChild->nextSibling;
-                                }
+                                }
-                                if (($firstChild->nodeType === XML_ELEMENT_NODE)
+                                if (($firstChild->nodeType === XML_ELEMENT_NODE)
-                                        && in_array(strtolower($firstChild->tagName), array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'))
+                                        && in_array(strtolower($firstChild->tagName), array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'))
-                                        && (strtolower(trim($firstChild->textContent)) == strtolower(trim($this->title)))) {
+                                        && (strtolower(trim($firstChild->textContent)) == strtolower(trim($this->title)))) {
-                                                $this->body->removeChild($firstChild);
+                                                $this->body->removeChild($firstChild);
-                                }
+                                }
-                        }
+                        }
-                        // prevent self-closing iframes
+                        // prevent self-closing iframes
-                        $elems = $this->body->getElementsByTagName('iframe');
+                        $elems = $this->body->getElementsByTagName('iframe');
-                        for ($i = $elems->length-1; $i >= 0; $i--) {
+                        for ($i = $elems->length-1; $i >= 0; $i--) {
-                                $e = $elems->item($i);
+                                $e = $elems->item($i);
-                                if (!$e->hasChildNodes()) {
+                                if (!$e->hasChildNodes()) {
-                                        $e->appendChild($this->body->ownerDocument->createTextNode('[embedded content]'));
+                                        $e->appendChild($this->body->ownerDocument->createTextNode('[embedded content]'));
-                                }
+                                }
-                        }
+                        }
-                        // remove image lazy loading - WordPress plugin http://wordpress.org/extend/plugins/lazy-load/
+                        // remove image lazy loading - WordPress plugin http://wordpress.org/extend/plugins/lazy-load/
-                        // the plugin replaces the src attribute to point to a 1x1 gif and puts the original src
+                        // the plugin replaces the src attribute to point to a 1x1 gif and puts the original src
-                        // inside the data-lazy-src attribute. It also places the original image inside a noscript element 
+                        // inside the data-lazy-src attribute. It also places the original image inside a noscript element 
-                        // next to the amended one.
+                        // next to the amended one.
-                        $elems = @$xpath->query("//img[@data-lazy-src]", $this->body);
+                        $elems = @$xpath->query("//img[@data-lazy-src]", $this->body);
-                        for ($i = $elems->length-1; $i >= 0; $i--) {
+                        for ($i = $elems->length-1; $i >= 0; $i--) {
-                                $e = $elems->item($i);
+                                $e = $elems->item($i);
-                                // let's see if we can grab image from noscript
+                                // let's see if we can grab image from noscript
-                                if ($e->nextSibling !== null && $e->nextSibling->nodeName === 'noscript') {
+                                if ($e->nextSibling !== null && $e->nextSibling->nodeName === 'noscript') {
-                                        $_new_elem = $e->ownerDocument->createDocumentFragment();
+                                        $_new_elem = $e->ownerDocument->createDocumentFragment();
-                                        @$_new_elem->appendXML($e->nextSibling->innerHTML);
+                                        @$_new_elem->appendXML($e->nextSibling->innerHTML);
-                                        $e->nextSibling->parentNode->replaceChild($_new_elem, $e->nextSibling);
+                                        $e->nextSibling->parentNode->replaceChild($_new_elem, $e->nextSibling);
-                                        $e->parentNode->removeChild($e);
+                                        $e->parentNode->removeChild($e);
-                                } else {
+                                } else {
-                                        // Use data-lazy-src as src value
+                                        // Use data-lazy-src as src value
-                                        $e->setAttribute('src', $e->getAttribute('data-lazy-src'));
+                                        $e->setAttribute('src', $e->getAttribute('data-lazy-src'));
-                                        $e->removeAttribute('data-lazy-src');
+                                        $e->removeAttribute('data-lazy-src');
-                                }
+                                }
-                        }
+                        }
-                
+                
-                        $this->success = true;
+                        $this->success = true;
-                }
+                }
-                
+                
-                // if we've had no success and we've used tidy, there's a chance
+                // if we've had no success and we've used tidy, there's a chance
-                // that tidy has messed up. So let's try again without tidy...
+                // that tidy has messed up. So let's try again without tidy...
-                if (!$this->success && $tidied && $smart_tidy) {
+                if (!$this->success && $tidied && $smart_tidy) {
-                        $this->debug('Trying again without tidy');
+                        $this->debug('Trying again without tidy');
-                        $this->process($original_html, $url, false);
+                        $this->process($original_html, $url, false);
-                }
+                }
-                return $this->success;
+                return $this->success;
-        }
+        }
-        
+        
-        private function isDescendant(DOMElement $parent, DOMElement $child) {
+        private function isDescendant(DOMElement $parent, DOMElement $child) {
-                $node = $child->parentNode;
+                $node = $child->parentNode;
-                while ($node != null) {
+                while ($node != null) {
-                        if ($node->isSameNode($parent)) return true;
+                        if ($node->isSameNode($parent)) return true;
-                        $node = $node->parentNode;
+                        $node = $node->parentNode;
-                }
+                }
-                return false;
+                return false;
-        }
+        }
-        public function getContent() {
+        public function getContent() {
-                return $this->body;
+                return $this->body;
-        }
+        }
-        
+        
-        public function getTitle() {
+        public function getTitle() {
-                return $this->title;
+                return $this->title;
-        }
+        }
-        
+        
-        public function getAuthors() {
+        public function getAuthors() {
-                return $this->author;
+                return $this->author;
-        }
+        }
-        
+        
-        public function getLanguage() {
+        public function getLanguage() {
-                return $this->language;
+                return $this->language;
-        }
+        }
-        
+        
-        public function getDate() {
+        public function getDate() {
-                return $this->date;
+                return $this->date;
-        }
+        }
-        
+        
-        public function getSiteConfig() {
+        public function getSiteConfig() {
-                return $this->config;
+                return $this->config;
-        }
+        }
-        
+        
-        public function getNextPageUrl() {
+        public function getNextPageUrl() {
-                return $this->nextPageUrl;
+                return $this->nextPageUrl;
-        }
+        }
-}
+}
+\ No newline at end of file
-?>
-\ No newline at end of file
diff --git a/inc/3rdparty/libraries/content-extractor/SiteConfig.php b/inc/3rdparty/libraries/content-extractor/SiteConfig.php
index c5e300d7..1f6a7603 100644
--- a/inc/3rdparty/libraries/content-extractor/SiteConfig.php
+++ b/inc/3rdparty/libraries/content-extractor/SiteConfig.php
@@ -1,338 +1,343 @@
-<?php
+<?php
-/**
+/**
- * Site Config
+ * Site Config
- * 
+ * 
- * Each instance of this class should hold extraction patterns and other directives
+ * Each instance of this class should hold extraction patterns and other directives
- * for a website. See ContentExtractor class to see how it's used.
+ * for a website. See ContentExtractor class to see how it's used.
- * 
+ * 
- * @version 0.7
+ * @version 0.8
- * @date 2012-08-27
+ * @date 2013-04-16
- * @author Keyvan Minoukadeh
+ * @author Keyvan Minoukadeh
- * @copyright 2012 Keyvan Minoukadeh
+ * @copyright 2013 Keyvan Minoukadeh
- * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
+ * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
- */
+ */
-class SiteConfig
+class SiteConfig
-{
+{
-        // Use first matching element as title (0 or more xpath expressions)
+        // Use first matching element as title (0 or more xpath expressions)
-        public $title = array();
+        public $title = array();
-        
+        
-        // Use first matching element as body (0 or more xpath expressions)
+        // Use first matching element as body (0 or more xpath expressions)
-        public $body = array();
+        public $body = array();
-        
+        
-        // Use first matching element as author (0 or more xpath expressions)
+        // Use first matching element as author (0 or more xpath expressions)
-        public $author = array();
+        public $author = array();
-        
+        
-        // Use first matching element as date (0 or more xpath expressions)
+        // Use first matching element as date (0 or more xpath expressions)
-        public $date = array();
+        public $date = array();
-        
+        
-        // Strip elements matching these xpath expressions (0 or more)
+        // Strip elements matching these xpath expressions (0 or more)
-        public $strip = array();
+        public $strip = array();
-        
+        
-        // Strip elements which contain these strings (0 or more) in the id or class attribute 
+        // Strip elements which contain these strings (0 or more) in the id or class attribute 
-        public $strip_id_or_class = array();
+        public $strip_id_or_class = array();
-        
+        
-        // Strip images which contain these strings (0 or more) in the src attribute 
+        // Strip images which contain these strings (0 or more) in the src attribute 
-        public $strip_image_src = array();
+        public $strip_image_src = array();
-        
+        
-        // Additional HTTP headers to send
+        // Additional HTTP headers to send
-        // NOT YET USED
+        // NOT YET USED
-        public $http_header = array();
+        public $http_header = array();
-        
+        
-        // Process HTML with tidy before creating DOM (bool or null if undeclared)
+        // Process HTML with tidy before creating DOM (bool or null if undeclared)
-        public $tidy = null;
+        public $tidy = null;
-        
+        
-        protected $default_tidy = true; // used if undeclared
+        protected $default_tidy = true; // used if undeclared
-        
+        
-        // Autodetect title/body if xpath expressions fail to produce results.
+        // Autodetect title/body if xpath expressions fail to produce results.
-        // Note that this applies to title and body separately, ie. 
+        // Note that this applies to title and body separately, ie. 
-        //   * if we get a body match but no title match, this option will determine whether we autodetect title 
+        //   * if we get a body match but no title match, this option will determine whether we autodetect title 
-        //   * if neither match, this determines whether we autodetect title and body.
+        //   * if neither match, this determines whether we autodetect title and body.
-        // Also note that this only applies when there is at least one xpath expression in title or body, ie.
+        // Also note that this only applies when there is at least one xpath expression in title or body, ie.
-        //   * if title and body are both empty (no xpath expressions), this option has no effect (both title and body will be auto-detected)
+        //   * if title and body are both empty (no xpath expressions), this option has no effect (both title and body will be auto-detected)
-        //   * if there's an xpath expression for title and none for body, body will be auto-detected and this option will determine whether we auto-detect title if the xpath expression for it fails to produce results.
+        //   * if there's an xpath expression for title and none for body, body will be auto-detected and this option will determine whether we auto-detect title if the xpath expression for it fails to produce results.
-        // Usage scenario: you want to extract something specific from a set of URLs, e.g. a table, and if the table is not found, you want to ignore the entry completely. Auto-detection is unlikely to succeed here, so you construct your patterns and set this option to false. Another scenario may be a site where auto-detection has proven to fail (or worse, picked up the wrong content).
+        // Usage scenario: you want to extract something specific from a set of URLs, e.g. a table, and if the table is not found, you want to ignore the entry completely. Auto-detection is unlikely to succeed here, so you construct your patterns and set this option to false. Another scenario may be a site where auto-detection has proven to fail (or worse, picked up the wrong content).
-        // bool or null if undeclared
+        // bool or null if undeclared
-        public $autodetect_on_failure = null;
+        public $autodetect_on_failure = null;
-        protected $default_autodetect_on_failure = true; // used if undeclared
+        protected $default_autodetect_on_failure = true; // used if undeclared
-        
+        
-        // Clean up content block - attempt to remove elements that appear to be superfluous
+        // Clean up content block - attempt to remove elements that appear to be superfluous
-        // bool or null if undeclared
+        // bool or null if undeclared
-        public $prune = null;
+        public $prune = null;
-        protected $default_prune = true; // used if undeclared
+        protected $default_prune = true; // used if undeclared
-        
+        
-        // Test URL - if present, can be used to test the config above
+        // Test URL - if present, can be used to test the config above
-        public $test_url = array();
+        public $test_url = array();
-        
+        
-        // Single-page link - should identify a link element or URL pointing to the page holding the entire article
+        // Single-page link - should identify a link element or URL pointing to the page holding the entire article
-        // This is useful for sites which split their articles across multiple pages. Links to such pages tend to 
+        // This is useful for sites which split their articles across multiple pages. Links to such pages tend to 
-        // display the first page with links to the other pages at the bottom. Often there is also a link to a page
+        // display the first page with links to the other pages at the bottom. Often there is also a link to a page
-        // which displays the entire article on one page (e.g. 'print view').
+        // which displays the entire article on one page (e.g. 'print view').
-        // This should be an XPath expression identifying the link to that page. If present and we find a match,
+        // This should be an XPath expression identifying the link to that page. If present and we find a match,
-        // we will retrieve that page and the rest of the options in this config will be applied to the new page.
+        // we will retrieve that page and the rest of the options in this config will be applied to the new page.
-        public $single_page_link = array();
+        public $single_page_link = array();
-        
+        
-        public $next_page_link = array();
+        public $next_page_link = array();
-        
+        
-        // Single-page link in feed? - same as above, but patterns applied to item description HTML taken from feed
+        // Single-page link in feed? - same as above, but patterns applied to item description HTML taken from feed
-        public $single_page_link_in_feed = array();
+        public $single_page_link_in_feed = array();
-        
+        
-        // Which parser to use for turning raw HTML into a DOMDocument (either 'libxml' or 'html5lib')
+        // Which parser to use for turning raw HTML into a DOMDocument (either 'libxml' or 'html5lib')
-        // string or null if undeclared
+        // string or null if undeclared
-        public $parser = null;
+        public $parser = null;
-        protected $default_parser = 'libxml'; // used if undeclared
+        protected $default_parser = 'libxml'; // used if undeclared
-        
+        
-        // Strings to search for in HTML before processing begins (used with $replace_string)
+        // Strings to search for in HTML before processing begins (used with $replace_string)
-        public $find_string = array();
+        public $find_string = array();
-        // Strings to replace those found in $find_string before HTML processing begins
+        // Strings to replace those found in $find_string before HTML processing begins
-        public $replace_string = array();
+        public $replace_string = array();
-        
+        
-        // the options below cannot be set in the config files which this class represents
+        // the options below cannot be set in the config files which this class represents
-        
+        
-        //public $cache_in_apc = false; // used to decide if we should cache in apc or not
+        //public $cache_in_apc = false; // used to decide if we should cache in apc or not
-        public $cache_key = null;
+        public $cache_key = null;
-        public static $debug = false;
+        public static $debug = false;
-        protected static $apc = false;
+        protected static $apc = false;
-        protected static $config_path;
+        protected static $config_path;
-        protected static $config_path_fallback;
+        protected static $config_path_fallback;
-        protected static $config_cache = array();
+        protected static $config_cache = array();
-        const HOSTNAME_REGEX = '/^(([a-zA-Z0-9-]*[a-zA-Z0-9])\.)*([A-Za-z0-9-]*[A-Za-z0-9])$/';
+        const HOSTNAME_REGEX = '/^(([a-zA-Z0-9-]*[a-zA-Z0-9])\.)*([A-Za-z0-9-]*[A-Za-z0-9])$/';
-        
+        
-        protected static function debug($msg) {
+        protected static function debug($msg) {
-                if (self::$debug) {
+                if (self::$debug) {
-                        //$mem = round(memory_get_usage()/1024, 2);
+                        //$mem = round(memory_get_usage()/1024, 2);
-                        //$memPeak = round(memory_get_peak_usage()/1024, 2);
+                        //$memPeak = round(memory_get_peak_usage()/1024, 2);
-                        echo '* ',$msg;
+                        echo '* ',$msg;
-                        //echo ' - mem used: ',$mem," (peak: $memPeak)\n";
+                        //echo ' - mem used: ',$mem," (peak: $memPeak)\n";
-                        echo "\n";
+                        echo "\n";
-                        ob_flush();
+                        ob_flush();
-                        flush();
+                        flush();
-                }
+                }
-        }
+        }
-        
+        
-        // enable APC caching of certain site config files?
+        // enable APC caching of certain site config files?
-        // If enabled the following site config files will be 
+        // If enabled the following site config files will be 
-        // cached in APC cache (when requested for first time):
+        // cached in APC cache (when requested for first time):
-        // * anything in site_config/custom/ and its corresponding file in site_config/standard/
+        // * anything in site_config/custom/ and its corresponding file in site_config/standard/
-        // * the site config files associated with HTML fingerprints
+        // * the site config files associated with HTML fingerprints
-        // * the global site config file
+        // * the global site config file
-        // returns true if enabled, false otherwise
+        // returns true if enabled, false otherwise
-        public static function use_apc($apc=true) {
+        public static function use_apc($apc=true) {
-                if (!function_exists('apc_add')) {
+                if (!function_exists('apc_add')) {
-                        if ($apc) self::debug('APC will not be used (function apc_add does not exist)');
+                        if ($apc) self::debug('APC will not be used (function apc_add does not exist)');
-                        return false;
+                        return false;
-                }
+                }
-                self::$apc = $apc;
+                self::$apc = $apc;
-                return $apc;
+                return $apc;
-        }
+        }
-        
+        
-        // return bool or null
+        // return bool or null
-        public function tidy($use_default=true) {
+        public function tidy($use_default=true) {
-                if ($use_default) return (isset($this->tidy)) ? $this->tidy : $this->default_tidy;
+                if ($use_default) return (isset($this->tidy)) ? $this->tidy : $this->default_tidy;
-                return $this->tidy;
+                return $this->tidy;
-        }
+        }
-        
+        
-        // return bool or null
+        // return bool or null
-        public function prune($use_default=true) {
+        public function prune($use_default=true) {
-                if ($use_default) return (isset($this->prune)) ? $this->prune : $this->default_prune;
+                if ($use_default) return (isset($this->prune)) ? $this->prune : $this->default_prune;
-                return $this->prune;
+                return $this->prune;
-        }
+        }
-        
+        
-        // return string or null
+        // return string or null
-        public function parser($use_default=true) {
+        public function parser($use_default=true) {
-                if ($use_default) return (isset($this->parser)) ? $this->parser : $this->default_parser;
+                if ($use_default) return (isset($this->parser)) ? $this->parser : $this->default_parser;
-                return $this->parser;
+                return $this->parser;
-        }
+        }
-        // return bool or null
+        // return bool or null
-        public function autodetect_on_failure($use_default=true) {
+        public function autodetect_on_failure($use_default=true) {
-                if ($use_default) return (isset($this->autodetect_on_failure)) ? $this->autodetect_on_failure : $this->default_autodetect_on_failure;
+                if ($use_default) return (isset($this->autodetect_on_failure)) ? $this->autodetect_on_failure : $this->default_autodetect_on_failure;
-                return $this->autodetect_on_failure;
+                return $this->autodetect_on_failure;
-        }
+        }
-        
+        
-        public static function set_config_path($path, $fallback=null) {
+        public static function set_config_path($path, $fallback=null) {
-                self::$config_path = $path;
+                self::$config_path = $path;
-                self::$config_path_fallback = $fallback;
+                self::$config_path_fallback = $fallback;
-        }
+        }
-        
+        
-        public static function add_to_cache($key, SiteConfig $config, $use_apc=true) {
+        public static function add_to_cache($key, SiteConfig $config, $use_apc=true) {
-                $key = strtolower($key);
+                $key = strtolower($key);
-                if (substr($key, 0, 4) == 'www.') $key = substr($key, 4);
+                if (substr($key, 0, 4) == 'www.') $key = substr($key, 4);
-                if ($config->cache_key) $key = $config->cache_key;
+                if ($config->cache_key) $key = $config->cache_key;
-                self::$config_cache[$key] = $config;
+                self::$config_cache[$key] = $config;
-                if (self::$apc && $use_apc) {
+                if (self::$apc && $use_apc) {
-                        self::debug("Adding site config to APC cache with key sc.$key");
+                        self::debug("Adding site config to APC cache with key sc.$key");
-                        apc_add("sc.$key", $config);
+                        apc_add("sc.$key", $config);
-                }
+                }
-                self::debug("Cached site config with key $key");
+                self::debug("Cached site config with key $key");
-        }
+        }
-        
+        
-        public static function is_cached($key) {
+        public static function is_cached($key) {
-                $key = strtolower($key);
+                $key = strtolower($key);
-                if (substr($key, 0, 4) == 'www.') $key = substr($key, 4);
+                if (substr($key, 0, 4) == 'www.') $key = substr($key, 4);
-                if (array_key_exists($key, self::$config_cache)) {
+                if (array_key_exists($key, self::$config_cache)) {
-                        return true;
+                        return true;
-                } elseif (self::$apc && (bool)apc_fetch("sc.$key")) {
+                } elseif (self::$apc && (bool)apc_fetch("sc.$key")) {
-                        return true;
+                        return true;
-                }
+                }
-                return false;
+                return false;
-        }
+        }
-        
+        
-        public function append(SiteConfig $newconfig) {
+        public function append(SiteConfig $newconfig) {
-                // check for commands where we accept multiple statements (no test_url)
+                // check for commands where we accept multiple statements (no test_url)
-                foreach (array('title', 'body', 'author', 'date', 'strip', 'strip_id_or_class', 'strip_image_src', 'single_page_link', 'single_page_link_in_feed', 'next_page_link', 'http_header', 'find_string', 'replace_string') as $var) {
+                foreach (array('title', 'body', 'author', 'date', 'strip', 'strip_id_or_class', 'strip_image_src', 'single_page_link', 'single_page_link_in_feed', 'next_page_link', 'http_header') as $var) {
-                        // append array elements for this config variable from $newconfig to this config
+                        // append array elements for this config variable from $newconfig to this config
-                        //$this->$var = $this->$var + $newconfig->$var;
+                        //$this->$var = $this->$var + $newconfig->$var;
-                        $this->$var = array_unique(array_merge($this->$var, $newconfig->$var));
+                        $this->$var = array_unique(array_merge($this->$var, $newconfig->$var));
-                }
+                }
-                // check for single statement commands
+                // check for single statement commands
-                // we do not overwrite existing non null values
+                // we do not overwrite existing non null values
-                foreach (array('tidy', 'prune', 'parser', 'autodetect_on_failure') as $var) {
+                foreach (array('tidy', 'prune', 'parser', 'autodetect_on_failure') as $var) {
-                        if ($this->$var === null) $this->$var = $newconfig->$var;
+                        if ($this->$var === null) $this->$var = $newconfig->$var;
-                }
+                }
-        }
+                // treat find_string and replace_string separately (don't apply array_unique) (thanks fabrizio!)
-        
+                foreach (array('find_string', 'replace_string') as $var) {
-        // returns SiteConfig instance if an appropriate one is found, false otherwise
+                        // append array elements for this config variable from $newconfig to this config
-        // if $exact_host_match is true, we will not look for wildcard config matches
+                        //$this->$var = $this->$var + $newconfig->$var;
-        // by default if host is 'test.example.org' we will look for and load '.example.org.txt' if it exists
+                        $this->$var = array_merge($this->$var, $newconfig->$var);
-        public static function build($host, $exact_host_match=false) {
+                }
-                $host = strtolower($host);
+        }
-                if (substr($host, 0, 4) == 'www.') $host = substr($host, 4);
+        
-                if (!$host || (strlen($host) > 200) || !preg_match(self::HOSTNAME_REGEX, ltrim($host, '.'))) return false;
+        // returns SiteConfig instance if an appropriate one is found, false otherwise
-                // check for site configuration
+        // if $exact_host_match is true, we will not look for wildcard config matches
-                $try = array($host);
+        // by default if host is 'test.example.org' we will look for and load '.example.org.txt' if it exists
-                // should we look for wildcard matches 
+        public static function build($host, $exact_host_match=false) {
-                if (!$exact_host_match) {
+                $host = strtolower($host);
-                        $split = explode('.', $host);
+                if (substr($host, 0, 4) == 'www.') $host = substr($host, 4);
-                        if (count($split) > 1) {
+                if (!$host || (strlen($host) > 200) || !preg_match(self::HOSTNAME_REGEX, ltrim($host, '.'))) return false;
-                                array_shift($split);
+                // check for site configuration
-                                $try[] = '.'.implode('.', $split);
+                $try = array($host);
-                        }
+                // should we look for wildcard matches 
-                }
+                if (!$exact_host_match) {
-                
+                        $split = explode('.', $host);
-                // look for site config file in primary folder
+                        if (count($split) > 1) {
-                self::debug(". looking for site config for $host in primary folder");
+                                array_shift($split);
-                foreach ($try as $h) {
+                                $try[] = '.'.implode('.', $split);
-                        if (array_key_exists($h, self::$config_cache)) {
+                        }
-                                self::debug("... site config for $h already loaded in this request");
+                }
-                                return self::$config_cache[$h];
+                
-                        } elseif (self::$apc && ($sconfig = apc_fetch("sc.$h"))) {
+                // look for site config file in primary folder
-                                self::debug("... site config for $h in APC cache");
+                self::debug(". looking for site config for $host in primary folder");
-                                return $sconfig;
+                foreach ($try as $h) {
-                        } elseif (file_exists(self::$config_path."/$h.txt")) {
+                        if (array_key_exists($h, self::$config_cache)) {
-                                self::debug("... found site config ($h.txt)");
+                                self::debug("... site config for $h already loaded in this request");
-                                $file_primary = self::$config_path."/$h.txt";
+                                return self::$config_cache[$h];
-                                $matched_name = $h;
+                        } elseif (self::$apc && ($sconfig = apc_fetch("sc.$h"))) {
-                                break;
+                                self::debug("... site config for $h in APC cache");
-                        }
+                                return $sconfig;
-                }
+                        } elseif (file_exists(self::$config_path."/$h.txt")) {
-                
+                                self::debug("... found site config ($h.txt)");
-                // if we found site config, process it
+                                $file_primary = self::$config_path."/$h.txt";
-                if (isset($file_primary)) {
+                                $matched_name = $h;
-                        $config_lines = file($file_primary, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
+                                break;
-                        if (!$config_lines || !is_array($config_lines)) return false;
+                        }
-                        $config = self::build_from_array($config_lines);
+                }
-                        // if APC caching is available and enabled, mark this for cache
+                
-                        //$config->cache_in_apc = true;
+                // if we found site config, process it
-                        $config->cache_key = $matched_name;
+                if (isset($file_primary)) {
-                        
+                        $config_lines = file($file_primary, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
-                        // if autodetec on failure is off (on by default) we do not need to look
+                        if (!$config_lines || !is_array($config_lines)) return false;
-                        // in secondary folder
+                        $config = self::build_from_array($config_lines);
-                        if (!$config->autodetect_on_failure()) {
+                        // if APC caching is available and enabled, mark this for cache
-                                self::debug('... autodetect on failure is disabled (no other site config files will be loaded)');
+                        //$config->cache_in_apc = true;
-                                return $config;
+                        $config->cache_key = $matched_name;
-                        }
+                        
-                }
+                        // if autodetec on failure is off (on by default) we do not need to look
-                
+                        // in secondary folder
-                // look for site config file in secondary folder
+                        if (!$config->autodetect_on_failure()) {
-                if (isset(self::$config_path_fallback)) {
+                                self::debug('... autodetect on failure is disabled (no other site config files will be loaded)');
-                        self::debug(". looking for site config for $host in secondary folder");
+                                return $config;
-                        foreach ($try as $h) {
+                        }
-                                if (file_exists(self::$config_path_fallback."/$h.txt")) {
+                }
-                                        self::debug("... found site config in secondary folder ($h.txt)");
+                
-                                        $file_secondary = self::$config_path_fallback."/$h.txt";
+                // look for site config file in secondary folder
-                                        $matched_name = $h;
+                if (isset(self::$config_path_fallback)) {
-                                        break;
+                        self::debug(". looking for site config for $host in secondary folder");
-                                }
+                        foreach ($try as $h) {
-                        }
+                                if (file_exists(self::$config_path_fallback."/$h.txt")) {
-                        if (!isset($file_secondary)) {
+                                        self::debug("... found site config in secondary folder ($h.txt)");
-                                self::debug("... no site config match in secondary folder");
+                                        $file_secondary = self::$config_path_fallback."/$h.txt";
-                        }
+                                        $matched_name = $h;
-                }
+                                        break;
-                
+                                }
-                // return false if no config file found
+                        }
-                if (!isset($file_primary) && !isset($file_secondary)) {
+                        if (!isset($file_secondary)) {
-                        self::debug("... no site config match for $host");
+                                self::debug("... no site config match in secondary folder");
-                        return false;
+                        }
-                }
+                }
-                
+                
-                // return primary config if secondary not found
+                // return false if no config file found
-                if (!isset($file_secondary) && isset($config)) {
+                if (!isset($file_primary) && !isset($file_secondary)) {
-                        return $config;
+                        self::debug("... no site config match for $host");
-                }
+                        return false;
-                
+                }
-                // process secondary config file
+                
-                $config_lines = file($file_secondary, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
+                // return primary config if secondary not found
-                if (!$config_lines || !is_array($config_lines)) {
+                if (!isset($file_secondary) && isset($config)) {
-                        // failed to process secondary
+                        return $config;
-                        if (isset($config)) {
+                }
-                                // return primary config
+                
-                                return $config;
+                // process secondary config file
-                        } else {
+                $config_lines = file($file_secondary, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
-                                return false;
+                if (!$config_lines || !is_array($config_lines)) {
-                        }
+                        // failed to process secondary
-                }
+                        if (isset($config)) {
-                
+                                // return primary config
-                // merge with primary and return
+                                return $config;
-                if (isset($config)) {
+                        } else {
-                        self::debug('. merging config files');
+                                return false;
-                        $config->append(self::build_from_array($config_lines));
+                        }
-                        return $config;
+                }
-                } else {
+                
-                        // return just secondary
+                // merge with primary and return
-                        $config = self::build_from_array($config_lines);
+                if (isset($config)) {
-                        // if APC caching is available and enabled, mark this for cache
+                        self::debug('. merging config files');
-                        //$config->cache_in_apc = true;
+                        $config->append(self::build_from_array($config_lines));
-                        $config->cache_key = $matched_name;
+                        return $config;
-                        return $config;
+                } else {
-                }
+                        // return just secondary
-        }
+                        $config = self::build_from_array($config_lines);
-        
+                        // if APC caching is available and enabled, mark this for cache
-        public static function build_from_array(array $lines) {
+                        //$config->cache_in_apc = true;
-                $config = new SiteConfig();
+                        $config->cache_key = $matched_name;
-                foreach ($lines as $line) {
+                        return $config;
-                        $line = trim($line);
+                }
-                        
+        }
-                        // skip comments, empty lines
+        
-                        if ($line == '' || $line[0] == '#') continue;
+        public static function build_from_array(array $lines) {
-                        
+                $config = new SiteConfig();
-                        // get command
+                foreach ($lines as $line) {
-                        $command = explode(':', $line, 2);
+                        $line = trim($line);
-                        // if there's no colon ':', skip this line
+                        
-                        if (count($command) != 2) continue;
+                        // skip comments, empty lines
-                        $val = trim($command[1]);
+                        if ($line == '' || $line[0] == '#') continue;
-                        $command = trim($command[0]);
+                        
-                        if ($command == '' || $val == '') continue;
+                        // get command
-                        
+                        $command = explode(':', $line, 2);
-                        // check for commands where we accept multiple statements
+                        // if there's no colon ':', skip this line
-                        if (in_array($command, array('title', 'body', 'author', 'date', 'strip', 'strip_id_or_class', 'strip_image_src', 'single_page_link', 'single_page_link_in_feed', 'next_page_link', 'http_header', 'test_url', 'find_string', 'replace_string'))) {
+                        if (count($command) != 2) continue;
-                                array_push($config->$command, $val);
+                        $val = trim($command[1]);
-                        // check for single statement commands that evaluate to true or false
+                        $command = trim($command[0]);
-                        } elseif (in_array($command, array('tidy', 'prune', 'autodetect_on_failure'))) {
+                        if ($command == '' || $val == '') continue;
-                                $config->$command = ($val == 'yes');
+                        
-                        // check for single statement commands stored as strings
+                        // check for commands where we accept multiple statements
-                        } elseif (in_array($command, array('parser'))) {
+                        if (in_array($command, array('title', 'body', 'author', 'date', 'strip', 'strip_id_or_class', 'strip_image_src', 'single_page_link', 'single_page_link_in_feed', 'next_page_link', 'http_header', 'test_url', 'find_string', 'replace_string'))) {
-                                $config->$command = $val;
+                                array_push($config->$command, $val);
-                        // check for replace_string(find): replace
+                        // check for single statement commands that evaluate to true or false
-                        } elseif ((substr($command, -1) == ')') && preg_match('!^([a-z0-9_]+)\((.*?)\)$!i', $command, $match)) {
+                        } elseif (in_array($command, array('tidy', 'prune', 'autodetect_on_failure'))) {
-                                if (in_array($match[1], array('replace_string'))) {
+                                $config->$command = ($val == 'yes');
-                                        $command = $match[1];
+                        // check for single statement commands stored as strings
-                                        array_push($config->find_string, $match[2]);
+                        } elseif (in_array($command, array('parser'))) {
-                                        array_push($config->$command, $val);
+                                $config->$command = $val;
-                                }
+                        // check for replace_string(find): replace
-                        }
+                        } elseif ((substr($command, -1) == ')') && preg_match('!^([a-z0-9_]+)\((.*?)\)$!i', $command, $match)) {
-                }
+                                if (in_array($match[1], array('replace_string'))) {
-                return $config;
+                                        $command = $match[1];
-        }
+                                        array_push($config->find_string, $match[2]);
-}
+                                        array_push($config->$command, $val);
-?>
-\ No newline at end of file
+                                }
+                        }
+                }
+                return $config;
+        }
+}
+\ No newline at end of file
diff --git a/inc/3rdparty/libraries/feedwriter/FeedItem.php b/inc/3rdparty/libraries/feedwriter/FeedItem.php
index 54a56f22..40786598 100644..100755
--- a/inc/3rdparty/libraries/feedwriter/FeedItem.php
+++ b/inc/3rdparty/libraries/feedwriter/FeedItem.php
@@ -1,7 +1,7 @@
 <?php
 /**
 * Univarsel Feed Writer
- * 
+ *
 * FeedItem class - Used as feed element in FeedWriter class
 *
 * @package         UnivarselFeedWriter
@@ -12,20 +12,20 @@
 {
    private $elements = array();    //Collection of feed elements
    private $version;
-    
    /**
-    * Constructor 
+    * Constructor
-    * 
+    *
-    * @param    contant     (RSS1/RSS2/ATOM) RSS2 is default. 
+    * @param    contant     (RSS1/RSS2/ATOM) RSS2 is default.
-    */ 
+    */
    function __construct($version = RSS2)
-    {    
+    {
        $this->version = $version;
    }
    /**
    * Set element (overwrites existing elements with $elementName)
-    * 
+    *
    * @access   public
    * @param    srting  The tag name of an element
    * @param    srting  The content of tag
@@ -38,11 +38,11 @@
            unset($this->elements[$elementName]);
        }
        $this->addElement($elementName, $content, $attributes);
-    }    
+    }
-    
    /**
    * Add an element to elements array
-    * 
+    *
    * @access   public
    * @param    srting  The tag name of an element
    * @param    srting  The content of tag
@@ -61,11 +61,11 @@
        $this->elements[$elementName][$i]['content']    = $content;
        $this->elements[$elementName][$i]['attributes'] = $attributes;
    }
-    
    /**
-    * Set multiple feed elements from an array. 
+    * Set multiple feed elements from an array.
    * Elements which have attributes cannot be added by this method
-    * 
+    *
    * @access   public
    * @param    array   array of elements in 'tagName' => 'tagContent' format.
    * @return   void
@@ -73,15 +73,15 @@
    public function addElementArray($elementArray)
    {
        if(! is_array($elementArray)) return;
-        foreach ($elementArray as $elementName => $content) 
+        foreach ($elementArray as $elementName => $content)
        {
            $this->addElement($elementName, $content);
        }
    }
-    
    /**
    * Return the collection of elements in this feed item
-    * 
+    *
    * @access   public
    * @return   array
    */
@@ -89,68 +89,74 @@
    {
        return $this->elements;
    }
-    
    // Wrapper functions ------------------------------------------------------
-    
    /**
    * Set the 'dscription' element of feed item
-    * 
+    *
    * @access   public
    * @param    string  The content of 'description' element
    * @return   void
    */
-    public function setDescription($description) 
+    public function setDescription($description)
    {
-        $this->setElement('description', $description);
+        $tag = ($this->version == ATOM)? 'summary' : 'description';
+        $this->setElement($tag, $description);
    }
-    
    /**
    * @desc     Set the 'title' element of feed item
    * @access   public
    * @param    string  The content of 'title' element
    * @return   void
    */
-    public function setTitle($title) 
+    public function setTitle($title)
    {
-        $this->setElement('title', $title);      
+        $this->setElement('title', $title);
    }
-    
    /**
    * Set the 'date' element of feed item
-    * 
+    *
    * @access   public
    * @param    string  The content of 'date' element
    * @return   void
    */
-    public function setDate($date) 
+    public function setDate($date)
    {
        if(! is_numeric($date))
        {
            $date = strtotime($date);
        }
-      
-        if($this->version == RSS2) 
+        if($this->version == ATOM)
+        {
+                $tag    = 'updated';
+                $value  = date(DATE_ATOM, $date);
+        }
+        elseif($this->version == RSS2)
        {
-            $tag    = 'pubDate';
+                $tag    = 'pubDate';
-            $value  = date(DATE_RSS, $date);
+                $value  = date(DATE_RSS, $date);
        }
-        else                                
+        else
        {
-            $tag    = 'dc:date';
+                $tag    = 'dc:date';
-            $value  = date("Y-m-d", $date);
+                $value  = date("Y-m-d", $date);
        }
-        
-        $this->setElement($tag, $value);    
+        $this->setElement($tag, $value);
    }
-    
    /**
    * Set the 'link' element of feed item
-    * 
+    *
    * @access   public
    * @param    string  The content of 'link' element
    * @return   void
    */
-    public function setLink($link) 
+    public function setLink($link)
    {
        if($this->version == RSS2 || $this->version == RSS1)
        {
@@ -161,27 +167,27 @@
        {
            $this->setElement('link','',array('href'=>$link));
            $this->setElement('id', FeedWriter::uuid($link,'urn:uuid:'));
-        } 
+        }
-        
    }
    /**
    * Set the 'source' element of feed item
-    * 
+    *
    * @access   public
    * @param    string  The content of 'source' element
    * @return   void
    */
-    public function setSource($link) 
+    public function setSource($link)
    {
        $attributes = array('url'=>$link);
        $this->setElement('source', "wallabag",$attributes);
    }
-    
    /**
    * Set the 'encloser' element of feed item
    * For RSS 2.0 only
-    * 
+    *
    * @access   public
    * @param    string  The url attribute of encloser tag
    * @param    string  The length attribute of encloser tag
@@ -193,6 +199,6 @@
        $attributes = array('url'=>$url, 'length'=>$length, 'type'=>$type);
        $this->setElement('enclosure','',$attributes);
    }
-    
 } // end of class FeedItem
 ?>
 \ No newline at end of file
diff --git a/inc/3rdparty/libraries/feedwriter/FeedWriter.php b/inc/3rdparty/libraries/feedwriter/FeedWriter.php
index d708e99b..77755690 100755
--- a/inc/3rdparty/libraries/feedwriter/FeedWriter.php
+++ b/inc/3rdparty/libraries/feedwriter/FeedWriter.php
@@ -97,15 +97,12 @@ define('JSONP', 3, true);
              header('X-content-type-options: nosniff');
          } elseif ($this->version == JSON) {
              header('Content-type: application/json; charset=UTF-8');
+              $this->json = new stdClass();
          } elseif ($this->version == JSONP) {
              header('Content-type: application/javascript; charset=UTF-8');
+              $this->json = new stdClass();
          }
        }
-      
-        if ($this->version == JSON || $this->version == JSONP) {
-          $this->json = new stdClass();
-        }
-      
        $this->printHead();
        $this->printChannels();
@@ -116,6 +113,11 @@ define('JSONP', 3, true);
        }
    }
+    public function &getItems()
+    {
+        return $this->items;
+    }
    /**
    * Create a new FeedItem.
    *
@@ -199,7 +201,8 @@ define('JSONP', 3, true);
    */
    public function setDescription($description)
    {
-        $this->setChannelElement('description', $description);
+        $tag = ($this->version == ATOM)? 'subtitle' : 'description';
+        $this->setChannelElement($tag, $desciption);
    }
    /**
@@ -244,7 +247,7 @@ define('JSONP', 3, true);
        {
            $out  = '<?xml version="1.0" encoding="utf-8"?>'."\n";
            if ($this->xsl) $out .= '<?xml-stylesheet type="text/xsl" href="'.htmlspecialchars($this->xsl).'"?>' . PHP_EOL;
-            $out .= '<rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:media="http://search.yahoo.com/mrss/">' . PHP_EOL;
+            $out .= '<rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:media="http://search.yahoo.com/mrss/">' . PHP_EOL;
            echo $out;
        }
        elseif ($this->version == JSON || $this->version == JSONP)
diff --git a/inc/3rdparty/libraries/html5/TreeBuilder.php b/inc/3rdparty/libraries/html5/TreeBuilder.php
index 2f5244f9..c4a48b21 100644
--- a/inc/3rdparty/libraries/html5/TreeBuilder.php
+++ b/inc/3rdparty/libraries/html5/TreeBuilder.php
@@ -134,6 +134,7 @@ class HTML5_TreeBuilder {
    // Namespaces for foreign content
    const NS_HTML   = null; // to prevent DOM from requiring NS on everything
+    const NS_XHTML  = 'http://www.w3.org/1999/xhtml';
    const NS_MATHML = 'http://www.w3.org/1998/Math/MathML';
    const NS_SVG    = 'http://www.w3.org/2000/svg';
    const NS_XLINK  = 'http://www.w3.org/1999/xlink';
@@ -3157,11 +3158,19 @@ class HTML5_TreeBuilder {
        }
    private function insertElement($token, $append = true) {
-        $el = $this->dom->createElementNS(self::NS_HTML, $token['name']);
+        //$el = $this->dom->createElementNS(self::NS_HTML, $token['name']);
+        $namespaceURI = strpos($token['name'], ':') ? self::NS_XHTML : self::NS_HTML;
+        $el = $this->dom->createElementNS($namespaceURI, $token['name']);
        if (!empty($token['attr'])) {
            foreach($token['attr'] as $attr) {
-                if(!$el->hasAttribute($attr['name'])) {
+                                // mike@macgirvin.com 2011-11-17, check attribute name for
+                                // validity (ignoring extenders and combiners) as illegal chars in names
+                                // causes everything to abort
+                                $valid = preg_match('/^[a-zA-Z\_\:]([\-a-zA-Z0-9\_\:\.]+$)/',$attr['name']);
+                if($attr['name'] && (!$el->hasAttribute($attr['name'])) && ($valid)) {
                    $el->setAttribute($attr['name'], $attr['value']);
                }
            }
diff --git a/inc/3rdparty/libraries/humble-http-agent/CookieJar.php b/inc/3rdparty/libraries/humble-http-agent/CookieJar.php
index 83e94f14..e4d5f495 100644
--- a/inc/3rdparty/libraries/humble-http-agent/CookieJar.php
+++ b/inc/3rdparty/libraries/humble-http-agent/CookieJar.php
@@ -1,404 +1,403 @@
-<?php
+<?php
-/**
+/**
- * Cookie Jar
+ * Cookie Jar
- * 
+ * 
- * PHP class for handling cookies, as defined by the Netscape spec: 
+ * PHP class for handling cookies, as defined by the Netscape spec: 
- * <http://curl.haxx.se/rfc/cookie_spec.html>
+ * <http://curl.haxx.se/rfc/cookie_spec.html>
- *
+ *
- * This class should be used to handle cookies (storing cookies from HTTP response messages, and
+ * This class should be used to handle cookies (storing cookies from HTTP response messages, and
- * sending out cookies in HTTP request messages). This has been adapted for FiveFilters.org 
+ * sending out cookies in HTTP request messages). This has been adapted for FiveFilters.org 
- * from the original version used in HTTP Navigator. See http://www.keyvan.net/code/http-navigator/
+ * from the original version used in HTTP Navigator. See http://www.keyvan.net/code/http-navigator/
- * 
+ * 
- * This class is mainly based on Cookies.pm <http://search.cpan.org/author/GAAS/libwww-perl-5.65/
+ * This class is mainly based on Cookies.pm <http://search.cpan.org/author/GAAS/libwww-perl-5.65/
- * lib/HTTP/Cookies.pm> from the libwww-perl collection <http://www.linpro.no/lwp/>.
+ * lib/HTTP/Cookies.pm> from the libwww-perl collection <http://www.linpro.no/lwp/>.
- * Unlike Cookies.pm, this class only supports the Netscape cookie spec, not RFC 2965.
+ * Unlike Cookies.pm, this class only supports the Netscape cookie spec, not RFC 2965.
- * 
+ * 
- * @version 0.5
+ * @version 0.5
- * @date 2011-03-15
+ * @date 2011-03-15
- * @see http://php.net/HttpRequestPool
+ * @see http://php.net/HttpRequestPool
- * @author Keyvan Minoukadeh
+ * @author Keyvan Minoukadeh
- * @copyright 2011 Keyvan Minoukadeh
+ * @copyright 2011 Keyvan Minoukadeh
- * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
+ * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
- */
+ */
-class CookieJar
+class CookieJar
-{
+{
-    /**
+    /**
-    * Cookies - array containing all cookies.
+    * Cookies - array containing all cookies.
-    *
+    *
-    * <pre>
+    * <pre>
-    * Cookies are stored like this:
+    * Cookies are stored like this:
-    *   [domain][path][name] = array
+    *   [domain][path][name] = array
-    * where array is:
+    * where array is:
-    *   0 => value, 1 => secure, 2 => expires
+    *   0 => value, 1 => secure, 2 => expires
-    * </pre>
+    * </pre>
-    * @var array
+    * @var array
-    * @access private
+    * @access private
-    */
+    */
-    public $cookies = array();
+    public $cookies = array();
-        public $debug = false;
+        public $debug = false;
-    /**
+    /**
-    * Constructor
+    * Constructor
-    */
+    */
-    function __construct() {
+    function __construct() {
-    }
+    }
-        protected function debug($msg, $file=null, $line=null) {
+        protected function debug($msg, $file=null, $line=null) {
-                if ($this->debug) {
+                if ($this->debug) {
-                        $mem = round(memory_get_usage()/1024, 2);
+                        $mem = round(memory_get_usage()/1024, 2);
-                        $memPeak = round(memory_get_peak_usage()/1024, 2);
+                        $memPeak = round(memory_get_peak_usage()/1024, 2);
-                        echo '* ',$msg;
+                        echo '* ',$msg;
-                        if (isset($file, $line)) echo " ($file line $line)";
+                        if (isset($file, $line)) echo " ($file line $line)";
-                        echo ' - mem used: ',$mem," (peak: $memPeak)\n";        
+                        echo ' - mem used: ',$mem," (peak: $memPeak)\n";        
-                        ob_flush();
+                        ob_flush();
-                        flush();
+                        flush();
-                }
+                }
-        }       
+        }       
-        
+        
-    /**
+    /**
-    * Get matching cookies
+    * Get matching cookies
-    *
+    *
-    * Only use this method if you cannot use add_cookie_header(), for example, if you want to use
+    * Only use this method if you cannot use add_cookie_header(), for example, if you want to use
-    * this cookie jar class without using the request class.
+    * this cookie jar class without using the request class.
-    *
+    *
-    * @param array $param associative array containing 'domain', 'path', 'secure' keys
+    * @param array $param associative array containing 'domain', 'path', 'secure' keys
-    * @return string
+    * @return string
-    * @see add_cookie_header()
+    * @see add_cookie_header()
-    */
+    */
-    public function getMatchingCookies($url)
+    public function getMatchingCookies($url)
-    {
+    {
-                if (($parts = @parse_url($url)) && isset($parts['scheme'], $parts['host'], $parts['path'])) {
+                if (($parts = @parse_url($url)) && isset($parts['scheme'], $parts['host'], $parts['path'])) {
-                        $param['domain'] = $parts['host'];
+                        $param['domain'] = $parts['host'];
-                        $param['path'] = $parts['path'];
+                        $param['path'] = $parts['path'];
-                        $param['secure'] = (strtolower($parts['scheme']) == 'https');
+                        $param['secure'] = (strtolower($parts['scheme']) == 'https');
-                        unset($parts);
+                        unset($parts);
-                } else {
+                } else {
-                        return false;
+                        return false;
-                }
+                }
-        // RFC 2965 notes:
+        // RFC 2965 notes:
-        //  If multiple cookies satisfy the criteria above, they are ordered in
+        //  If multiple cookies satisfy the criteria above, they are ordered in
-        //  the Cookie header such that those with more specific Path attributes
+        //  the Cookie header such that those with more specific Path attributes
-        //  precede those with less specific.  Ordering with respect to other
+        //  precede those with less specific.  Ordering with respect to other
-        //  attributes (e.g., Domain) is unspecified.
+        //  attributes (e.g., Domain) is unspecified.
-        $domain = $param['domain'];
+        $domain = $param['domain'];
-        if (strpos($domain, '.') === false) $domain .= '.local';
+        if (strpos($domain, '.') === false) $domain .= '.local';
-        $request_path = $param['path'];
+        $request_path = $param['path'];
-        if ($request_path == '') $request_path = '/';
+        if ($request_path == '') $request_path = '/';
-        $request_secure = $param['secure'];
+        $request_secure = $param['secure'];
-        $now = time();
+        $now = time();
-        $matched_cookies = array();
+        $matched_cookies = array();
-        // domain - find matching domains
+        // domain - find matching domains
-        $this->debug('Finding matching domains for '.$domain, __FILE__, __LINE__);
+        $this->debug('Finding matching domains for '.$domain, __FILE__, __LINE__);
-        while (strpos($domain, '.') !== false) {
+        while (strpos($domain, '.') !== false) {
-            if (isset($this->cookies[$domain])) {
+            if (isset($this->cookies[$domain])) {
-                $this->debug(' domain match found: '.$domain);
+                $this->debug(' domain match found: '.$domain);
-                $cookies =& $this->cookies[$domain];
+                $cookies =& $this->cookies[$domain];
-            } else {
+            } else {
-                $domain = $this->_reduce_domain($domain);
+                $domain = $this->_reduce_domain($domain);
-                continue;
+                continue;
-            }
+            }
-            // paths - find matching paths starting from most specific
+            // paths - find matching paths starting from most specific
-            $this->debug('  - Finding matching paths for '.$request_path);
+            $this->debug('  - Finding matching paths for '.$request_path);
-            $paths = array_keys($cookies);
+            $paths = array_keys($cookies);
-            usort($paths, array($this, '_cmp_length'));
+            usort($paths, array($this, '_cmp_length'));
-            foreach ($paths as $path) {
+            foreach ($paths as $path) {
-                // continue to next cookie if request path does not path-match cookie path
+                // continue to next cookie if request path does not path-match cookie path
-                if (!$this->_path_match($request_path, $path)) continue;
+                if (!$this->_path_match($request_path, $path)) continue;
-                // loop through cookie names
+                // loop through cookie names
-                $this->debug('     path match found: '.$path);
+                $this->debug('     path match found: '.$path);
-                foreach ($cookies[$path] as $name => $values) {
+                foreach ($cookies[$path] as $name => $values) {
-                    // if this cookie is secure but request isn't, continue to next cookie
+                    // if this cookie is secure but request isn't, continue to next cookie
-                    if ($values[1] && !$request_secure) continue;
+                    if ($values[1] && !$request_secure) continue;
-                    // if cookie is not a session cookie and has expired, continue to next cookie
+                    // if cookie is not a session cookie and has expired, continue to next cookie
-                    if (is_int($values[2]) && ($values[2] < $now)) continue;
+                    if (is_int($values[2]) && ($values[2] < $now)) continue;
-                    // cookie matches request
+                    // cookie matches request
-                    $this->debug('      cookie match: '.$name.'='.$values[0]);
+                    $this->debug('      cookie match: '.$name.'='.$values[0]);
-                    $matched_cookies[] = $name.'='.$values[0];
+                    $matched_cookies[] = $name.'='.$values[0];
-                }
+                }
-            }
+            }
-            $domain = $this->_reduce_domain($domain);
+            $domain = $this->_reduce_domain($domain);
-        }
+        }
-        // return cookies
+        // return cookies
-        return implode('; ', $matched_cookies);
+        return implode('; ', $matched_cookies);
-    }
+    }
-    /**
+    /**
-    * Parse Set-Cookie values.
+    * Parse Set-Cookie values.
-    *
+    *
-    * Only use this method if you cannot use extract_cookies(), for example, if you want to use
+    * Only use this method if you cannot use extract_cookies(), for example, if you want to use
-    * this cookie jar class without using the response class.
+    * this cookie jar class without using the response class.
-    *
+    *
-    * @param array $set_cookies array holding 1 or more "Set-Cookie" header values
+    * @param array $set_cookies array holding 1 or more "Set-Cookie" header values
-    * @param array $param associative array containing 'host', 'path' keys
+    * @param array $param associative array containing 'host', 'path' keys
-    * @return void
+    * @return void
-    * @see extract_cookies()
+    * @see extract_cookies()
-    */
+    */
-    public function storeCookies($url, $set_cookies)
+    public function storeCookies($url, $set_cookies)
-    {
+    {
-        if (count($set_cookies) == 0) return;
+        if (count($set_cookies) == 0) return;
-                $param = @parse_url($url);
+                $param = @parse_url($url);
-                if (!is_array($param) || !isset($param['host'])) return;
+                if (!is_array($param) || !isset($param['host'])) return;
-        $request_host = $param['host'];
+        $request_host = $param['host'];
-        if (strpos($request_host, '.') === false) $request_host .= '.local';
+        if (strpos($request_host, '.') === false) $request_host .= '.local';
-        $request_path = @$param['path'];
+        $request_path = @$param['path'];
-        if ($request_path == '') $request_path = '/';
+        if ($request_path == '') $request_path = '/';
-        //
+        //
-        // loop through set-cookie headers
+        // loop through set-cookie headers
-        //
+        //
-        foreach ($set_cookies as $set_cookie) {
+        foreach ($set_cookies as $set_cookie) {
-            $this->debug('Parsing: '.$set_cookie);
+            $this->debug('Parsing: '.$set_cookie);
-            // temporary cookie store (before adding to jar)
+            // temporary cookie store (before adding to jar)
-            $tmp_cookie = array();
+            $tmp_cookie = array();
-            $param = explode(';', $set_cookie);
+            $param = explode(';', $set_cookie);
-            // loop through params
+            // loop through params
-            for ($x=0; $x<count($param); $x++) {
+            for ($x=0; $x<count($param); $x++) {
-                $key_val = explode('=', $param[$x], 2);
+                $key_val = explode('=', $param[$x], 2);
-                if (count($key_val) != 2) {
+                if (count($key_val) != 2) {
-                    // if the first param isn't a name=value pair, continue to the next set-cookie
+                    // if the first param isn't a name=value pair, continue to the next set-cookie
-                    // header
+                    // header
-                    if ($x == 0) continue 2;
+                    if ($x == 0) continue 2;
-                    // check for secure flag
+                    // check for secure flag
-                    if (strtolower(trim($key_val[0])) == 'secure') $tmp_cookie['secure'] = true;
+                    if (strtolower(trim($key_val[0])) == 'secure') $tmp_cookie['secure'] = true;
-                    // continue to next param
+                    // continue to next param
-                    continue;
+                    continue;
-                }
+                }
-                list($key, $val) = array_map('trim', $key_val);
+                list($key, $val) = array_map('trim', $key_val);
-                // first name=value pair is the cookie name and value
+                // first name=value pair is the cookie name and value
-                // the name and value are stored under 'name' and 'value' to avoid conflicts
+                // the name and value are stored under 'name' and 'value' to avoid conflicts
-                // with later parameters.
+                // with later parameters.
-                if ($x == 0) {
+                if ($x == 0) {
-                    $tmp_cookie = array('name'=>$key, 'value'=>$val);
+                    $tmp_cookie = array('name'=>$key, 'value'=>$val);
-                    continue;
+                    continue;
-                }
+                }
-                $key = strtolower($key);
+                $key = strtolower($key);
-                if (in_array($key, array('expires', 'path', 'domain', 'secure'))) {
+                if (in_array($key, array('expires', 'path', 'domain', 'secure'))) {
-                    $tmp_cookie[$key] = $val;
+                    $tmp_cookie[$key] = $val;
-                }
+                }
-            }
+            }
-            //
+            //
-            // set cookie
+            // set cookie
-            //
+            //
-            // check domain
+            // check domain
-            if (isset($tmp_cookie['domain']) && ($tmp_cookie['domain'] != $request_host) &&
+            if (isset($tmp_cookie['domain']) && ($tmp_cookie['domain'] != $request_host) &&
-                    ($tmp_cookie['domain'] != ".$request_host")) {
+                    ($tmp_cookie['domain'] != ".$request_host")) {
-                $domain = $tmp_cookie['domain'];
+                $domain = $tmp_cookie['domain'];
-                if ((strpos($domain, '.') === false) && ($domain != 'local')) {
+                if ((strpos($domain, '.') === false) && ($domain != 'local')) {
-                    $this->debug(' - domain "'.$domain.'" has no dot and is not a local domain');
+                    $this->debug(' - domain "'.$domain.'" has no dot and is not a local domain');
-                    continue;
+                    continue;
-                }
+                }
-                if (preg_match('/\.[0-9]+$/', $domain)) {
+                if (preg_match('/\.[0-9]+$/', $domain)) {
-                    $this->debug(' - domain "'.$domain.'" appears to be an ip address');
+                    $this->debug(' - domain "'.$domain.'" appears to be an ip address');
-                    continue;
+                    continue;
-                }
+                }
-                if (substr($domain, 0, 1) != '.') $domain = ".$domain";
+                if (substr($domain, 0, 1) != '.') $domain = ".$domain";
-                if (!$this->_domain_match($request_host, $domain)) {
+                if (!$this->_domain_match($request_host, $domain)) {
-                    $this->debug(' - request host "'.$request_host.'" does not domain-match "'.$domain.'"');
+                    $this->debug(' - request host "'.$request_host.'" does not domain-match "'.$domain.'"');
-                    continue;
+                    continue;
-                }
+                }
-            } else {
+            } else {
-                // if domain is not specified in the set-cookie header, domain will default to
+                // if domain is not specified in the set-cookie header, domain will default to
-                // the request host
+                // the request host
-                $domain = $request_host;
+                $domain = $request_host;
-            }
+            }
-            // check path
+            // check path
-            if (isset($tmp_cookie['path']) && ($tmp_cookie['path'] != '')) {
+            if (isset($tmp_cookie['path']) && ($tmp_cookie['path'] != '')) {
-                $path = urldecode($tmp_cookie['path']);
+                $path = urldecode($tmp_cookie['path']);
-                if (!$this->_path_match($request_path, $path)) {
+                if (!$this->_path_match($request_path, $path)) {
-                    $this->debug(' - request path "'.$request_path.'" does not path-match "'.$path.'"');
+                    $this->debug(' - request path "'.$request_path.'" does not path-match "'.$path.'"');
-                    continue;
+                    continue;
-                }
+                }
-            } else {
+            } else {
-                $path = $request_path;
+                $path = $request_path;
-                $path = substr($path, 0, strrpos($path, '/'));
+                $path = substr($path, 0, strrpos($path, '/'));
-                if ($path == '') $path = '/';
+                if ($path == '') $path = '/';
-            }
+            }
-            // check if secure
+            // check if secure
-            $secure = (isset($tmp_cookie['secure'])) ? true : false;
+            $secure = (isset($tmp_cookie['secure'])) ? true : false;
-            // check expiry
+            // check expiry
-            if (isset($tmp_cookie['expires'])) {
+            if (isset($tmp_cookie['expires'])) {
-                if (($expires = strtotime($tmp_cookie['expires'])) < 0) {
+                if (($expires = strtotime($tmp_cookie['expires'])) < 0) {
-                    $expires = null;
+                    $expires = null;
-                }
+                }
-            } else {
+            } else {
-                $expires = null;
+                $expires = null;
-            }
+            }
-            // set cookie
+            // set cookie
-            $this->set_cookie($domain, $path, $tmp_cookie['name'], $tmp_cookie['value'], $secure, $expires);
+            $this->set_cookie($domain, $path, $tmp_cookie['name'], $tmp_cookie['value'], $secure, $expires);
-        }
+        }
-    }
+    }
-        
+        
-        // return array of set-cookie values extracted from HTTP response headers (string $h)
+        // return array of set-cookie values extracted from HTTP response headers (string $h)
-        public function extractCookies($h) {
+        public function extractCookies($h) {
-        $x = 0;
+        $x = 0;
-        $lines = 0;
+        $lines = 0;
-        $headers = array();
+        $headers = array();
-        $last_match = false;
+        $last_match = false;
-                $h = explode("\n", $h);
+                $h = explode("\n", $h);
-        foreach ($h as $line) {
+        foreach ($h as $line) {
-                        $line = rtrim($line);
+                        $line = rtrim($line);
-            $lines++;
+            $lines++;
-            $trimmed_line = trim($line);
+            $trimmed_line = trim($line);
-            if (isset($line_last)) {
+            if (isset($line_last)) {
-                // check if we have \r\n\r\n (indicating the end of headers)
+                // check if we have \r\n\r\n (indicating the end of headers)
-                // some servers will not use CRLF (\r\n), so we make CR (\r) optional.
+                // some servers will not use CRLF (\r\n), so we make CR (\r) optional.
-                // if (preg_match('/\015?\012\015?\012/', $line_last.$line)) {
+                // if (preg_match('/\015?\012\015?\012/', $line_last.$line)) {
-                //     break;
+                //     break;
-                // }
+                // }
-                // As an alternative, we can check if the current trimmed line is empty
+                // As an alternative, we can check if the current trimmed line is empty
-                if ($trimmed_line == '') {
+                if ($trimmed_line == '') {
-                    break;
+                    break;
-                }
+                }
-                // check for continuation line...
+                // check for continuation line...
-                // RFC 2616 Section 2.2 "Basic Rules":
+                // RFC 2616 Section 2.2 "Basic Rules":
-                // HTTP/1.1 header field values can be folded onto multiple lines if the
+                // HTTP/1.1 header field values can be folded onto multiple lines if the
-                // continuation line begins with a space or horizontal tab. All linear
+                // continuation line begins with a space or horizontal tab. All linear
-                // white space, including folding, has the same semantics as SP. A
+                // white space, including folding, has the same semantics as SP. A
-                // recipient MAY replace any linear white space with a single SP before
+                // recipient MAY replace any linear white space with a single SP before
-                // interpreting the field value or forwarding the message downstream.
+                // interpreting the field value or forwarding the message downstream.
-                if ($last_match && preg_match('/^\s+(.*)/', $line, $match)) {
+                if ($last_match && preg_match('/^\s+(.*)/', $line, $match)) {
-                    // append to previous header value
+                    // append to previous header value
-                    $headers[$x-1] .= ' '.rtrim($match[1]);
+                    $headers[$x-1] .= ' '.rtrim($match[1]);
-                    continue;
+                    continue;
-                }
+                }
-            }
+            }
-            $line_last = $line;
+            $line_last = $line;
-            // split header name and value
+            // split header name and value
-            if (preg_match('/^Set-Cookie\s*:\s*(.*)/i', $line, $match)) {
+            if (preg_match('/^Set-Cookie\s*:\s*(.*)/i', $line, $match)) {
-                $headers[$x++] = rtrim($match[1]);
+                $headers[$x++] = rtrim($match[1]);
-                $last_match = true;
+                $last_match = true;
-            } else {
+            } else {
-                $last_match = false;
+                $last_match = false;
-            }
+            }
-        }
+        }
-        return $headers;
+        return $headers;
-        }
+        }
-    /**
+    /**
-    * Set Cookie
+    * Set Cookie
-    * @param string $domain
+    * @param string $domain
-    * @param string $path
+    * @param string $path
-    * @param string $name cookie name
+    * @param string $name cookie name
-    * @param string $value cookie value
+    * @param string $value cookie value
-    * @param bool $secure
+    * @param bool $secure
-    * @param int $expires expiry time (null if session cookie, <= 0 will delete cookie)
+    * @param int $expires expiry time (null if session cookie, <= 0 will delete cookie)
-    * @return void
+    * @return void
-    */
+    */
-    function set_cookie($domain, $path, $name, $value, $secure=false, $expires=null)
+    function set_cookie($domain, $path, $name, $value, $secure=false, $expires=null)
-    {
+    {
-        if ($domain == '') return;
+        if ($domain == '') return;
-        if ($path == '') return;
+        if ($path == '') return;
-        if ($name == '') return;
+        if ($name == '') return;
-        // check if cookie needs to go
+        // check if cookie needs to go
-        if (isset($expires) && ($expires <= 0)) {
+        if (isset($expires) && ($expires <= 0)) {
-            if (isset($this->cookies[$domain][$path][$name])) unset($this->cookies[$domain][$path][$name]);
+            if (isset($this->cookies[$domain][$path][$name])) unset($this->cookies[$domain][$path][$name]);
-            return;
+            return;
-        }
+        }
-        if ($value == '') return;
+        if ($value == '') return;
-        $this->cookies[$domain][$path][$name] = array($value, $secure, $expires);
+        $this->cookies[$domain][$path][$name] = array($value, $secure, $expires);
-        return;
+        return;
-    }
+    }
-    /**
+    /**
-    * Clear cookies - [domain [,path [,name]]] - call method with no arguments to clear all cookies.
+    * Clear cookies - [domain [,path [,name]]] - call method with no arguments to clear all cookies.
-    * @param string $domain
+    * @param string $domain
-    * @param string $path
+    * @param string $path
-    * @param string $name
+    * @param string $name
-    * @return void
+    * @return void
-    */
+    */
-    function clear($domain=null, $path=null, $name=null)
+    function clear($domain=null, $path=null, $name=null)
-    {
+    {
-        if (!isset($domain)) {
+        if (!isset($domain)) {
-            $this->cookies = array();
+            $this->cookies = array();
-        } elseif (!isset($path)) {
+        } elseif (!isset($path)) {
-            if (isset($this->cookies[$domain])) unset($this->cookies[$domain]);
+            if (isset($this->cookies[$domain])) unset($this->cookies[$domain]);
-        } elseif (!isset($name)) {
+        } elseif (!isset($name)) {
-            if (isset($this->cookies[$domain][$path])) unset($this->cookies[$domain][$path]);
+            if (isset($this->cookies[$domain][$path])) unset($this->cookies[$domain][$path]);
-        } elseif (isset($name)) {
+        } elseif (isset($name)) {
-            if (isset($this->cookies[$domain][$path][$name])) unset($this->cookies[$domain][$path][$name]);
+            if (isset($this->cookies[$domain][$path][$name])) unset($this->cookies[$domain][$path][$name]);
-        }
+        }
-    }
+    }
-    /**
+    /**
-    * Compare string length - used for sorting
+    * Compare string length - used for sorting
-    * @access private
+    * @access private
-    * @return int
+    * @return int
-    */
+    */
-    function _cmp_length($a, $b)
+    function _cmp_length($a, $b)
-    {
+    {
-        $la = strlen($a); $lb = strlen($b);
+        $la = strlen($a); $lb = strlen($b);
-        if ($la == $lb) return 0;
+        if ($la == $lb) return 0;
-        return ($la > $lb) ? -1 : 1;
+        return ($la > $lb) ? -1 : 1;
-    }
+    }
-    /**
+    /**
-    * Reduce domain
+    * Reduce domain
-    * @param string $domain
+    * @param string $domain
-    * @return string
+    * @return string
-    * @access private
+    * @access private
-    */
+    */
-    function _reduce_domain($domain)
+    function _reduce_domain($domain)
-    {
+    {
-        if ($domain == '') return '';
+        if ($domain == '') return '';
-        if (substr($domain, 0, 1) == '.') return substr($domain, 1);
+        if (substr($domain, 0, 1) == '.') return substr($domain, 1);
-        return substr($domain, strpos($domain, '.'));
+        return substr($domain, strpos($domain, '.'));
-    }
+    }
-    /**
+    /**
-    * Path match - check if path1 path-matches path2
+    * Path match - check if path1 path-matches path2
-    *
+    *
-    * From RFC 2965: 
+    * From RFC 2965: 
-    *   <i>For two strings that represent paths, P1 and P2, P1 path-matches P2
+    *   <i>For two strings that represent paths, P1 and P2, P1 path-matches P2
-    *   if P2 is a prefix of P1 (including the case where P1 and P2 string-
+    *   if P2 is a prefix of P1 (including the case where P1 and P2 string-
-    *   compare equal).  Thus, the string /tec/waldo path-matches /tec.</i>
+    *   compare equal).  Thus, the string /tec/waldo path-matches /tec.</i>
-    * @param string $path1
+    * @param string $path1
-    * @param string $path2
+    * @param string $path2
-    * @return bool
+    * @return bool
-    * @access private
+    * @access private
-    */
+    */
-    function _path_match($path1, $path2)
+    function _path_match($path1, $path2)
-    {
+    {
-        return (substr($path1, 0, strlen($path2)) == $path2);
+        return (substr($path1, 0, strlen($path2)) == $path2);
-    }
+    }
-    /**
+    /**
-    * Domain match - check if domain1 domain-matches domain2
+    * Domain match - check if domain1 domain-matches domain2
-    *
+    *
-    * A few extracts from RFC 2965: 
+    * A few extracts from RFC 2965: 
-    *  -  A Set-Cookie2 from request-host y.x.foo.com for Domain=.foo.com
+    *  -  A Set-Cookie2 from request-host y.x.foo.com for Domain=.foo.com
-    *     would be rejected, because H is y.x and contains a dot.
+    *     would be rejected, because H is y.x and contains a dot.
-    *
+    *
-    *  -  A Set-Cookie2 from request-host x.foo.com for Domain=.foo.com
+    *  -  A Set-Cookie2 from request-host x.foo.com for Domain=.foo.com
-    *     would be accepted.
+    *     would be accepted.
-    *
+    *
-    *  -  A Set-Cookie2 with Domain=.com or Domain=.com., will always be
+    *  -  A Set-Cookie2 with Domain=.com or Domain=.com., will always be
-    *     rejected, because there is no embedded dot.
+    *     rejected, because there is no embedded dot.
-    *
+    *
-    *  -  A Set-Cookie2 from request-host example for Domain=.local will
+    *  -  A Set-Cookie2 from request-host example for Domain=.local will
-    *     be accepted, because the effective host name for the request-
+    *     be accepted, because the effective host name for the request-
-    *     host is example.local, and example.local domain-matches .local.
+    *     host is example.local, and example.local domain-matches .local.
-    *
+    *
-    * I'm ignoring the first point for now (must check to see how other browsers handle
+    * I'm ignoring the first point for now (must check to see how other browsers handle
-    * this rule for Set-Cookie headers)
+    * this rule for Set-Cookie headers)
-    *
+    *
-    * @param string $domain1
+    * @param string $domain1
-    * @param string $domain2
+    * @param string $domain2
-    * @return bool
+    * @return bool
-    * @access private
+    * @access private
-    */
+    */
-    function _domain_match($domain1, $domain2)
+    function _domain_match($domain1, $domain2)
-    {
+    {
-        $domain1 = strtolower($domain1);
+        $domain1 = strtolower($domain1);
-        $domain2 = strtolower($domain2);
+        $domain2 = strtolower($domain2);
-        while (strpos($domain1, '.') !== false) {
+        while (strpos($domain1, '.') !== false) {
-            if ($domain1 == $domain2) return true;
+            if ($domain1 == $domain2) return true;
-            $domain1 = $this->_reduce_domain($domain1);
+            $domain1 = $this->_reduce_domain($domain1);
-            continue;
+            continue;
-        }
+        }
-        return false;
+        return false;
-    }
+    }
-}
+}
+\ No newline at end of file
-?>
-\ No newline at end of file
diff --git a/inc/3rdparty/libraries/humble-http-agent/HumbleHttpAgent.php b/inc/3rdparty/libraries/humble-http-agent/HumbleHttpAgent.php
index e4f1b3b3..963f0c05 100644
--- a/inc/3rdparty/libraries/humble-http-agent/HumbleHttpAgent.php
+++ b/inc/3rdparty/libraries/humble-http-agent/HumbleHttpAgent.php
@@ -1,779 +1,810 @@
-<?php
+<?php
-/**
+/**
- * Humble HTTP Agent
+ * Humble HTTP Agent
- * 
+ * 
- * This class is designed to take advantage of parallel HTTP requests
+ * This class is designed to take advantage of parallel HTTP requests
- * offered by PHP's PECL HTTP extension or the curl_multi_* functions. 
+ * offered by PHP's PECL HTTP extension or the curl_multi_* functions. 
- * For environments which do not have these options, it reverts to standard sequential 
+ * For environments which do not have these options, it reverts to standard sequential 
- * requests (using file_get_contents())
+ * requests (using file_get_contents())
- * 
+ * 
- * @version 1.1
+ * @version 1.4
- * @date 2012-08-20
+ * @date 2013-05-10
- * @see http://php.net/HttpRequestPool
+ * @see http://php.net/HttpRequestPool
- * @author Keyvan Minoukadeh
+ * @author Keyvan Minoukadeh
- * @copyright 2011-2012 Keyvan Minoukadeh
+ * @copyright 2011-2013 Keyvan Minoukadeh
- * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
+ * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
- */
+ */
-class HumbleHttpAgent
+class HumbleHttpAgent
-{
+{
-        const METHOD_REQUEST_POOL = 1;
+        const METHOD_REQUEST_POOL = 1;
-        const METHOD_CURL_MULTI = 2;
+        const METHOD_CURL_MULTI = 2;
-        const METHOD_FILE_GET_CONTENTS = 4;
+        const METHOD_FILE_GET_CONTENTS = 4;
-        //const UA_BROWSER = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0.1) Gecko/20100101 Firefox/4.0.1';
+        //const UA_BROWSER = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0.1) Gecko/20100101 Firefox/4.0.1';
-        const UA_BROWSER = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.92 Safari/535.2';
+        const UA_BROWSER = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.92 Safari/535.2';
-        const UA_PHP = 'PHP/5.2';
+        const UA_PHP = 'PHP/5.4';
-        const REF_GOOGLE = 'http://www.google.co.uk/url?sa=t&source=web&cd=1';
+        const REF_GOOGLE = 'http://www.google.co.uk/url?sa=t&source=web&cd=1';
-        
+        
-        protected $requests = array();
+        protected $requests = array();
-        protected $redirectQueue = array();
+        protected $redirectQueue = array();
-        protected $requestOptions;
+        protected $requestOptions;
-        protected $maxParallelRequests = 5;
+        protected $maxParallelRequests = 5;
-        protected $cache = null; //TODO
+        protected $cache = null; //TODO
-        protected $httpContext;
+        protected $httpContext;
-        protected $minimiseMemoryUse = false; //TODO
+        protected $minimiseMemoryUse = false; //TODO
-        protected $method;
+        protected $method;
-        protected $cookieJar;
+        protected $cookieJar;
-        public $debug = false;
+        public $debug = false;
-        public $debugVerbose = false;
+        public $debugVerbose = false;
-        public $rewriteHashbangFragment = true; // see http://code.google.com/web/ajaxcrawling/docs/specification.html
+        public $rewriteHashbangFragment = true; // see http://code.google.com/web/ajaxcrawling/docs/specification.html
-        public $maxRedirects = 5;
+        public $maxRedirects = 5;
-        public $userAgentMap = array();
+        public $userAgentMap = array();
-        public $rewriteUrls = array();
+        public $rewriteUrls = array();
-        public $userAgentDefault;
+        public $userAgentDefault;
-        public $referer;
+        public $referer;
-        //public $userAgent = 'Mozilla/5.0';
+        //public $userAgent = 'Mozilla/5.0';
-        
+        
-        // Prevent certain file/mime types
+        // Prevent certain file/mime types
-        // HTTP responses which match these content types will
+        // HTTP responses which match these content types will
-        // be returned without body.
+        // be returned without body.
-        public $headerOnlyTypes = array();
+        public $headerOnlyTypes = array();
-        // URLs ending with one of these extensions will
+        // URLs ending with one of these extensions will
-        // prompt Humble HTTP Agent to send a HEAD request first
+        // prompt Humble HTTP Agent to send a HEAD request first
-        // to see if returned content type matches $headerOnlyTypes.
+        // to see if returned content type matches $headerOnlyTypes.
-        public $headerOnlyClues = array('pdf','mp3','zip','exe','gif','gzip','gz','jpeg','jpg','mpg','mpeg','png','ppt','mov');
+        public $headerOnlyClues = array('pdf','mp3','zip','exe','gif','gzip','gz','jpeg','jpg','mpg','mpeg','png','ppt','mov');
-        // AJAX triggers to search for.
+        // AJAX triggers to search for.
-        // for AJAX sites, e.g. Blogger with its dynamic views templates.
+        // for AJAX sites, e.g. Blogger with its dynamic views templates.
-        public $ajaxTriggers = array("<meta name='fragment' content='!'",'<meta name="fragment" content="!"',"<meta content='!' name='fragment'",'<meta content="!" name="fragment"');
+        public $ajaxTriggers = array("<meta name='fragment' content='!'",'<meta name="fragment" content="!"',"<meta content='!' name='fragment'",'<meta content="!" name="fragment"');
-        
+        
-        //TODO: set max file size
+        //TODO: set max file size
-        //TODO: normalise headers
+        //TODO: normalise headers
-        
+        
-        function __construct($requestOptions=null, $method=null) {
+        function __construct($requestOptions=null, $method=null) {
-                $this->userAgentDefault = self::UA_BROWSER;
+                $this->userAgentDefault = self::UA_BROWSER;
-                $this->referer = self::REF_GOOGLE;
+                $this->referer = self::REF_GOOGLE;
-                // set the request method
+                // set the request method
-                if (in_array($method, array(1,2,4))) {
+                if (in_array($method, array(1,2,4))) {
-                        $this->method = $method;
+                        $this->method = $method;
-                } else {
+                } else {
-                        if (class_exists('HttpRequestPool')) {
+                        if (class_exists('HttpRequestPool')) {
-                                $this->method = self::METHOD_REQUEST_POOL;
+                                $this->method = self::METHOD_REQUEST_POOL;
-                        } elseif (function_exists('curl_multi_init')) {
+                        } elseif (function_exists('curl_multi_init')) {
-                                $this->method = self::METHOD_CURL_MULTI;
+                                $this->method = self::METHOD_CURL_MULTI;
-                        } else {
+                        } else {
-                                $this->method = self::METHOD_FILE_GET_CONTENTS;
+                                $this->method = self::METHOD_FILE_GET_CONTENTS;
-                        }
+                        }
-                }
+                }
-                if ($this->method == self::METHOD_CURL_MULTI) {
+                if ($this->method == self::METHOD_CURL_MULTI) {
-                        require_once(dirname(__FILE__).'/RollingCurl.php');
+                        require_once(dirname(__FILE__).'/RollingCurl.php');
-                }
+                }
-                // create cookie jar
+                // create cookie jar
-                $this->cookieJar = new CookieJar();
+                $this->cookieJar = new CookieJar();
-                // set request options (redirect must be 0)
+                // set request options (redirect must be 0)
-                $this->requestOptions = array(
+                $this->requestOptions = array(
-                        'timeout' => 15,
+                        'timeout' => 15,
-                        'redirect' => 0 // we handle redirects manually so we can rewrite the new hashbang URLs that are creeping up over the web
+                        'connecttimeout' => 15,
-                        // TODO: test onprogress?
+                        'dns_cache_timeout' => 300,
-                );
+                        'redirect' => 0 // we handle redirects manually so we can rewrite the new hashbang URLs that are creeping up over the web
-                if (is_array($requestOptions)) {
+                        // TODO: test onprogress?
-                        $this->requestOptions = array_merge($this->requestOptions, $requestOptions);
+                );
-                }
+                if (is_array($requestOptions)) {
-                $this->httpContext = array(
+                        $this->requestOptions = array_merge($this->requestOptions, $requestOptions);
-                        'http' => array(
+                }
-                                'ignore_errors' => true,
+                $this->httpContext = array(
-                                'timeout' => $this->requestOptions['timeout'],
+                        'http' => array(
-                                'max_redirects' => $this->requestOptions['redirect'],
+                                'ignore_errors' => true,
-                                'header' => "Accept: */*\r\n"
+                                'timeout' => $this->requestOptions['timeout'],
-                                )
+                                'max_redirects' => $this->requestOptions['redirect'],
-                        );
+                                'header' => "Accept: */*\r\n"
-        }
+                                )
-        
+                        );
-        protected function debug($msg) {
+        }
-                if ($this->debug) {
+        
-                        $mem = round(memory_get_usage()/1024, 2);
+        protected function debug($msg) {
-                        $memPeak = round(memory_get_peak_usage()/1024, 2);
+                if ($this->debug) {
-                        echo '* ',$msg;
+                        $mem = round(memory_get_usage()/1024, 2);
-                        if ($this->debugVerbose) echo ' - mem used: ',$mem," (peak: $memPeak)";
+                        $memPeak = round(memory_get_peak_usage()/1024, 2);
-                        echo "\n";
+                        echo '* ',$msg;
-                        ob_flush();
+                        if ($this->debugVerbose) echo ' - mem used: ',$mem," (peak: $memPeak)";
-                        flush();
+                        echo "\n";
-                }
+                        ob_flush();
-        }
+                        flush();
-        
+                }
-        protected function getUserAgent($url, $asArray=false) {
+        }
-                $host = @parse_url($url, PHP_URL_HOST);
+        
-                if (strtolower(substr($host, 0, 4)) == 'www.') {
+        protected function getUserAgent($url, $asArray=false) {
-                        $host = substr($host, 4);
+                $host = @parse_url($url, PHP_URL_HOST);
-                }
+                if (strtolower(substr($host, 0, 4)) == 'www.') {
-                if ($host) {
+                        $host = substr($host, 4);
-                        $try = array($host);
+                }
-                        $split = explode('.', $host);
+                if ($host) {
-                        if (count($split) > 1) {
+                        $try = array($host);
-                                array_shift($split);
+                        $split = explode('.', $host);
-                                $try[] = '.'.implode('.', $split);
+                        if (count($split) > 1) {
-                        }
+                                array_shift($split);
-                        foreach ($try as $h) {
+                                $try[] = '.'.implode('.', $split);
-                                if (isset($this->userAgentMap[$h])) {
+                        }
-                                        $ua = $this->userAgentMap[$h];
+                        foreach ($try as $h) {
-                                        break;
+                                if (isset($this->userAgentMap[$h])) {
-                                }
+                                        $ua = $this->userAgentMap[$h];
-                        }
+                                        break;
-                }
+                                }
-                if (!isset($ua)) $ua = $this->userAgentDefault;
+                        }
-                if ($asArray) {
+                }
-                        return array('User-Agent' => $ua);
+                if (!isset($ua)) $ua = $this->userAgentDefault;
-                } else {
+                if ($asArray) {
-                        return 'User-Agent: '.$ua;
+                        return array('User-Agent' => $ua);
-                }
+                } else {
-        }
+                        return 'User-Agent: '.$ua;
-        
+                }
-        public function rewriteHashbangFragment($url) {
+        }
-                // return $url if there's no '#!'
+        
-                if (strpos($url, '#!') === false) return $url;
+        public function rewriteHashbangFragment($url) {
-                // split $url and rewrite
+                // return $url if there's no '#!'
-                // TODO: is SimplePie_IRI included?
+                if (strpos($url, '#!') === false) return $url;
-                $iri = new SimplePie_IRI($url);
+                // split $url and rewrite
-                $fragment = substr($iri->fragment, 1); // strip '!'
+                // TODO: is SimplePie_IRI included?
-                $iri->fragment = null;
+                $iri = new SimplePie_IRI($url);
-                if (isset($iri->query)) {
+                $fragment = substr($iri->fragment, 1); // strip '!'
-                        parse_str($iri->query, $query);
+                $iri->fragment = null;
-                } else {
+                if (isset($iri->query)) {
-                        $query = array();
+                        parse_str($iri->query, $query);
-                }
+                } else {
-                $query['_escaped_fragment_'] = (string)$fragment;
+                        $query = array();
-                $iri->query = str_replace('%2F', '/', http_build_query($query)); // needed for some sites
+                }
-                return $iri->get_iri();
+                $query['_escaped_fragment_'] = (string)$fragment;
-        }
+                $iri->query = str_replace('%2F', '/', http_build_query($query)); // needed for some sites
-        
+                return $iri->get_iri();
-        public function getUglyURL($url, $html) {
+        }
-                if ($html == '') return false;
+        
-                $found = false;
+        public function getRedirectURLfromHTML($url, $html) {
-                foreach ($this->ajaxTriggers as $string) {
+                $redirect_url = $this->getMetaRefreshURL($url, $html);
-                        if (stripos($html, $string)) {
+                if (!$redirect_url) {
-                                $found = true;
+                        $redirect_url = $this->getUglyURL($url, $html);
-                                break;
+                }
-                        }
+                return $redirect_url;
-                }
+        }
-                if (!$found) return false;
+        
-                $iri = new SimplePie_IRI($url);
+        public function getMetaRefreshURL($url, $html) {
-                if (isset($iri->query)) {
+                if ($html == '') return false;
-                        parse_str($iri->query, $query);
+                // <meta HTTP-EQUIV="REFRESH" content="0; url=http://www.bernama.com/bernama/v6/newsindex.php?id=943513">
-                } else {
+                if (!preg_match('!<meta http-equiv=["\']?refresh["\']? content=["\']?[0-9];\s*url=["\']?([^"\'>]+)["\']*>!i', $html, $match)) {
-                        $query = array();
+                        return false;
-                }
+                }
-                $query['_escaped_fragment_'] = '';
+                $redirect_url = $match[1];
-                $iri->query = str_replace('%2F', '/', http_build_query($query)); // needed for some sites
+                if (preg_match('!^https?://!i', $redirect_url)) {
-                return $iri->get_iri();
+                        // already absolute
-        }
+                        $this->debug('Meta refresh redirect found (http-equiv="refresh"), new URL: '.$redirect_url);
-        
+                        return $redirect_url;
-        public function removeFragment($url) {
+                }
-                $pos = strpos($url, '#');
+                // absolutize redirect URL
-                if ($pos === false) {
+                $base = new SimplePie_IRI($url);
-                        return $url;
+                // remove '//' in URL path (causes URLs not to resolve properly)
-                } else {
+                if (isset($base->path)) $base->path = preg_replace('!//+!', '/', $base->path);
-                        return substr($url, 0, $pos);
+                if ($absolute = SimplePie_IRI::absolutize($base, $redirect_url)) {
-                }
+                        $this->debug('Meta refresh redirect found (http-equiv="refresh"), new URL: '.$absolute);
-        }
+                        return $absolute;
-        
+                }
-        public function rewriteUrls($url) {
+                return false;
-                foreach ($this->rewriteUrls as $find => $action) {
+        }       
-                        if (strpos($url, $find) !== false) {
+        
-                                if (is_array($action)) {
+        public function getUglyURL($url, $html) {
-                                        return strtr($url, $action);
+                if ($html == '') return false;
-                                }
+                $found = false;
-                        }
+                foreach ($this->ajaxTriggers as $string) {
-                }
+                        if (stripos($html, $string)) {
-                return $url;
+                                $found = true;
-        }
+                                break;
-        
+                        }
-        public function enableDebug($bool=true) {
+                }
-                $this->debug = (bool)$bool;
+                if (!$found) return false;
-        }
+                $iri = new SimplePie_IRI($url);
-        
+                if (isset($iri->query)) {
-        public function minimiseMemoryUse($bool = true) {
+                        parse_str($iri->query, $query);
-                $this->minimiseMemoryUse = $bool;
+                } else {
-        }
+                        $query = array();
-        
+                }
-        public function setMaxParallelRequests($max) {
+                $query['_escaped_fragment_'] = '';
-                $this->maxParallelRequests = $max;
+                $iri->query = str_replace('%2F', '/', http_build_query($query)); // needed for some sites
-        }
+                $ugly_url = $iri->get_iri();
-        
+                $this->debug('AJAX trigger (meta name="fragment" content="!") found, new URL: '.$ugly_url);
-        public function validateUrl($url) {
+                return $ugly_url;
-                $url = filter_var($url, FILTER_SANITIZE_URL);
+        }
-                $test = filter_var($url, FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED);
+        
-                // deal with bug http://bugs.php.net/51192 (present in PHP 5.2.13 and PHP 5.3.2)
+        public function removeFragment($url) {
-                if ($test === false) {
+                $pos = strpos($url, '#');
-                        $test = filter_var(strtr($url, '-', '_'), FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED);
+                if ($pos === false) {
-                }
+                        return $url;
-                if ($test !== false && $test !== null && preg_match('!^https?://!', $url)) {
+                } else {
-                        return $url;
+                        return substr($url, 0, $pos);
-                } else {
+                }
-                        return false;
+        }
-                }
+        
-        }
+        public function rewriteUrls($url) {
-        
+                foreach ($this->rewriteUrls as $find => $action) {
-        public function fetchAll(array $urls) {
+                        if (strpos($url, $find) !== false) {
-                $this->fetchAllOnce($urls, $isRedirect=false);
+                                if (is_array($action)) {
-                $redirects = 0;
+                                        return strtr($url, $action);
-                while (!empty($this->redirectQueue) && ++$redirects <= $this->maxRedirects) {
+                                }
-                        $this->debug("Following redirects #$redirects...");
+                        }
-                        $this->fetchAllOnce($this->redirectQueue, $isRedirect=true);
+                }
-                }
+                return $url;
-        }
+        }
-        
+        
-        // fetch all URLs without following redirects
+        public function enableDebug($bool=true) {
-        public function fetchAllOnce(array $urls, $isRedirect=false) {
+                $this->debug = (bool)$bool;
-                if (!$isRedirect) $urls = array_unique($urls);
+        }
-                if (empty($urls)) return;
+        
-                
+        public function minimiseMemoryUse($bool = true) {
-                //////////////////////////////////////////////////////
+                $this->minimiseMemoryUse = $bool;
-                // parallel (HttpRequestPool)
+        }
-                if ($this->method == self::METHOD_REQUEST_POOL) {
+        
-                        $this->debug('Starting parallel fetch (HttpRequestPool)');
+        public function setMaxParallelRequests($max) {
-                        try {
+                $this->maxParallelRequests = $max;
-                                while (count($urls) > 0) {
+        }
-                                        $this->debug('Processing set of '.min($this->maxParallelRequests, count($urls)));
+        
-                                        $subset = array_splice($urls, 0, $this->maxParallelRequests);
+        public function validateUrl($url) {
-                                        $pool = new HttpRequestPool();
+                $url = filter_var($url, FILTER_SANITIZE_URL);
-                                        foreach ($subset as $orig => $url) {
+                $test = filter_var($url, FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED);
-                                                if (!$isRedirect) $orig = $url;
+                // deal with bug http://bugs.php.net/51192 (present in PHP 5.2.13 and PHP 5.3.2)
-                                                unset($this->redirectQueue[$orig]);
+                if ($test === false) {
-                                                $this->debug("...$url");
+                        $test = filter_var(strtr($url, '-', '_'), FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED);
-                                                if (!$isRedirect && isset($this->requests[$url])) {
+                }
-                                                        $this->debug("......in memory");
+                if ($test !== false && $test !== null && preg_match('!^https?://!', $url)) {
-                                                /*
+                        return $url;
-                                                } elseif ($this->isCached($url)) {
+                } else {
-                                                        $this->debug("......is cached");
+                        return false;
-                                                        if (!$this->minimiseMemoryUse) {
+                }
-                                                                $this->requests[$url] = $this->getCached($url);
+        }
-                                                        }
+        
-                                                */
+        public function fetchAll(array $urls) {
-                                                } else {
+                $this->fetchAllOnce($urls, $isRedirect=false);
-                                                        $this->debug("......adding to pool");
+                $redirects = 0;
-                                                        $req_url = $this->rewriteUrls($url);
+                while (!empty($this->redirectQueue) && ++$redirects <= $this->maxRedirects) {
-                                                        $req_url = ($this->rewriteHashbangFragment) ? $this->rewriteHashbangFragment($req_url) : $req_url;
+                        $this->debug("Following redirects #$redirects...");
-                                                        $req_url = $this->removeFragment($req_url);
+                        $this->fetchAllOnce($this->redirectQueue, $isRedirect=true);
-                                                        if (!empty($this->headerOnlyTypes) && !isset($this->requests[$orig]['wrongGuess']) && $this->possibleUnsupportedType($req_url)) {
+                }
-                                                                $_meth = HttpRequest::METH_HEAD;
+        }
-                                                        } else {
+        
-                                                                $_meth = HttpRequest::METH_GET;
+        // fetch all URLs without following redirects
-                                                                unset($this->requests[$orig]['wrongGuess']);
+        public function fetchAllOnce(array $urls, $isRedirect=false) {
-                                                        }
+                if (!$isRedirect) $urls = array_unique($urls);
-                                                        $httpRequest = new HttpRequest($req_url, $_meth, $this->requestOptions);
+                if (empty($urls)) return;
-                                                        // send cookies, if we have any
+                
-                                                        if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) {
+                //////////////////////////////////////////////////////
-                                                                $this->debug("......sending cookies: $cookies");
+                // parallel (HttpRequestPool)
-                                                                $httpRequest->addHeaders(array('Cookie' => $cookies));
+                if ($this->method == self::METHOD_REQUEST_POOL) {
-                                                        }
+                        $this->debug('Starting parallel fetch (HttpRequestPool)');
-                                                        //$httpRequest->addHeaders(array('User-Agent' => $this->userAgent));
+                        try {
-                                                        $httpRequest->addHeaders($this->getUserAgent($req_url, true));
+                                while (count($urls) > 0) {
-                                                        // add referer for picky sites
+                                        $this->debug('Processing set of '.min($this->maxParallelRequests, count($urls)));
-                                                        $httpRequest->addheaders(array('Referer' => $this->referer));
+                                        $subset = array_splice($urls, 0, $this->maxParallelRequests);
-                                                        $this->requests[$orig] = array('headers'=>null, 'body'=>null, 'httpRequest'=>$httpRequest);
+                                        $pool = new HttpRequestPool();
-                                                        $this->requests[$orig]['original_url'] = $orig;
+                                        foreach ($subset as $orig => $url) {
-                                                        $pool->attach($httpRequest);
+                                                if (!$isRedirect) $orig = $url;
-                                                }
+                                                unset($this->redirectQueue[$orig]);
-                                        }
+                                                $this->debug("...$url");
-                                        // did we get anything into the pool?
+                                                if (!$isRedirect && isset($this->requests[$url])) {
-                                        if (count($pool) > 0) {
+                                                        $this->debug("......in memory");
-                                                $this->debug('Sending request...');
+                                                /*
-                                                try {
+                                                } elseif ($this->isCached($url)) {
-                                                        $pool->send();
+                                                        $this->debug("......is cached");
-                                                } catch (HttpRequestPoolException $e) {
+                                                        if (!$this->minimiseMemoryUse) {
-                                                        // do nothing
+                                                                $this->requests[$url] = $this->getCached($url);
-                                                }
+                                                        }
-                                                $this->debug('Received responses');
+                                                */
-                                                foreach($subset as $orig => $url) {
+                                                } else {
-                                                        if (!$isRedirect) $orig = $url;
+                                                        $this->debug("......adding to pool");
-                                                        $request = $this->requests[$orig]['httpRequest'];
+                                                        $req_url = $this->rewriteUrls($url);
-                                                        //$this->requests[$orig]['headers'] = $this->headersToString($request->getResponseHeader());
+                                                        $req_url = ($this->rewriteHashbangFragment) ? $this->rewriteHashbangFragment($req_url) : $req_url;
-                                                        // getResponseHeader() doesn't return status line, so, for consistency...
+                                                        $req_url = $this->removeFragment($req_url);
-                                                        $this->requests[$orig]['headers'] = substr($request->getRawResponseMessage(), 0, $request->getResponseInfo('header_size'));
+                                                        if (!empty($this->headerOnlyTypes) && !isset($this->requests[$orig]['wrongGuess']) && $this->possibleUnsupportedType($req_url)) {
-                                                        // check content type
+                                                                $_meth = HttpRequest::METH_HEAD;
-                                                        // TODO: use getResponseHeader('content-type') or getResponseInfo()
+                                                        } else {
-                                                        if ($this->headerOnlyType($this->requests[$orig]['headers'])) {
+                                                                $_meth = HttpRequest::METH_GET;
-                                                                $this->requests[$orig]['body'] = '';
+                                                                unset($this->requests[$orig]['wrongGuess']);
-                                                                $_header_only_type = true;
+                                                        }
-                                                                $this->debug('Header only type returned');
+                                                        $httpRequest = new HttpRequest($req_url, $_meth, $this->requestOptions);
-                                                        } else {
+                                                        // send cookies, if we have any
-                                                                $this->requests[$orig]['body'] = $request->getResponseBody();
+                                                        if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) {
-                                                                $_header_only_type = false;
+                                                                $this->debug("......sending cookies: $cookies");
-                                                        }
+                                                                $httpRequest->addHeaders(array('Cookie' => $cookies));
-                                                        $this->requests[$orig]['effective_url'] = $request->getResponseInfo('effective_url');
+                                                        }
-                                                        $this->requests[$orig]['status_code'] = $status_code = $request->getResponseCode();
+                                                        //$httpRequest->addHeaders(array('User-Agent' => $this->userAgent));
-                                                        // is redirect?
+                                                        $httpRequest->addHeaders($this->getUserAgent($req_url, true));
-                                                        if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && $request->getResponseHeader('location')) {
+                                                        // add referer for picky sites
-                                                                $redirectURL = $request->getResponseHeader('location');
+                                                        $httpRequest->addheaders(array('Referer' => $this->referer));
-                                                                if (!preg_match('!^https?://!i', $redirectURL)) {
+                                                        $this->requests[$orig] = array('headers'=>null, 'body'=>null, 'httpRequest'=>$httpRequest);
-                                                                        $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url);
+                                                        $this->requests[$orig]['original_url'] = $orig;
-                                                                }
+                                                        $pool->attach($httpRequest);
-                                                                if ($this->validateURL($redirectURL)) {
+                                                }
-                                                                        $this->debug('Redirect detected. Valid URL: '.$redirectURL);
+                                        }
-                                                                        // store any cookies
+                                        // did we get anything into the pool?
-                                                                        $cookies = $request->getResponseHeader('set-cookie');
+                                        if (count($pool) > 0) {
-                                                                        if ($cookies && !is_array($cookies)) $cookies = array($cookies);
+                                                $this->debug('Sending request...');
-                                                                        if ($cookies) $this->cookieJar->storeCookies($url, $cookies);
+                                                try {
-                                                                        $this->redirectQueue[$orig] = $redirectURL;
+                                                        $pool->send();
-                                                                } else {
+                                                } catch (HttpRequestPoolException $e) {
-                                                                        $this->debug('Redirect detected. Invalid URL: '.$redirectURL);
+                                                        // do nothing
-                                                                }
+                                                }
-                                                        } elseif (!$_header_only_type && $request->getMethod() === HttpRequest::METH_HEAD) {
+                                                $this->debug('Received responses');
-                                                                // the response content-type did not match our 'header only' types, 
+                                                foreach($subset as $orig => $url) {
-                                                                // but we'd issues a HEAD request because we assumed it would. So
+                                                        if (!$isRedirect) $orig = $url;
-                                                                // let's queue a proper GET request for this item...
+                                                        $request = $this->requests[$orig]['httpRequest'];
-                                                                $this->debug('Wrong guess at content-type, queing GET request');
+                                                        //$this->requests[$orig]['headers'] = $this->headersToString($request->getResponseHeader());
-                                                                $this->requests[$orig]['wrongGuess'] = true;
+                                                        // getResponseHeader() doesn't return status line, so, for consistency...
-                                                                $this->redirectQueue[$orig] = $this->requests[$orig]['effective_url'];
+                                                        $this->requests[$orig]['headers'] = substr($request->getRawResponseMessage(), 0, $request->getResponseInfo('header_size'));
-                                                        } elseif (strpos($this->requests[$orig]['effective_url'], '_escaped_fragment_') === false) {
+                                                        // check content type
-                                                                // check for <meta name='fragment' content='!'/>
+                                                        // TODO: use getResponseHeader('content-type') or getResponseInfo()
-                                                                // for AJAX sites, e.g. Blogger with its dynamic views templates.
+                                                        if ($this->headerOnlyType($this->requests[$orig]['headers'])) {
-                                                                // Based on Google's spec: https://developers.google.com/webmasters/ajax-crawling/docs/specification
+                                                                $this->requests[$orig]['body'] = '';
-                                                                if (isset($this->requests[$orig]['body'])) {
+                                                                $_header_only_type = true;
-                                                                        $redirectURL = $this->getUglyURL($this->requests[$orig]['effective_url'], substr($this->requests[$orig]['body'], 0, 4000));
+                                                                $this->debug('Header only type returned');
-                                                                        if ($redirectURL) {
+                                                        } else {
-                                                                                $this->debug('AJAX trigger (meta name="fragment" content="!") found. Queueing '.$redirectURL);
+                                                                $this->requests[$orig]['body'] = $request->getResponseBody();
-                                                                                $this->redirectQueue[$orig] = $redirectURL;
+                                                                $_header_only_type = false;
-                                                                        }
+                                                        }
-                                                                }
+                                                        $this->requests[$orig]['effective_url'] = $request->getResponseInfo('effective_url');
-                                                        }
+                                                        $this->requests[$orig]['status_code'] = $status_code = $request->getResponseCode();
-                                                        //die($url.' -multi- '.$request->getResponseInfo('effective_url'));
+                                                        // is redirect?
-                                                        $pool->detach($request);
+                                                        if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && $request->getResponseHeader('location')) {
-                                                        unset($this->requests[$orig]['httpRequest'], $request);
+                                                                $redirectURL = $request->getResponseHeader('location');
-                                                        /*
+                                                                if (!preg_match('!^https?://!i', $redirectURL)) {
-                                                        if ($this->minimiseMemoryUse) {
+                                                                        $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url);
-                                                                if ($this->cache($url)) {
+                                                                }
-                                                                        unset($this->requests[$url]);
+                                                                if ($this->validateURL($redirectURL)) {
-                                                                }
+                                                                        $this->debug('Redirect detected. Valid URL: '.$redirectURL);
-                                                        }
+                                                                        // store any cookies
-                                                        */
+                                                                        $cookies = $request->getResponseHeader('set-cookie');
-                                                }
+                                                                        if ($cookies && !is_array($cookies)) $cookies = array($cookies);
-                                        }
+                                                                        if ($cookies) $this->cookieJar->storeCookies($url, $cookies);
-                                }
+                                                                        $this->redirectQueue[$orig] = $redirectURL;
-                        } catch (HttpException $e) {
+                                                                } else {
-                                $this->debug($e);
+                                                                        $this->debug('Redirect detected. Invalid URL: '.$redirectURL);
-                                return false;
+                                                                }
-                        }
+                                                        } elseif (!$_header_only_type && $request->getMethod() === HttpRequest::METH_HEAD) {
-                }
+                                                                // the response content-type did not match our 'header only' types, 
-                
+                                                                // but we'd issues a HEAD request because we assumed it would. So
-                //////////////////////////////////////////////////////////
+                                                                // let's queue a proper GET request for this item...
-                // parallel (curl_multi_*)
+                                                                $this->debug('Wrong guess at content-type, queing GET request');
-                elseif ($this->method == self::METHOD_CURL_MULTI) {
+                                                                $this->requests[$orig]['wrongGuess'] = true;
-                        $this->debug('Starting parallel fetch (curl_multi_*)');
+                                                                $this->redirectQueue[$orig] = $this->requests[$orig]['effective_url'];
-                        while (count($urls) > 0) {
+                                                        } elseif (strpos($this->requests[$orig]['effective_url'], '_escaped_fragment_') === false) {
-                                $this->debug('Processing set of '.min($this->maxParallelRequests, count($urls)));
+                                                                // check for <meta name='fragment' content='!'/>
-                                $subset = array_splice($urls, 0, $this->maxParallelRequests);
+                                                                // for AJAX sites, e.g. Blogger with its dynamic views templates.
-                                $pool = new RollingCurl(array($this, 'handleCurlResponse'));
+                                                                // Based on Google's spec: https://developers.google.com/webmasters/ajax-crawling/docs/specification
-                                $pool->window_size = count($subset);            
+                                                                if (isset($this->requests[$orig]['body'])) {
-                                
+                                                                        $redirectURL = $this->getRedirectURLfromHTML($this->requests[$orig]['effective_url'], substr($this->requests[$orig]['body'], 0, 4000));
-                                foreach ($subset as $orig => $url) {
+                                                                        if ($redirectURL) {
-                                        if (!$isRedirect) $orig = $url;
+                                                                                $this->redirectQueue[$orig] = $redirectURL;
-                                        unset($this->redirectQueue[$orig]);
+                                                                        }
-                                        $this->debug("...$url");
+                                                                }
-                                        if (!$isRedirect && isset($this->requests[$url])) {
+                                                        }
-                                                $this->debug("......in memory");
+                                                        //die($url.' -multi- '.$request->getResponseInfo('effective_url'));
-                                        /*
+                                                        $pool->detach($request);
-                                        } elseif ($this->isCached($url)) {
+                                                        unset($this->requests[$orig]['httpRequest'], $request);
-                                                $this->debug("......is cached");
+                                                        /*
-                                                if (!$this->minimiseMemoryUse) {
+                                                        if ($this->minimiseMemoryUse) {
-                                                        $this->requests[$url] = $this->getCached($url);
+                                                                if ($this->cache($url)) {
-                                                }
+                                                                        unset($this->requests[$url]);
-                                        */
+                                                                }
-                                        } else {
+                                                        }
-                                                $this->debug("......adding to pool");
+                                                        */
-                                                $req_url = $this->rewriteUrls($url);
+                                                }
-                                                $req_url = ($this->rewriteHashbangFragment) ? $this->rewriteHashbangFragment($req_url) : $req_url;
+                                        }
-                                                $req_url = $this->removeFragment($req_url);
+                                }
-                                                if (!empty($this->headerOnlyTypes) && !isset($this->requests[$orig]['wrongGuess']) && $this->possibleUnsupportedType($req_url)) {
+                        } catch (HttpException $e) {
-                                                        $_meth = 'HEAD';
+                                $this->debug($e);
-                                                } else {
+                                return false;
-                                                        $_meth = 'GET';
+                        }
-                                                        unset($this->requests[$orig]['wrongGuess']);
+                }
-                                                }                                               
+                
-                                                $headers = array();
+                //////////////////////////////////////////////////////////
-                                                //$headers[] = 'User-Agent: '.$this->userAgent;
+                // parallel (curl_multi_*)
-                                                $headers[] = $this->getUserAgent($req_url);
+                elseif ($this->method == self::METHOD_CURL_MULTI) {
-                                                // add referer for picky sites
+                        $this->debug('Starting parallel fetch (curl_multi_*)');
-                                                $headers[] = 'Referer: '.$this->referer;
+                        while (count($urls) > 0) {
-                                                // send cookies, if we have any
+                                $this->debug('Processing set of '.min($this->maxParallelRequests, count($urls)));
-                                                if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) {
+                                $subset = array_splice($urls, 0, $this->maxParallelRequests);
-                                                        $this->debug("......sending cookies: $cookies");
+                                $pool = new RollingCurl(array($this, 'handleCurlResponse'));
-                                                        $headers[] = 'Cookie: '.$cookies;
+                                $pool->window_size = count($subset);            
-                                                }
+                                
-                                                $httpRequest = new RollingCurlRequest($req_url, $_meth, null, $headers, array(
+                                foreach ($subset as $orig => $url) {
-                                                        CURLOPT_CONNECTTIMEOUT => $this->requestOptions['timeout'],
+                                        if (!$isRedirect) $orig = $url;
-                                                        CURLOPT_TIMEOUT => $this->requestOptions['timeout']
+                                        unset($this->redirectQueue[$orig]);
-                                                        ));
+                                        $this->debug("...$url");
-                                                $httpRequest->set_original_url($orig);
+                                        if (!$isRedirect && isset($this->requests[$url])) {
-                                                $this->requests[$orig] = array('headers'=>null, 'body'=>null, 'httpRequest'=>$httpRequest);
+                                                $this->debug("......in memory");
-                                                $this->requests[$orig]['original_url'] = $orig; // TODO: is this needed anymore?
+                                        /*
-                                                $pool->add($httpRequest);
+                                        } elseif ($this->isCached($url)) {
-                                        }
+                                                $this->debug("......is cached");
-                                }
+                                                if (!$this->minimiseMemoryUse) {
-                                // did we get anything into the pool?
+                                                        $this->requests[$url] = $this->getCached($url);
-                                if (count($pool) > 0) {
+                                                }
-                                        $this->debug('Sending request...');
+                                        */
-                                        $pool->execute(); // this will call handleCurlResponse() and populate $this->requests[$orig]
+                                        } else {
-                                        $this->debug('Received responses');
+                                                $this->debug("......adding to pool");
-                                        foreach($subset as $orig => $url) {
+                                                $req_url = $this->rewriteUrls($url);
-                                                if (!$isRedirect) $orig = $url;
+                                                $req_url = ($this->rewriteHashbangFragment) ? $this->rewriteHashbangFragment($req_url) : $req_url;
-                                                // $this->requests[$orig]['headers']
+                                                $req_url = $this->removeFragment($req_url);
-                                                // $this->requests[$orig]['body']
+                                                if (!empty($this->headerOnlyTypes) && !isset($this->requests[$orig]['wrongGuess']) && $this->possibleUnsupportedType($req_url)) {
-                                                // $this->requests[$orig]['effective_url']
+                                                        $_meth = 'HEAD';
-                                                // check content type
+                                                } else {
-                                                if ($this->headerOnlyType($this->requests[$orig]['headers'])) {
+                                                        $_meth = 'GET';
-                                                        $this->requests[$orig]['body'] = '';
+                                                        unset($this->requests[$orig]['wrongGuess']);
-                                                        $_header_only_type = true;
+                                                }                                               
-                                                        $this->debug('Header only type returned');
+                                                $headers = array();
-                                                } else {
+                                                //$headers[] = 'User-Agent: '.$this->userAgent;
-                                                        $_header_only_type = false;
+                                                $headers[] = $this->getUserAgent($req_url);
-                                                }
+                                                // add referer for picky sites
-                                                $status_code = $this->requests[$orig]['status_code'];
+                                                $headers[] = 'Referer: '.$this->referer;
-                                                if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && isset($this->requests[$orig]['location'])) {
+                                                // send cookies, if we have any
-                                                        $redirectURL = $this->requests[$orig]['location'];
+                                                if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) {
-                                                        if (!preg_match('!^https?://!i', $redirectURL)) {
+                                                        $this->debug("......sending cookies: $cookies");
-                                                                $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url);
+                                                        $headers[] = 'Cookie: '.$cookies;
-                                                        }
+                                                }
-                                                        if ($this->validateURL($redirectURL)) {
+                                                $httpRequest = new RollingCurlRequest($req_url, $_meth, null, $headers, array(
-                                                                $this->debug('Redirect detected. Valid URL: '.$redirectURL);
+                                                        CURLOPT_CONNECTTIMEOUT => $this->requestOptions['timeout'],
-                                                                // store any cookies
+                                                        CURLOPT_TIMEOUT => $this->requestOptions['timeout']
-                                                                $cookies = $this->cookieJar->extractCookies($this->requests[$orig]['headers']);
+                                                        ));
-                                                                if (!empty($cookies)) $this->cookieJar->storeCookies($url, $cookies);                                                   
+                                                $httpRequest->set_original_url($orig);
-                                                                $this->redirectQueue[$orig] = $redirectURL;
+                                                $this->requests[$orig] = array('headers'=>null, 'body'=>null, 'httpRequest'=>$httpRequest);
-                                                        } else {
+                                                $this->requests[$orig]['original_url'] = $orig; // TODO: is this needed anymore?
-                                                                $this->debug('Redirect detected. Invalid URL: '.$redirectURL);
+                                                $pool->add($httpRequest);
-                                                        }
+                                        }
-                                                } elseif (!$_header_only_type && $this->requests[$orig]['method'] == 'HEAD') {
+                                }
-                                                        // the response content-type did not match our 'header only' types, 
+                                // did we get anything into the pool?
-                                                        // but we'd issues a HEAD request because we assumed it would. So
+                                if (count($pool) > 0) {
-                                                        // let's queue a proper GET request for this item...
+                                        $this->debug('Sending request...');
-                                                        $this->debug('Wrong guess at content-type, queing GET request');
+                                        $pool->execute(); // this will call handleCurlResponse() and populate $this->requests[$orig]
-                                                        $this->requests[$orig]['wrongGuess'] = true;
+                                        $this->debug('Received responses');
-                                                        $this->redirectQueue[$orig] = $this->requests[$orig]['effective_url'];
+                                        foreach($subset as $orig => $url) {
-                                                } elseif (strpos($this->requests[$orig]['effective_url'], '_escaped_fragment_') === false) {
+                                                if (!$isRedirect) $orig = $url;
-                                                        // check for <meta name='fragment' content='!'/>
+                                                // $this->requests[$orig]['headers']
-                                                        // for AJAX sites, e.g. Blogger with its dynamic views templates.
+                                                // $this->requests[$orig]['body']
-                                                        // Based on Google's spec: https://developers.google.com/webmasters/ajax-crawling/docs/specification
+                                                // $this->requests[$orig]['effective_url']
-                                                        if (isset($this->requests[$orig]['body'])) {
+                                                // check content type
-                                                                $redirectURL = $this->getUglyURL($this->requests[$orig]['effective_url'], substr($this->requests[$orig]['body'], 0, 4000));
+                                                if ($this->headerOnlyType($this->requests[$orig]['headers'])) {
-                                                                if ($redirectURL) {
+                                                        $this->requests[$orig]['body'] = '';
-                                                                        $this->debug('AJAX trigger (meta name="fragment" content="!") found. Queueing '.$redirectURL);
+                                                        $_header_only_type = true;
-                                                                        $this->redirectQueue[$orig] = $redirectURL;
+                                                        $this->debug('Header only type returned');
-                                                                }
+                                                } else {
-                                                        }
+                                                        $_header_only_type = false;
-                                                }
+                                                }
-                                                // die($url.' -multi- '.$request->getResponseInfo('effective_url'));
+                                                $status_code = $this->requests[$orig]['status_code'];
-                                                unset($this->requests[$orig]['httpRequest'], $this->requests[$orig]['method']);
+                                                if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && isset($this->requests[$orig]['location'])) {
-                                        }
+                                                        $redirectURL = $this->requests[$orig]['location'];
-                                }
+                                                        if (!preg_match('!^https?://!i', $redirectURL)) {
-                        }
+                                                                $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url);
-                }
+                                                        }
+                                                        if ($this->validateURL($redirectURL)) {
-                //////////////////////////////////////////////////////
+                                                                $this->debug('Redirect detected. Valid URL: '.$redirectURL);
-                // sequential (file_get_contents)
+                                                                // store any cookies
-                else {
+                                                                $cookies = $this->cookieJar->extractCookies($this->requests[$orig]['headers']);
-                        $this->debug('Starting sequential fetch (file_get_contents)');
+                                                                if (!empty($cookies)) $this->cookieJar->storeCookies($url, $cookies);                                                   
-                        $this->debug('Processing set of '.count($urls));
+                                                                $this->redirectQueue[$orig] = $redirectURL;
-                        foreach ($urls as $orig => $url) {
+                                                        } else {
-                                if (!$isRedirect) $orig = $url;
+                                                                $this->debug('Redirect detected. Invalid URL: '.$redirectURL);
-                                unset($this->redirectQueue[$orig]);
+                                                        }
-                                $this->debug("...$url");
+                                                } elseif (!$_header_only_type && $this->requests[$orig]['method'] == 'HEAD') {
-                                if (!$isRedirect && isset($this->requests[$url])) {
+                                                        // the response content-type did not match our 'header only' types, 
-                                        $this->debug("......in memory");
+                                                        // but we'd issues a HEAD request because we assumed it would. So
-                                /*
+                                                        // let's queue a proper GET request for this item...
-                                } elseif ($this->isCached($url)) {
+                                                        $this->debug('Wrong guess at content-type, queing GET request');
-                                        $this->debug("......is cached");
+                                                        $this->requests[$orig]['wrongGuess'] = true;
-                                        if (!$this->minimiseMemoryUse) {
+                                                        $this->redirectQueue[$orig] = $this->requests[$orig]['effective_url'];
-                                                $this->requests[$url] = $this->getCached($url);
+                                                } elseif (strpos($this->requests[$orig]['effective_url'], '_escaped_fragment_') === false) {
-                                        }
+                                                        // check for <meta name='fragment' content='!'/>
-                                */
+                                                        // for AJAX sites, e.g. Blogger with its dynamic views templates.
-                                } else {
+                                                        // Based on Google's spec: https://developers.google.com/webmasters/ajax-crawling/docs/specification
-                                        $this->debug("Sending request for $url");
+                                                        if (isset($this->requests[$orig]['body'])) {
-                                        $this->requests[$orig]['original_url'] = $orig;
+                                                                $redirectURL = $this->getRedirectURLfromHTML($this->requests[$orig]['effective_url'], substr($this->requests[$orig]['body'], 0, 4000));
-                                        $req_url = $this->rewriteUrls($url);
+                                                                if ($redirectURL) {
-                                        $req_url = ($this->rewriteHashbangFragment) ? $this->rewriteHashbangFragment($req_url) : $req_url;
+                                                                        $this->redirectQueue[$orig] = $redirectURL;
-                                        $req_url = $this->removeFragment($req_url);
+                                                                }
-                                        // send cookies, if we have any
+                                                        }
-                                        $httpContext = $this->httpContext;
+                                                }
-                                        $httpContext['http']['header'] .= $this->getUserAgent($req_url)."\r\n";
+                                                // die($url.' -multi- '.$request->getResponseInfo('effective_url'));
-                                        // add referer for picky sites
+                                                unset($this->requests[$orig]['httpRequest'], $this->requests[$orig]['method']);
-                                        $httpContext['http']['header'] .= 'Referer: '.$this->referer."\r\n";
+                                        }
-                                        if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) {
+                                }
-                                                $this->debug("......sending cookies: $cookies");
+                        }
-                                                $httpContext['http']['header'] .= 'Cookie: '.$cookies."\r\n";
+                }
-                                        }
-                                        if (false !== ($html = @file_get_contents($req_url, false, stream_context_create($httpContext)))) {
+                //////////////////////////////////////////////////////
-                                                $this->debug('Received response');
+                // sequential (file_get_contents)
-                                                // get status code
+                else {
-                                                if (!isset($http_response_header[0]) || !preg_match('!^HTTP/\d+\.\d+\s+(\d+)!', trim($http_response_header[0]), $match)) {
+                        $this->debug('Starting sequential fetch (file_get_contents)');
-                                                        $this->debug('Error: no status code found');
+                        $this->debug('Processing set of '.count($urls));
-                                                        // TODO: handle error - no status code
+                        foreach ($urls as $orig => $url) {
-                                                } else {
+                                if (!$isRedirect) $orig = $url;
-                                                        $this->requests[$orig]['headers'] = $this->headersToString($http_response_header, false);
+                                unset($this->redirectQueue[$orig]);
-                                                        // check content type
+                                $this->debug("...$url");
-                                                        if ($this->headerOnlyType($this->requests[$orig]['headers'])) {
+                                if (!$isRedirect && isset($this->requests[$url])) {
-                                                                $this->requests[$orig]['body'] = '';
+                                        $this->debug("......in memory");
-                                                        } else {
+                                /*
-                                                                $this->requests[$orig]['body'] = $html;
+                                } elseif ($this->isCached($url)) {
-                                                        }
+                                        $this->debug("......is cached");
-                                                        $this->requests[$orig]['effective_url'] = $req_url;
+                                        if (!$this->minimiseMemoryUse) {
-                                                        $this->requests[$orig]['status_code'] = $status_code = (int)$match[1];
+                                                $this->requests[$url] = $this->getCached($url);
-                                                        unset($match);
+                                        }
-                                                        // handle redirect
+                                */
-                                                        if (preg_match('/^Location:(.*?)$/mi', $this->requests[$orig]['headers'], $match)) {
+                                } else {
-                                                                $this->requests[$orig]['location'] =  trim($match[1]);
+                                        $this->debug("Sending request for $url");
-                                                        }
+                                        $this->requests[$orig]['original_url'] = $orig;
-                                                        if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && isset($this->requests[$orig]['location'])) {
+                                        $req_url = $this->rewriteUrls($url);
-                                                                $redirectURL = $this->requests[$orig]['location'];
+                                        $req_url = ($this->rewriteHashbangFragment) ? $this->rewriteHashbangFragment($req_url) : $req_url;
-                                                                if (!preg_match('!^https?://!i', $redirectURL)) {
+                                        $req_url = $this->removeFragment($req_url);
-                                                                        $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url);
+                                        // send cookies, if we have any
-                                                                }
+                                        $httpContext = $this->httpContext;
-                                                                if ($this->validateURL($redirectURL)) {
+                                        $httpContext['http']['header'] .= $this->getUserAgent($req_url)."\r\n";
-                                                                        $this->debug('Redirect detected. Valid URL: '.$redirectURL);
+                                        // add referer for picky sites
-                                                                        // store any cookies
+                                        $httpContext['http']['header'] .= 'Referer: '.$this->referer."\r\n";
-                                                                        $cookies = $this->cookieJar->extractCookies($this->requests[$orig]['headers']);
+                                        if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) {
-                                                                        if (!empty($cookies)) $this->cookieJar->storeCookies($url, $cookies);
+                                                $this->debug("......sending cookies: $cookies");
-                                                                        $this->redirectQueue[$orig] = $redirectURL;
+                                                $httpContext['http']['header'] .= 'Cookie: '.$cookies."\r\n";
-                                                                } else {
+                                        }
-                                                                        $this->debug('Redirect detected. Invalid URL: '.$redirectURL);
+                                        if (false !== ($html = @file_get_contents($req_url, false, stream_context_create($httpContext)))) {
-                                                                }
+                                                $this->debug('Received response');
-                                                        } elseif (strpos($this->requests[$orig]['effective_url'], '_escaped_fragment_') === false) {
+                                                // get status code
-                                                                // check for <meta name='fragment' content='!'/>
+                                                if (!isset($http_response_header[0]) || !preg_match('!^HTTP/\d+\.\d+\s+(\d+)!', trim($http_response_header[0]), $match)) {
-                                                                // for AJAX sites, e.g. Blogger with its dynamic views templates.
+                                                        $this->debug('Error: no status code found');
-                                                                // Based on Google's spec: https://developers.google.com/webmasters/ajax-crawling/docs/specification
+                                                        // TODO: handle error - no status code
-                                                                if (isset($this->requests[$orig]['body'])) {
+                                                } else {
-                                                                        $redirectURL = $this->getUglyURL($this->requests[$orig]['effective_url'], substr($this->requests[$orig]['body'], 0, 4000));
+                                                        $this->requests[$orig]['headers'] = $this->headersToString($http_response_header, false);
-                                                                        if ($redirectURL) {
+                                                        // check content type
-                                                                                $this->debug('AJAX trigger (meta name="fragment" content="!") found. Queueing '.$redirectURL);
+                                                        if ($this->headerOnlyType($this->requests[$orig]['headers'])) {
-                                                                                $this->redirectQueue[$orig] = $redirectURL;
+                                                                $this->requests[$orig]['body'] = '';
-                                                                        }
+                                                        } else {
-                                                                }
+                                                                $this->requests[$orig]['body'] = $html;
-                                                        }
+                                                        }
-                                                }
+                                                        $this->requests[$orig]['effective_url'] = $req_url;
-                                        } else {
+                                                        $this->requests[$orig]['status_code'] = $status_code = (int)$match[1];
-                                                $this->debug('Error retrieving URL');
+                                                        unset($match);
-                                                //print_r($req_url);
+                                                        // handle redirect
-                                                //print_r($http_response_header);
+                                                        if (preg_match('/^Location:(.*?)$/mi', $this->requests[$orig]['headers'], $match)) {
-                                                //print_r($html);
+                                                                $this->requests[$orig]['location'] =  trim($match[1]);
-                                                
+                                                        }
-                                                // TODO: handle error - failed to retrieve URL
+                                                        if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && isset($this->requests[$orig]['location'])) {
-                                        }
+                                                                $redirectURL = $this->requests[$orig]['location'];
-                                }
+                                                                if (!preg_match('!^https?://!i', $redirectURL)) {
-                        }
+                                                                        $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url);
-                }
+                                                                }
-        }
+                                                                if ($this->validateURL($redirectURL)) {
-        
+                                                                        $this->debug('Redirect detected. Valid URL: '.$redirectURL);
-        public function handleCurlResponse($response, $info, $request) {
+                                                                        // store any cookies
-                $orig = $request->url_original;
+                                                                        $cookies = $this->cookieJar->extractCookies($this->requests[$orig]['headers']);
-                $this->requests[$orig]['headers'] = substr($response, 0, $info['header_size']);
+                                                                        if (!empty($cookies)) $this->cookieJar->storeCookies($url, $cookies);
-                $this->requests[$orig]['body'] = substr($response, $info['header_size']);
+                                                                        $this->redirectQueue[$orig] = $redirectURL;
-                $this->requests[$orig]['method'] = $request->method;
+                                                                } else {
-                $this->requests[$orig]['effective_url'] = $info['url'];
+                                                                        $this->debug('Redirect detected. Invalid URL: '.$redirectURL);
-                $this->requests[$orig]['status_code'] = (int)$info['http_code'];
+                                                                }
-                if (preg_match('/^Location:(.*?)$/mi', $this->requests[$orig]['headers'], $match)) {
+                                                        } elseif (strpos($this->requests[$orig]['effective_url'], '_escaped_fragment_') === false) {
-                        $this->requests[$orig]['location'] =  trim($match[1]);
+                                                                // check for <meta name='fragment' content='!'/>
-                }
+                                                                // for AJAX sites, e.g. Blogger with its dynamic views templates.
-        }
+                                                                // Based on Google's spec: https://developers.google.com/webmasters/ajax-crawling/docs/specification
-        
+                                                                if (isset($this->requests[$orig]['body'])) {
-        protected function headersToString(array $headers, $associative=true) {
+                                                                        $redirectURL = $this->getRedirectURLfromHTML($this->requests[$orig]['effective_url'], substr($this->requests[$orig]['body'], 0, 4000));
-                if (!$associative) {
+                                                                        if ($redirectURL) {
-                        return implode("\n", $headers);
+                                                                                $this->redirectQueue[$orig] = $redirectURL;
-                } else {
+                                                                        }
-                        $str = '';
+                                                                }
-                        foreach ($headers as $key => $val) {
+                                                        }
-                                if (is_array($val)) {
+                                                }
-                                        foreach ($val as $v) $str .= "$key: $v\n";
+                                        } else {
-                                } else {
+                                                $this->debug('Error retrieving URL');
-                                        $str .= "$key: $val\n";
+                                                //print_r($req_url);
-                                }
+                                                //print_r($http_response_header);
-                        }
+                                                //print_r($html);
-                        return rtrim($str);
+                                                
-                }
+                                                // TODO: handle error - failed to retrieve URL
-        }
+                                        }
-        
+                                }
-        public function get($url, $remove=false, $gzdecode=true) {
+                        }
-                $url = "$url";
+                }
-                if (isset($this->requests[$url]) && isset($this->requests[$url]['body'])) {
+        }
-                        $this->debug("URL already fetched - in memory ($url, effective: {$this->requests[$url]['effective_url']})");
+        
-                        $response = $this->requests[$url];
+        public function handleCurlResponse($response, $info, $request) {
-                /*
+                $orig = $request->url_original;
-                } elseif ($this->isCached($url)) {
+                $this->requests[$orig]['headers'] = substr($response, 0, $info['header_size']);
-                        $this->debug("URL already fetched - in disk cache ($url)");
+                $this->requests[$orig]['body'] = substr($response, $info['header_size']);
-                        $response = $this->getCached($url);
+                $this->requests[$orig]['method'] = $request->method;
-                        $this->requests[$url] = $response;
+                $this->requests[$orig]['effective_url'] = $info['url'];
-                */
+                $this->requests[$orig]['status_code'] = (int)$info['http_code'];
-                } else {
+                if (preg_match('/^Location:(.*?)$/mi', $this->requests[$orig]['headers'], $match)) {
-                        $this->debug("Fetching URL ($url)");
+                        $this->requests[$orig]['location'] =  trim($match[1]);
-                        $this->fetchAll(array($url));
+                }
-                        if (isset($this->requests[$url]) && isset($this->requests[$url]['body'])) {
+        }
-                                $response = $this->requests[$url];
+        
-                        } else {
+        protected function headersToString(array $headers, $associative=true) {
-                                $this->debug("Request failed");
+                if (!$associative) {
-                                $response = false;
+                        return implode("\n", $headers);
-                        }
+                } else {
-                }
+                        $str = '';
-                /*
+                        foreach ($headers as $key => $val) {
-                if ($this->minimiseMemoryUse && $response) {
+                                if (is_array($val)) {
-                        $this->cache($url);
+                                        foreach ($val as $v) $str .= "$key: $v\n";
-                        unset($this->requests[$url]);
+                                } else {
-                }
+                                        $str .= "$key: $val\n";
-                */
+                                }
-                if ($remove && $response) unset($this->requests[$url]);
+                        }
-                if ($gzdecode && stripos($response['headers'], 'Content-Encoding: gzip')) {
+                        return rtrim($str);
-                        if ($html = gzdecode($response['body'])) {
+                }
-                                $response['body'] = $html;
+        }
-                        }
+        
-                }
+        public function get($url, $remove=false, $gzdecode=true) {
-                return $response;
+                $url = "$url";
-        }
+                if (isset($this->requests[$url]) && isset($this->requests[$url]['body'])) {
-        
+                        $this->debug("URL already fetched - in memory ($url, effective: {$this->requests[$url]['effective_url']})");
-        public function parallelSupport() {
+                        $response = $this->requests[$url];
-                return class_exists('HttpRequestPool') || function_exists('curl_multi_init');
+                /*
-        }
+                } elseif ($this->isCached($url)) {
-        
+                        $this->debug("URL already fetched - in disk cache ($url)");
-        private function headerOnlyType($headers) {
+                        $response = $this->getCached($url);
-                if (preg_match('!^Content-Type:\s*(([a-z-]+)/([^;\r\n ]+))!im', $headers, $match)) {
+                        $this->requests[$url] = $response;
-                        // look for full mime type (e.g. image/jpeg) or just type (e.g. image)
+                */
-                        $match[1] = strtolower(trim($match[1]));
+                } else {
-                        $match[2] = strtolower(trim($match[2]));
+                        $this->debug("Fetching URL ($url)");
-                        foreach (array($match[1], $match[2]) as $mime) {
+                        $this->fetchAll(array($url));
-                                if (in_array($mime, $this->headerOnlyTypes)) return true;
+                        if (isset($this->requests[$url]) && isset($this->requests[$url]['body'])) {
-                        }
+                                $response = $this->requests[$url];
-                }
+                        } else {
-                return false;
+                                $this->debug("Request failed");
-        }
+                                $response = false;
-        
+                        }
-        private function possibleUnsupportedType($url) {
+                }
-                $path = @parse_url($url, PHP_URL_PATH);
+                /*
-                if ($path && strpos($path, '.') !== false) {
+                if ($this->minimiseMemoryUse && $response) {
-                        $ext = strtolower(trim(pathinfo($path, PATHINFO_EXTENSION)));
+                        $this->cache($url);
-                        return in_array($ext, $this->headerOnlyClues);
+                        unset($this->requests[$url]);
-                }
+                }
-                return false;
+                */
-        }
+                if ($remove && $response) unset($this->requests[$url]);
-}
+                if ($gzdecode && stripos($response['headers'], 'Content-Encoding: gzip')) {
+                        if ($html = gzdecode($response['body'])) {
-// gzdecode from http://www.php.net/manual/en/function.gzdecode.php#82930
+                                $response['body'] = $html;
-if (!function_exists('gzdecode')) {
+                        }
-        function gzdecode($data,&$filename='',&$error='',$maxlength=null) 
+                }
-        {
+                return $response;
-                $len = strlen($data);
+        }
-                if ($len < 18 || strcmp(substr($data,0,2),"\x1f\x8b")) {
+        
-                        $error = "Not in GZIP format.";
+        public function parallelSupport() {
-                        return null;  // Not GZIP format (See RFC 1952)
+                return class_exists('HttpRequestPool') || function_exists('curl_multi_init');
-                }
+        }
-                $method = ord(substr($data,2,1));  // Compression method
+        
-                $flags  = ord(substr($data,3,1));  // Flags
+        private function headerOnlyType($headers) {
-                if ($flags & 31 != $flags) {
+                if (preg_match('!^Content-Type:\s*(([a-z-]+)/([^;\r\n ]+))!im', $headers, $match)) {
-                        $error = "Reserved bits not allowed.";
+                        // look for full mime type (e.g. image/jpeg) or just type (e.g. image)
-                        return null;
+                        $match[1] = strtolower(trim($match[1]));
-                }
+                        $match[2] = strtolower(trim($match[2]));
-                // NOTE: $mtime may be negative (PHP integer limitations)
+                        foreach (array($match[1], $match[2]) as $mime) {
-                $mtime = unpack("V", substr($data,4,4));
+                                if (in_array($mime, $this->headerOnlyTypes)) return true;
-                $mtime = $mtime[1];
+                        }
-                $xfl   = substr($data,8,1);
+                }
-                $os    = substr($data,8,1);
+                return false;
-                $headerlen = 10;
+        }
-                $extralen  = 0;
+        
-                $extra     = "";
+        private function possibleUnsupportedType($url) {
-                if ($flags & 4) {
+                $path = @parse_url($url, PHP_URL_PATH);
-                        // 2-byte length prefixed EXTRA data in header
+                if ($path && strpos($path, '.') !== false) {
-                        if ($len - $headerlen - 2 < 8) {
+                        $ext = strtolower(trim(pathinfo($path, PATHINFO_EXTENSION)));
-                                return false;  // invalid
+                        return in_array($ext, $this->headerOnlyClues);
-                        }
+                }
-                        $extralen = unpack("v",substr($data,8,2));
+                return false;
-                        $extralen = $extralen[1];
+        }
-                        if ($len - $headerlen - 2 - $extralen < 8) {
+}
-                                return false;  // invalid
-                        }
+// gzdecode from http://www.php.net/manual/en/function.gzdecode.php#82930
-                        $extra = substr($data,10,$extralen);
+if (!function_exists('gzdecode')) {
-                        $headerlen += 2 + $extralen;
+        function gzdecode($data,&$filename='',&$error='',$maxlength=null) 
-                }
+        {
-                $filenamelen = 0;
+                $len = strlen($data);
-                $filename = "";
+                if ($len < 18 || strcmp(substr($data,0,2),"\x1f\x8b")) {
-                if ($flags & 8) {
+                        $error = "Not in GZIP format.";
-                        // C-style string
+                        return null;  // Not GZIP format (See RFC 1952)
-                        if ($len - $headerlen - 1 < 8) {
+                }
-                                return false; // invalid
+                $method = ord(substr($data,2,1));  // Compression method
-                        }
+                $flags  = ord(substr($data,3,1));  // Flags
-                        $filenamelen = strpos(substr($data,$headerlen),chr(0));
+                if ($flags & 31 != $flags) {
-                        if ($filenamelen === false || $len - $headerlen - $filenamelen - 1 < 8) {
+                        $error = "Reserved bits not allowed.";
-                                return false; // invalid
+                        return null;
-                        }
+                }
-                        $filename = substr($data,$headerlen,$filenamelen);
+                // NOTE: $mtime may be negative (PHP integer limitations)
-                        $headerlen += $filenamelen + 1;
+                $mtime = unpack("V", substr($data,4,4));
-                }
+                $mtime = $mtime[1];
-                $commentlen = 0;
+                $xfl   = substr($data,8,1);
-                $comment = "";
+                $os    = substr($data,8,1);
-                if ($flags & 16) {
+                $headerlen = 10;
-                        // C-style string COMMENT data in header
+                $extralen  = 0;
-                        if ($len - $headerlen - 1 < 8) {
+                $extra     = "";
-                                return false;    // invalid
+                if ($flags & 4) {
-                        }
+                        // 2-byte length prefixed EXTRA data in header
-                        $commentlen = strpos(substr($data,$headerlen),chr(0));
+                        if ($len - $headerlen - 2 < 8) {
-                        if ($commentlen === false || $len - $headerlen - $commentlen - 1 < 8) {
+                                return false;  // invalid
-                                return false;    // Invalid header format
+                        }
-                        }
+                        $extralen = unpack("v",substr($data,8,2));
-                        $comment = substr($data,$headerlen,$commentlen);
+                        $extralen = $extralen[1];
-                        $headerlen += $commentlen + 1;
+                        if ($len - $headerlen - 2 - $extralen < 8) {
-                }
+                                return false;  // invalid
-                $headercrc = "";
+                        }
-                if ($flags & 2) {
+                        $extra = substr($data,10,$extralen);
-                        // 2-bytes (lowest order) of CRC32 on header present
+                        $headerlen += 2 + $extralen;
-                        if ($len - $headerlen - 2 < 8) {
+                }
-                                return false;    // invalid
+                $filenamelen = 0;
-                        }
+                $filename = "";
-                        $calccrc = crc32(substr($data,0,$headerlen)) & 0xffff;
+                if ($flags & 8) {
-                        $headercrc = unpack("v", substr($data,$headerlen,2));
+                        // C-style string
-                        $headercrc = $headercrc[1];
+                        if ($len - $headerlen - 1 < 8) {
-                        if ($headercrc != $calccrc) {
+                                return false; // invalid
-                                $error = "Header checksum failed.";
+                        }
-                                return false;    // Bad header CRC
+                        $filenamelen = strpos(substr($data,$headerlen),chr(0));
-                        }
+                        if ($filenamelen === false || $len - $headerlen - $filenamelen - 1 < 8) {
-                        $headerlen += 2;
+                                return false; // invalid
-                }
+                        }
-                // GZIP FOOTER
+                        $filename = substr($data,$headerlen,$filenamelen);
-                $datacrc = unpack("V",substr($data,-8,4));
+                        $headerlen += $filenamelen + 1;
-                $datacrc = sprintf('%u',$datacrc[1] & 0xFFFFFFFF);
+                }
-                $isize = unpack("V",substr($data,-4));
+                $commentlen = 0;
-                $isize = $isize[1];
+                $comment = "";
-                // decompression:
+                if ($flags & 16) {
-                $bodylen = $len-$headerlen-8;
+                        // C-style string COMMENT data in header
-                if ($bodylen < 1) {
+                        if ($len - $headerlen - 1 < 8) {
-                        // IMPLEMENTATION BUG!
+                                return false;    // invalid
-                        return null;
+                        }
-                }
+                        $commentlen = strpos(substr($data,$headerlen),chr(0));
-                $body = substr($data,$headerlen,$bodylen);
+                        if ($commentlen === false || $len - $headerlen - $commentlen - 1 < 8) {
-                $data = "";
+                                return false;    // Invalid header format
-                if ($bodylen > 0) {
+                        }
-                        switch ($method) {
+                        $comment = substr($data,$headerlen,$commentlen);
-                        case 8:
+                        $headerlen += $commentlen + 1;
-                                // Currently the only supported compression method:
+                }
-                                $data = gzinflate($body,$maxlength);
+                $headercrc = "";
-                                break;
+                if ($flags & 2) {
-                        default:
+                        // 2-bytes (lowest order) of CRC32 on header present
-                                $error = "Unknown compression method.";
+                        if ($len - $headerlen - 2 < 8) {
-                                return false;
+                                return false;    // invalid
-                        }
+                        }
-                }  // zero-byte body content is allowed
+                        $calccrc = crc32(substr($data,0,$headerlen)) & 0xffff;
-                // Verifiy CRC32
+                        $headercrc = unpack("v", substr($data,$headerlen,2));
-                $crc   = sprintf("%u",crc32($data));
+                        $headercrc = $headercrc[1];
-                $crcOK = $crc == $datacrc;
+                        if ($headercrc != $calccrc) {
-                $lenOK = $isize == strlen($data);
+                                $error = "Header checksum failed.";
-                if (!$lenOK || !$crcOK) {
+                                return false;    // Bad header CRC
-                        $error = ( $lenOK ? '' : 'Length check FAILED. ') . ( $crcOK ? '' : 'Checksum FAILED.');
+                        }
-                        return false;
+                        $headerlen += 2;
-                }
+                }
-                return $data;
+                // GZIP FOOTER
-        }
+                $datacrc = unpack("V",substr($data,-8,4));
-}
+                $datacrc = sprintf('%u',$datacrc[1] & 0xFFFFFFFF);
-?>
-\ No newline at end of file
+                $isize = unpack("V",substr($data,-4));
+                $isize = $isize[1];
+                // decompression:
+                $bodylen = $len-$headerlen-8;
+                if ($bodylen < 1) {
+                        // IMPLEMENTATION BUG!
+                        return null;
+                }
+                $body = substr($data,$headerlen,$bodylen);
+                $data = "";
+                if ($bodylen > 0) {
+                        switch ($method) {
+                        case 8:
+                                // Currently the only supported compression method:
+                                $data = gzinflate($body,$maxlength);
+                                break;
+                        default:
+                                $error = "Unknown compression method.";
+                                return false;
+                        }
+                }  // zero-byte body content is allowed
+                // Verifiy CRC32
+                $crc   = sprintf("%u",crc32($data));
+                $crcOK = $crc == $datacrc;
+                $lenOK = $isize == strlen($data);
+                if (!$lenOK || !$crcOK) {
+                        $error = ( $lenOK ? '' : 'Length check FAILED. ') . ( $crcOK ? '' : 'Checksum FAILED.');
+                        return false;
+                }
+                return $data;
+        }
+}
+\ No newline at end of file
diff --git a/inc/3rdparty/libraries/humble-http-agent/SimplePie_HumbleHttpAgent.php b/inc/3rdparty/libraries/humble-http-agent/SimplePie_HumbleHttpAgent.php
index ecd46d5f..c524a1ee 100644
--- a/inc/3rdparty/libraries/humble-http-agent/SimplePie_HumbleHttpAgent.php
+++ b/inc/3rdparty/libraries/humble-http-agent/SimplePie_HumbleHttpAgent.php
@@ -1,79 +1,78 @@
-<?php
+<?php
-/**
+/**
- * Humble HTTP Agent extension for SimplePie_File
+ * Humble HTTP Agent extension for SimplePie_File
- * 
+ * 
- * This class is designed to extend and override SimplePie_File
+ * This class is designed to extend and override SimplePie_File
- * in order to prevent duplicate HTTP requests being sent out.
+ * in order to prevent duplicate HTTP requests being sent out.
- * The idea is to initialise an instance of Humble HTTP Agent
+ * The idea is to initialise an instance of Humble HTTP Agent
- * and attach it, to a static class variable, of this class.
+ * and attach it, to a static class variable, of this class.
- * SimplePie will then automatically initialise this class
+ * SimplePie will then automatically initialise this class
- * 
+ * 
- * @date 2011-02-28
+ * @date 2011-02-28
- */
+ */
-class SimplePie_HumbleHttpAgent extends SimplePie_File
+class SimplePie_HumbleHttpAgent extends SimplePie_File
-{
+{
-        protected static $agent;
+        protected static $agent;
-        var $url;
+        var $url;
-        var $useragent;
+        var $useragent;
-        var $success = true;
+        var $success = true;
-        var $headers = array();
+        var $headers = array();
-        var $body;
+        var $body;
-        var $status_code;
+        var $status_code;
-        var $redirects = 0;
+        var $redirects = 0;
-        var $error;
+        var $error;
-        var $method = SIMPLEPIE_FILE_SOURCE_NONE;
+        var $method = SIMPLEPIE_FILE_SOURCE_NONE;
-        public static function set_agent(HumbleHttpAgent $agent) {
+        public static function set_agent(HumbleHttpAgent $agent) {
-                self::$agent = $agent;
+                self::$agent = $agent;
-        }
+        }
-        
+        
-        public function __construct($url, $timeout = 10, $redirects = 5, $headers = null, $useragent = null, $force_fsockopen = false) {
+        public function __construct($url, $timeout = 10, $redirects = 5, $headers = null, $useragent = null, $force_fsockopen = false) {
-                if (class_exists('idna_convert'))
+                if (class_exists('idna_convert'))
-                {
+                {
-                        $idn = new idna_convert();
+                        $idn = new idna_convert();
-                        $parsed = SimplePie_Misc::parse_url($url);
+                        $parsed = SimplePie_Misc::parse_url($url);
-                        $url = SimplePie_Misc::compress_parse_url($parsed['scheme'], $idn->encode($parsed['authority']), $parsed['path'], $parsed['query'], $parsed['fragment']);
+                        $url = SimplePie_Misc::compress_parse_url($parsed['scheme'], $idn->encode($parsed['authority']), $parsed['path'], $parsed['query'], $parsed['fragment']);
-                }
+                }
-                $this->url = $url;
+                $this->url = $url;
-                $this->useragent = $useragent;
+                $this->useragent = $useragent;
-                if (preg_match('/^http(s)?:\/\//i', $url))
+                if (preg_match('/^http(s)?:\/\//i', $url))
-                {
+                {
-                        if (!is_array($headers))
+                        if (!is_array($headers))
-                        {
+                        {
-                                $headers = array();
+                                $headers = array();
-                        }
+                        }
-                        $this->method = SIMPLEPIE_FILE_SOURCE_REMOTE | SIMPLEPIE_FILE_SOURCE_CURL;
+                        $this->method = SIMPLEPIE_FILE_SOURCE_REMOTE | SIMPLEPIE_FILE_SOURCE_CURL;
-                        $headers2 = array();
+                        $headers2 = array();
-                        foreach ($headers as $key => $value) {
+                        foreach ($headers as $key => $value) {
-                                $headers2[] = "$key: $value";
+                                $headers2[] = "$key: $value";
-                        }
+                        }
-                        //TODO: allow for HTTP headers
+                        //TODO: allow for HTTP headers
-                        // curl_setopt($fp, CURLOPT_HTTPHEADER, $headers2);
+                        // curl_setopt($fp, CURLOPT_HTTPHEADER, $headers2);
-                        $response = self::$agent->get($url);
+                        $response = self::$agent->get($url);
-                        
+                        
-                        if ($response === false || !isset($response['status_code'])) {
+                        if ($response === false || !isset($response['status_code'])) {
-                                $this->error = 'failed to fetch URL';
+                                $this->error = 'failed to fetch URL';
-                                $this->success = false;
+                                $this->success = false;
-                        } else {
+                        } else {
-                                // The extra lines at the end are there to satisfy SimplePie's HTTP parser.
+                                // The extra lines at the end are there to satisfy SimplePie's HTTP parser.
-                                // The class expects a full HTTP message, whereas we're giving it only
+                                // The class expects a full HTTP message, whereas we're giving it only
-                                // headers - the new lines indicate the start of the body.
+                                // headers - the new lines indicate the start of the body.
-                                $parser = new SimplePie_HTTP_Parser($response['headers']."\r\n\r\n");
+                                $parser = new SimplePie_HTTP_Parser($response['headers']."\r\n\r\n");
-                                if ($parser->parse()) {
+                                if ($parser->parse()) {
-                                        $this->headers = $parser->headers;
+                                        $this->headers = $parser->headers;
-                                        //$this->body = $parser->body;
+                                        //$this->body = $parser->body;
-                                        $this->body = $response['body'];
+                                        $this->body = $response['body'];
-                                        $this->status_code = $parser->status_code;
+                                        $this->status_code = $parser->status_code;
-                                }
+                                }
-                        }
+                        }
-                }
+                }
-                else
+                else
-                {
+                {
-                        $this->error = 'invalid URL';
+                        $this->error = 'invalid URL';
-                        $this->success = false;
+                        $this->success = false;
-                }
+                }
-        }
+        }
-}
+}
+\ No newline at end of file
-?>
-\ No newline at end of file
diff --git a/inc/3rdparty/libraries/language-detect/LanguageDetect.php b/inc/3rdparty/libraries/language-detect/LanguageDetect.php
index 09b11546..382d869c 100644
--- a/inc/3rdparty/libraries/language-detect/LanguageDetect.php
+++ b/inc/3rdparty/libraries/language-detect/LanguageDetect.php
@@ -6,23 +6,24 @@
 * Attempts to detect the language of a sample of text by correlating ranked
 * 3-gram frequencies to a table of 3-gram frequencies of known languages.
 *
- * Implements a version of a technique originally proposed by Cavnar & Trenkle 
+ * Implements a version of a technique originally proposed by Cavnar & Trenkle
- * (1994): "N-Gram-Based Text Categorization" 
+ * (1994): "N-Gram-Based Text Categorization"
 *
- * PHP versions 4 and 5
+ * PHP version 5
 *
- * @category   Text
+ * @category  Text
- * @package    Text_LanguageDetect
+ * @package   Text_LanguageDetect
- * @author     Nicholas Pisarro <infinityminusnine+pear@gmail.com>
+ * @author    Nicholas Pisarro <infinityminusnine+pear@gmail.com>
- * @copyright  2005-2006 Nicholas Pisarro
+ * @copyright 2005-2006 Nicholas Pisarro
- * @license    http://www.debian.org/misc/bsd.license BSD
+ * @license   http://www.debian.org/misc/bsd.license BSD
- * @version    CVS: $Id: LanguageDetect.php,v 1.20 2008/07/01 02:09:15 taak Exp $
+ * @version   SVN: $Id: LanguageDetect.php 322353 2012-01-16 08:41:43Z cweiske $
- * @link       http://pear.php.net/package/Text_LanguageDetect/
+ * @link      http://pear.php.net/package/Text_LanguageDetect/
- * @link       http://langdetect.blogspot.com/
+ * @link      http://langdetect.blogspot.com/
 */
-//require_once 'PEAR.php';
+require_once 'LanguageDetect/Exception.php';
-require_once 'Parser.php';
+require_once 'LanguageDetect/Parser.php';
+require_once 'LanguageDetect/ISO639.php';
 /**
 * Language detection class
@@ -41,9 +42,10 @@ require_once 'Parser.php';
 *
 * echo "Supported languages:\n";
 *
- * $langs = $l->getLanguages();
+ * try {
- * if (PEAR::isError($langs)) {
+ *     $langs = $l->getLanguages();
- *     die($langs->getMessage());
+ * } catch (Text_LanguageDetect_Exception $e) {
+ *     die($e->getMessage());
 * }
 *
 * sort($langs);
@@ -54,38 +56,38 @@ require_once 'Parser.php';
 * }
 * </code>
 *
- * @category   Text
+ * @category  Text
- * @package    Text_LanguageDetect
+ * @package   Text_LanguageDetect
- * @author     Nicholas Pisarro <infinityminusnine+pear@gmail.com>
+ * @author    Nicholas Pisarro <infinityminusnine+pear@gmail.com>
- * @copyright  2005 Nicholas Pisarro
+ * @copyright 2005 Nicholas Pisarro
- * @license    http://www.debian.org/misc/bsd.license BSD
+ * @license   http://www.debian.org/misc/bsd.license BSD
- * @version    Release: @package_version@
+ * @version   Release: @package_version@
- * @todo       allow users to generate their own language models
+ * @link      http://pear.php.net/package/Text_LanguageDetect/
+ * @todo      allow users to generate their own language models
 */
- 
 class Text_LanguageDetect
 {
-    /** 
+    /**
     * The filename that stores the trigram data for the detector
     *
-     * If this value starts with a slash (/) or a dot (.) the value of 
+     * If this value starts with a slash (/) or a dot (.) the value of
     * $this->_data_dir will be ignored
-     * 
+     *
     * @var      string
     * @access   private
     */
-    var $_db_filename = './lang.dat';
+    var $_db_filename = 'lang.dat';
    /**
     * The filename that stores the unicode block definitions
     *
-     * If this value starts with a slash (/) or a dot (.) the value of 
+     * If this value starts with a slash (/) or a dot (.) the value of
     * $this->_data_dir will be ignored
-     * 
+     *
     * @var string
     * @access private
     */
-    var $_unicode_db_filename = './unicode_blocks.dat';
+    var $_unicode_db_filename = 'unicode_blocks.dat';
    /**
     * The data directory
@@ -99,11 +101,8 @@ class Text_LanguageDetect
    /**
     * The trigram data for comparison
-     * 
-     * Will be loaded on start from $this->_db_filename
     *
-     * May be set to a PEAR_Error object if there is an error during its 
+     * Will be loaded on start from $this->_db_filename
-     * initialization
     *
     * @var      array
     * @access   private
@@ -120,7 +119,7 @@ class Text_LanguageDetect
    /**
     * The size of the trigram data arrays
-     * 
+     *
     * @var      int
     * @access   private
     */
@@ -140,7 +139,7 @@ class Text_LanguageDetect
    /**
     * Whether or not to simulate perl's Language::Guess exactly
-     * 
+     *
     * @access  private
     * @var     bool
     * @see     setPerlCompatible()
@@ -165,18 +164,24 @@ class Text_LanguageDetect
    var $_clusters;
    /**
+     * Which type of "language names" are accepted and returned:
+     *
+     * 0 - language name ("english")
+     * 2 - 2-letter ISO 639-1 code ("en")
+     * 3 - 3-letter ISO 639-2 code ("eng")
+     */
+    var $_name_mode = 0;
+    /**
     * Constructor
     *
     * Will attempt to load the language database. If it fails, you will get
-     * a PEAR_Error object returned when you try to use detect()
+     * an exception.
-     *
     */
-    function Text_LanguageDetect($db=null, $unicode_db=null)
+    function __construct()
    {
-                if (isset($db)) $this->_db_filename = $db;
-                if (isset($unicode_db)) $this->_unicode_db_filename = $unicode_db;
-                
        $data = $this->_readdb($this->_db_filename);
+        $this->_checkTrigram($data['trigram']);
        $this->_lang_db = $data['trigram'];
        if (isset($data['trigram-unicodemap'])) {
@@ -186,29 +191,32 @@ class Text_LanguageDetect
        // Not yet implemented:
        if (isset($data['trigram-clusters'])) {
            $this->_clusters = $data['trigram-clusters'];
-        }               
+        }
    }
    /**
     * Returns the path to the location of the database
     *
-     * @access    private
+     * @param string $fname File name to load
-     * @return    string    expected path to the language model database
+     *
+     * @return string expected path to the language model database
+     * @access private
     */
    function _get_data_loc($fname)
    {
-        return $fname;
+        return dirname(__FILE__).'/'.$fname;
    }
    /**
     * Loads the language trigram database from filename
     *
     * Trigram datbase should be a serialize()'d array
-     * 
+     *
-     * @access    private
+     * @param string $fname the filename where the data is stored
-     * @param     string      $fname   the filename where the data is stored
+     *
-     * @return    array                the language model data
+     * @return array the language model data
-     * @throws    PEAR_Error
+     * @throws Text_LanguageDetect_Exception
+     * @access private
     */
    function _readdb($fname)
    {
@@ -217,79 +225,74 @@ class Text_LanguageDetect
        // input check
        if (!file_exists($fname)) {
-            throw new Exception('Language database does not exist.');
+            throw new Text_LanguageDetect_Exception(
+                'Language database does not exist: ' . $fname,
+                Text_LanguageDetect_Exception::DB_NOT_FOUND
+            );
        } elseif (!is_readable($fname)) {
-            throw new Exception('Language database is not readable.');
+            throw new Text_LanguageDetect_Exception(
+                'Language database is not readable: ' . $fname,
+                Text_LanguageDetect_Exception::DB_NOT_READABLE
+            );
        }
-        if (function_exists('file_get_contents')) {
+        return unserialize(file_get_contents($fname));
-            return unserialize(file_get_contents($fname));
-        } else {
-            // if you don't have file_get_contents(), 
-            // then this is the next fastest way
-            ob_start();
-            readfile($fname);
-            $contents = ob_get_contents();
-            ob_end_clean();
-            return unserialize($contents);
-        }
    }
    /**
     * Checks if this object is ready to detect languages
-     * 
+     *
-     * @access   private
+     * @param array $trigram Trigram data from database
-     * @param    mixed   &$err  error object to be returned by reference, if any
+     *
-     * @return   bool           true if no errors
+     * @return void
+     * @access private
     */
-    function _setup_ok(&$err)
+    function _checkTrigram($trigram)
    {
-        if (!is_array($this->_lang_db)) {
+        if (!is_array($trigram)) {
            if (ini_get('magic_quotes_runtime')) {
-                throw new Exception('Error loading database. Try turning magic_quotes_runtime off.');
+                throw new Text_LanguageDetect_Exception(
-            } else {
+                    'Error loading database. Try turning magic_quotes_runtime off.',
-                throw new Exception('Language database is not an array.');
+                    Text_LanguageDetect_Exception::MAGIC_QUOTES
+                );
            }
-            return false;
+            throw new Text_LanguageDetect_Exception(
+                'Language database is not an array.',
-        } elseif (empty($this->_lang_db)) {
+                Text_LanguageDetect_Exception::DB_NOT_ARRAY
-            throw new Exception('Language database has no elements.');
+            );
-            return false;
+        } elseif (empty($trigram)) {
+            throw new Text_LanguageDetect_Exception(
-        } else {
+                'Language database has no elements.',
-            return true;
+                Text_LanguageDetect_Exception::DB_EMPTY
+            );
        }
    }
    /**
     * Omits languages
     *
-     * Pass this function the name of or an array of names of 
+     * Pass this function the name of or an array of names of
     * languages that you don't want considered
     *
-     * If you're only expecting a limited set of languages, this can greatly 
+     * If you're only expecting a limited set of languages, this can greatly
     * speed up processing
     *
-     * @access   public
+     * @param mixed $omit_list    language name or array of names to omit
-     * @param    mixed  $omit_list      language name or array of names to omit
+     * @param bool  $include_only if true will include (rather than
-     * @param    bool   $include_only   if true will include (rather than 
+     *                            exclude) only those in the list
-     *                                  exclude) only those in the list
+     *
-     * @return   int                    number of languages successfully deleted
+     * @return int number of languages successfully deleted
-     * @throws   PEAR_Error
+     * @throws Text_LanguageDetect_Exception
     */
-    function omitLanguages($omit_list, $include_only = false)
+    public function omitLanguages($omit_list, $include_only = false)
    {
-        // setup check
-        if (!$this->_setup_ok($err)) {
-            return $err;
-        }
        $deleted = 0;
-        // deleting the given languages
+        $omit_list = $this->_convertFromNameMode($omit_list);
        if (!$include_only) {
+            // deleting the given languages
            if (!is_array($omit_list)) {
                $omit_list = strtolower($omit_list); // case desensitize
                if (isset($this->_lang_db[$omit_list])) {
@@ -301,12 +304,12 @@ class Text_LanguageDetect
                    if (isset($this->_lang_db[$omit_lang])) {
                        unset($this->_lang_db[$omit_lang]);
                        $deleted++;
-                    } 
+                    }
                }
            }
-        // deleting all except the given languages
        } else {
+            // deleting all except the given languages
            if (!is_array($omit_list)) {
                $omit_list = array($omit_list);
            }
@@ -327,7 +330,7 @@ class Text_LanguageDetect
        // reset the cluster cache if the number of languages changes
        // this will then have to be recalculated
        if (isset($this->_clusters) && $deleted > 0) {
-            unset($this->_clusters);
+            $this->_clusters = null;
        }
        return $deleted;
@@ -339,49 +342,40 @@ class Text_LanguageDetect
     *
     * @access public
     * @return int            the number of languages
-     * @throws PEAR_Error
+     * @throws   Text_LanguageDetect_Exception
     */
    function getLanguageCount()
    {
-        if (!$this->_setup_ok($err)) {
+        return count($this->_lang_db);
-            return $err;
-        } else {
-            return count($this->_lang_db);
-        }
    }
    /**
-     * Returns true if a given language exists
+     * Checks if the language with the given name exists in the database
     *
-     * If passed an array of names, will return true only if all exist
+     * @param mixed $lang Language name or array of language names
     *
-     * @access    public
+     * @return bool true if language model exists
-     * @param     mixed       $lang    language name or array of language names
-     * @return    bool                 true if language model exists
-     * @throws    PEAR_Error
     */
-    function languageExists($lang)
+    public function languageExists($lang)
    {
-        if (!$this->_setup_ok($err)) {
+        $lang = $this->_convertFromNameMode($lang);
-            return $err;
-        } else {
-            // string
-            if (is_string($lang)) {
-                return isset($this->_lang_db[strtolower($lang)]);
-            // array
-            } elseif (is_array($lang)) {
-                foreach ($lang as $test_lang) {
-                    if (!isset($this->_lang_db[strtolower($test_lang)])) {
-                        return false;
-                    } 
-                }
-                return true;
-            // other (error)
+        if (is_string($lang)) {
-            } else {
+            return isset($this->_lang_db[strtolower($lang)]);
-                throw new Exception('Unknown type passed to languageExists()');
+        } elseif (is_array($lang)) {
+            foreach ($lang as $test_lang) {
+                if (!isset($this->_lang_db[strtolower($test_lang)])) {
+                    return false;
+                }
            }
+            return true;
+        } else {
+            throw new Text_LanguageDetect_Exception(
+                'Unsupported parameter type passed to languageExists()',
+                Text_LanguageDetect_Exception::PARAM_TYPE
+            );
        }
    }
@@ -389,25 +383,24 @@ class Text_LanguageDetect
     * Returns the list of detectable languages
     *
     * @access public
-     * @return array        the names of the languages known to this object
+     * @return array        the names of the languages known to this object<<<<<<<
-     * @throws PEAR_Error
+     * @throws   Text_LanguageDetect_Exception
     */
    function getLanguages()
    {
-        if (!$this->_setup_ok($err)) {
+        return $this->_convertToNameMode(
-            return $err;
+            array_keys($this->_lang_db)
-        } else {
+        );
-            return array_keys($this->_lang_db);
-        }
    }
    /**
     * Make this object behave like Language::Guess
-     * 
+     *
-     * @access    public
+     * @param bool $setting false to turn off perl compatibility
-     * @param     bool     $setting     false to turn off perl compatibility
+     *
+     * @return void
     */
-    function setPerlCompatible($setting = true)
+    public function setPerlCompatible($setting = true)
    {
        if (is_bool($setting)) { // input check
            $this->_perl_compatible = $setting;
@@ -422,6 +415,21 @@ class Text_LanguageDetect
    }
    /**
+     * Sets the way how language names are accepted and returned.
+     *
+     * @param integer $name_mode One of the following modes:
+     *                           0 - language name ("english")
+     *                           2 - 2-letter ISO 639-1 code ("en")
+     *                           3 - 3-letter ISO 639-2 code ("eng")
+     *
+     * @return void
+     */
+    function setNameMode($name_mode)
+    {
+        $this->_name_mode = $name_mode;
+    }
+    /**
     * Whether to use unicode block ranges in detection
     *
     * Should speed up most detections if turned on (detault is on). In some
@@ -429,10 +437,11 @@ class Text_LanguageDetect
     * in languages that use latin scripts. In other cases it should speed up
     * detection noticeably.
     *
-     * @access  public
+     * @param bool $setting false to turn off
-     * @param   bool    $setting    false to turn off
+     *
+     * @return void
     */
-    function useUnicodeBlocks($setting = true)
+    public function useUnicodeBlocks($setting = true)
    {
        if (is_bool($setting)) {
            $this->_use_unicode_narrowing = $setting;
@@ -442,15 +451,15 @@ class Text_LanguageDetect
    /**
     * Converts a piece of text into trigrams
     *
-     * Superceded by the Text_LanguageDetect_Parser class 
+     * @param string $text text to convert
     *
-     * @access    private
+     * @return     array array of trigram frequencies
-     * @param     string    $text    text to convert
+     * @access     private
-     * @return    array              array of trigram frequencies
+     * @deprecated Superceded by the Text_LanguageDetect_Parser class
     */
    function _trigram($text)
    {
-        $s = new Text_LanguageDetect_Parser($text, $this->_db_filename, $this->_unicode_db_filename);
+        $s = new Text_LanguageDetect_Parser($text);
        $s->prepareTrigram();
        $s->prepareUnicode(false);
        $s->setPadStart(!$this->_perl_compatible);
@@ -463,11 +472,12 @@ class Text_LanguageDetect
     *
     * Thresholds (cuts off) the list at $this->_threshold
     *
-     * @access    protected
+     * @param array $arr array of trigram
-     * @param     array     $arr     array of trgram 
+     *
-     * @return    array              ranks of trigrams
+     * @return array ranks of trigrams
+     * @access protected
     */
-    function _arr_rank(&$arr)
+    function _arr_rank($arr)
    {
        // sorts alphabetically first as a standard way of breaking rank ties
@@ -494,14 +504,17 @@ class Text_LanguageDetect
    /**
     * Sorts an array by value breaking ties alphabetically
-     * 
+     *
-     * @access   private
+     * @param array &$arr the array to sort
-     * @param    array     &$arr     the array to sort
+     *
+     * @return void
+     * @access private
     */
    function _bub_sort(&$arr)
    {
        // should do the same as this perl statement:
-        // sort { $trigrams{$b} == $trigrams{$a} ?  $a cmp $b : $trigrams{$b} <=> $trigrams{$a} }
+        // sort { $trigrams{$b} == $trigrams{$a}
+        //   ?  $a cmp $b : $trigrams{$b} <=> $trigrams{$a} }
        // needs to sort by both key and value at once
        // using the key to break ties for the value
@@ -528,13 +541,14 @@ class Text_LanguageDetect
    /**
     * Sort function used by bubble sort
     *
-     * Callback function for usort(). 
+     * Callback function for usort().
     *
-     * @access   private
+     * @param array $a first param passed by usort()
-     * @param    array        first param passed by usort()
+     * @param array $b second param passed by usort()
-     * @param    array        second param passed by usort()
+     *
-     * @return   int          1 if $a is greater, -1 if not
+     * @return int 1 if $a is greater, -1 if not
-     * @see      _bub_sort()
+     * @see    _bub_sort()
+     * @access private
     */
    function _sort_func($a, $b)
    {
@@ -542,12 +556,12 @@ class Text_LanguageDetect
        list($a_key, $a_value) = $a;
        list($b_key, $b_value) = $b;
-        // if the values are the same, break ties using the key
        if ($a_value == $b_value) {
+            // if the values are the same, break ties using the key
            return strcmp($a_key, $b_key);
-        // if not, just sort normally
        } else {
+            // if not, just sort normally
            if ($a_value > $b_value) {
                return -1;
            } else {
@@ -559,23 +573,24 @@ class Text_LanguageDetect
    }
    /**
-     * Calculates a linear rank-order distance statistic between two sets of 
+     * Calculates a linear rank-order distance statistic between two sets of
     * ranked trigrams
     *
-     * Sums the differences in rank for each trigram. If the trigram does not 
+     * Sums the differences in rank for each trigram. If the trigram does not
     * appear in both, consider it a difference of $this->_threshold.
     *
     * This distance measure was proposed by Cavnar & Trenkle (1994). Despite
     * its simplicity it has been shown to be highly accurate for language
     * identification tasks.
     *
-     * @access  private
+     * @param array $arr1 the reference set of trigram ranks
-     * @param   array    $arr1  the reference set of trigram ranks
+     * @param array $arr2 the target set of trigram ranks
-     * @param   array    $arr2  the target set of trigram ranks
+     *
-     * @return  int             the sum of the differences between the ranks of
+     * @return int the sum of the differences between the ranks of
-     *                          the two trigram sets
+     *             the two trigram sets
+     * @access private
     */
-    function _distance(&$arr1, &$arr2)
+    function _distance($arr1, $arr2)
    {
        $sumdist = 0;
@@ -598,14 +613,15 @@ class Text_LanguageDetect
    /**
     * Normalizes the score returned by _distance()
-     * 
+     *
     * Different if perl compatible or not
     *
-     * @access    private
+     * @param int $score      the score from _distance()
-     * @param     int    $score          the score from _distance()
+     * @param int $base_count the number of trigrams being considered
-     * @param     int    $base_count     the number of trigrams being considered
+     *
-     * @return    float                  the normalized score
+     * @return float the normalized score
-     * @see       _distance()
+     * @see    _distance()
+     * @access private
     */
    function _normalize_score($score, $base_count = null)
    {
@@ -630,29 +646,24 @@ class Text_LanguageDetect
     *
     * If perl compatible, the score is 300-0, 0 being most similar.
     * Otherwise, it's 0-1 with 1 being most similar.
-     * 
+     *
     * The $sample text should be at least a few sentences in length;
     * should be ascii-7 or utf8 encoded, if another and the mbstring extension
     * is present it will try to detect and convert. However, experience has
-     * shown that mb_detect_encoding() *does not work very well* with at least 
+     * shown that mb_detect_encoding() *does not work very well* with at least
     * some types of encoding.
     *
-     * @access  public
+     * @param string $sample a sample of text to compare.
-     * @param   string  $sample a sample of text to compare.
+     * @param int    $limit  if specified, return an array of the most likely
-     * @param   int     $limit  if specified, return an array of the most likely
+     *                       $limit languages and their scores.
-     *                           $limit languages and their scores.
+     *
-     * @return  mixed       sorted array of language scores, blank array if no 
+     * @return mixed sorted array of language scores, blank array if no
-     *                      useable text was found, or PEAR_Error if error 
+     *               useable text was found
-     *                      with the object setup
+     * @see    _distance()
-     * @see     _distance()
+     * @throws Text_LanguageDetect_Exception
-     * @throws  PEAR_Error
     */
-    function detect($sample, $limit = 0)
+    public function detect($sample, $limit = 0)
    {
-        if (!$this->_setup_ok($err)) {
-            return $err;
-        }
        // input check
        if (!Text_LanguageDetect_Parser::validateString($sample)) {
            return array();
@@ -660,36 +671,27 @@ class Text_LanguageDetect
        // check char encoding
        // (only if mbstring extension is compiled and PHP > 4.0.6)
-        if (function_exists('mb_detect_encoding') 
+        if (function_exists('mb_detect_encoding')
-            && function_exists('mb_convert_encoding')) {
+            && function_exists('mb_convert_encoding')
+        ) {
            // mb_detect_encoding isn't very reliable, to say the least
-            // detection should still work with a sufficient sample of ascii characters
+            // detection should still work with a sufficient sample
+            //  of ascii characters
            $encoding = mb_detect_encoding($sample);
            // mb_detect_encoding() will return FALSE if detection fails
            // don't attempt conversion if that's the case
-            if ($encoding != 'ASCII' && $encoding != 'UTF-8' && $encoding !== false) {
+            if ($encoding != 'ASCII' && $encoding != 'UTF-8'
-            
+                && $encoding !== false
-                if (function_exists('mb_list_encodings')) {
+            ) {
- 
+                // verify the encoding exists in mb_list_encodings
-                    // verify the encoding exists in mb_list_encodings
+                if (in_array($encoding, mb_list_encodings())) {
-                    if (in_array($encoding, mb_list_encodings())) {
+                    $sample = mb_convert_encoding($sample, 'UTF-8', $encoding);
-                        $sample = mb_convert_encoding($sample, 'UTF-8', $encoding);
-                    }
-                    // if the previous condition failed:
-                    // somehow we detected an encoding that also we don't support
-                } else {
-                    // php 4 doesnt have mb_list_encodings()
-                    // so attempt with error suppression
-                    $sample = @mb_convert_encoding($sample, 'UTF-8', $encoding);
                }
            }
        }
-        $sample_obj = new Text_LanguageDetect_Parser($sample, $this->_db_filename, $this->_unicode_db_filename);
+        $sample_obj = new Text_LanguageDetect_Parser($sample);
        $sample_obj->prepareTrigram();
        if ($this->_use_unicode_narrowing) {
            $sample_obj->prepareUnicode();
@@ -713,7 +715,10 @@ class Text_LanguageDetect
            if (is_array($blocks)) {
                $present_blocks = array_keys($blocks);
            } else {
-                throw new Exception('Error during block detection');
+                throw new Text_LanguageDetect_Exception(
+                    'Error during block detection',
+                    Text_LanguageDetect_Exception::BLOCK_DETECTION
+                );
            }
            $possible_langs = array();
@@ -731,30 +736,30 @@ class Text_LanguageDetect
            }
            // could also try an intersect operation rather than a union
-            // in other words, choose languages whose trigrams contain 
+            // in other words, choose languages whose trigrams contain
            // ALL of the unicode blocks found in this sample
            // would improve speed but would be completely thrown off by an
            // unexpected character, like an umlaut appearing in english text
            $possible_langs = array_intersect(
-                        array_keys($this->_lang_db),
+                array_keys($this->_lang_db),
-                        array_unique($possible_langs)
+                array_unique($possible_langs)
            );
-            // needs to intersect it with the keys of _lang_db in case 
+            // needs to intersect it with the keys of _lang_db in case
            // languages have been omitted
-        // or just try 'em all
        } else {
+            // or just try 'em all
            $possible_langs = array_keys($this->_lang_db);
        }
        foreach ($possible_langs as $lang) {
-            $scores[$lang] =
+            $scores[$lang] = $this->_normalize_score(
-                $this->_normalize_score(
+                $this->_distance($this->_lang_db[$lang], $trigram_freqs),
-                        $this->_distance($this->_lang_db[$lang], $trigram_freqs),
+                $trigram_count
-                        $trigram_count);
+            );
        }
        unset($sample_obj);
@@ -772,7 +777,6 @@ class Text_LanguageDetect
            $limited_scores = array();
            $i = 0;
            foreach ($scores as $key => $value) {
                if ($i++ >= $limit) {
                    break;
@@ -781,9 +785,9 @@ class Text_LanguageDetect
                $limited_scores[$key] = $value;
            }
-            return $limited_scores;
+            return $this->_convertToNameMode($limited_scores, true);
        } else {
-            return $scores;
+            return $this->_convertToNameMode($scores, true);
        }
    }
@@ -791,35 +795,33 @@ class Text_LanguageDetect
     * Returns only the most similar language to the text sample
     *
     * Calls $this->detect() and returns only the top result
-     * 
+     *
-     * @access   public
+     * @param string $sample text to detect the language of
-     * @param    string    $sample    text to detect the language of
+     *
-     * @return   string               the name of the most likely language
+     * @return string the name of the most likely language
-     *                                or null if no language is similar
+     *                or null if no language is similar
-     * @see      detect()
+     * @see    detect()
-     * @throws   PEAR_Error
+     * @throws Text_LanguageDetect_Exception
     */
-    function detectSimple($sample)
+    public function detectSimple($sample)
    {
        $scores = $this->detect($sample, 1);
        // if top language has the maximum possible score,
        // then the top score will have been picked at random
-        if (    !is_array($scores) 
+        if (!is_array($scores) || empty($scores)
-                || empty($scores) 
+            || current($scores) == $this->_max_score
-                || current($scores) == $this->_max_score) {
+        ) {
            return null;
        } else {
-            return ucfirst(key($scores));
+            return key($scores);
        }
    }
    /**
     * Returns an array containing the most similar language and a confidence
     * rating
-     * 
+     *
     * Confidence is a simple measure calculated from the similarity score
     * minus the similarity score from the next most similar language
     * divided by the highest possible score. Languages that have closely
@@ -827,46 +829,43 @@ class Text_LanguageDetect
     * confidence scores.
     *
     * The similarity score answers the question "How likely is the text the
-     * returned language regardless of the other languages considered?" The 
+     * returned language regardless of the other languages considered?" The
     * confidence score is one way of answering the question "how likely is the
     * text the detected language relative to the rest of the language model
     * set?"
     *
     * To see how similar languages are a priori, see languageSimilarity()
-     * 
+     *
-     * @access   public
+     * @param string $sample text for which language will be detected
-     * @param    string    $sample    text for which language will be detected
+     *
-     * @return   array     most similar language, score and confidence rating
+     * @return array most similar language, score and confidence rating
-     *                     or null if no language is similar
+     *               or null if no language is similar
-     * @see      detect()
+     * @see    detect()
-     * @throws   PEAR_Error
+     * @throws Text_LanguageDetect_Exception
     */
-    function detectConfidence($sample)
+    public function detectConfidence($sample)
    {
        $scores = $this->detect($sample, 2);
-        // if most similar language has the max score, it 
+        // if most similar language has the max score, it
        // will have been picked at random
-        if (    !is_array($scores) 
+        if (!is_array($scores) || empty($scores)
-                || empty($scores) 
+            || current($scores) == $this->_max_score
-                || current($scores) == $this->_max_score) {
+        ) {
            return null;
        }
-        $arr['language'] = ucfirst(key($scores));
+        $arr['language'] = key($scores);
        $arr['similarity'] = current($scores);
        if (next($scores) !== false) { // if false then no next element
            // the goal is to return a higher value if the distance between
            // the similarity of the first score and the second score is high
            if ($this->_perl_compatible) {
+                $arr['confidence'] = (current($scores) - $arr['similarity'])
-                $arr['confidence'] =
+                    / $this->_max_score;
-                    (current($scores) - $arr['similarity']) / $this->_max_score;
            } else {
                $arr['confidence'] = $arr['similarity'] - current($scores);
            }
@@ -882,32 +881,26 @@ class Text_LanguageDetect
     * Returns the distribution of unicode blocks in a given utf8 string
     *
     * For the block name of a single char, use unicodeBlockName()
-     * 
+     *
-     * @access public
+     * @param string $str          input string. Must be ascii or utf8
-     * @param string $str input string. Must be ascii or utf8
+     * @param bool   $skip_symbols if true, skip ascii digits, symbols and
-     * @param bool $skip_symbols if true, skip ascii digits, symbols and 
+     *                             non-printing characters. Includes spaces,
-     *                           non-printing characters. Includes spaces,
+     *                             newlines and common punctutation characters.
-     *                           newlines and common punctutation characters.
+     *
     * @return array
-     * @throws PEAR_Error
+     * @throws Text_LanguageDetect_Exception
     */
-    function detectUnicodeBlocks($str, $skip_symbols)
+    public function detectUnicodeBlocks($str, $skip_symbols)
    {
-        // input check
+        $skip_symbols = (bool)$skip_symbols;
-        if (!is_bool($skip_symbols)) {
+        $str          = (string)$str;
-            throw new Exception('Second parameter must be boolean');
-        } 
-        if (!is_string($str)) {
-            throw new Exception('First parameter was not a string');
-        }
-        $sample_obj = new Text_LanguageDetect_Parser($str, $this->_db_filename, $this->_unicode_db_filename);
+        $sample_obj = new Text_LanguageDetect_Parser($str);
        $sample_obj->prepareUnicode();
        $sample_obj->prepareTrigram(false);
        $sample_obj->setUnicodeSkipSymbols($skip_symbols);
        $sample_obj->analyze();
-        $blocks =& $sample_obj->getUnicodeBlocks();
+        $blocks = $sample_obj->getUnicodeBlocks();
        unset($sample_obj);
        return $blocks;
    }
@@ -915,38 +908,37 @@ class Text_LanguageDetect
    /**
     * Returns the block name for a given unicode value
     *
-     * If passed a string, will assume it is being passed a UTF8-formatted 
+     * If passed a string, will assume it is being passed a UTF8-formatted
     * character and will automatically convert. Otherwise it will assume it
     * is being passed a numeric unicode value.
     *
     * Make sure input is of the correct type!
     *
-     * @access public
     * @param mixed $unicode unicode value or utf8 char
+     *
     * @return mixed the block name string or false if not found
-     * @throws PEAR_Error
+     * @throws Text_LanguageDetect_Exception
     */
-    function unicodeBlockName($unicode) {
+    public function unicodeBlockName($unicode)
+    {
        if (is_string($unicode)) {
            // assume it is being passed a utf8 char, so convert it
+            if (self::utf8strlen($unicode) > 1) {
-            // input check
+                throw new Text_LanguageDetect_Exception(
-            if ($this->utf8strlen($unicode) > 1) {
+                    'Pass a single char only to this method',
-                throw new Exception('Pass this function only a single char');
+                    Text_LanguageDetect_Exception::PARAM_TYPE
+                );
            }
            $unicode = $this->_utf8char2unicode($unicode);
-            if ($unicode == -1) {
-                throw new Exception('Malformatted char');
-            }
-        // input check
        } elseif (!is_int($unicode)) {
-            throw new Exception('Input must be of type string or int.');
+            throw new Text_LanguageDetect_Exception(
+                'Input must be of type string or int.',
+                Text_LanguageDetect_Exception::PARAM_TYPE
+            );
        }
-        $blocks =& $this->_read_unicode_block_db();
+        $blocks = $this->_read_unicode_block_db();
        $result = $this->_unicode_block_name($unicode, $blocks);
@@ -964,14 +956,17 @@ class Text_LanguageDetect
     * the public interface for this function, which does input checks which
     * this function omits for speed.
     *
-     * @access  protected
+     * @param int   $unicode     the unicode value
-     * @param   int     $unicode the unicode value
+     * @param array $blocks      the block database
-     * @param   array   &$blocks the block database
+     * @param int   $block_count the number of defined blocks in the database
-     * @param   int     $block_count the number of defined blocks in the database
+     *
-     * @see     unicodeBlockName()
+     * @return mixed Block name, -1 if it failed
+     * @see    unicodeBlockName()
+     * @access protected
     */
-    function _unicode_block_name($unicode, &$blocks, $block_count = -1) {
+    function _unicode_block_name($unicode, $blocks, $block_count = -1)
-        // for a reference, see 
+    {
+        // for a reference, see
        // http://www.unicode.org/Public/UNIDATA/Blocks.txt
        // assume that ascii characters are the most common
@@ -994,35 +989,36 @@ class Text_LanguageDetect
        while ($low <= $high) {
            $mid = floor(($low + $high) / 2);
-            // if it's lower than the lower bound
            if ($unicode < $blocks[$mid][0]) {
+                // if it's lower than the lower bound
                $high = $mid - 1;
-            // if it's higher than the upper bound
            } elseif ($unicode > $blocks[$mid][1]) {
+                // if it's higher than the upper bound
                $low = $mid + 1;
-            // found it
            } else {
+                // found it
                return $blocks[$mid];
            }
        }
-        // failed to find the block 
+        // failed to find the block
        return -1;
-        // todo: differentiate when it's out of range or when it falls 
+        // todo: differentiate when it's out of range or when it falls
        //       into an unassigned range?
    }
    /**
     * Brings up the unicode block database
     *
-     * @access protected
     * @return array the database of unicode block definitions
-     * @throws PEAR_Error
+     * @throws Text_LanguageDetect_Exception
+     * @access protected
     */
-    function &_read_unicode_block_db() {
+    function _read_unicode_block_db()
+    {
        // since the unicode definitions are always going to be the same,
        // might as well share the memory for the db with all other instances
        // of this class
@@ -1037,29 +1033,27 @@ class Text_LanguageDetect
    /**
     * Calculate the similarities between the language models
-     * 
+     *
     * Use this function to see how similar languages are to each other.
     *
     * If passed 2 language names, will return just those languages compared.
     * If passed 1 language name, will return that language compared to
     * all others.
-     * If passed none, will return an array of every language model compared 
+     * If passed none, will return an array of every language model compared
     * to every other one.
     *
-     * @access  public
+     * @param string $lang1 the name of the first language to be compared
-     * @param   string   $lang1   the name of the first language to be compared
+     * @param string $lang2 the name of the second language to be compared
-     * @param   string   $lang2   the name of the second language to be compared
+     *
-     * @return  array    scores of every language compared
+     * @return array scores of every language compared
-     *                   or the score of just the provided languages
+     *               or the score of just the provided languages
-     *                   or null if one of the supplied languages does not exist
+     *               or null if one of the supplied languages does not exist
-     * @throws  PEAR_Error
+     * @throws Text_LanguageDetect_Exception
     */
-    function languageSimilarity($lang1 = null, $lang2 = null)
+    public function languageSimilarity($lang1 = null, $lang2 = null)
    {
-        if (!$this->_setup_ok($err)) {
+        $lang1 = $this->_convertFromNameMode($lang1);
-            return $err;
+        $lang2 = $this->_convertFromNameMode($lang2);
-        }
        if ($lang1 != null) {
            $lang1 = strtolower($lang1);
@@ -1069,12 +1063,8 @@ class Text_LanguageDetect
            }
            if ($lang2 != null) {
+                if (!isset($this->_lang_db[$lang2])) {
-                // can't only set the second param
+                    // check if language model exists
-                if ($lang1 == null) {
-                    return null;
-                // check if language model exists
-                } elseif (!isset($this->_lang_db[$lang2])) {
                    return null;
                }
@@ -1088,14 +1078,15 @@ class Text_LanguageDetect
                    )
                );
-            // compare just $lang1 to all languages
            } else {
+                // compare just $lang1 to all languages
                $return_arr = array();
                foreach ($this->_lang_db as $key => $value) {
-                    if ($key != $lang1) { // don't compare a language to itself
+                    if ($key != $lang1) {
+                        // don't compare a language to itself
                        $return_arr[$key] = $this->_normalize_score(
-                            $this->_distance($this->_lang_db[$lang1], $value));
+                            $this->_distance($this->_lang_db[$lang1], $value)
+                        );
                    }
                }
                asort($return_arr);
@@ -1104,30 +1095,27 @@ class Text_LanguageDetect
            }
-        // compare all languages to each other
        } else {
+            // compare all languages to each other
            $return_arr = array();
            foreach (array_keys($this->_lang_db) as $lang1) {
                foreach (array_keys($this->_lang_db) as $lang2) {
                    // skip comparing languages to themselves
-                    if ($lang1 != $lang2) { 
+                    if ($lang1 != $lang2) {
-                    
-                        // don't re-calculate what's already been done
-                        if (isset($return_arr[$lang2][$lang1])) {
-                            $return_arr[$lang1][$lang2] =
+                        if (isset($return_arr[$lang2][$lang1])) {
-                                $return_arr[$lang2][$lang1];
+                            // don't re-calculate what's already been done
+                            $return_arr[$lang1][$lang2]
+                                = $return_arr[$lang2][$lang1];
-                        // calculate
                        } else {
+                            // calculate
-                            $return_arr[$lang1][$lang2] = 
+                            $return_arr[$lang1][$lang2]
-                                $this->_normalize_score(
+                                = $this->_normalize_score(
-                                        $this->_distance(
+                                    $this->_distance(
-                                            $this->_lang_db[$lang1],
+                                        $this->_lang_db[$lang1],
-                                            $this->_lang_db[$lang2]
+                                        $this->_lang_db[$lang2]
-                                        )
+                                    )
                                );
                        }
@@ -1150,20 +1138,14 @@ class Text_LanguageDetect
     *
     * @access      public
     * @return      array language cluster data
-     * @throws      PEAR_Error
+     * @throws      Text_LanguageDetect_Exception
     * @see         languageSimilarity()
-     * @deprecated  this function will eventually be removed and placed into 
+     * @deprecated  this function will eventually be removed and placed into
     *              the model generation class
     */
    function clusterLanguages()
    {
        // todo: set the maximum number of clusters
-        // setup check
-        if (!$this->_setup_ok($err)) {
-            return $err;
-        }
        // return cached result, if any
        if (isset($this->_clusters)) {
            return $this->_clusters;
@@ -1177,7 +1159,10 @@ class Text_LanguageDetect
        foreach ($langs as $lang) {
            if (!isset($this->_lang_db[$lang])) {
-                throw new Exception("missing $lang!\n");
+                throw new Text_LanguageDetect_Exception(
+                    "missing $lang!",
+                    Text_LanguageDetect_Exception::UNKNOWN_LANGUAGE
+                );
            }
        }
@@ -1186,7 +1171,9 @@ class Text_LanguageDetect
            $langs[$lang1] = $lang1;
            unset($langs[$old_key]);
        }
-        
+        $result_data = $really_map = array();
        $i = 0;
        while (count($langs) > 2 && $i++ < 200) {
            $highest_score = -1;
@@ -1194,18 +1181,22 @@ class Text_LanguageDetect
            $highest_key2 = '';
            foreach ($langs as $lang1) {
                foreach ($langs as $lang2) {
-                    if (    $lang1 != $lang2 
+                    if ($lang1 != $lang2
-                            && $arr[$lang1][$lang2] > $highest_score) {
+                        && $arr[$lang1][$lang2] > $highest_score
+                    ) {
                        $highest_score = $arr[$lang1][$lang2];
                        $highest_key1 = $lang1;
                        $highest_key2 = $lang2;
                    }
                }
            }
-            
            if (!$highest_key1) {
                // should not ever happen
-                throw new Exception("no highest key? (step: $i)");
+                throw new Text_LanguageDetect_Exception(
+                    "no highest key? (step: $i)",
+                    Text_LanguageDetect_Exception::NO_HIGHEST_KEY
+                );
            }
            if ($highest_score == 0) {
@@ -1217,7 +1208,7 @@ class Text_LanguageDetect
            $sum1 = array_sum($arr[$highest_key1]);
            $sum2 = array_sum($arr[$highest_key2]);
-            // use the score for the one that is most similar to the rest of 
+            // use the score for the one that is most similar to the rest of
            // the field as the score for the group
            // todo: could try averaging or "centroid" method instead
            // seems like that might make more sense
@@ -1248,7 +1239,7 @@ class Text_LanguageDetect
            $really_lang = $replaceme;
            while (isset($really_map[$really_lang])) {
                $really_lang = $really_map[$really_lang];
-            } 
+            }
            $really_map[$newkey] = $really_lang;
@@ -1259,8 +1250,8 @@ class Text_LanguageDetect
                        $arr[$key1][$newkey] = $arr[$key1][$key2];
                        unset($arr[$key1][$key2]);
                        // replacing $arr[$key1][$key2] with $arr[$key1][$newkey]
-                    } 
+                    }
-                    
                    if ($key1 == $replaceme) {
                        $arr[$newkey][$key2] = $arr[$key1][$key2];
                        unset($arr[$key1][$key2]);
@@ -1273,7 +1264,7 @@ class Text_LanguageDetect
                    }
                }
            }
-                        
            unset($langs[$highest_key1]);
            unset($langs[$highest_key2]);
@@ -1293,7 +1284,7 @@ class Text_LanguageDetect
        }
        $return_val = array(
-                'open_forks' => $langs, 
+                'open_forks' => $langs,
                        // the top level of clusters
                        // clusters that are mutually exclusive
                        // or specified by a specific maximum
@@ -1323,11 +1314,11 @@ class Text_LanguageDetect
     * use, and it may disappear or its functionality may change in future
     * releases without notice.
     *
-     * This compares the sample text to top the top level of clusters. If the 
+     * This compares the sample text to top the top level of clusters. If the
     * sample is similar to the cluster it will drop down and compare it to the
     * languages in the cluster, and so on until it hits a leaf node.
     *
-     * this should find the language in considerably fewer compares 
+     * this should find the language in considerably fewer compares
     * (the equivalent of a binary search), however clusterLanguages() is costly
     * and the loss of accuracy from this technique is significant.
     *
@@ -1337,15 +1328,14 @@ class Text_LanguageDetect
     * was very large, however in such cases some method of Bayesian inference
     * might be more helpful.
     *
-     * @see     clusterLanguages()
+     * @param string $str input string
-     * @access  public
+     *
-     * @param   string $str input string
+     * @return array language scores (only those compared)
-     * @return  array language scores (only those compared)
+     * @throws Text_LanguageDetect_Exception
-     * @throws  PEAR_Error
+     * @see    clusterLanguages()
     */
-    function clusteredSearch($str)
+    public function clusteredSearch($str)
    {
        // input check
        if (!Text_LanguageDetect_Parser::validateString($str)) {
            return array();
@@ -1359,7 +1349,7 @@ class Text_LanguageDetect
        $dendogram_data  = $result['fork_data'];
        $dendogram_alias = $result['name_map'];
-        $sample_obj = new Text_LanguageDetect_Parser($str, $this->_db_filename, $this->_unicode_db_filename);
+        $sample_obj = new Text_LanguageDetect_Parser($str);
        $sample_obj->prepareTrigram();
        $sample_obj->setPadStart(!$this->_perl_compatible);
        $sample_obj->analyze();
@@ -1372,7 +1362,7 @@ class Text_LanguageDetect
        }
        $i = 0; // counts the number of steps
-        
        foreach ($dendogram_start as $lang) {
            if (isset($dendogram_alias[$lang])) {
                $lang_key = $dendogram_alias[$lang];
@@ -1382,7 +1372,8 @@ class Text_LanguageDetect
            $scores[$lang] = $this->_normalize_score(
                $this->_distance($this->_lang_db[$lang_key], $sample_result),
-                $sample_count);
+                $sample_count
+            );
            $i++;
        }
@@ -1411,7 +1402,8 @@ class Text_LanguageDetect
                $scores[$lang] = $this->_normalize_score(
                    $this->_distance($this->_lang_db[$lang_key], $sample_result),
-                    $sample_count);
+                    $sample_count
+                );
                //todo: does not need to do same comparison again
            }
@@ -1428,8 +1420,8 @@ class Text_LanguageDetect
            $diff = $scores[$cur_key] - $scores[$loser_key];
-            // $cur_key ({$dendogram_alias[$cur_key]}) wins 
+            // $cur_key ({$dendogram_alias[$cur_key]}) wins
-            // over $loser_key ({$dendogram_alias[$loser_key]}) 
+            // over $loser_key ({$dendogram_alias[$loser_key]})
            // with a difference of $diff
        }
@@ -1439,9 +1431,9 @@ class Text_LanguageDetect
        // which paths the algorithm decided to take along the tree
        // but sometimes the last item is only the second highest
-        if (   ($this->_perl_compatible  && (end($scores) > prev($scores)))
+        if (($this->_perl_compatible  && (end($scores) > prev($scores)))
-            || (!$this->_perl_compatible && (end($scores) < prev($scores)))) {
+            || (!$this->_perl_compatible && (end($scores) < prev($scores)))
+        ) {
            $real_last_score = current($scores);
            $real_last_key = key($scores);
@@ -1449,7 +1441,7 @@ class Text_LanguageDetect
            unset($scores[$real_last_key]);
            $scores[$real_last_key] = $real_last_score;
        }
-            
        if (!$this->_perl_compatible) {
            $scores = array_reverse($scores, true);
@@ -1464,12 +1456,11 @@ class Text_LanguageDetect
     *
     * Returns the numbers of characters (not bytes) in a utf8 string
     *
-     * @static
+     * @param string $str string to get the length of
-     * @access  public
+     *
-     * @param   string $str string to get the length of
+     * @return int number of chars
-     * @return  int         number of chars
     */
-    function utf8strlen($str)
+    public static function utf8strlen($str)
    {
        // utf8_decode() will convert unknown chars to '?', which is actually
        // ideal for counting.
@@ -1482,53 +1473,45 @@ class Text_LanguageDetect
    /**
     * Returns the unicode value of a utf8 char
     *
-     * @access  protected
+     * @param string $char a utf8 (possibly multi-byte) char
-     * @param   string $char a utf8 (possibly multi-byte) char
+     *
-     * @return  int          unicode value or -1 if malformatted
+     * @return int unicode value
+     * @access protected
+     * @link   http://en.wikipedia.org/wiki/UTF-8
     */
-    function _utf8char2unicode($char) {
+    function _utf8char2unicode($char)
+    {
        // strlen() here will actually get the binary length of a single char
        switch (strlen($char)) {
+        case 1:
-            // for a reference, see http://en.wikipedia.org/wiki/UTF-8
+            // normal ASCII-7 byte
+            // 0xxxxxxx -->  0xxxxxxx
-            case 1:
+            return ord($char{0});
-                // normal ASCII-7 byte
-                // 0xxxxxxx -->  0xxxxxxx
+        case 2:
-                return ord($char{0});
+            // 2 byte unicode
+            // 110zzzzx 10xxxxxx --> 00000zzz zxxxxxxx
-            case 2:
+            $z = (ord($char{0}) & 0x000001F) << 6;
-                // 2 byte unicode
+            $x = (ord($char{1}) & 0x0000003F);
-                // 110zzzzx 10xxxxxx --> 00000zzz zxxxxxxx
+            return ($z | $x);
-                $z = (ord($char{0}) & 0x000001F) << 6;
-                $x = (ord($char{1}) & 0x0000003F);
+        case 3:
+            // 3 byte unicode
-                return ($z | $x);
+            // 1110zzzz 10zxxxxx 10xxxxxx --> zzzzzxxx xxxxxxxx
+            $z =  (ord($char{0}) & 0x0000000F) << 12;
-            case 3:
+            $x1 = (ord($char{1}) & 0x0000003F) << 6;
-                // 3 byte unicode
+            $x2 = (ord($char{2}) & 0x0000003F);
-                // 1110zzzz 10zxxxxx 10xxxxxx --> zzzzzxxx xxxxxxxx 
+            return ($z | $x1 | $x2);
-                $z =  (ord($char{0}) & 0x0000000F) << 12;
-                $x1 = (ord($char{1}) & 0x0000003F) << 6;
+        case 4:
-                $x2 = (ord($char{2}) & 0x0000003F);
+            // 4 byte unicode
+            // 11110zzz 10zzxxxx 10xxxxxx 10xxxxxx -->
-                return ($z | $x1 | $x2);
+            // 000zzzzz xxxxxxxx xxxxxxxx
+            $z1 = (ord($char{0}) & 0x00000007) << 18;
-            case 4:
+            $z2 = (ord($char{1}) & 0x0000003F) << 12;
-                // 4 byte unicode
+            $x1 = (ord($char{2}) & 0x0000003F) << 6;
-                // 11110zzz 10zzxxxx 10xxxxxx 10xxxxxx -->
+            $x2 = (ord($char{3}) & 0x0000003F);
-                // 000zzzzz xxxxxxxx xxxxxxxx
+            return ($z1 | $z2 | $x1 | $x2);
-                $z1 = (ord($char{0}) & 0x00000007) << 18;
-                $z2 = (ord($char{1}) & 0x0000003F) << 12;
-                $x1 = (ord($char{2}) & 0x0000003F) << 6;
-                $x2 = (ord($char{3}) & 0x0000003F);
-                return ($z1 | $z2 | $x1 | $x2);
-            default:
-                // error: malformatted char?
-                return -1;
        }
    }
@@ -1536,18 +1519,18 @@ class Text_LanguageDetect
     * utf8-safe fast character iterator
     *
     * Will get the next character starting from $counter, which will then be
-     * incremented. If a multi-byte char the bytes will be concatenated and 
+     * incremented. If a multi-byte char the bytes will be concatenated and
     * $counter will be incremeted by the number of bytes in the char.
     *
-     * @access  private
+     * @param string $str             the string being iterated over
-     * @param   string  &$str        the string being iterated over
+     * @param int    &$counter        the iterator, will increment by reference
-     * @param   int     &$counter    the iterator, will increment by reference
+     * @param bool   $special_convert whether to do special conversions
-     * @param   bool    $special_convert whether to do special conversions
+     *
-     * @return  char    the next (possibly multi-byte) char from $counter
+     * @return char the next (possibly multi-byte) char from $counter
+     * @access private
     */
-    function _next_char(&$str, &$counter, $special_convert = false)
+    static function _next_char($str, &$counter, $special_convert = false)
    {
        $char = $str{$counter++};
        $ord = ord($char);
@@ -1556,7 +1539,6 @@ class Text_LanguageDetect
        // normal ascii one byte char
        if ($ord <= 127) {
            // special conversions needed for this package
            // (that only apply to regular ascii characters)
            // lower case, and convert all non-alphanumeric characters
@@ -1571,8 +1553,8 @@ class Text_LanguageDetect
            return $char;
-        // multi-byte chars
        } elseif ($ord >> 5 == 6) { // two-byte char
+            // multi-byte chars
            $nextchar = $str{$counter++}; // get next byte
            // lower-casing of non-ascii characters is still incomplete
@@ -1582,27 +1564,27 @@ class Text_LanguageDetect
                if ($ord == 195) {
                    $nextord = ord($nextchar);
                    $nextord_adj = $nextord + 64;
-                    // for a reference, see 
+                    // for a reference, see
                    // http://www.ramsch.org/martin/uni/fmi-hp/iso8859-1.html
                    // &Agrave; - &THORN; but not &times;
-                    if (    $nextord_adj >= 192
+                    if ($nextord_adj >= 192
-                            && $nextord_adj <= 222 
+                        && $nextord_adj <= 222
-                            && $nextord_adj != 215) {
+                        && $nextord_adj != 215
+                    ) {
-                        $nextchar = chr($nextord + 32); 
+                        $nextchar = chr($nextord + 32);
                    }
-                // lower case cyrillic alphabet
                } elseif ($ord == 208) {
+                    // lower case cyrillic alphabet
                    $nextord = ord($nextchar);
                    // if A - Pe
                    if ($nextord >= 144 && $nextord <= 159) {
                        // lower case
                        $nextchar = chr($nextord + 32);
-                    // if Er - Ya
                    } elseif ($nextord >= 160 && $nextord <= 175) {
+                        // if Er - Ya
                        // lower case
                        $char = chr(209); // == $ord++
                        $nextchar = chr($nextord - 32);
@@ -1611,12 +1593,11 @@ class Text_LanguageDetect
            }
            // tag on next byte
-            return $char . $nextchar; 
+            return $char . $nextchar;
        } elseif ($ord >> 4  == 14) { // three-byte char
-            
            // tag on next 2 bytes
-            return $char . $str{$counter++} . $str{$counter++}; 
+            return $char . $str{$counter++} . $str{$counter++};
        } elseif ($ord >> 3 == 30) { // four-byte char
@@ -1628,8 +1609,85 @@ class Text_LanguageDetect
        }
    }
-}
+    /**
+     * Converts an $language input parameter from the configured mode
+     * to the language name that is used internally.
+     *
+     * Works for strings and arrays.
+     *
+     * @param string|array $lang       A language description ("english"/"en"/"eng")
+     * @param boolean      $convertKey If $lang is an array, setting $key
+     *                                 converts the keys to the language name.
+     *
+     * @return string|array Language name
+     */
+    function _convertFromNameMode($lang, $convertKey = false)
+    {
+        if ($this->_name_mode == 0) {
+            return $lang;
+        }
+        if ($this->_name_mode == 2) {
+            $method = 'code2ToName';
+        } else {
+            $method = 'code3ToName';
+        }
+        if (is_string($lang)) {
+            return (string)Text_LanguageDetect_ISO639::$method($lang);
+        }
+        $newlang = array();
+        foreach ($lang as $key => $val) {
+            if ($convertKey) {
+                $newkey = (string)Text_LanguageDetect_ISO639::$method($key);
+                $newlang[$newkey] = $val;
+            } else {
+                $newlang[$key] = (string)Text_LanguageDetect_ISO639::$method($val);
+            }
+        }
+        return $newlang;
+    }
-/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
+    /**
+     * Converts an $language output parameter from the language name that is
+     * used internally to the configured mode.
+     *
+     * Works for strings and arrays.
+     *
+     * @param string|array $lang       A language description ("english"/"en"/"eng")
+     * @param boolean      $convertKey If $lang is an array, setting $key
+     *                                 converts the keys to the language name.
+     *
+     * @return string|array Language name
+     */
+    function _convertToNameMode($lang, $convertKey = false)
+    {
+        if ($this->_name_mode == 0) {
+            return $lang;
+        }
+        if ($this->_name_mode == 2) {
+            $method = 'nameToCode2';
+        } else {
+            $method = 'nameToCode3';
+        }
+        if (is_string($lang)) {
+            return Text_LanguageDetect_ISO639::$method($lang);
+        }
+        $newlang = array();
+        foreach ($lang as $key => $val) {
+            if ($convertKey) {
+                $newkey = Text_LanguageDetect_ISO639::$method($key);
+                $newlang[$newkey] = $val;
+            } else {
+                $newlang[$key] = Text_LanguageDetect_ISO639::$method($val);
+            }
+        }
+        return $newlang;
+    }
+}
-?>
+/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
+\ No newline at end of file
diff --git a/inc/3rdparty/libraries/language-detect/LanguageDetect/Exception.php b/inc/3rdparty/libraries/language-detect/LanguageDetect/Exception.php
new file mode 100644
index 00000000..196d994f
--- /dev/null
+++ b/inc/3rdparty/libraries/language-detect/LanguageDetect/Exception.php
@@ -0,0 +1,57 @@
+<?php
+class Text_LanguageDetect_Exception extends Exception
+{
+    /**
+     * Database file could not be found
+     */
+    const DB_NOT_FOUND = 10;
+    /**
+     * Database file found, but not readable
+     */
+    const DB_NOT_READABLE = 11;
+    /**
+     * Database file is empty
+     */
+    const DB_EMPTY = 12;
+    /**
+     * Database contents is not a PHP array
+     */
+    const DB_NOT_ARRAY = 13;
+    /**
+     * Magic quotes are activated
+     */
+    const MAGIC_QUOTES = 14;
+    /**
+     * Parameter of invalid type passed to method
+     */
+    const PARAM_TYPE = 20;
+    /**
+     * Character in parameter is invalid
+     */
+    const INVALID_CHAR = 21;
+    /**
+     * Language is not in the database
+     */
+    const UNKNOWN_LANGUAGE = 30;
+    /**
+     * Error during block detection
+     */
+    const BLOCK_DETECTION = 40;
+    /**
+     * Error while clustering languages
+     */
+    const NO_HIGHEST_KEY = 50;
+}
diff --git a/inc/3rdparty/libraries/language-detect/LanguageDetect/ISO639.php b/inc/3rdparty/libraries/language-detect/LanguageDetect/ISO639.php
new file mode 100644
index 00000000..05b0590d
--- /dev/null
+++ b/inc/3rdparty/libraries/language-detect/LanguageDetect/ISO639.php
@@ -0,0 +1,339 @@
+<?php
+/**
+ * Part of Text_LanguageDetect
+ *
+ * PHP version 5
+ *
+ * @category  Text
+ * @package   Text_LanguageDetect
+ * @author    Christian Weiske <cweiske@php.net>
+ * @copyright 2011 Christian Weiske <cweiske@php.net>
+ * @license   http://www.debian.org/misc/bsd.license BSD
+ * @version   SVN: $Id$
+ * @link      http://pear.php.net/package/Text_LanguageDetect/
+ */
+/**
+ * Provides a mapping between the languages from lang.dat and the
+ * ISO 639-1 and ISO-639-2 codes.
+ *
+ * Note that this class contains only languages that exist in lang.dat.
+ *
+ * @category  Text
+ * @package   Text_LanguageDetect
+ * @author    Christian Weiske <cweiske@php.net>
+ * @copyright 2011 Christian Weiske <cweiske@php.net>
+ * @license   http://www.debian.org/misc/bsd.license BSD
+ * @link      http://www.loc.gov/standards/iso639-2/php/code_list.php
+ */
+class Text_LanguageDetect_ISO639
+{
+    /**
+     * Maps all language names from the language database to the
+     * ISO 639-1 2-letter language code.
+     *
+     * NULL indicates that there is no 2-letter code.
+     *
+     * @var array
+     */
+    public static $nameToCode2 = array(
+        'albanian'   => 'sq',
+        'arabic'     => 'ar',
+        'azeri'      => 'az',
+        'bengali'    => 'bn',
+        'bulgarian'  => 'bg',
+        'cebuano'    => null,
+        'croatian'   => 'hr',
+        'czech'      => 'cs',
+        'danish'     => 'da',
+        'dutch'      => 'nl',
+        'english'    => 'en',
+        'estonian'   => 'et',
+        'farsi'      => 'fa',
+        'finnish'    => 'fi',
+        'french'     => 'fr',
+        'german'     => 'de',
+        'hausa'      => 'ha',
+        'hawaiian'   => null,
+        'hindi'      => 'hi',
+        'hungarian'  => 'hu',
+        'icelandic'  => 'is',
+        'indonesian' => 'id',
+        'italian'    => 'it',
+        'kazakh'     => 'kk',
+        'kyrgyz'     => 'ky',
+        'latin'      => 'la',
+        'latvian'    => 'lv',
+        'lithuanian' => 'lt',
+        'macedonian' => 'mk',
+        'mongolian'  => 'mn',
+        'nepali'     => 'ne',
+        'norwegian'  => 'no',
+        'pashto'     => 'ps',
+        'pidgin'     => null,
+        'polish'     => 'pl',
+        'portuguese' => 'pt',
+        'romanian'   => 'ro',
+        'russian'    => 'ru',
+        'serbian'    => 'sr',
+        'slovak'     => 'sk',
+        'slovene'    => 'sl',
+        'somali'     => 'so',
+        'spanish'    => 'es',
+        'swahili'    => 'sw',
+        'swedish'    => 'sv',
+        'tagalog'    => 'tl',
+        'turkish'    => 'tr',
+        'ukrainian'  => 'uk',
+        'urdu'       => 'ur',
+        'uzbek'      => 'uz',
+        'vietnamese' => 'vi',
+        'welsh'      => 'cy',
+    );
+    /**
+     * Maps all language names from the language database to the
+     * ISO 639-2 3-letter language code.
+     *
+     * @var array
+     */
+    public static $nameToCode3 = array(
+        'albanian'   => 'sqi',
+        'arabic'     => 'ara',
+        'azeri'      => 'aze',
+        'bengali'    => 'ben',
+        'bulgarian'  => 'bul',
+        'cebuano'    => 'ceb',
+        'croatian'   => 'hrv',
+        'czech'      => 'ces',
+        'danish'     => 'dan',
+        'dutch'      => 'nld',
+        'english'    => 'eng',
+        'estonian'   => 'est',
+        'farsi'      => 'fas',
+        'finnish'    => 'fin',
+        'french'     => 'fra',
+        'german'     => 'deu',
+        'hausa'      => 'hau',
+        'hawaiian'   => 'haw',
+        'hindi'      => 'hin',
+        'hungarian'  => 'hun',
+        'icelandic'  => 'isl',
+        'indonesian' => 'ind',
+        'italian'    => 'ita',
+        'kazakh'     => 'kaz',
+        'kyrgyz'     => 'kir',
+        'latin'      => 'lat',
+        'latvian'    => 'lav',
+        'lithuanian' => 'lit',
+        'macedonian' => 'mkd',
+        'mongolian'  => 'mon',
+        'nepali'     => 'nep',
+        'norwegian'  => 'nor',
+        'pashto'     => 'pus',
+        'pidgin'     => 'crp',
+        'polish'     => 'pol',
+        'portuguese' => 'por',
+        'romanian'   => 'ron',
+        'russian'    => 'rus',
+        'serbian'    => 'srp',
+        'slovak'     => 'slk',
+        'slovene'    => 'slv',
+        'somali'     => 'som',
+        'spanish'    => 'spa',
+        'swahili'    => 'swa',
+        'swedish'    => 'swe',
+        'tagalog'    => 'tgl',
+        'turkish'    => 'tur',
+        'ukrainian'  => 'ukr',
+        'urdu'       => 'urd',
+        'uzbek'      => 'uzb',
+        'vietnamese' => 'vie',
+        'welsh'      => 'cym',
+    );
+    /**
+     * Maps ISO 639-1 2-letter language codes to the language names
+     * in the language database
+     *
+     * Not all languages have a 2 letter code, so some are missing
+     *
+     * @var array
+     */
+    public static $code2ToName = array(
+        'ar' => 'arabic',
+        'az' => 'azeri',
+        'bg' => 'bulgarian',
+        'bn' => 'bengali',
+        'cs' => 'czech',
+        'cy' => 'welsh',
+        'da' => 'danish',
+        'de' => 'german',
+        'en' => 'english',
+        'es' => 'spanish',
+        'et' => 'estonian',
+        'fa' => 'farsi',
+        'fi' => 'finnish',
+        'fr' => 'french',
+        'ha' => 'hausa',
+        'hi' => 'hindi',
+        'hr' => 'croatian',
+        'hu' => 'hungarian',
+        'id' => 'indonesian',
+        'is' => 'icelandic',
+        'it' => 'italian',
+        'kk' => 'kazakh',
+        'ky' => 'kyrgyz',
+        'la' => 'latin',
+        'lt' => 'lithuanian',
+        'lv' => 'latvian',
+        'mk' => 'macedonian',
+        'mn' => 'mongolian',
+        'ne' => 'nepali',
+        'nl' => 'dutch',
+        'no' => 'norwegian',
+        'pl' => 'polish',
+        'ps' => 'pashto',
+        'pt' => 'portuguese',
+        'ro' => 'romanian',
+        'ru' => 'russian',
+        'sk' => 'slovak',
+        'sl' => 'slovene',
+        'so' => 'somali',
+        'sq' => 'albanian',
+        'sr' => 'serbian',
+        'sv' => 'swedish',
+        'sw' => 'swahili',
+        'tl' => 'tagalog',
+        'tr' => 'turkish',
+        'uk' => 'ukrainian',
+        'ur' => 'urdu',
+        'uz' => 'uzbek',
+        'vi' => 'vietnamese',
+    );
+    /**
+     * Maps ISO 639-2 3-letter language codes to the language names
+     * in the language database.
+     *
+     * @var array
+     */
+    public static $code3ToName = array(
+        'ara' => 'arabic',
+        'aze' => 'azeri',
+        'ben' => 'bengali',
+        'bul' => 'bulgarian',
+        'ceb' => 'cebuano',
+        'ces' => 'czech',
+        'crp' => 'pidgin',
+        'cym' => 'welsh',
+        'dan' => 'danish',
+        'deu' => 'german',
+        'eng' => 'english',
+        'est' => 'estonian',
+        'fas' => 'farsi',
+        'fin' => 'finnish',
+        'fra' => 'french',
+        'hau' => 'hausa',
+        'haw' => 'hawaiian',
+        'hin' => 'hindi',
+        'hrv' => 'croatian',
+        'hun' => 'hungarian',
+        'ind' => 'indonesian',
+        'isl' => 'icelandic',
+        'ita' => 'italian',
+        'kaz' => 'kazakh',
+        'kir' => 'kyrgyz',
+        'lat' => 'latin',
+        'lav' => 'latvian',
+        'lit' => 'lithuanian',
+        'mkd' => 'macedonian',
+        'mon' => 'mongolian',
+        'nep' => 'nepali',
+        'nld' => 'dutch',
+        'nor' => 'norwegian',
+        'pol' => 'polish',
+        'por' => 'portuguese',
+        'pus' => 'pashto',
+        'rom' => 'romanian',
+        'rus' => 'russian',
+        'slk' => 'slovak',
+        'slv' => 'slovene',
+        'som' => 'somali',
+        'spa' => 'spanish',
+        'sqi' => 'albanian',
+        'srp' => 'serbian',
+        'swa' => 'swahili',
+        'swe' => 'swedish',
+        'tgl' => 'tagalog',
+        'tur' => 'turkish',
+        'ukr' => 'ukrainian',
+        'urd' => 'urdu',
+        'uzb' => 'uzbek',
+        'vie' => 'vietnamese',
+    );
+    /**
+     * Returns the 2-letter ISO 639-1 code for the given language name.
+     *
+     * @param string $lang English language name like "swedish"
+     *
+     * @return string Two-letter language code (e.g. "sv") or NULL if not found
+     */
+    public static function nameToCode2($lang)
+    {
+        $lang = strtolower($lang);
+        if (!isset(self::$nameToCode2[$lang])) {
+            return null;
+        }
+        return self::$nameToCode2[$lang];
+    }
+    /**
+     * Returns the 3-letter ISO 639-2 code for the given language name.
+     *
+     * @param string $lang English language name like "swedish"
+     *
+     * @return string Three-letter language code (e.g. "swe") or NULL if not found
+     */
+    public static function nameToCode3($lang)
+    {
+        $lang = strtolower($lang);
+        if (!isset(self::$nameToCode3[$lang])) {
+            return null;
+        }
+        return self::$nameToCode3[$lang];
+    }
+    /**
+     * Returns the language name for the given 2-letter ISO 639-1 code.
+     *
+     * @param string $code Two-letter language code (e.g. "sv")
+     *
+     * @return string English language name like "swedish"
+     */
+    public static function code2ToName($code)
+    {
+        $lang = strtolower($code);
+        if (!isset(self::$code2ToName[$code])) {
+            return null;
+        }
+        return self::$code2ToName[$code];
+    }
+    /**
+     * Returns the language name for the given 3-letter ISO 639-2 code.
+     *
+     * @param string $code Three-letter language code (e.g. "swe")
+     *
+     * @return string English language name like "swedish"
+     */
+    public static function code3ToName($code)
+    {
+        $lang = strtolower($code);
+        if (!isset(self::$code3ToName[$code])) {
+            return null;
+        }
+        return self::$code3ToName[$code];
+    }
+}
+\ No newline at end of file
diff --git a/inc/3rdparty/libraries/language-detect/Parser.php b/inc/3rdparty/libraries/language-detect/LanguageDetect/Parser.php
index 7f15fa98..fb0e1e20 100644
--- a/inc/3rdparty/libraries/language-detect/Parser.php
+++ b/inc/3rdparty/libraries/language-detect/LanguageDetect/Parser.php
@@ -8,7 +8,7 @@
 * @author      Nicholas Pisarro
 * @copyright   2006
 * @license     BSD
- * @version     CVS: $Id: Parser.php,v 1.5 2006/03/11 05:45:05 taak Exp $
+ * @version     CVS: $Id: Parser.php 322327 2012-01-15 17:55:59Z cweiske $
 * @link        http://pear.php.net/package/Text_LanguageDetect/
 * @link        http://langdetect.blogspot.com/
 */
@@ -28,7 +28,7 @@
 * @author      Nicholas Pisarro
 * @copyright   2006
 * @license     BSD
- * @version     release: 0.2.3
+ * @version     release: 0.3.0
 */
 class Text_LanguageDetect_Parser extends Text_LanguageDetect
 {
@@ -102,21 +102,17 @@ class Text_LanguageDetect_Parser extends Text_LanguageDetect
     * @access  private
     * @param   string  $string     string to be parsed
     */
-    function Text_LanguageDetect_Parser($string, $db=null, $unicode_db=null) {
+    function Text_LanguageDetect_Parser($string) {
-                if (isset($db)) $this->_db_filename = $db;
-                if (isset($unicode_db)) $this->_unicode_db_filename = $unicode_db;      
        $this->_string = $string;
    }
    /**
     * Returns true if a string is suitable for parsing
     *
-     * @static
-     * @access  public
     * @param   string  $str    input string to test
     * @return  bool            true if acceptable, false if not
     */
-    function validateString($str) {
+    public static function validateString($str) {
        if (!empty($str) && strlen($str) > 3 && preg_match('/\S/', $str)) {
            return true;
        } else {
@@ -222,8 +218,7 @@ class Text_LanguageDetect_Parser extends Text_LanguageDetect
        // unicode startup
        if ($this->_compile_unicode) {
-            $blocks =& $this->_read_unicode_block_db();
+            $blocks = $this->_read_unicode_block_db();
            $block_count = count($blocks);
            $skipped_count = 0;
@@ -349,6 +344,4 @@ class Text_LanguageDetect_Parser extends Text_LanguageDetect
    }
 }
-/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
+/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
+\ No newline at end of file
-?>
diff --git a/inc/3rdparty/libraries/readability/Readability.php b/inc/3rdparty/libraries/readability/Readability.php
index 2e8991cc..d0f09d74 100644
--- a/inc/3rdparty/libraries/readability/Readability.php
+++ b/inc/3rdparty/libraries/readability/Readability.php
@@ -1,1138 +1,1138 @@
-<?php
+<?php
-/** 
+/** 
-* Arc90's Readability ported to PHP for FiveFilters.org
+* Arc90's Readability ported to PHP for FiveFilters.org
-* Based on readability.js version 1.7.1 (without multi-page support)
+* Based on readability.js version 1.7.1 (without multi-page support)
-* Updated to allow HTML5 parsing with html5lib
+* Updated to allow HTML5 parsing with html5lib
-* Updated with lightClean mode to preserve more images and youtube/vimeo/viddler embeds
+* Updated with lightClean mode to preserve more images and youtube/vimeo/viddler embeds
-* ------------------------------------------------------
+* ------------------------------------------------------
-* Original URL: http://lab.arc90.com/experiments/readability/js/readability.js
+* Original URL: http://lab.arc90.com/experiments/readability/js/readability.js
-* Arc90's project URL: http://lab.arc90.com/experiments/readability/
+* Arc90's project URL: http://lab.arc90.com/experiments/readability/
-* JS Source: http://code.google.com/p/arc90labs-readability
+* JS Source: http://code.google.com/p/arc90labs-readability
-* Ported by: Keyvan Minoukadeh, http://www.keyvan.net
+* Ported by: Keyvan Minoukadeh, http://www.keyvan.net
-* More information: http://fivefilters.org/content-only/
+* More information: http://fivefilters.org/content-only/
-* License: Apache License, Version 2.0
+* License: Apache License, Version 2.0
-* Requires: PHP5
+* Requires: PHP5
-* Date: 2012-09-19
+* Date: 2012-09-19
-* 
+* 
-* Differences between the PHP port and the original
+* Differences between the PHP port and the original
-* ------------------------------------------------------
+* ------------------------------------------------------
-* Arc90's Readability is designed to run in the browser. It works on the DOM 
+* Arc90's Readability is designed to run in the browser. It works on the DOM 
-* tree (the parsed HTML) after the page's CSS styles have been applied and 
+* tree (the parsed HTML) after the page's CSS styles have been applied and 
-* Javascript code executed. This PHP port does not run inside a browser. 
+* Javascript code executed. This PHP port does not run inside a browser. 
-* We use PHP's ability to parse HTML to build our DOM tree, but we cannot 
+* We use PHP's ability to parse HTML to build our DOM tree, but we cannot 
-* rely on CSS or Javascript support. As such, the results will not always 
+* rely on CSS or Javascript support. As such, the results will not always 
-* match Arc90's Readability. (For example, if a web page contains CSS style 
+* match Arc90's Readability. (For example, if a web page contains CSS style 
-* rules or Javascript code which hide certain HTML elements from display, 
+* rules or Javascript code which hide certain HTML elements from display, 
-* Arc90's Readability will dismiss those from consideration but our PHP port, 
+* Arc90's Readability will dismiss those from consideration but our PHP port, 
-* unable to understand CSS or Javascript, will not know any better.)
+* unable to understand CSS or Javascript, will not know any better.)
-* 
+* 
-* Another significant difference is that the aim of Arc90's Readability is 
+* Another significant difference is that the aim of Arc90's Readability is 
-* to re-present the main content block of a given web page so users can 
+* to re-present the main content block of a given web page so users can 
-* read it more easily in their browsers. Correct identification, clean up, 
+* read it more easily in their browsers. Correct identification, clean up, 
-* and separation of the content block is only a part of this process. 
+* and separation of the content block is only a part of this process. 
-* This PHP port is only concerned with this part, it does not include code 
+* This PHP port is only concerned with this part, it does not include code 
-* that relates to presentation in the browser - Arc90 already do 
+* that relates to presentation in the browser - Arc90 already do 
-* that extremely well, and for PDF output there's FiveFilters.org's 
+* that extremely well, and for PDF output there's FiveFilters.org's 
-* PDF Newspaper: http://fivefilters.org/pdf-newspaper/.
+* PDF Newspaper: http://fivefilters.org/pdf-newspaper/.
-* 
+* 
-* Finally, this class contains methods that might be useful for developers 
+* Finally, this class contains methods that might be useful for developers 
-* working on HTML document fragments. So without deviating too much from 
+* working on HTML document fragments. So without deviating too much from 
-* the original code (which I don't want to do because it makes debugging 
+* the original code (which I don't want to do because it makes debugging 
-* and updating more difficult), I've tried to make it a little more 
+* and updating more difficult), I've tried to make it a little more 
-* developer friendly. You should be able to use the methods here on 
+* developer friendly. You should be able to use the methods here on 
-* existing DOMElement objects without passing an entire HTML document to 
+* existing DOMElement objects without passing an entire HTML document to 
-* be parsed.
+* be parsed.
-*/
+*/
-// This class allows us to do JavaScript like assignements to innerHTML
+// This class allows us to do JavaScript like assignements to innerHTML
-require_once(dirname(__FILE__).'/JSLikeHTMLElement.php');
+require_once(dirname(__FILE__).'/JSLikeHTMLElement.php');
-// Alternative usage (for testing only!)
+// Alternative usage (for testing only!)
-// uncomment the lines below and call Readability.php in your browser 
+// uncomment the lines below and call Readability.php in your browser 
-// passing it the URL of the page you'd like content from, e.g.:
+// passing it the URL of the page you'd like content from, e.g.:
-// Readability.php?url=http://medialens.org/alerts/09/090615_the_guardian_climate.php
+// Readability.php?url=http://medialens.org/alerts/09/090615_the_guardian_climate.php
-/*
+/*
-if (!isset($_GET['url']) || $_GET['url'] == '') {
+if (!isset($_GET['url']) || $_GET['url'] == '') {
-        die('Please pass a URL to the script. E.g. Readability.php?url=bla.com/story.html');
+        die('Please pass a URL to the script. E.g. Readability.php?url=bla.com/story.html');
-}
+}
-$url = $_GET['url'];
+$url = $_GET['url'];
-if (!preg_match('!^https?://!i', $url)) $url = 'http://'.$url;
+if (!preg_match('!^https?://!i', $url)) $url = 'http://'.$url;
-$html = file_get_contents($url);
+$html = file_get_contents($url);
-$r = new Readability($html, $url);
+$r = new Readability($html, $url);
-$r->init();
+$r->init();
-echo $r->articleContent->innerHTML;
+echo $r->articleContent->innerHTML;
-*/
+*/
-class Readability
+class Readability
-{
+{
-        public $version = '1.7.1-without-multi-page';
+        public $version = '1.7.1-without-multi-page';
-        public $convertLinksToFootnotes = false;
+        public $convertLinksToFootnotes = false;
-        public $revertForcedParagraphElements = true;
+        public $revertForcedParagraphElements = true;
-        public $articleTitle;
+        public $articleTitle;
-        public $articleContent;
+        public $articleContent;
-        public $dom;
+        public $dom;
-        public $url = null; // optional - URL where HTML was retrieved
+        public $url = null; // optional - URL where HTML was retrieved
-        public $debug = false;
+        public $debug = false;
-        public $lightClean = true; // preserves more content (experimental) added 2012-09-19
+        public $lightClean = true; // preserves more content (experimental) added 2012-09-19
-        protected $body = null; // 
+        protected $body = null; // 
-        protected $bodyCache = null; // Cache the body HTML in case we need to re-use it later
+        protected $bodyCache = null; // Cache the body HTML in case we need to re-use it later
-        protected $flags = 7; // 1 | 2 | 4;   // Start with all flags set.
+        protected $flags = 7; // 1 | 2 | 4;   // Start with all flags set.
-        protected $success = false; // indicates whether we were able to extract or not
+        protected $success = false; // indicates whether we were able to extract or not
-        
+        
-        /**
+        /**
-        * All of the regular expressions in use within readability.
+        * All of the regular expressions in use within readability.
-        * Defined up here so we don't instantiate them repeatedly in loops.
+        * Defined up here so we don't instantiate them repeatedly in loops.
-        **/
+        **/
-        public $regexps = array(
+        public $regexps = array(
-                'unlikelyCandidates' => '/combx|comment|community|disqus|extra|foot|header|menu|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup/i',
+                'unlikelyCandidates' => '/combx|comment|community|disqus|extra|foot|header|menu|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup/i',
-                'okMaybeItsACandidate' => '/and|article|body|column|main|shadow/i',
+                'okMaybeItsACandidate' => '/and|article|body|column|main|shadow/i',
-                'positive' => '/article|body|content|entry|hentry|main|page|attachment|pagination|post|text|blog|story/i',
+                'positive' => '/article|body|content|entry|hentry|main|page|attachment|pagination|post|text|blog|story/i',
-                'negative' => '/combx|comment|com-|contact|foot|footer|_nav|footnote|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|widget/i',
+                'negative' => '/combx|comment|com-|contact|foot|footer|_nav|footnote|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|widget/i',
-                'divToPElements' => '/<(a|blockquote|dl|div|img|ol|p|pre|table|ul)/i',
+                'divToPElements' => '/<(a|blockquote|dl|div|img|ol|p|pre|table|ul)/i',
-                'replaceBrs' => '/(<br[^>]*>[ \n\r\t]*){2,}/i',
+                'replaceBrs' => '/(<br[^>]*>[ \n\r\t]*){2,}/i',
-                'replaceFonts' => '/<(\/?)font[^>]*>/i',
+                'replaceFonts' => '/<(\/?)font[^>]*>/i',
-                // 'trimRe' => '/^\s+|\s+$/g', // PHP has trim()
+                // 'trimRe' => '/^\s+|\s+$/g', // PHP has trim()
-                'normalize' => '/\s{2,}/',
+                'normalize' => '/\s{2,}/',
-                'killBreaks' => '/(<br\s*\/?>(\s|&nbsp;?)*){1,}/',
+                'killBreaks' => '/(<br\s*\/?>(\s|&nbsp;?)*){1,}/',
-                'video' => '!//(player\.|www\.)?(youtube|vimeo|viddler)\.com!i',
+                'video' => '!//(player\.|www\.)?(youtube|vimeo|viddler)\.com!i',
-                'skipFootnoteLink' => '/^\s*(\[?[a-z0-9]{1,2}\]?|^|edit|citation needed)\s*$/i'
+                'skipFootnoteLink' => '/^\s*(\[?[a-z0-9]{1,2}\]?|^|edit|citation needed)\s*$/i'
-        );      
+        );      
-        
+        
-        /* constants */
+        /* constants */
-        const FLAG_STRIP_UNLIKELYS = 1;
+        const FLAG_STRIP_UNLIKELYS = 1;
-        const FLAG_WEIGHT_CLASSES = 2;
+        const FLAG_WEIGHT_CLASSES = 2;
-        const FLAG_CLEAN_CONDITIONALLY = 4;
+        const FLAG_CLEAN_CONDITIONALLY = 4;
-        
+        
-        /**
+        /**
-        * Create instance of Readability
+        * Create instance of Readability
-        * @param string UTF-8 encoded string
+        * @param string UTF-8 encoded string
-        * @param string (optional) URL associated with HTML (used for footnotes)
+        * @param string (optional) URL associated with HTML (used for footnotes)
-        * @param string which parser to use for turning raw HTML into a DOMDocument (either 'libxml' or 'html5lib')
+        * @param string which parser to use for turning raw HTML into a DOMDocument (either 'libxml' or 'html5lib')
-        */      
+        */      
-        function __construct($html, $url=null, $parser='libxml')
+        function __construct($html, $url=null, $parser='libxml')
-        {
+        {
-                $this->url = $url;
+                $this->url = $url;
-                /* Turn all double br's into p's */
+                /* Turn all double br's into p's */
-                $html = preg_replace($this->regexps['replaceBrs'], '</p><p>', $html);
+                $html = preg_replace($this->regexps['replaceBrs'], '</p><p>', $html);
-                $html = preg_replace($this->regexps['replaceFonts'], '<$1span>', $html);
+                $html = preg_replace($this->regexps['replaceFonts'], '<$1span>', $html);
-                $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
+                $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
-                if (trim($html) == '') $html = '<html></html>';
+                if (trim($html) == '') $html = '<html></html>';
-                if ($parser=='html5lib' && ($this->dom = HTML5_Parser::parse($html))) {
+                if ($parser=='html5lib' && ($this->dom = HTML5_Parser::parse($html))) {
-                        // all good
+                        // all good
-                } else {
+                } else {
-                        $this->dom = new DOMDocument();
+                        $this->dom = new DOMDocument();
-                        $this->dom->preserveWhiteSpace = false;
+                        $this->dom->preserveWhiteSpace = false;
-                        @$this->dom->loadHTML($html);
+                        @$this->dom->loadHTML($html);
-                }
+                }
-                $this->dom->registerNodeClass('DOMElement', 'JSLikeHTMLElement');
+                $this->dom->registerNodeClass('DOMElement', 'JSLikeHTMLElement');
-        }
+        }
-        /**
+        /**
-        * Get article title element
+        * Get article title element
-        * @return DOMElement
+        * @return DOMElement
-        */
+        */
-        public function getTitle() {
+        public function getTitle() {
-                return $this->articleTitle;
+                return $this->articleTitle;
-        }
+        }
-        
+        
-        /**
+        /**
-        * Get article content element
+        * Get article content element
-        * @return DOMElement
+        * @return DOMElement
-        */
+        */
-        public function getContent() {
+        public function getContent() {
-                return $this->articleContent;
+                return $this->articleContent;
-        }       
+        }       
-        
+        
-        /**
+        /**
-        * Runs readability.
+        * Runs readability.
-        * 
+        * 
-        * Workflow:
+        * Workflow:
-        *  1. Prep the document by removing script tags, css, etc.
+        *  1. Prep the document by removing script tags, css, etc.
-        *  2. Build readability's DOM tree.
+        *  2. Build readability's DOM tree.
-        *  3. Grab the article content from the current dom tree.
+        *  3. Grab the article content from the current dom tree.
-        *  4. Replace the current DOM tree with the new one.
+        *  4. Replace the current DOM tree with the new one.
-        *  5. Read peacefully.
+        *  5. Read peacefully.
-        *
+        *
-        * @return boolean true if we found content, false otherwise
+        * @return boolean true if we found content, false otherwise
-        **/
+        **/
-        public function init()
+        public function init()
-        {
+        {
-                if (!isset($this->dom->documentElement)) return false;
+                if (!isset($this->dom->documentElement)) return false;
-                $this->removeScripts($this->dom);
+                $this->removeScripts($this->dom);
-                //die($this->getInnerHTML($this->dom->documentElement));
+                //die($this->getInnerHTML($this->dom->documentElement));
-                
+                
-                // Assume successful outcome
+                // Assume successful outcome
-                $this->success = true;
+                $this->success = true;
-                $bodyElems = $this->dom->getElementsByTagName('body');
+                $bodyElems = $this->dom->getElementsByTagName('body');
-                if ($bodyElems->length > 0) {
+                if ($bodyElems->length > 0) {
-                        if ($this->bodyCache == null) {
+                        if ($this->bodyCache == null) {
-                                $this->bodyCache = $bodyElems->item(0)->innerHTML;
+                                $this->bodyCache = $bodyElems->item(0)->innerHTML;
-                        }
+                        }
-                        if ($this->body == null) {
+                        if ($this->body == null) {
-                                $this->body = $bodyElems->item(0);
+                                $this->body = $bodyElems->item(0);
-                        }
+                        }
-                }
+                }
-                $this->prepDocument();
+                $this->prepDocument();
-                
+                
-                //die($this->dom->documentElement->parentNode->nodeType);
+                //die($this->dom->documentElement->parentNode->nodeType);
-                //$this->setInnerHTML($this->dom->documentElement, $this->getInnerHTML($this->dom->documentElement));
+                //$this->setInnerHTML($this->dom->documentElement, $this->getInnerHTML($this->dom->documentElement));
-                //die($this->getInnerHTML($this->dom->documentElement));
+                //die($this->getInnerHTML($this->dom->documentElement));
-                /* Build readability's DOM tree */
+                /* Build readability's DOM tree */
-                $overlay        = $this->dom->createElement('div');
+                $overlay        = $this->dom->createElement('div');
-                $innerDiv       = $this->dom->createElement('div');
+                $innerDiv       = $this->dom->createElement('div');
-                $articleTitle   = $this->getArticleTitle();
+                $articleTitle   = $this->getArticleTitle();
-                $articleContent = $this->grabArticle();
+                $articleContent = $this->grabArticle();
-                if (!$articleContent) {
+                if (!$articleContent) {
-                        $this->success = false;
+                        $this->success = false;
-                        $articleContent = $this->dom->createElement('div');
+                        $articleContent = $this->dom->createElement('div');
-                        $articleContent->setAttribute('id', 'readability-content');
+                        $articleContent->setAttribute('id', 'readability-content');
-                        $articleContent->innerHTML = '<p>Sorry, Readability was unable to parse this page for content.</p>';            
+                        $articleContent->innerHTML = '<p>Sorry, Readability was unable to parse this page for content.</p>';            
-                }
+                }
-                
+                
-                $overlay->setAttribute('id', 'readOverlay');
+                $overlay->setAttribute('id', 'readOverlay');
-                $innerDiv->setAttribute('id', 'readInner');
+                $innerDiv->setAttribute('id', 'readInner');
-                /* Glue the structure of our document together. */
+                /* Glue the structure of our document together. */
-                $innerDiv->appendChild($articleTitle);
+                $innerDiv->appendChild($articleTitle);
-                $innerDiv->appendChild($articleContent);
+                $innerDiv->appendChild($articleContent);
-                $overlay->appendChild($innerDiv);
+                $overlay->appendChild($innerDiv);
-                
+                
-                /* Clear the old HTML, insert the new content. */
+                /* Clear the old HTML, insert the new content. */
-                $this->body->innerHTML = '';
+                $this->body->innerHTML = '';
-                $this->body->appendChild($overlay);
+                $this->body->appendChild($overlay);
-                //document.body.insertBefore(overlay, document.body.firstChild);
+                //document.body.insertBefore(overlay, document.body.firstChild);
-                $this->body->removeAttribute('style');
+                $this->body->removeAttribute('style');
-                $this->postProcessContent($articleContent);
+                $this->postProcessContent($articleContent);
-                
+                
-                // Set title and content instance variables
+                // Set title and content instance variables
-                $this->articleTitle = $articleTitle;
+                $this->articleTitle = $articleTitle;
-                $this->articleContent = $articleContent;
+                $this->articleContent = $articleContent;
-                
+                
-                return $this->success;
+                return $this->success;
-        }
+        }
-        
+        
-        /**
+        /**
-        * Debug
+        * Debug
-        */
+        */
-        protected function dbg($msg) {
+        protected function dbg($msg) {
-                if ($this->debug) echo '* ',$msg, "\n";
+                if ($this->debug) echo '* ',$msg, "\n";
-        }
+        }
-        
+        
-        /**
+        /**
-        * Run any post-process modifications to article content as necessary.
+        * Run any post-process modifications to article content as necessary.
-        *
+        *
-        * @param DOMElement
+        * @param DOMElement
-        * @return void
+        * @return void
-        */
+        */
-        public function postProcessContent($articleContent) {
+        public function postProcessContent($articleContent) {
-                if ($this->convertLinksToFootnotes && !preg_match('/wikipedia\.org/', @$this->url)) { 
+                if ($this->convertLinksToFootnotes && !preg_match('/wikipedia\.org/', @$this->url)) { 
-                        $this->addFootnotes($articleContent);
+                        $this->addFootnotes($articleContent);
-                }
+                }
-        }
+        }
-        
+        
-        /**
+        /**
-        * Get the article title as an H1.
+        * Get the article title as an H1.
-        *
+        *
-        * @return DOMElement
+        * @return DOMElement
-        */
+        */
-        protected function getArticleTitle() {
+        protected function getArticleTitle() {
-                $curTitle = '';
+                $curTitle = '';
-                $origTitle = '';
+                $origTitle = '';
-                try {
+                try {
-                        $curTitle = $origTitle = $this->getInnerText($this->dom->getElementsByTagName('title')->item(0));
+                        $curTitle = $origTitle = $this->getInnerText($this->dom->getElementsByTagName('title')->item(0));
-                } catch(Exception $e) {}
+                } catch(Exception $e) {}
-                
+                
-                if (preg_match('/ [\|\-] /', $curTitle))
+                if (preg_match('/ [\|\-] /', $curTitle))
-                {
+                {
-                        $curTitle = preg_replace('/(.*)[\|\-] .*/i', '$1', $origTitle);
+                        $curTitle = preg_replace('/(.*)[\|\-] .*/i', '$1', $origTitle);
-                        
+                        
-                        if (count(explode(' ', $curTitle)) < 3) {
+                        if (count(explode(' ', $curTitle)) < 3) {
-                                $curTitle = preg_replace('/[^\|\-]*[\|\-](.*)/i', '$1', $origTitle);
+                                $curTitle = preg_replace('/[^\|\-]*[\|\-](.*)/i', '$1', $origTitle);
-                        }
+                        }
-                }
+                }
-                else if (strpos($curTitle, ': ') !== false)
+                else if (strpos($curTitle, ': ') !== false)
-                {
+                {
-                        $curTitle = preg_replace('/.*:(.*)/i', '$1', $origTitle);
+                        $curTitle = preg_replace('/.*:(.*)/i', '$1', $origTitle);
-                        if (count(explode(' ', $curTitle)) < 3) {
+                        if (count(explode(' ', $curTitle)) < 3) {
-                                $curTitle = preg_replace('/[^:]*[:](.*)/i','$1', $origTitle);
+                                $curTitle = preg_replace('/[^:]*[:](.*)/i','$1', $origTitle);
-                        }
+                        }
-                }
+                }
-                else if(strlen($curTitle) > 150 || strlen($curTitle) < 15)
+                else if(strlen($curTitle) > 150 || strlen($curTitle) < 15)
-                {
+                {
-                        $hOnes = $this->dom->getElementsByTagName('h1');
+                        $hOnes = $this->dom->getElementsByTagName('h1');
-                        if($hOnes->length == 1)
+                        if($hOnes->length == 1)
-                        {
+                        {
-                                $curTitle = $this->getInnerText($hOnes->item(0));
+                                $curTitle = $this->getInnerText($hOnes->item(0));
-                        }
+                        }
-                }
+                }
-                $curTitle = trim($curTitle);
+                $curTitle = trim($curTitle);
-                if (count(explode(' ', $curTitle)) <= 4) {
+                if (count(explode(' ', $curTitle)) <= 4) {
-                        $curTitle = $origTitle;
+                        $curTitle = $origTitle;
-                }
+                }
-                
+                
-                $articleTitle = $this->dom->createElement('h1');
+                $articleTitle = $this->dom->createElement('h1');
-                $articleTitle->innerHTML = $curTitle;
+                $articleTitle->innerHTML = $curTitle;
-                
+                
-                return $articleTitle;
+                return $articleTitle;
-        }
+        }
-        
+        
-        /**
+        /**
-        * Prepare the HTML document for readability to scrape it.
+        * Prepare the HTML document for readability to scrape it.
-        * This includes things like stripping javascript, CSS, and handling terrible markup.
+        * This includes things like stripping javascript, CSS, and handling terrible markup.
-        * 
+        * 
-        * @return void
+        * @return void
-        **/
+        **/
-        protected function prepDocument() {
+        protected function prepDocument() {
-                /**
+                /**
-                * In some cases a body element can't be found (if the HTML is totally hosed for example)
+                * In some cases a body element can't be found (if the HTML is totally hosed for example)
-                * so we create a new body node and append it to the document.
+                * so we create a new body node and append it to the document.
-                */
+                */
-                if ($this->body == null)
+                if ($this->body == null)
-                {
+                {
-                        $this->body = $this->dom->createElement('body');
+                        $this->body = $this->dom->createElement('body');
-                        $this->dom->documentElement->appendChild($this->body);
+                        $this->dom->documentElement->appendChild($this->body);
-                }
+                }
-                $this->body->setAttribute('id', 'readabilityBody');
+                $this->body->setAttribute('id', 'readabilityBody');
-                /* Remove all style tags in head */
+                /* Remove all style tags in head */
-                $styleTags = $this->dom->getElementsByTagName('style');
+                $styleTags = $this->dom->getElementsByTagName('style');
-                for ($i = $styleTags->length-1; $i >= 0; $i--)
+                for ($i = $styleTags->length-1; $i >= 0; $i--)
-                {
+                {
-                        $styleTags->item($i)->parentNode->removeChild($styleTags->item($i));
+                        $styleTags->item($i)->parentNode->removeChild($styleTags->item($i));
-                }
+                }
-                /* Turn all double br's into p's */
+                /* Turn all double br's into p's */
-                /* Note, this is pretty costly as far as processing goes. Maybe optimize later. */
+                /* Note, this is pretty costly as far as processing goes. Maybe optimize later. */
-                //document.body.innerHTML = document.body.innerHTML.replace(readability.regexps.replaceBrs, '</p><p>').replace(readability.regexps.replaceFonts, '<$1span>');
+                //document.body.innerHTML = document.body.innerHTML.replace(readability.regexps.replaceBrs, '</p><p>').replace(readability.regexps.replaceFonts, '<$1span>');
-                // We do this in the constructor for PHP as that's when we have raw HTML - before parsing it into a DOM tree.
+                // We do this in the constructor for PHP as that's when we have raw HTML - before parsing it into a DOM tree.
-                // Manipulating innerHTML as it's done in JS is not possible in PHP.
+                // Manipulating innerHTML as it's done in JS is not possible in PHP.
-        }
+        }
-        /**
+        /**
-        * For easier reading, convert this document to have footnotes at the bottom rather than inline links.
+        * For easier reading, convert this document to have footnotes at the bottom rather than inline links.
-        * @see http://www.roughtype.com/archives/2010/05/experiments_in.php
+        * @see http://www.roughtype.com/archives/2010/05/experiments_in.php
-        *
+        *
-        * @return void
+        * @return void
-        **/
+        **/
-        public function addFootnotes($articleContent) {
+        public function addFootnotes($articleContent) {
-                $footnotesWrapper = $this->dom->createElement('div');
+                $footnotesWrapper = $this->dom->createElement('div');
-                $footnotesWrapper->setAttribute('id', 'readability-footnotes');
+                $footnotesWrapper->setAttribute('id', 'readability-footnotes');
-                $footnotesWrapper->innerHTML = '<h3>References</h3>';
+                $footnotesWrapper->innerHTML = '<h3>References</h3>';
-                
+                
-                $articleFootnotes = $this->dom->createElement('ol');
+                $articleFootnotes = $this->dom->createElement('ol');
-                $articleFootnotes->setAttribute('id', 'readability-footnotes-list');
+                $articleFootnotes->setAttribute('id', 'readability-footnotes-list');
-                $footnotesWrapper->appendChild($articleFootnotes);
+                $footnotesWrapper->appendChild($articleFootnotes);
-                
+                
-                $articleLinks = $articleContent->getElementsByTagName('a');
+                $articleLinks = $articleContent->getElementsByTagName('a');
-                
+                
-                $linkCount = 0;
+                $linkCount = 0;
-                for ($i = 0; $i < $articleLinks->length; $i++)
+                for ($i = 0; $i < $articleLinks->length; $i++)
-                {
+                {
-                        $articleLink  = $articleLinks->item($i);
+                        $articleLink  = $articleLinks->item($i);
-                        $footnoteLink = $articleLink->cloneNode(true);
+                        $footnoteLink = $articleLink->cloneNode(true);
-                        $refLink      = $this->dom->createElement('a');
+                        $refLink      = $this->dom->createElement('a');
-                        $footnote     = $this->dom->createElement('li');
+                        $footnote     = $this->dom->createElement('li');
-                        $linkDomain   = @parse_url($footnoteLink->getAttribute('href'), PHP_URL_HOST);
+                        $linkDomain   = @parse_url($footnoteLink->getAttribute('href'), PHP_URL_HOST);
-                        if (!$linkDomain && isset($this->url)) $linkDomain = @parse_url($this->url, PHP_URL_HOST);
+                        if (!$linkDomain && isset($this->url)) $linkDomain = @parse_url($this->url, PHP_URL_HOST);
-                        //linkDomain   = footnoteLink.host ? footnoteLink.host : document.location.host,
+                        //linkDomain   = footnoteLink.host ? footnoteLink.host : document.location.host,
-                        $linkText     = $this->getInnerText($articleLink);
+                        $linkText     = $this->getInnerText($articleLink);
-                        
+                        
-                        if ((strpos($articleLink->getAttribute('class'), 'readability-DoNotFootnote') !== false) || preg_match($this->regexps['skipFootnoteLink'], $linkText)) {
+                        if ((strpos($articleLink->getAttribute('class'), 'readability-DoNotFootnote') !== false) || preg_match($this->regexps['skipFootnoteLink'], $linkText)) {
-                                continue;
+                                continue;
-                        }
+                        }
-                        
+                        
-                        $linkCount++;
+                        $linkCount++;
-                        /** Add a superscript reference after the article link */
+                        /** Add a superscript reference after the article link */
-                        $refLink->setAttribute('href', '#readabilityFootnoteLink-' . $linkCount);
+                        $refLink->setAttribute('href', '#readabilityFootnoteLink-' . $linkCount);
-                        $refLink->innerHTML = '<small><sup>[' . $linkCount . ']</sup></small>';
+                        $refLink->innerHTML = '<small><sup>[' . $linkCount . ']</sup></small>';
-                        $refLink->setAttribute('class', 'readability-DoNotFootnote');
+                        $refLink->setAttribute('class', 'readability-DoNotFootnote');
-                        $refLink->setAttribute('style', 'color: inherit;');
+                        $refLink->setAttribute('style', 'color: inherit;');
-                        
+                        
-                        //TODO: does this work or should we use DOMNode.isSameNode()?
+                        //TODO: does this work or should we use DOMNode.isSameNode()?
-                        if ($articleLink->parentNode->lastChild == $articleLink) {
+                        if ($articleLink->parentNode->lastChild == $articleLink) {
-                                $articleLink->parentNode->appendChild($refLink);
+                                $articleLink->parentNode->appendChild($refLink);
-                        } else {
+                        } else {
-                                $articleLink->parentNode->insertBefore($refLink, $articleLink->nextSibling);
+                                $articleLink->parentNode->insertBefore($refLink, $articleLink->nextSibling);
-                        }
+                        }
-                        $articleLink->setAttribute('style', 'color: inherit; text-decoration: none;');
+                        $articleLink->setAttribute('style', 'color: inherit; text-decoration: none;');
-                        $articleLink->setAttribute('name', 'readabilityLink-' . $linkCount);
+                        $articleLink->setAttribute('name', 'readabilityLink-' . $linkCount);
-                        $footnote->innerHTML = '<small><sup><a href="#readabilityLink-' . $linkCount . '" title="Jump to Link in Article">^</a></sup></small> ';
+                        $footnote->innerHTML = '<small><sup><a href="#readabilityLink-' . $linkCount . '" title="Jump to Link in Article">^</a></sup></small> ';
-                        $footnoteLink->innerHTML = ($footnoteLink->getAttribute('title') != '' ? $footnoteLink->getAttribute('title') : $linkText);
+                        $footnoteLink->innerHTML = ($footnoteLink->getAttribute('title') != '' ? $footnoteLink->getAttribute('title') : $linkText);
-                        $footnoteLink->setAttribute('name', 'readabilityFootnoteLink-' . $linkCount);
+                        $footnoteLink->setAttribute('name', 'readabilityFootnoteLink-' . $linkCount);
-                        
+                        
-                        $footnote->appendChild($footnoteLink);
+                        $footnote->appendChild($footnoteLink);
-                        if ($linkDomain) $footnote->innerHTML = $footnote->innerHTML . '<small> (' . $linkDomain . ')</small>';
+                        if ($linkDomain) $footnote->innerHTML = $footnote->innerHTML . '<small> (' . $linkDomain . ')</small>';
-                        
+                        
-                        $articleFootnotes->appendChild($footnote);
+                        $articleFootnotes->appendChild($footnote);
-                }
+                }
-                if ($linkCount > 0) {
+                if ($linkCount > 0) {
-                        $articleContent->appendChild($footnotesWrapper);           
+                        $articleContent->appendChild($footnotesWrapper);           
-                }
+                }
-        }
+        }
-        /**
+        /**
-        * Reverts P elements with class 'readability-styled'
+        * Reverts P elements with class 'readability-styled'
-        * to text nodes - which is what they were before.
+        * to text nodes - which is what they were before.
-        *
+        *
-        * @param DOMElement
+        * @param DOMElement
-        * @return void
+        * @return void
-        */
+        */
-        function revertReadabilityStyledElements($articleContent) {
+        function revertReadabilityStyledElements($articleContent) {
-                $xpath = new DOMXPath($articleContent->ownerDocument);
+                $xpath = new DOMXPath($articleContent->ownerDocument);
-                $elems = $xpath->query('.//p[@class="readability-styled"]', $articleContent);
+                $elems = $xpath->query('.//p[@class="readability-styled"]', $articleContent);
-                //$elems = $articleContent->getElementsByTagName('p');
+                //$elems = $articleContent->getElementsByTagName('p');
-                for ($i = $elems->length-1; $i >= 0; $i--) {
+                for ($i = $elems->length-1; $i >= 0; $i--) {
-                        $e = $elems->item($i);
+                        $e = $elems->item($i);
-                        $e->parentNode->replaceChild($articleContent->ownerDocument->createTextNode($e->textContent), $e);
+                        $e->parentNode->replaceChild($articleContent->ownerDocument->createTextNode($e->textContent), $e);
-                        //if ($e->hasAttribute('class') && $e->getAttribute('class') == 'readability-styled') {
+                        //if ($e->hasAttribute('class') && $e->getAttribute('class') == 'readability-styled') {
-                        //      $e->parentNode->replaceChild($this->dom->createTextNode($e->textContent), $e);
+                        //      $e->parentNode->replaceChild($this->dom->createTextNode($e->textContent), $e);
-                        //}
+                        //}
-                }
+                }
-        }
+        }
-        
+        
-        /**
+        /**
-        * Prepare the article node for display. Clean out any inline styles,
+        * Prepare the article node for display. Clean out any inline styles,
-        * iframes, forms, strip extraneous <p> tags, etc.
+        * iframes, forms, strip extraneous <p> tags, etc.
-        *
+        *
-        * @param DOMElement
+        * @param DOMElement
-        * @return void
+        * @return void
-        */
+        */
-        function prepArticle($articleContent) {
+        function prepArticle($articleContent) {
-                $this->cleanStyles($articleContent);
+                $this->cleanStyles($articleContent);
-                $this->killBreaks($articleContent);
+                $this->killBreaks($articleContent);
-                if ($this->revertForcedParagraphElements) {
+                if ($this->revertForcedParagraphElements) {
-                        $this->revertReadabilityStyledElements($articleContent);
+                        $this->revertReadabilityStyledElements($articleContent);
-                }
+                }
-                /* Clean out junk from the article content */
+                /* Clean out junk from the article content */
-                $this->cleanConditionally($articleContent, 'form');
+                $this->cleanConditionally($articleContent, 'form');
-                $this->clean($articleContent, 'object');
+                $this->clean($articleContent, 'object');
-                $this->clean($articleContent, 'h1');
+                $this->clean($articleContent, 'h1');
-                /**
+                /**
-                * If there is only one h2, they are probably using it
+                * If there is only one h2, they are probably using it
-                * as a header and not a subheader, so remove it since we already have a header.
+                * as a header and not a subheader, so remove it since we already have a header.
-                ***/
+                ***/
-                if (!$this->lightClean && ($articleContent->getElementsByTagName('h2')->length == 1)) {
+                if (!$this->lightClean && ($articleContent->getElementsByTagName('h2')->length == 1)) {
-                        $this->clean($articleContent, 'h2'); 
+                        $this->clean($articleContent, 'h2'); 
-                }
+                }
-                $this->clean($articleContent, 'iframe');
+                $this->clean($articleContent, 'iframe');
-                $this->cleanHeaders($articleContent);
+                $this->cleanHeaders($articleContent);
-                /* Do these last as the previous stuff may have removed junk that will affect these */
+                /* Do these last as the previous stuff may have removed junk that will affect these */
-                $this->cleanConditionally($articleContent, 'table');
+                $this->cleanConditionally($articleContent, 'table');
-                $this->cleanConditionally($articleContent, 'ul');
+                $this->cleanConditionally($articleContent, 'ul');
-                $this->cleanConditionally($articleContent, 'div');
+                $this->cleanConditionally($articleContent, 'div');
-                /* Remove extra paragraphs */
+                /* Remove extra paragraphs */
-                $articleParagraphs = $articleContent->getElementsByTagName('p');
+                $articleParagraphs = $articleContent->getElementsByTagName('p');
-                for ($i = $articleParagraphs->length-1; $i >= 0; $i--)
+                for ($i = $articleParagraphs->length-1; $i >= 0; $i--)
-                {
+                {
-                        $imgCount    = $articleParagraphs->item($i)->getElementsByTagName('img')->length;
+                        $imgCount    = $articleParagraphs->item($i)->getElementsByTagName('img')->length;
-                        $embedCount  = $articleParagraphs->item($i)->getElementsByTagName('embed')->length;
+                        $embedCount  = $articleParagraphs->item($i)->getElementsByTagName('embed')->length;
-                        $objectCount = $articleParagraphs->item($i)->getElementsByTagName('object')->length;
+                        $objectCount = $articleParagraphs->item($i)->getElementsByTagName('object')->length;
-                        $iframeCount = $articleParagraphs->item($i)->getElementsByTagName('iframe')->length;
+                        $iframeCount = $articleParagraphs->item($i)->getElementsByTagName('iframe')->length;
-                        
+                        
-                        if ($imgCount === 0 && $embedCount === 0 && $objectCount === 0 && $iframeCount === 0 && $this->getInnerText($articleParagraphs->item($i), false) == '')
+                        if ($imgCount === 0 && $embedCount === 0 && $objectCount === 0 && $iframeCount === 0 && $this->getInnerText($articleParagraphs->item($i), false) == '')
-                        {
+                        {
-                                $articleParagraphs->item($i)->parentNode->removeChild($articleParagraphs->item($i));
+                                $articleParagraphs->item($i)->parentNode->removeChild($articleParagraphs->item($i));
-                        }
+                        }
-                }
+                }
-                try {
+                try {
-                        $articleContent->innerHTML = preg_replace('/<br[^>]*>\s*<p/i', '<p', $articleContent->innerHTML);
+                        $articleContent->innerHTML = preg_replace('/<br[^>]*>\s*<p/i', '<p', $articleContent->innerHTML);
-                        //articleContent.innerHTML = articleContent.innerHTML.replace(/<br[^>]*>\s*<p/gi, '<p');      
+                        //articleContent.innerHTML = articleContent.innerHTML.replace(/<br[^>]*>\s*<p/gi, '<p');      
-                }
+                }
-                catch (Exception $e) {
+                catch (Exception $e) {
-                        $this->dbg("Cleaning innerHTML of breaks failed. This is an IE strict-block-elements bug. Ignoring.: " . $e);
+                        $this->dbg("Cleaning innerHTML of breaks failed. This is an IE strict-block-elements bug. Ignoring.: " . $e);
-                }
+                }
-        }
+        }
-        
+        
-        /**
+        /**
-        * Initialize a node with the readability object. Also checks the
+        * Initialize a node with the readability object. Also checks the
-        * className/id for special names to add to its score.
+        * className/id for special names to add to its score.
-        *
+        *
-        * @param Element
+        * @param Element
-        * @return void
+        * @return void
-        **/
+        **/
-        protected function initializeNode($node) {
+        protected function initializeNode($node) {
-                $readability = $this->dom->createAttribute('readability');
+                $readability = $this->dom->createAttribute('readability');
-                $readability->value = 0; // this is our contentScore
+                $readability->value = 0; // this is our contentScore
-                $node->setAttributeNode($readability);                   
+                $node->setAttributeNode($readability);                   
-                switch (strtoupper($node->tagName)) { // unsure if strtoupper is needed, but using it just in case
+                switch (strtoupper($node->tagName)) { // unsure if strtoupper is needed, but using it just in case
-                        case 'DIV':
+                        case 'DIV':
-                                $readability->value += 5;
+                                $readability->value += 5;
-                                break;
+                                break;
-                        case 'PRE':
+                        case 'PRE':
-                        case 'TD':
+                        case 'TD':
-                        case 'BLOCKQUOTE':
+                        case 'BLOCKQUOTE':
-                                $readability->value += 3;
+                                $readability->value += 3;
-                                break;
+                                break;
-                                
+                                
-                        case 'ADDRESS':
+                        case 'ADDRESS':
-                        case 'OL':
+                        case 'OL':
-                        case 'UL':
+                        case 'UL':
-                        case 'DL':
+                        case 'DL':
-                        case 'DD':
+                        case 'DD':
-                        case 'DT':
+                        case 'DT':
-                        case 'LI':
+                        case 'LI':
-                        case 'FORM':
+                        case 'FORM':
-                                $readability->value -= 3;
+                                $readability->value -= 3;
-                                break;
+                                break;
-                        case 'H1':
+                        case 'H1':
-                        case 'H2':
+                        case 'H2':
-                        case 'H3':
+                        case 'H3':
-                        case 'H4':
+                        case 'H4':
-                        case 'H5':
+                        case 'H5':
-                        case 'H6':
+                        case 'H6':
-                        case 'TH':
+                        case 'TH':
-                                $readability->value -= 5;
+                                $readability->value -= 5;
-                                break;
+                                break;
-                }
+                }
-                $readability->value += $this->getClassWeight($node);
+                $readability->value += $this->getClassWeight($node);
-        }
+        }
-        
+        
-        /***
+        /***
-        * grabArticle - Using a variety of metrics (content score, classname, element types), find the content that is
+        * grabArticle - Using a variety of metrics (content score, classname, element types), find the content that is
-        *               most likely to be the stuff a user wants to read. Then return it wrapped up in a div.
+        *               most likely to be the stuff a user wants to read. Then return it wrapped up in a div.
-        *
+        *
-        * @return DOMElement
+        * @return DOMElement
-        **/
+        **/
-        protected function grabArticle($page=null) {
+        protected function grabArticle($page=null) {
-                $stripUnlikelyCandidates = $this->flagIsActive(self::FLAG_STRIP_UNLIKELYS);
+                $stripUnlikelyCandidates = $this->flagIsActive(self::FLAG_STRIP_UNLIKELYS);
-                if (!$page) $page = $this->dom;
+                if (!$page) $page = $this->dom;
-                $allElements = $page->getElementsByTagName('*');
+                $allElements = $page->getElementsByTagName('*');
-                /**
+                /**
-                * First, node prepping. Trash nodes that look cruddy (like ones with the class name "comment", etc), and turn divs
+                * First, node prepping. Trash nodes that look cruddy (like ones with the class name "comment", etc), and turn divs
-                * into P tags where they have been used inappropriately (as in, where they contain no other block level elements.)
+                * into P tags where they have been used inappropriately (as in, where they contain no other block level elements.)
-                *
+                *
-                * Note: Assignment from index for performance. See http://www.peachpit.com/articles/article.aspx?p=31567&seqNum=5
+                * Note: Assignment from index for performance. See http://www.peachpit.com/articles/article.aspx?p=31567&seqNum=5
-                * TODO: Shouldn't this be a reverse traversal?
+                * TODO: Shouldn't this be a reverse traversal?
-                **/
+                **/
-                $node = null;
+                $node = null;
-                $nodesToScore = array();
+                $nodesToScore = array();
-                for ($nodeIndex = 0; ($node = $allElements->item($nodeIndex)); $nodeIndex++) {
+                for ($nodeIndex = 0; ($node = $allElements->item($nodeIndex)); $nodeIndex++) {
-                //for ($nodeIndex=$targetList->length-1; $nodeIndex >= 0; $nodeIndex--) {
+                //for ($nodeIndex=$targetList->length-1; $nodeIndex >= 0; $nodeIndex--) {
-                        //$node = $targetList->item($nodeIndex);
+                        //$node = $targetList->item($nodeIndex);
-                        $tagName = strtoupper($node->tagName);
+                        $tagName = strtoupper($node->tagName);
-                        /* Remove unlikely candidates */
+                        /* Remove unlikely candidates */
-                        if ($stripUnlikelyCandidates) {
+                        if ($stripUnlikelyCandidates) {
-                                $unlikelyMatchString = $node->getAttribute('class') . $node->getAttribute('id');
+                                $unlikelyMatchString = $node->getAttribute('class') . $node->getAttribute('id');
-                                if (
+                                if (
-                                        preg_match($this->regexps['unlikelyCandidates'], $unlikelyMatchString) &&
+                                        preg_match($this->regexps['unlikelyCandidates'], $unlikelyMatchString) &&
-                                        !preg_match($this->regexps['okMaybeItsACandidate'], $unlikelyMatchString) &&
+                                        !preg_match($this->regexps['okMaybeItsACandidate'], $unlikelyMatchString) &&
-                                        $tagName != 'BODY'
+                                        $tagName != 'BODY'
-                                )
+                                )
-                                {
+                                {
-                                        $this->dbg('Removing unlikely candidate - ' . $unlikelyMatchString);
+                                        $this->dbg('Removing unlikely candidate - ' . $unlikelyMatchString);
-                                        //$nodesToRemove[] = $node;
+                                        //$nodesToRemove[] = $node;
-                                        $node->parentNode->removeChild($node);
+                                        $node->parentNode->removeChild($node);
-                                        $nodeIndex--;
+                                        $nodeIndex--;
-                                        continue;
+                                        continue;
-                                }               
+                                }               
-                        }
+                        }
-                        if ($tagName == 'P' || $tagName == 'TD' || $tagName == 'PRE') {
+                        if ($tagName == 'P' || $tagName == 'TD' || $tagName == 'PRE') {
-                                $nodesToScore[] = $node;
+                                $nodesToScore[] = $node;
-                        }
+                        }
-                        /* Turn all divs that don't have children block level elements into p's */
+                        /* Turn all divs that don't have children block level elements into p's */
-                        if ($tagName == 'DIV') {
+                        if ($tagName == 'DIV') {
-                                if (!preg_match($this->regexps['divToPElements'], $node->innerHTML)) {
+                                if (!preg_match($this->regexps['divToPElements'], $node->innerHTML)) {
-                                        //$this->dbg('Altering div to p');
+                                        //$this->dbg('Altering div to p');
-                                        $newNode = $this->dom->createElement('p');
+                                        $newNode = $this->dom->createElement('p');
-                                        try {
+                                        try {
-                                                $newNode->innerHTML = $node->innerHTML;
+                                                $newNode->innerHTML = $node->innerHTML;
-                                                //$nodesToReplace[] = array('new'=>$newNode, 'old'=>$node);
+                                                //$nodesToReplace[] = array('new'=>$newNode, 'old'=>$node);
-                                                $node->parentNode->replaceChild($newNode, $node);
+                                                $node->parentNode->replaceChild($newNode, $node);
-                                                $nodeIndex--;
+                                                $nodeIndex--;
-                                                $nodesToScore[] = $node; // or $newNode?
+                                                $nodesToScore[] = $node; // or $newNode?
-                                        }
+                                        }
-                                        catch(Exception $e) {
+                                        catch(Exception $e) {
-                                                $this->dbg('Could not alter div to p, reverting back to div.: ' . $e);
+                                                $this->dbg('Could not alter div to p, reverting back to div.: ' . $e);
-                                        }
+                                        }
-                                }
+                                }
-                                else
+                                else
-                                {
+                                {
-                                        /* EXPERIMENTAL */
+                                        /* EXPERIMENTAL */
-                                        // TODO: change these p elements back to text nodes after processing
+                                        // TODO: change these p elements back to text nodes after processing
-                                        for ($i = 0, $il = $node->childNodes->length; $i < $il; $i++) {
+                                        for ($i = 0, $il = $node->childNodes->length; $i < $il; $i++) {
-                                                $childNode = $node->childNodes->item($i);
+                                                $childNode = $node->childNodes->item($i);
-                                                if ($childNode->nodeType == 3) { // XML_TEXT_NODE
+                                                if ($childNode->nodeType == 3) { // XML_TEXT_NODE
-                                                        //$this->dbg('replacing text node with a p tag with the same content.');
+                                                        //$this->dbg('replacing text node with a p tag with the same content.');
-                                                        $p = $this->dom->createElement('p');
+                                                        $p = $this->dom->createElement('p');
-                                                        $p->innerHTML = $childNode->nodeValue;
+                                                        $p->innerHTML = $childNode->nodeValue;
-                                                        $p->setAttribute('style', 'display: inline;');
+                                                        $p->setAttribute('style', 'display: inline;');
-                                                        $p->setAttribute('class', 'readability-styled');
+                                                        $p->setAttribute('class', 'readability-styled');
-                                                        $childNode->parentNode->replaceChild($p, $childNode);
+                                                        $childNode->parentNode->replaceChild($p, $childNode);
-                                                }
+                                                }
-                                        }
+                                        }
-                                }
+                                }
-                        }
+                        }
-                }
+                }
-                
+                
-                /**
+                /**
-                * Loop through all paragraphs, and assign a score to them based on how content-y they look.
+                * Loop through all paragraphs, and assign a score to them based on how content-y they look.
-                * Then add their score to their parent node.
+                * Then add their score to their parent node.
-                *
+                *
-                * A score is determined by things like number of commas, class names, etc. Maybe eventually link density.
+                * A score is determined by things like number of commas, class names, etc. Maybe eventually link density.
-                **/
+                **/
-                $candidates = array();
+                $candidates = array();
-                for ($pt=0; $pt < count($nodesToScore); $pt++) {
+                for ($pt=0; $pt < count($nodesToScore); $pt++) {
-                        $parentNode      = $nodesToScore[$pt]->parentNode;
+                        $parentNode      = $nodesToScore[$pt]->parentNode;
-                        // $grandParentNode = $parentNode ? $parentNode->parentNode : null;
+                        // $grandParentNode = $parentNode ? $parentNode->parentNode : null;
-                        $grandParentNode = !$parentNode ? null : (($parentNode->parentNode instanceof DOMElement) ? $parentNode->parentNode : null);
+                        $grandParentNode = !$parentNode ? null : (($parentNode->parentNode instanceof DOMElement) ? $parentNode->parentNode : null);
-                        $innerText       = $this->getInnerText($nodesToScore[$pt]);
+                        $innerText       = $this->getInnerText($nodesToScore[$pt]);
-                        if (!$parentNode || !isset($parentNode->tagName)) {
+                        if (!$parentNode || !isset($parentNode->tagName)) {
-                                continue;
+                                continue;
-                        }
+                        }
-                        /* If this paragraph is less than 25 characters, don't even count it. */
+                        /* If this paragraph is less than 25 characters, don't even count it. */
-                        if(strlen($innerText) < 25) {
+                        if(strlen($innerText) < 25) {
-                                continue;
+                                continue;
-                        }
+                        }
-                        /* Initialize readability data for the parent. */
+                        /* Initialize readability data for the parent. */
-                        if (!$parentNode->hasAttribute('readability')) 
+                        if (!$parentNode->hasAttribute('readability')) 
-                        {
+                        {
-                                $this->initializeNode($parentNode);
+                                $this->initializeNode($parentNode);
-                                $candidates[] = $parentNode;
+                                $candidates[] = $parentNode;
-                        }
+                        }
-                        /* Initialize readability data for the grandparent. */
+                        /* Initialize readability data for the grandparent. */
-                        if ($grandParentNode && !$grandParentNode->hasAttribute('readability') && isset($grandParentNode->tagName))
+                        if ($grandParentNode && !$grandParentNode->hasAttribute('readability') && isset($grandParentNode->tagName))
-                        {
+                        {
-                                $this->initializeNode($grandParentNode);
+                                $this->initializeNode($grandParentNode);
-                                $candidates[] = $grandParentNode;
+                                $candidates[] = $grandParentNode;
-                        }
+                        }
-                        $contentScore = 0;
+                        $contentScore = 0;
-                        /* Add a point for the paragraph itself as a base. */
+                        /* Add a point for the paragraph itself as a base. */
-                        $contentScore++;
+                        $contentScore++;
-                        /* Add points for any commas within this paragraph */
+                        /* Add points for any commas within this paragraph */
-                        $contentScore += count(explode(',', $innerText));
+                        $contentScore += count(explode(',', $innerText));
-                        
+                        
-                        /* For every 100 characters in this paragraph, add another point. Up to 3 points. */
+                        /* For every 100 characters in this paragraph, add another point. Up to 3 points. */
-                        $contentScore += min(floor(strlen($innerText) / 100), 3);
+                        $contentScore += min(floor(strlen($innerText) / 100), 3);
-                        
+                        
-                        /* Add the score to the parent. The grandparent gets half. */
+                        /* Add the score to the parent. The grandparent gets half. */
-                        $parentNode->getAttributeNode('readability')->value += $contentScore;
+                        $parentNode->getAttributeNode('readability')->value += $contentScore;
-                        if ($grandParentNode) {
+                        if ($grandParentNode) {
-                                $grandParentNode->getAttributeNode('readability')->value += $contentScore/2;             
+                                $grandParentNode->getAttributeNode('readability')->value += $contentScore/2;             
-                        }
+                        }
-                }
+                }
-                /**
+                /**
-                * After we've calculated scores, loop through all of the possible candidate nodes we found
+                * After we've calculated scores, loop through all of the possible candidate nodes we found
-                * and find the one with the highest score.
+                * and find the one with the highest score.
-                **/
+                **/
-                $topCandidate = null;
+                $topCandidate = null;
-                for ($c=0, $cl=count($candidates); $c < $cl; $c++)
+                for ($c=0, $cl=count($candidates); $c < $cl; $c++)
-                {
+                {
-                        /**
+                        /**
-                        * Scale the final candidates score based on link density. Good content should have a
+                        * Scale the final candidates score based on link density. Good content should have a
-                        * relatively small link density (5% or less) and be mostly unaffected by this operation.
+                        * relatively small link density (5% or less) and be mostly unaffected by this operation.
-                        **/
+                        **/
-                        $readability = $candidates[$c]->getAttributeNode('readability');
+                        $readability = $candidates[$c]->getAttributeNode('readability');
-                        $readability->value = $readability->value * (1-$this->getLinkDensity($candidates[$c]));
+                        $readability->value = $readability->value * (1-$this->getLinkDensity($candidates[$c]));
-                        $this->dbg('Candidate: ' . $candidates[$c]->tagName . ' (' . $candidates[$c]->getAttribute('class') . ':' . $candidates[$c]->getAttribute('id') . ') with score ' . $readability->value);
+                        $this->dbg('Candidate: ' . $candidates[$c]->tagName . ' (' . $candidates[$c]->getAttribute('class') . ':' . $candidates[$c]->getAttribute('id') . ') with score ' . $readability->value);
-                        if (!$topCandidate || $readability->value > (int)$topCandidate->getAttribute('readability')) {
+                        if (!$topCandidate || $readability->value > (int)$topCandidate->getAttribute('readability')) {
-                                $topCandidate = $candidates[$c];
+                                $topCandidate = $candidates[$c];
-                        }
+                        }
-                }
+                }
-                /**
+                /**
-                * If we still have no top candidate, just use the body as a last resort.
+                * If we still have no top candidate, just use the body as a last resort.
-                * We also have to copy the body node so it is something we can modify.
+                * We also have to copy the body node so it is something we can modify.
-                **/
+                **/
-                if ($topCandidate === null || strtoupper($topCandidate->tagName) == 'BODY')
+                if ($topCandidate === null || strtoupper($topCandidate->tagName) == 'BODY')
-                {
+                {
-                        $topCandidate = $this->dom->createElement('div');
+                        $topCandidate = $this->dom->createElement('div');
-                        if ($page instanceof DOMDocument) {
+                        if ($page instanceof DOMDocument) {
-                                if (!isset($page->documentElement)) {
+                                if (!isset($page->documentElement)) {
-                                        // we don't have a body either? what a mess! :)
+                                        // we don't have a body either? what a mess! :)
-                                } else {
+                                } else {
-                                        $topCandidate->innerHTML = $page->documentElement->innerHTML;
+                                        $topCandidate->innerHTML = $page->documentElement->innerHTML;
-                                        $page->documentElement->innerHTML = '';
+                                        $page->documentElement->innerHTML = '';
-                                        $page->documentElement->appendChild($topCandidate);
+                                        $page->documentElement->appendChild($topCandidate);
-                                }
+                                }
-                        } else {
+                        } else {
-                                $topCandidate->innerHTML = $page->innerHTML;
+                                $topCandidate->innerHTML = $page->innerHTML;
-                                $page->innerHTML = '';
+                                $page->innerHTML = '';
-                                $page->appendChild($topCandidate);
+                                $page->appendChild($topCandidate);
-                        }
+                        }
-                        $this->initializeNode($topCandidate);
+                        $this->initializeNode($topCandidate);
-                }
+                }
-                /**
+                /**
-                * Now that we have the top candidate, look through its siblings for content that might also be related.
+                * Now that we have the top candidate, look through its siblings for content that might also be related.
-                * Things like preambles, content split by ads that we removed, etc.
+                * Things like preambles, content split by ads that we removed, etc.
-                **/
+                **/
-                $articleContent        = $this->dom->createElement('div');
+                $articleContent        = $this->dom->createElement('div');
-                $articleContent->setAttribute('id', 'readability-content');
+                $articleContent->setAttribute('id', 'readability-content');
-                $siblingScoreThreshold = max(10, ((int)$topCandidate->getAttribute('readability')) * 0.2);
+                $siblingScoreThreshold = max(10, ((int)$topCandidate->getAttribute('readability')) * 0.2);
-                $siblingNodes          = $topCandidate->parentNode->childNodes;
+                $siblingNodes          = $topCandidate->parentNode->childNodes;
-                if (!isset($siblingNodes)) {
+                if (!isset($siblingNodes)) {
-                        $siblingNodes = new stdClass;
+                        $siblingNodes = new stdClass;
-                        $siblingNodes->length = 0;
+                        $siblingNodes->length = 0;
-                }
+                }
-                for ($s=0, $sl=$siblingNodes->length; $s < $sl; $s++)
+                for ($s=0, $sl=$siblingNodes->length; $s < $sl; $s++)
-                {
+                {
-                        $siblingNode = $siblingNodes->item($s);
+                        $siblingNode = $siblingNodes->item($s);
-                        $append      = false;
+                        $append      = false;
-                        $this->dbg('Looking at sibling node: ' . $siblingNode->nodeName . (($siblingNode->nodeType === XML_ELEMENT_NODE && $siblingNode->hasAttribute('readability')) ? (' with score ' . $siblingNode->getAttribute('readability')) : ''));
+                        $this->dbg('Looking at sibling node: ' . $siblingNode->nodeName . (($siblingNode->nodeType === XML_ELEMENT_NODE && $siblingNode->hasAttribute('readability')) ? (' with score ' . $siblingNode->getAttribute('readability')) : ''));
-                        //dbg('Sibling has score ' . ($siblingNode->readability ? siblingNode.readability.contentScore : 'Unknown'));
+                        //dbg('Sibling has score ' . ($siblingNode->readability ? siblingNode.readability.contentScore : 'Unknown'));
-                        if ($siblingNode === $topCandidate)
+                        if ($siblingNode === $topCandidate)
-                        // or if ($siblingNode->isSameNode($topCandidate))
+                        // or if ($siblingNode->isSameNode($topCandidate))
-                        {
+                        {
-                                $append = true;
+                                $append = true;
-                        }
+                        }
-                        $contentBonus = 0;
+                        $contentBonus = 0;
-                        /* Give a bonus if sibling nodes and top candidates have the example same classname */
+                        /* Give a bonus if sibling nodes and top candidates have the example same classname */
-                        if ($siblingNode->nodeType === XML_ELEMENT_NODE && $siblingNode->getAttribute('class') == $topCandidate->getAttribute('class') && $topCandidate->getAttribute('class') != '') {
+                        if ($siblingNode->nodeType === XML_ELEMENT_NODE && $siblingNode->getAttribute('class') == $topCandidate->getAttribute('class') && $topCandidate->getAttribute('class') != '') {
-                                $contentBonus += ((int)$topCandidate->getAttribute('readability')) * 0.2;
+                                $contentBonus += ((int)$topCandidate->getAttribute('readability')) * 0.2;
-                        }
+                        }
-                        if ($siblingNode->nodeType === XML_ELEMENT_NODE && $siblingNode->hasAttribute('readability') && (((int)$siblingNode->getAttribute('readability')) + $contentBonus) >= $siblingScoreThreshold)
+                        if ($siblingNode->nodeType === XML_ELEMENT_NODE && $siblingNode->hasAttribute('readability') && (((int)$siblingNode->getAttribute('readability')) + $contentBonus) >= $siblingScoreThreshold)
-                        {
+                        {
-                                $append = true;
+                                $append = true;
-                        }
+                        }
-                        
+                        
-                        if (strtoupper($siblingNode->nodeName) == 'P') {
+                        if (strtoupper($siblingNode->nodeName) == 'P') {
-                                $linkDensity = $this->getLinkDensity($siblingNode);
+                                $linkDensity = $this->getLinkDensity($siblingNode);
-                                $nodeContent = $this->getInnerText($siblingNode);
+                                $nodeContent = $this->getInnerText($siblingNode);
-                                $nodeLength  = strlen($nodeContent);
+                                $nodeLength  = strlen($nodeContent);
-                                
+                                
-                                if ($nodeLength > 80 && $linkDensity < 0.25)
+                                if ($nodeLength > 80 && $linkDensity < 0.25)
-                                {
+                                {
-                                        $append = true;
+                                        $append = true;
-                                }
+                                }
-                                else if ($nodeLength < 80 && $linkDensity === 0 && preg_match('/\.( |$)/', $nodeContent))
+                                else if ($nodeLength < 80 && $linkDensity === 0 && preg_match('/\.( |$)/', $nodeContent))
-                                {
+                                {
-                                        $append = true;
+                                        $append = true;
-                                }
+                                }
-                        }
+                        }
-                        if ($append)
+                        if ($append)
-                        {
+                        {
-                                $this->dbg('Appending node: ' . $siblingNode->nodeName);
+                                $this->dbg('Appending node: ' . $siblingNode->nodeName);
-                                $nodeToAppend = null;
+                                $nodeToAppend = null;
-                                $sibNodeName = strtoupper($siblingNode->nodeName);
+                                $sibNodeName = strtoupper($siblingNode->nodeName);
-                                if ($sibNodeName != 'DIV' && $sibNodeName != 'P') {
+                                if ($sibNodeName != 'DIV' && $sibNodeName != 'P') {
-                                        /* We have a node that isn't a common block level element, like a form or td tag. Turn it into a div so it doesn't get filtered out later by accident. */
+                                        /* We have a node that isn't a common block level element, like a form or td tag. Turn it into a div so it doesn't get filtered out later by accident. */
-                                        
+                                        
-                                        $this->dbg('Altering siblingNode of ' . $sibNodeName . ' to div.');
+                                        $this->dbg('Altering siblingNode of ' . $sibNodeName . ' to div.');
-                                        $nodeToAppend = $this->dom->createElement('div');
+                                        $nodeToAppend = $this->dom->createElement('div');
-                                        try {
+                                        try {
-                                                $nodeToAppend->setAttribute('id', $siblingNode->getAttribute('id'));
+                                                $nodeToAppend->setAttribute('id', $siblingNode->getAttribute('id'));
-                                                $nodeToAppend->innerHTML = $siblingNode->innerHTML;
+                                                $nodeToAppend->innerHTML = $siblingNode->innerHTML;
-                                        }
+                                        }
-                                        catch(Exception $e)
+                                        catch(Exception $e)
-                                        {
+                                        {
-                                                $this->dbg('Could not alter siblingNode to div, reverting back to original.');
+                                                $this->dbg('Could not alter siblingNode to div, reverting back to original.');
-                                                $nodeToAppend = $siblingNode;
+                                                $nodeToAppend = $siblingNode;
-                                                $s--;
+                                                $s--;
-                                                $sl--;
+                                                $sl--;
-                                        }
+                                        }
-                                } else {
+                                } else {
-                                        $nodeToAppend = $siblingNode;
+                                        $nodeToAppend = $siblingNode;
-                                        $s--;
+                                        $s--;
-                                        $sl--;
+                                        $sl--;
-                                }
+                                }
-                                
+                                
-                                /* To ensure a node does not interfere with readability styles, remove its classnames */
+                                /* To ensure a node does not interfere with readability styles, remove its classnames */
-                                $nodeToAppend->removeAttribute('class');
+                                $nodeToAppend->removeAttribute('class');
-                                /* Append sibling and subtract from our list because it removes the node when you append to another node */
+                                /* Append sibling and subtract from our list because it removes the node when you append to another node */
-                                $articleContent->appendChild($nodeToAppend);
+                                $articleContent->appendChild($nodeToAppend);
-                        }
+                        }
-                }
+                }
-                /**
+                /**
-                * So we have all of the content that we need. Now we clean it up for presentation.
+                * So we have all of the content that we need. Now we clean it up for presentation.
-                **/
+                **/
-                $this->prepArticle($articleContent);
+                $this->prepArticle($articleContent);
-                /**
+                /**
-                * Now that we've gone through the full algorithm, check to see if we got any meaningful content.
+                * Now that we've gone through the full algorithm, check to see if we got any meaningful content.
-                * If we didn't, we may need to re-run grabArticle with different flags set. This gives us a higher
+                * If we didn't, we may need to re-run grabArticle with different flags set. This gives us a higher
-                * likelihood of finding the content, and the sieve approach gives us a higher likelihood of
+                * likelihood of finding the content, and the sieve approach gives us a higher likelihood of
-                * finding the -right- content.
+                * finding the -right- content.
-                **/
+                **/
-                if (strlen($this->getInnerText($articleContent, false)) < 250)
+                if (strlen($this->getInnerText($articleContent, false)) < 250)
-                {
+                {
-                        // TODO: find out why element disappears sometimes, e.g. for this URL http://www.businessinsider.com/6-hedge-fund-etfs-for-average-investors-2011-7
+                        // TODO: find out why element disappears sometimes, e.g. for this URL http://www.businessinsider.com/6-hedge-fund-etfs-for-average-investors-2011-7
-                        // in the meantime, we check and create an empty element if it's not there.
+                        // in the meantime, we check and create an empty element if it's not there.
-                        if (!isset($this->body->childNodes)) $this->body = $this->dom->createElement('body');
+                        if (!isset($this->body->childNodes)) $this->body = $this->dom->createElement('body');
-                        $this->body->innerHTML = $this->bodyCache;
+                        $this->body->innerHTML = $this->bodyCache;
-                        
+                        
-                        if ($this->flagIsActive(self::FLAG_STRIP_UNLIKELYS)) {
+                        if ($this->flagIsActive(self::FLAG_STRIP_UNLIKELYS)) {
-                                $this->removeFlag(self::FLAG_STRIP_UNLIKELYS);
+                                $this->removeFlag(self::FLAG_STRIP_UNLIKELYS);
-                                return $this->grabArticle($this->body);
+                                return $this->grabArticle($this->body);
-                        }
+                        }
-                        else if ($this->flagIsActive(self::FLAG_WEIGHT_CLASSES)) {
+                        else if ($this->flagIsActive(self::FLAG_WEIGHT_CLASSES)) {
-                                $this->removeFlag(self::FLAG_WEIGHT_CLASSES);
+                                $this->removeFlag(self::FLAG_WEIGHT_CLASSES);
-                                return $this->grabArticle($this->body);              
+                                return $this->grabArticle($this->body);              
-                        }
+                        }
-                        else if ($this->flagIsActive(self::FLAG_CLEAN_CONDITIONALLY)) {
+                        else if ($this->flagIsActive(self::FLAG_CLEAN_CONDITIONALLY)) {
-                                $this->removeFlag(self::FLAG_CLEAN_CONDITIONALLY);
+                                $this->removeFlag(self::FLAG_CLEAN_CONDITIONALLY);
-                                return $this->grabArticle($this->body);
+                                return $this->grabArticle($this->body);
-                        }
+                        }
-                        else {
+                        else {
-                                return false;
+                                return false;
-                        }
+                        }
-                }
+                }
-                return $articleContent;
+                return $articleContent;
-        }
+        }
-        
+        
-        /**
+        /**
-        * Remove script tags from document
+        * Remove script tags from document
-        *
+        *
-        * @param DOMElement
+        * @param DOMElement
-        * @return void
+        * @return void
-        */
+        */
-        public function removeScripts($doc) {
+        public function removeScripts($doc) {
-                $scripts = $doc->getElementsByTagName('script');
+                $scripts = $doc->getElementsByTagName('script');
-                for($i = $scripts->length-1; $i >= 0; $i--)
+                for($i = $scripts->length-1; $i >= 0; $i--)
-                {
+                {
-                        $scripts->item($i)->parentNode->removeChild($scripts->item($i));
+                        $scripts->item($i)->parentNode->removeChild($scripts->item($i));
-                }
+                }
-        }
+        }
-        
+        
-        /**
+        /**
-        * Get the inner text of a node.
+        * Get the inner text of a node.
-        * This also strips out any excess whitespace to be found.
+        * This also strips out any excess whitespace to be found.
-        *
+        *
-        * @param DOMElement $
+        * @param DOMElement $
-        * @param boolean $normalizeSpaces (default: true)
+        * @param boolean $normalizeSpaces (default: true)
-        * @return string
+        * @return string
-        **/
+        **/
-        public function getInnerText($e, $normalizeSpaces=true) {
+        public function getInnerText($e, $normalizeSpaces=true) {
-                $textContent = '';
+                $textContent = '';
-                if (!isset($e->textContent) || $e->textContent == '') {
+                if (!isset($e->textContent) || $e->textContent == '') {
-                        return '';
+                        return '';
-                }
+                }
-                $textContent = trim($e->textContent);
+                $textContent = trim($e->textContent);
-                if ($normalizeSpaces) {
+                if ($normalizeSpaces) {
-                        return preg_replace($this->regexps['normalize'], ' ', $textContent);
+                        return preg_replace($this->regexps['normalize'], ' ', $textContent);
-                } else {
+                } else {
-                        return $textContent;
+                        return $textContent;
-                }
+                }
-        }
+        }
-        /**
+        /**
-        * Get the number of times a string $s appears in the node $e.
+        * Get the number of times a string $s appears in the node $e.
-        *
+        *
-        * @param DOMElement $e
+        * @param DOMElement $e
-        * @param string - what to count. Default is ","
+        * @param string - what to count. Default is ","
-        * @return number (integer)
+        * @return number (integer)
-        **/
+        **/
-        public function getCharCount($e, $s=',') {
+        public function getCharCount($e, $s=',') {
-                return substr_count($this->getInnerText($e), $s);
+                return substr_count($this->getInnerText($e), $s);
-        }
+        }
-        /**
+        /**
-        * Remove the style attribute on every $e and under.
+        * Remove the style attribute on every $e and under.
-        *
+        *
-        * @param DOMElement $e
+        * @param DOMElement $e
-        * @return void
+        * @return void
-        */
+        */
-        public function cleanStyles($e) {
+        public function cleanStyles($e) {
-                if (!is_object($e)) return;
+                if (!is_object($e)) return;
-                $elems = $e->getElementsByTagName('*');
+                $elems = $e->getElementsByTagName('*');
-                foreach ($elems as $elem) {
+                foreach ($elems as $elem) {
-                        $elem->removeAttribute('style');
+                        $elem->removeAttribute('style');
-                }
+                }
-        }
+        }
-        
+        
-        /**
+        /**
-        * Get the density of links as a percentage of the content
+        * Get the density of links as a percentage of the content
-        * This is the amount of text that is inside a link divided by the total text in the node.
+        * This is the amount of text that is inside a link divided by the total text in the node.
-        * 
+        * 
-        * @param DOMElement $e
+        * @param DOMElement $e
-        * @return number (float)
+        * @return number (float)
-        */
+        */
-        public function getLinkDensity($e) {
+        public function getLinkDensity($e) {
-                $links      = $e->getElementsByTagName('a');
+                $links      = $e->getElementsByTagName('a');
-                $textLength = strlen($this->getInnerText($e));
+                $textLength = strlen($this->getInnerText($e));
-                $linkLength = 0;
+                $linkLength = 0;
-                for ($i=0, $il=$links->length; $i < $il; $i++)
+                for ($i=0, $il=$links->length; $i < $il; $i++)
-                {
+                {
-                        $linkLength += strlen($this->getInnerText($links->item($i)));
+                        $linkLength += strlen($this->getInnerText($links->item($i)));
-                }
+                }
-                if ($textLength > 0) {
+                if ($textLength > 0) {
-                        return $linkLength / $textLength;
+                        return $linkLength / $textLength;
-                } else {
+                } else {
-                        return 0;
+                        return 0;
-                }
+                }
-        }
+        }
-        
+        
-        /**
+        /**
-        * Get an elements class/id weight. Uses regular expressions to tell if this 
+        * Get an elements class/id weight. Uses regular expressions to tell if this 
-        * element looks good or bad.
+        * element looks good or bad.
-        *
+        *
-        * @param DOMElement $e
+        * @param DOMElement $e
-        * @return number (Integer)
+        * @return number (Integer)
-        */
+        */
-        public function getClassWeight($e) {
+        public function getClassWeight($e) {
-                if(!$this->flagIsActive(self::FLAG_WEIGHT_CLASSES)) {
+                if(!$this->flagIsActive(self::FLAG_WEIGHT_CLASSES)) {
-                        return 0;
+                        return 0;
-                }
+                }
-                $weight = 0;
+                $weight = 0;
-                /* Look for a special classname */
+                /* Look for a special classname */
-                if ($e->hasAttribute('class') && $e->getAttribute('class') != '')
+                if ($e->hasAttribute('class') && $e->getAttribute('class') != '')
-                {
+                {
-                        if (preg_match($this->regexps['negative'], $e->getAttribute('class'))) {
+                        if (preg_match($this->regexps['negative'], $e->getAttribute('class'))) {
-                                $weight -= 25;
+                                $weight -= 25;
-                        }
+                        }
-                        if (preg_match($this->regexps['positive'], $e->getAttribute('class'))) {
+                        if (preg_match($this->regexps['positive'], $e->getAttribute('class'))) {
-                                $weight += 25;
+                                $weight += 25;
-                        }
+                        }
-                }
+                }
-                /* Look for a special ID */
+                /* Look for a special ID */
-                if ($e->hasAttribute('id') && $e->getAttribute('id') != '')
+                if ($e->hasAttribute('id') && $e->getAttribute('id') != '')
-                {
+                {
-                        if (preg_match($this->regexps['negative'], $e->getAttribute('id'))) {
+                        if (preg_match($this->regexps['negative'], $e->getAttribute('id'))) {
-                                $weight -= 25;
+                                $weight -= 25;
-                        }
+                        }
-                        if (preg_match($this->regexps['positive'], $e->getAttribute('id'))) {
+                        if (preg_match($this->regexps['positive'], $e->getAttribute('id'))) {
-                                $weight += 25;
+                                $weight += 25;
-                        }
+                        }
-                }
+                }
-                return $weight;
+                return $weight;
-        }
+        }
-        /**
+        /**
-        * Remove extraneous break tags from a node.
+        * Remove extraneous break tags from a node.
-        *
+        *
-        * @param DOMElement $node
+        * @param DOMElement $node
-        * @return void
+        * @return void
-        */
+        */
-        public function killBreaks($node) {
+        public function killBreaks($node) {
-                $html = $node->innerHTML;
+                $html = $node->innerHTML;
-                $html = preg_replace($this->regexps['killBreaks'], '<br />', $html);
+                $html = preg_replace($this->regexps['killBreaks'], '<br />', $html);
-                $node->innerHTML = $html;
+                $node->innerHTML = $html;
-        }
+        }
-        /**
+        /**
-        * Clean a node of all elements of type "tag".
+        * Clean a node of all elements of type "tag".
-        * (Unless it's a youtube/vimeo video. People love movies.)
+        * (Unless it's a youtube/vimeo video. People love movies.)
-        *
+        *
-        * Updated 2012-09-18 to preserve youtube/vimeo iframes
+        * Updated 2012-09-18 to preserve youtube/vimeo iframes
-        *
+        *
-        * @param DOMElement $e
+        * @param DOMElement $e
-        * @param string $tag
+        * @param string $tag
-        * @return void
+        * @return void
-        */
+        */
-        public function clean($e, $tag) {
+        public function clean($e, $tag) {
-                $targetList = $e->getElementsByTagName($tag);
+                $targetList = $e->getElementsByTagName($tag);
-                $isEmbed = ($tag == 'iframe' || $tag == 'object' || $tag == 'embed');
+                $isEmbed = ($tag == 'iframe' || $tag == 'object' || $tag == 'embed');
-                
+                
-                for ($y=$targetList->length-1; $y >= 0; $y--) {
+                for ($y=$targetList->length-1; $y >= 0; $y--) {
-                        /* Allow youtube and vimeo videos through as people usually want to see those. */
+                        /* Allow youtube and vimeo videos through as people usually want to see those. */
-                        if ($isEmbed) {
+                        if ($isEmbed) {
-                                $attributeValues = '';
+                                $attributeValues = '';
-                                for ($i=0, $il=$targetList->item($y)->attributes->length; $i < $il; $i++) {
+                                for ($i=0, $il=$targetList->item($y)->attributes->length; $i < $il; $i++) {
-                                        $attributeValues .= $targetList->item($y)->attributes->item($i)->value . '|'; // DOMAttr? (TODO: test)
+                                        $attributeValues .= $targetList->item($y)->attributes->item($i)->value . '|'; // DOMAttr? (TODO: test)
-                                }
+                                }
-                                
+                                
-                                /* First, check the elements attributes to see if any of them contain youtube or vimeo */
+                                /* First, check the elements attributes to see if any of them contain youtube or vimeo */
-                                if (preg_match($this->regexps['video'], $attributeValues)) {
+                                if (preg_match($this->regexps['video'], $attributeValues)) {
-                                        continue;
+                                        continue;
-                                }
+                                }
-                                /* Then check the elements inside this element for the same. */
+                                /* Then check the elements inside this element for the same. */
-                                if (preg_match($this->regexps['video'], $targetList->item($y)->innerHTML)) {
+                                if (preg_match($this->regexps['video'], $targetList->item($y)->innerHTML)) {
-                                        continue;
+                                        continue;
-                                }
+                                }
-                        }
+                        }
-                        $targetList->item($y)->parentNode->removeChild($targetList->item($y));
+                        $targetList->item($y)->parentNode->removeChild($targetList->item($y));
-                }
+                }
-        }
+        }
-        
+        
-        /**
+        /**
-        * Clean an element of all tags of type "tag" if they look fishy.
+        * Clean an element of all tags of type "tag" if they look fishy.
-        * "Fishy" is an algorithm based on content length, classnames, 
+        * "Fishy" is an algorithm based on content length, classnames, 
-        * link density, number of images & embeds, etc.
+        * link density, number of images & embeds, etc.
-        *
+        *
-        * @param DOMElement $e
+        * @param DOMElement $e
-        * @param string $tag
+        * @param string $tag
-        * @return void
+        * @return void
-        */
+        */
-        public function cleanConditionally($e, $tag) {
+        public function cleanConditionally($e, $tag) {
-                if (!$this->flagIsActive(self::FLAG_CLEAN_CONDITIONALLY)) {
+                if (!$this->flagIsActive(self::FLAG_CLEAN_CONDITIONALLY)) {
-                        return;
+                        return;
-                }
+                }
-                $tagsList = $e->getElementsByTagName($tag);
+                $tagsList = $e->getElementsByTagName($tag);
-                $curTagsLength = $tagsList->length;
+                $curTagsLength = $tagsList->length;
-                /**
+                /**
-                * Gather counts for other typical elements embedded within.
+                * Gather counts for other typical elements embedded within.
-                * Traverse backwards so we can remove nodes at the same time without effecting the traversal.
+                * Traverse backwards so we can remove nodes at the same time without effecting the traversal.
-                *
+                *
-                * TODO: Consider taking into account original contentScore here.
+                * TODO: Consider taking into account original contentScore here.
-                */
+                */
-                for ($i=$curTagsLength-1; $i >= 0; $i--) {
+                for ($i=$curTagsLength-1; $i >= 0; $i--) {
-                        $weight = $this->getClassWeight($tagsList->item($i));
+                        $weight = $this->getClassWeight($tagsList->item($i));
-                        $contentScore = ($tagsList->item($i)->hasAttribute('readability')) ? (int)$tagsList->item($i)->getAttribute('readability') : 0;
+                        $contentScore = ($tagsList->item($i)->hasAttribute('readability')) ? (int)$tagsList->item($i)->getAttribute('readability') : 0;
-                        
+                        
-                        $this->dbg('Cleaning Conditionally ' . $tagsList->item($i)->tagName . ' (' . $tagsList->item($i)->getAttribute('class') . ':' . $tagsList->item($i)->getAttribute('id') . ')' . (($tagsList->item($i)->hasAttribute('readability')) ? (' with score ' . $tagsList->item($i)->getAttribute('readability')) : ''));
+                        $this->dbg('Cleaning Conditionally ' . $tagsList->item($i)->tagName . ' (' . $tagsList->item($i)->getAttribute('class') . ':' . $tagsList->item($i)->getAttribute('id') . ')' . (($tagsList->item($i)->hasAttribute('readability')) ? (' with score ' . $tagsList->item($i)->getAttribute('readability')) : ''));
-                        if ($weight + $contentScore < 0) {
+                        if ($weight + $contentScore < 0) {
-                                $tagsList->item($i)->parentNode->removeChild($tagsList->item($i));
+                                $tagsList->item($i)->parentNode->removeChild($tagsList->item($i));
-                        }
+                        }
-                        else if ( $this->getCharCount($tagsList->item($i), ',') < 10) {
+                        else if ( $this->getCharCount($tagsList->item($i), ',') < 10) {
-                                /**
+                                /**
-                                * If there are not very many commas, and the number of
+                                * If there are not very many commas, and the number of
-                                * non-paragraph elements is more than paragraphs or other ominous signs, remove the element.
+                                * non-paragraph elements is more than paragraphs or other ominous signs, remove the element.
-                                **/
+                                **/
-                                $p      = $tagsList->item($i)->getElementsByTagName('p')->length;
+                                $p      = $tagsList->item($i)->getElementsByTagName('p')->length;
-                                $img    = $tagsList->item($i)->getElementsByTagName('img')->length;
+                                $img    = $tagsList->item($i)->getElementsByTagName('img')->length;
-                                $li     = $tagsList->item($i)->getElementsByTagName('li')->length-100;
+                                $li     = $tagsList->item($i)->getElementsByTagName('li')->length-100;
-                                $input  = $tagsList->item($i)->getElementsByTagName('input')->length;
+                                $input  = $tagsList->item($i)->getElementsByTagName('input')->length;
-                                $a              = $tagsList->item($i)->getElementsByTagName('a')->length;
+                                $a              = $tagsList->item($i)->getElementsByTagName('a')->length;
-                                $embedCount = 0;
+                                $embedCount = 0;
-                                $embeds = $tagsList->item($i)->getElementsByTagName('embed');
+                                $embeds = $tagsList->item($i)->getElementsByTagName('embed');
-                                for ($ei=0, $il=$embeds->length; $ei < $il; $ei++) {
+                                for ($ei=0, $il=$embeds->length; $ei < $il; $ei++) {
-                                        if (preg_match($this->regexps['video'], $embeds->item($ei)->getAttribute('src'))) {
+                                        if (preg_match($this->regexps['video'], $embeds->item($ei)->getAttribute('src'))) {
-                                                $embedCount++; 
+                                                $embedCount++; 
-                                        }
+                                        }
-                                }
+                                }
-                                $embeds = $tagsList->item($i)->getElementsByTagName('iframe');
+                                $embeds = $tagsList->item($i)->getElementsByTagName('iframe');
-                                for ($ei=0, $il=$embeds->length; $ei < $il; $ei++) {
+                                for ($ei=0, $il=$embeds->length; $ei < $il; $ei++) {
-                                        if (preg_match($this->regexps['video'], $embeds->item($ei)->getAttribute('src'))) {
+                                        if (preg_match($this->regexps['video'], $embeds->item($ei)->getAttribute('src'))) {
-                                                $embedCount++; 
+                                                $embedCount++; 
-                                        }
+                                        }
-                                }
+                                }
-                                $linkDensity   = $this->getLinkDensity($tagsList->item($i));
+                                $linkDensity   = $this->getLinkDensity($tagsList->item($i));
-                                $contentLength = strlen($this->getInnerText($tagsList->item($i)));
+                                $contentLength = strlen($this->getInnerText($tagsList->item($i)));
-                                $toRemove      = false;
+                                $toRemove      = false;
-                                if ($this->lightClean) {
+                                if ($this->lightClean) {
-                                        $this->dbg('Light clean...');
+                                        $this->dbg('Light clean...');
-                                        if ( ($img > $p) && ($img > 4) ) {
+                                        if ( ($img > $p) && ($img > 4) ) {
-                                                $this->dbg(' more than 4 images and more image elements than paragraph elements');
+                                                $this->dbg(' more than 4 images and more image elements than paragraph elements');
-                                                $toRemove = true;
+                                                $toRemove = true;
-                                        } else if ($li > $p && $tag != 'ul' && $tag != 'ol') {
+                                        } else if ($li > $p && $tag != 'ul' && $tag != 'ol') {
-                                                $this->dbg(' too many <li> elements, and parent is not <ul> or <ol>');
+                                                $this->dbg(' too many <li> elements, and parent is not <ul> or <ol>');
-                                                $toRemove = true;
+                                                $toRemove = true;
-                                        } else if ( $input > floor($p/3) ) {
+                                        } else if ( $input > floor($p/3) ) {
-                                                $this->dbg(' too many <input> elements');
+                                                $this->dbg(' too many <input> elements');
-                                                $toRemove = true; 
+                                                $toRemove = true; 
-                                        } else if ($contentLength < 25 && ($embedCount === 0 && ($img === 0 || $img > 2))) {
+                                        } else if ($contentLength < 10 && ($embedCount === 0 && ($img === 0 || $img > 2))) {
-                                                $this->dbg(' content length less than 25 chars, 0 embeds and either 0 images or more than 2 images');
+                                                $this->dbg(' content length less than 10 chars, 0 embeds and either 0 images or more than 2 images');
-                                                $toRemove = true;
+                                                $toRemove = true;
-                                        } else if($weight < 25 && $linkDensity > 0.2) {
+                                        } else if($weight < 25 && $linkDensity > 0.2) {
-                                                $this->dbg(' weight smaller than 25 and link density above 0.2');
+                                                $this->dbg(' weight smaller than 25 and link density above 0.2');
-                                                $toRemove = true;
+                                                $toRemove = true;
-                                        } else if($a > 2 && ($weight >= 25 && $linkDensity > 0.5)) {
+                                        } else if($a > 2 && ($weight >= 25 && $linkDensity > 0.5)) {
-                                                $this->dbg(' more than 2 links and weight above 25 but link density greater than 0.5');
+                                                $this->dbg(' more than 2 links and weight above 25 but link density greater than 0.5');
-                                                $toRemove = true;
+                                                $toRemove = true;
-                                        } else if($embedCount > 3) {
+                                        } else if($embedCount > 3) {
-                                                $this->dbg(' more than 3 embeds');
+                                                $this->dbg(' more than 3 embeds');
-                                                $toRemove = true;
+                                                $toRemove = true;
-                                        }
+                                        }
-                                } else {
+                                } else {
-                                        $this->dbg('Standard clean...');
+                                        $this->dbg('Standard clean...');
-                                        if ( $img > $p ) {
+                                        if ( $img > $p ) {
-                                                $this->dbg(' more image elements than paragraph elements');
+                                                $this->dbg(' more image elements than paragraph elements');
-                                                $toRemove = true;
+                                                $toRemove = true;
-                                        } else if ($li > $p && $tag != 'ul' && $tag != 'ol') {
+                                        } else if ($li > $p && $tag != 'ul' && $tag != 'ol') {
-                                                $this->dbg(' too many <li> elements, and parent is not <ul> or <ol>');
+                                                $this->dbg(' too many <li> elements, and parent is not <ul> or <ol>');
-                                                $toRemove = true;
+                                                $toRemove = true;
-                                        } else if ( $input > floor($p/3) ) {
+                                        } else if ( $input > floor($p/3) ) {
-                                                $this->dbg(' too many <input> elements');
+                                                $this->dbg(' too many <input> elements');
-                                                $toRemove = true; 
+                                                $toRemove = true; 
-                                        } else if ($contentLength < 25 && ($img === 0 || $img > 2) ) {
+                                        } else if ($contentLength < 25 && ($img === 0 || $img > 2) ) {
-                                                $this->dbg(' content length less than 25 chars and 0 images, or more than 2 images');
+                                                $this->dbg(' content length less than 25 chars and 0 images, or more than 2 images');
-                                                $toRemove = true;
+                                                $toRemove = true;
-                                        } else if($weight < 25 && $linkDensity > 0.2) {
+                                        } else if($weight < 25 && $linkDensity > 0.2) {
-                                                $this->dbg(' weight smaller than 25 and link density above 0.2');
+                                                $this->dbg(' weight smaller than 25 and link density above 0.2');
-                                                $toRemove = true;
+                                                $toRemove = true;
-                                        } else if($weight >= 25 && $linkDensity > 0.5) {
+                                        } else if($weight >= 25 && $linkDensity > 0.5) {
-                                                $this->dbg(' weight above 25 but link density greater than 0.5');
+                                                $this->dbg(' weight above 25 but link density greater than 0.5');
-                                                $toRemove = true;
+                                                $toRemove = true;
-                                        } else if(($embedCount == 1 && $contentLength < 75) || $embedCount > 1) {
+                                        } else if(($embedCount == 1 && $contentLength < 75) || $embedCount > 1) {
-                                                $this->dbg(' 1 embed and content length smaller than 75 chars, or more than one embed');
+                                                $this->dbg(' 1 embed and content length smaller than 75 chars, or more than one embed');
-                                                $toRemove = true;
+                                                $toRemove = true;
-                                        }
+                                        }
-                                }
+                                }
-                                if ($toRemove) {
+                                if ($toRemove) {
-                                        //$this->dbg('Removing: '.$tagsList->item($i)->innerHTML);
+                                        //$this->dbg('Removing: '.$tagsList->item($i)->innerHTML);
-                                        $tagsList->item($i)->parentNode->removeChild($tagsList->item($i));
+                                        $tagsList->item($i)->parentNode->removeChild($tagsList->item($i));
-                                }
+                                }
-                        }
+                        }
-                }
+                }
-        }
+        }
-        /**
+        /**
-        * Clean out spurious headers from an Element. Checks things like classnames and link density.
+        * Clean out spurious headers from an Element. Checks things like classnames and link density.
-        *
+        *
-        * @param DOMElement $e
+        * @param DOMElement $e
-        * @return void
+        * @return void
-        */
+        */
-        public function cleanHeaders($e) {
+        public function cleanHeaders($e) {
-                for ($headerIndex = 1; $headerIndex < 3; $headerIndex++) {
+                for ($headerIndex = 1; $headerIndex < 3; $headerIndex++) {
-                        $headers = $e->getElementsByTagName('h' . $headerIndex);
+                        $headers = $e->getElementsByTagName('h' . $headerIndex);
-                        for ($i=$headers->length-1; $i >=0; $i--) {
+                        for ($i=$headers->length-1; $i >=0; $i--) {
-                                if ($this->getClassWeight($headers->item($i)) < 0 || $this->getLinkDensity($headers->item($i)) > 0.33) {
+                                if ($this->getClassWeight($headers->item($i)) < 0 || $this->getLinkDensity($headers->item($i)) > 0.33) {
-                                        $headers->item($i)->parentNode->removeChild($headers->item($i));
+                                        $headers->item($i)->parentNode->removeChild($headers->item($i));
-                                }
+                                }
-                        }
+                        }
-                }
+                }
-        }
+        }
-        public function flagIsActive($flag) {
+        public function flagIsActive($flag) {
-                return ($this->flags & $flag) > 0;
+                return ($this->flags & $flag) > 0;
-        }
+        }
-        
+        
-        public function addFlag($flag) {
+        public function addFlag($flag) {
-                $this->flags = $this->flags | $flag;
+                $this->flags = $this->flags | $flag;
-        }
+        }
-        
+        
-        public function removeFlag($flag) {
+        public function removeFlag($flag) {
-                $this->flags = $this->flags & ~$flag;
+                $this->flags = $this->flags & ~$flag;
-        }
+        }
-}
+}
 ?>
 \ No newline at end of file
diff --git a/inc/3rdparty/makefulltextfeed.php b/inc/3rdparty/makefulltextfeed.php
index 4faad6d9..7a56be8c 100755
--- a/inc/3rdparty/makefulltextfeed.php
+++ b/inc/3rdparty/makefulltextfeed.php
@@ -3,8 +3,8 @@
 // Author: Keyvan Minoukadeh
 // Copyright (c) 2013 Keyvan Minoukadeh
 // License: AGPLv3
-// Version: 3.1
+// Version: 3.2
-// Date: 2013-03-05
+// Date: 2013-05-13
 // More info: http://fivefilters.org/content-only/
 // Help: http://help.fivefilters.org
@@ -25,12 +25,8 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>.
 // Usage
 // -----
-// Request this file passing it your feed in the querystring: makefulltextfeed.php?url=mysite.org
+// Request this file passing it a web page or feed URL in the querystring: makefulltextfeed.php?url=example.org/article
-// The following options can be passed in the querystring:
+// For more request parameters, see http://help.fivefilters.org/customer/portal/articles/226660-usage
-// * URL: url=[feed or website url] (required, should be URL-encoded - in php: urlencode($url))
-// * URL points to HTML (not feed): html=true (optional, by default it's automatically detected)
-// * API key: key=[api key] (optional, refer to config.php)
-// * Max entries to process: max=[max number of items] (optional)
 error_reporting(E_ALL ^ E_NOTICE);
 ini_set("display_errors", 1);
@@ -76,8 +72,8 @@ header('X-Robots-Tag: noindex, nofollow');
 ////////////////////////////////
 // Check if service is enabled
 ////////////////////////////////
-if (!$options->enabled) { 
+if (!$options->enabled) {
-        die('The full-text RSS service is currently disabled'); 
+        die('The full-text RSS service is currently disabled');
 }
 ////////////////////////////////
@@ -121,8 +117,8 @@ $options->smart_cache = $options->smart_cache && function_exists('apc_inc');
 ////////////////////////////////
 // Check for feed URL
 ////////////////////////////////
-if (!isset($_GET['url'])) { 
+if (!isset($_GET['url'])) {
-        die('No URL supplied'); 
+        die('No URL supplied');
 }
 $url = trim($_GET['url']);
 if (strtolower(substr($url, 0, 7)) == 'feed://') {
@@ -161,10 +157,12 @@ if (isset($_GET['key']) && ($key_index = array_search($_GET['key'], $options->ap
        if (isset($_GET['links'])) $redirect .= '&links='.urlencode($_GET['links']);
        if (isset($_GET['exc'])) $redirect .= '&exc='.urlencode($_GET['exc']);
        if (isset($_GET['format'])) $redirect .= '&format='.urlencode($_GET['format']);
-        if (isset($_GET['callback'])) $redirect .= '&callback='.urlencode($_GET['callback']);   
+        if (isset($_GET['callback'])) $redirect .= '&callback='.urlencode($_GET['callback']);
        if (isset($_GET['l'])) $redirect .= '&l='.urlencode($_GET['l']);
        if (isset($_GET['xss'])) $redirect .= '&xss';
        if (isset($_GET['use_extracted_title'])) $redirect .= '&use_extracted_title';
+        if (isset($_GET['content'])) $redirect .= '&content='.urlencode($_GET['content']);
+        if (isset($_GET['summary'])) $redirect .= '&summary='.urlencode($_GET['summary']);
        if (isset($_GET['debug'])) $redirect .= '&debug';
        if ($debug_mode) {
                debug('Redirecting to hide access key, follow URL below to continue');
@@ -177,7 +175,7 @@ if (isset($_GET['key']) && ($key_index = array_search($_GET['key'], $options->ap
 ///////////////////////////////////////////////
 // Set timezone.
-// Prevents warnings, but needs more testing - 
+// Prevents warnings, but needs more testing -
 // perhaps if timezone is set in php.ini we
 // don't need to set it at all...
 ///////////////////////////////////////////////
@@ -199,7 +197,7 @@ if (isset($_GET['key']) && isset($_GET['hash']) && isset($options->api_keys[(int
 }
 $key_index = ($valid_key) ? (int)$_GET['key'] : 0;
 if (!$valid_key && $options->key_required) {
-        die('A valid key must be supplied'); 
+        die('A valid key must be supplied');
 }
 if (!$valid_key && isset($_GET['key']) && $_GET['key'] != '') {
        die('The entered key is invalid');
@@ -251,6 +249,28 @@ if ($options->favour_feed_titles == 'user') {
 }
 ///////////////////////////////////////////////
+// Include full content in output?
+///////////////////////////////////////////////
+if ($options->content === 'user') {
+        if (isset($_GET['content']) && $_GET['content'] === '0') {
+                $options->content = false;
+        } else {
+                $options->content = true;
+        }
+}
+///////////////////////////////////////////////
+// Include summaries in output?
+///////////////////////////////////////////////
+if ($options->summary === 'user') {
+        if (isset($_GET['summary']) && $_GET['summary'] === '1') {
+                $options->summary = true;
+        } else {
+                $options->summary = false;
+        }
+}
+///////////////////////////////////////////////
 // Exclude items if extraction fails
 ///////////////////////////////////////////////
 if ($options->exclude_items_on_fail === 'user') {
@@ -272,15 +292,6 @@ if ($options->detect_language === 'user') {
        $detect_language = $options->detect_language;
 }
-if ($detect_language >= 2) {
-        $language_codes = array('albanian' => 'sq','arabic' => 'ar','azeri' => 'az','bengali' => 'bn','bulgarian' => 'bg',
-        'cebuano' => 'ceb', // ISO 639-2
-        'croatian' => 'hr','czech' => 'cs','danish' => 'da','dutch' => 'nl','english' => 'en','estonian' => 'et','farsi' => 'fa','finnish' => 'fi','french' => 'fr','german' => 'de','hausa' => 'ha',
-        'hawaiian' => 'haw', // ISO 639-2 
-        'hindi' => 'hi','hungarian' => 'hu','icelandic' => 'is','indonesian' => 'id','italian' => 'it','kazakh' => 'kk','kyrgyz' => 'ky','latin' => 'la','latvian' => 'lv','lithuanian' => 'lt','macedonian' => 'mk','mongolian' => 'mn','nepali' => 'ne','norwegian' => 'no','pashto' => 'ps',
-        'pidgin' => 'cpe', // ISO 639-2  
-        'polish' => 'pl','portuguese' => 'pt','romanian' => 'ro','russian' => 'ru','serbian' => 'sr','slovak' => 'sk','slovene' => 'sl','somali' => 'so','spanish' => 'es','swahili' => 'sw','swedish' => 'sv','tagalog' => 'tl','turkish' => 'tr','ukrainian' => 'uk','urdu' => 'ur','uzbek' => 'uz','vietnamese' => 'vi','welsh' => 'cy');
-}
 $use_cld = extension_loaded('cld') && (version_compare(PHP_VERSION, '5.3.0') >= 0);
 /////////////////////////////////////
@@ -330,7 +341,7 @@ if ($options->cors) header('Access-Control-Allow-Origin: *');
 //////////////////////////////////
 if ($options->caching) {
        debug('Caching is enabled...');
-        $cache_id = md5($max.$url.$valid_key.$links.$favour_feed_titles.$xss_filter.$exclude_on_fail.$format.$detect_language.(int)isset($_GET['pubsub']));
+        $cache_id = md5($max.$url.(int)$valid_key.$links.(int)$favour_feed_titles.(int)$options->content.(int)$options->summary.(int)$xss_filter.(int)$exclude_on_fail.$format.$detect_language.(int)isset($_GET['pubsub']));
        $check_cache = true;
        if ($options->apc && $options->smart_cache) {
                apc_add("cache.$cache_id", 0, 10*60);
@@ -468,7 +479,7 @@ if ($img_url = $feed->get_image_url()) {
 ////////////////////////////////////////////
 // Loop through feed items
 ////////////////////////////////////////////
-$items = $feed->get_items(0, $max);     
+$items = $feed->get_items(0, $max);
 // Request all feed items in parallel (if supported)
 $urls_sanitized = array();
 $urls = array();
@@ -550,24 +561,43 @@ foreach ($items as $key => $item) {
                        $is_single_page = false;
                        if ($single_page_response = getSinglePage($item, $html, $effective_url)) {
                                $is_single_page = true;
-                                $html = $single_page_response['body'];
-                                // remove strange things
-                                $html = str_replace('</[>', '', $html); 
-                                $html = convert_to_utf8($html, $single_page_response['headers']);
                                $effective_url = $single_page_response['effective_url'];
-                                debug("Retrieved single-page view from $effective_url");
+                                // check if action defined for returned Content-Type
+                                $mime_info = get_mime_action_info($single_page_response['headers']);
+                                if (isset($mime_info['action'])) {
+                                        if ($mime_info['action'] == 'exclude') {
+                                                continue; // skip this feed item entry
+                                        } elseif ($mime_info['action'] == 'link') {
+                                                if ($mime_info['type'] == 'image') {
+                                                        $html = "<a href=\"$effective_url\"><img src=\"$effective_url\" alt=\"{$mime_info['name']}\" /></a>";
+                                                } else {
+                                                        $html = "<a href=\"$effective_url\">Download {$mime_info['name']}</a>";
+                                                }
+                                                $extracted_title = $mime_info['name'];
+                                                $do_content_extraction = false;
+                                        }
+                                }
+                                if ($do_content_extraction) {
+                                        $html = $single_page_response['body'];
+                                        // remove strange things
+                                        $html = str_replace('</[>', '', $html);
+                                        $html = convert_to_utf8($html, $single_page_response['headers']);
+                                        debug("Retrieved single-page view from $effective_url");
+                                }
                                unset($single_page_response);
                        }
+                }
+                if ($do_content_extraction) {
                        debug('--------');
                        debug('Attempting to extract content');
                        $extract_result = $extractor->process($html, $effective_url);
                        $readability = $extractor->readability;
-                        $content_block = ($extract_result) ? $extractor->getContent() : null;                   
+                        $content_block = ($extract_result) ? $extractor->getContent() : null;
                        $extracted_title = ($extract_result) ? $extractor->getTitle() : '';
                        // Deal with multi-page articles
                        //die('Next: '.$extractor->getNextPageUrl());
                        $is_multi_page = (!$is_single_page && $extract_result && $extractor->getNextPageUrl());
-                        if ($options->multipage && $is_multi_page) {
+                        if ($options->multipage && $is_multi_page && $options->content) {
                                debug('--------');
                                debug('Attempting to process multi-page article');
                                $multi_page_urls = array();
@@ -580,7 +610,7 @@ foreach ($items as $key => $item) {
                                                // check it's not what we have already!
                                                if (!in_array($next_page_url, $multi_page_urls)) {
                                                        // it's not, so let's attempt to fetch it
-                                                        $multi_page_urls[] = $next_page_url;                                            
+                                                        $multi_page_urls[] = $next_page_url;
                                                        $_prev_ref = $http->referer;
                                                        if (($response = $http->get($next_page_url, true)) && $response['status_code'] < 300) {
                                                                // make sure mime type is not something with a different action associated
@@ -605,13 +635,15 @@ foreach ($items as $key => $item) {
                                // did we successfully deal with this multi-page article?
                                if (empty($multi_page_content)) {
                                        debug('Failed to extract all parts of multi-page article, so not going to include them');
-                                        $multi_page_content[] = $readability->dom->createElement('p')->innerHTML = '<em>This article appears to continue on subsequent pages which we could not extract</em>';
+                                        $_page = $readability->dom->createElement('p');
+                                        $_page->innerHTML = '<em>This article appears to continue on subsequent pages which we could not extract</em>';
+                                        $multi_page_content[] = $_page;
                                }
                                foreach ($multi_page_content as $_page) {
                                        $_page = $content_block->ownerDocument->importNode($_page, true);
                                        $content_block->appendChild($_page);
                                }
-                                unset($multi_page_urls, $multi_page_content, $page_mime_info, $next_page_url);
+                                unset($multi_page_urls, $multi_page_content, $page_mime_info, $next_page_url, $_page);
                        }
                }
                // use extracted title for both feed and item title if we're using single-item dummy feed
@@ -658,7 +690,7 @@ foreach ($items as $key => $item) {
                        } else {
                                $html = $content_block->ownerDocument->saveXML($content_block); // essentially outerHTML
                        }
-                        unset($content_block);
+                        //unset($content_block);
                        // post-processing cleanup
                        $html = preg_replace('!<p>[\s\h\v]*</p>!u', '', $html);
                        if ($links == 'remove') {
@@ -671,130 +703,155 @@ foreach ($items as $key => $item) {
                }
        }
-                if ($valid_key && isset($_GET['pubsub'])) { // used only on fivefilters.org at the moment
+        if ($valid_key && isset($_GET['pubsub'])) { // used only on fivefilters.org at the moment
-                        $newitem->addElement('guid', 'http://fivefilters.org/content-only/redirect.php?url='.urlencode($item->get_permalink()), array('isPermaLink'=>'false'));
+                $newitem->addElement('guid', 'http://fivefilters.org/content-only/redirect.php?url='.urlencode($item->get_permalink()), array('isPermaLink'=>'false'));
+        } else {
+                $newitem->addElement('guid', $item->get_permalink(), array('isPermaLink'=>'true'));
+        }
+        // filter xss?
+        if ($xss_filter) {
+                debug('Filtering HTML to remove XSS');
+                $html = htmLawed::hl($html, array('safe'=>1, 'deny_attribute'=>'style', 'comment'=>1, 'cdata'=>1));
+        }
+        // add content
+        if ($options->summary === true) {
+                // get summary
+                $summary = '';
+                if (!$do_content_extraction) {
+                        $summary = $html;
                } else {
-                        $newitem->addElement('guid', $item->get_permalink(), array('isPermaLink'=>'true'));
+                        // Try to get first few paragraphs
-                }
+                        if (isset($content_block) && ($content_block instanceof DOMElement)) {
-                // filter xss?
+                                $_paras = $content_block->getElementsByTagName('p');
-                if ($xss_filter) {
+                                foreach ($_paras as $_para) {
-                        debug('Filtering HTML to remove XSS');
+                                        $summary .= preg_replace("/[\n\r\t ]+/", ' ', $_para->textContent).' ';
-                        $html = htmLawed::hl($html, array('safe'=>1, 'deny_attribute'=>'style', 'comment'=>1, 'cdata'=>1));
+                                        if (strlen($summary) > 200) break;
-                }
-                $newitem->setDescription($html);
-                
-                // set date
-                if ((int)$item->get_date('U') > 0) {
-                        $newitem->setDate((int)$item->get_date('U'));
-                } elseif ($extractor->getDate()) {
-                        $newitem->setDate($extractor->getDate());
-                }
-                
-                // add authors
-                if ($authors = $item->get_authors()) {
-                        foreach ($authors as $author) {
-                                // for some feeds, SimplePie stores author's name as email, e.g. http://feeds.feedburner.com/nymag/intel
-                                if ($author->get_name() !== null) {
-                                        $newitem->addElement('dc:creator', $author->get_name());
-                                } elseif ($author->get_email() !== null) {
-                                        $newitem->addElement('dc:creator', $author->get_email());
                                }
+                        } else {
+                                $summary = $html;
                        }
-                } elseif ($authors = $extractor->getAuthors()) {
+                }
-                        //TODO: make sure the list size is reasonable
+                unset($_paras, $_para);
-                        foreach ($authors as $author) {
+                $summary = get_excerpt($summary);
-                                // TODO: xpath often selects authors from other articles linked from the page.
+                $newitem->setDescription($summary);
-                                // for now choose first item
+                if ($options->content) $newitem->setElement('content:encoded', $html);
-                                $newitem->addElement('dc:creator', $author);
+        } else {
-                                break;
+                if ($options->content) $newitem->setDescription($html);
+        }
+        // set date
+        if ((int)$item->get_date('U') > 0) {
+                $newitem->setDate((int)$item->get_date('U'));
+        } elseif ($extractor->getDate()) {
+                $newitem->setDate($extractor->getDate());
+        }
+        // add authors
+        if ($authors = $item->get_authors()) {
+                foreach ($authors as $author) {
+                        // for some feeds, SimplePie stores author's name as email, e.g. http://feeds.feedburner.com/nymag/intel
+                        if ($author->get_name() !== null) {
+                                $newitem->addElement('dc:creator', $author->get_name());
+                        } elseif ($author->get_email() !== null) {
+                                $newitem->addElement('dc:creator', $author->get_email());
                        }
                }
-                
+        } elseif ($authors = $extractor->getAuthors()) {
-                // add language
+                //TODO: make sure the list size is reasonable
-                if ($detect_language) {
+                foreach ($authors as $author) {
-                        $language = $extractor->getLanguage();
+                        // TODO: xpath often selects authors from other articles linked from the page.
-                        if (!$language) $language = $feed->get_language();
+                        // for now choose first item
-                        if (($detect_language == 3 || (!$language && $detect_language == 2)) && $text_sample) {
+                        $newitem->addElement('dc:creator', $author);
-                                try {
+                        break;
-                                        if ($use_cld) {
+                }
-                                                // Use PHP-CLD extension
+        }
-                                                $php_cld = 'CLD\detect'; // in quotes to prevent PHP 5.2 parse error
-                                                $res = $php_cld($text_sample);
+        // add language
-                                                if (is_array($res) && count($res) > 0) {
+        if ($detect_language) {
-                                                        $language = $res[0]['code'];
+                $language = $extractor->getLanguage();
-                                                }       
+                if (!$language) $language = $feed->get_language();
-                                        } else {
+                if (($detect_language == 3 || (!$language && $detect_language == 2)) && $text_sample) {
-                                                //die('what');
+                        try {
-                                                // Use PEAR's Text_LanguageDetect
+                                if ($use_cld) {
-                                                if (!isset($l)) {
+                                        // Use PHP-CLD extension
-                                                        $l = new Text_LanguageDetect('libraries/language-detect/lang.dat', 'libraries/language-detect/unicode_blocks.dat');
+                                        $php_cld = 'CLD\detect'; // in quotes to prevent PHP 5.2 parse error
-                                                }
+                                        $res = $php_cld($text_sample);
-                                                $l_result = $l->detect($text_sample, 1);
+                                        if (is_array($res) && count($res) > 0) {
-                                                if (count($l_result) > 0) {
+                                                $language = $res[0]['code'];
-                                                        $language = $language_codes[key($l_result)];
+                                        }
-                                                }
+                                } else {
+                                        //die('what');
+                                        // Use PEAR's Text_LanguageDetect
+                                        if (!isset($l)) {
+                                          $l = new Text_LanguageDetect();
+                                          $l->setNameMode(2); // return ISO 639-1 codes (e.g. "en")
+                                        }
+                                        $l_result = $l->detect($text_sample, 1);
+                                        if (count($l_result) > 0) {
+                                                $language = key($l_result);
                                        }
-                                } catch (Exception $e) {
-                                        //die('error: '.$e);    
-                                        // do nothing
                                }
-                        }
+                        } catch (Exception $e) {
-                        if ($language && (strlen($language) < 7)) {     
+                                //die('error: '.$e);
-                                $newitem->addElement('dc:language', $language);
+                                // do nothing
                        }
                }
-                
+                if ($language && (strlen($language) < 7)) {
-                // add MIME type (if it appeared in our exclusions lists)
+                        $newitem->addElement('dc:language', $language);
-                if (isset($mime_info['mime'])) $newitem->addElement('dc:format', $mime_info['mime']);
-                // add effective URL (URL after redirects)
-                if (isset($effective_url)) {
-                        //TODO: ensure $effective_url is valid witout - sometimes it causes problems, e.g.
-                        //http://www.siasat.pk/forum/showthread.php?108883-Pakistan-Chowk-by-Rana-Mubashir-�-25th-March-2012-Special-Program-from-Liari-(Karachi)
-                        //temporary measure: use utf8_encode()
-                        $newitem->addElement('dc:identifier', remove_url_cruft(utf8_encode($effective_url)));
-                } else {
-                        $newitem->addElement('dc:identifier', remove_url_cruft($item->get_permalink()));
                }
-                
+        }
-                // add categories
-                if ($categories = $item->get_categories()) {
+        // add MIME type (if it appeared in our exclusions lists)
-                        foreach ($categories as $category) {
+        if (isset($mime_info['mime'])) $newitem->addElement('dc:format', $mime_info['mime']);
-                                if ($category->get_label() !== null) {
+        // add effective URL (URL after redirects)
-                                        $newitem->addElement('category', $category->get_label());
+        if (isset($effective_url)) {
-                                }
+                //TODO: ensure $effective_url is valid witout - sometimes it causes problems, e.g.
+                //http://www.siasat.pk/forum/showthread.php?108883-Pakistan-Chowk-by-Rana-Mubashir-�-25th-March-2012-Special-Program-from-Liari-(Karachi)
+                //temporary measure: use utf8_encode()
+                $newitem->addElement('dc:identifier', remove_url_cruft(utf8_encode($effective_url)));
+        } else {
+                $newitem->addElement('dc:identifier', remove_url_cruft($item->get_permalink()));
+        }
+        // add categories
+        if ($categories = $item->get_categories()) {
+                foreach ($categories as $category) {
+                        if ($category->get_label() !== null) {
+                                $newitem->addElement('category', $category->get_label());
                        }
                }
-                
+        }
-                // check for enclosures
-                if ($options->keep_enclosures) {
+        // check for enclosures
-                        if ($enclosures = $item->get_enclosures()) {
+        if ($options->keep_enclosures) {
-                                foreach ($enclosures as $enclosure) {
+                if ($enclosures = $item->get_enclosures()) {
-                                        // thumbnails
+                        foreach ($enclosures as $enclosure) {
-                                        foreach ((array)$enclosure->get_thumbnails() as $thumbnail) {
+                                // thumbnails
-                                                $newitem->addElement('media:thumbnail', '', array('url'=>$thumbnail));
+                                foreach ((array)$enclosure->get_thumbnails() as $thumbnail) {
-                                        }
+                                        $newitem->addElement('media:thumbnail', '', array('url'=>$thumbnail));
-                                        if (!$enclosure->get_link()) continue;
-                                        $enc = array();
-                                        // Media RSS spec ($enc): http://search.yahoo.com/mrss
-                                        // SimplePie methods ($enclosure): http://simplepie.org/wiki/reference/start#methods4
-                                        $enc['url'] = $enclosure->get_link();
-                                        if ($enclosure->get_length()) $enc['fileSize'] = $enclosure->get_length();
-                                        if ($enclosure->get_type()) $enc['type'] = $enclosure->get_type();
-                                        if ($enclosure->get_medium()) $enc['medium'] = $enclosure->get_medium();
-                                        if ($enclosure->get_expression()) $enc['expression'] = $enclosure->get_expression();
-                                        if ($enclosure->get_bitrate()) $enc['bitrate'] = $enclosure->get_bitrate();
-                                        if ($enclosure->get_framerate()) $enc['framerate'] = $enclosure->get_framerate();
-                                        if ($enclosure->get_sampling_rate()) $enc['samplingrate'] = $enclosure->get_sampling_rate();
-                                        if ($enclosure->get_channels()) $enc['channels'] = $enclosure->get_channels();
-                                        if ($enclosure->get_duration()) $enc['duration'] = $enclosure->get_duration();
-                                        if ($enclosure->get_height()) $enc['height'] = $enclosure->get_height();
-                                        if ($enclosure->get_width()) $enc['width'] = $enclosure->get_width();
-                                        if ($enclosure->get_language()) $enc['lang'] = $enclosure->get_language();
-                                        $newitem->addElement('media:content', '', $enc);
                                }
+                                if (!$enclosure->get_link()) continue;
+                                $enc = array();
+                                // Media RSS spec ($enc): http://search.yahoo.com/mrss
+                                // SimplePie methods ($enclosure): http://simplepie.org/wiki/reference/start#methods4
+                                $enc['url'] = $enclosure->get_link();
+                                if ($enclosure->get_length()) $enc['fileSize'] = $enclosure->get_length();
+                                if ($enclosure->get_type()) $enc['type'] = $enclosure->get_type();
+                                if ($enclosure->get_medium()) $enc['medium'] = $enclosure->get_medium();
+                                if ($enclosure->get_expression()) $enc['expression'] = $enclosure->get_expression();
+                                if ($enclosure->get_bitrate()) $enc['bitrate'] = $enclosure->get_bitrate();
+                                if ($enclosure->get_framerate()) $enc['framerate'] = $enclosure->get_framerate();
+                                if ($enclosure->get_sampling_rate()) $enc['samplingrate'] = $enclosure->get_sampling_rate();
+                                if ($enclosure->get_channels()) $enc['channels'] = $enclosure->get_channels();
+                                if ($enclosure->get_duration()) $enc['duration'] = $enclosure->get_duration();
+                                if ($enclosure->get_height()) $enc['height'] = $enclosure->get_height();
+                                if ($enclosure->get_width()) $enc['width'] = $enclosure->get_width();
+                                if ($enclosure->get_language()) $enc['lang'] = $enclosure->get_language();
+                                $newitem->addElement('media:content', '', $enc);
                        }
                }
-        /* } */
+        }
        $output->addItem($newitem);
        unset($html);
        $item_count++;
diff --git a/inc/3rdparty/makefulltextfeedHelpers.php b/inc/3rdparty/makefulltextfeedHelpers.php
index 1c11b8f6..4e985372 100755
--- a/inc/3rdparty/makefulltextfeedHelpers.php
+++ b/inc/3rdparty/makefulltextfeedHelpers.php
@@ -66,6 +66,38 @@ class DummySingleItem {
 // HELPER FUNCTIONS
 ///////////////////////////////
+// Adapted from WordPress
+// http://core.trac.wordpress.org/browser/tags/3.5.1/wp-includes/formatting.php#L2173
+function get_excerpt($text, $num_words=55, $more=null) {
+        if (null === $more) $more = '&hellip;';
+        $text = strip_tags($text);
+        //TODO: Check if word count is based on single characters (East Asian characters)
+        /*
+        if (1==2) {
+        $text = trim(preg_replace("/[\n\r\t ]+/", ' ', $text), ' ');
+        preg_match_all('/./u', $text, $words_array);
+        $words_array = array_slice($words_array[0], 0, $num_words + 1);
+        $sep = '';
+        } else {
+        $words_array = preg_split("/[\n\r\t ]+/", $text, $num_words + 1, PREG_SPLIT_NO_EMPTY);
+        $sep = ' ';
+        }
+        */
+        $words_array = preg_split("/[\n\r\t ]+/", $text, $num_words + 1, PREG_SPLIT_NO_EMPTY);
+        $sep = ' ';
+        if (count($words_array) > $num_words) {
+                array_pop($words_array);
+                $text = implode($sep, $words_array);
+                $text = $text.$more;
+        } else {
+                $text = implode($sep, $words_array);
+        }
+        // trim whitespace at beginning or end of string
+        // See: http://stackoverflow.com/questions/4166896/trim-unicode-whitespace-in-php-5-2
+        $text = preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $text);
+        return $text;
+}
 function url_allowed($url) {
        global $options;
        if (!empty($options->allowed_urls)) {
@@ -165,14 +197,6 @@ function convert_to_utf8($html, $header=null)
                        if (strtolower($encoding) != 'utf-8') {
                                debug('Converting to UTF-8');
                                $html = SimplePie_Misc::change_encoding($html, $encoding, 'utf-8');
-                                /*
-                                if (function_exists('iconv')) {
-                                        // iconv appears to handle certain character encodings better than mb_convert_encoding
-                                        $html = iconv($encoding, 'utf-8', $html);
-                                } else {
-                                        $html = mb_convert_encoding($html, 'utf-8', $encoding);
-                                }
-                                */
                        }
                }
        }
@@ -196,7 +220,7 @@ function makeAbsolute($base, $elem) {
 }
 function makeAbsoluteAttr($base, $e, $attr) {
        if ($e->hasAttribute($attr)) {
-                // Trim leading and trailing white space. I don't really like this but 
+                // Trim leading and trailing white space. I don't really like this but
                // unfortunately it does appear on some sites. e.g.  <img src=" /path/to/image.jpg" />
                $url = trim(str_replace('%20', ' ', $e->getAttribute($attr)));
                $url = str_replace(' ', '%20', $url);
diff --git a/inc/3rdparty/site_config/custom/dailymotion.com.txt b/inc/3rdparty/site_config/custom/dailymotion.com.txt
new file mode 100755
index 00000000..0cad808f
--- /dev/null
+++ b/inc/3rdparty/site_config/custom/dailymotion.com.txt
@@ -0,0 +1,12 @@
+title: //title
+body: //iframe
+replace_string(<![CDATA[): _
+replace_string(]]>): _
+single_page_link: //link[@type='application/xml+oembed']
+prune: no
+tidy: no
+http://www.dailymotion.com/video/x1vk5oh_before-they-were-on-game-of-thrones_people
diff --git a/inc/3rdparty/site_config/custom/index.php b/inc/3rdparty/site_config/custom/index.php
new file mode 100644
index 00000000..a3d5f739
--- /dev/null
+++ b/inc/3rdparty/site_config/custom/index.php
@@ -0,0 +1,3 @@
+<?php
+// this is here to prevent directory listing over the web
+?>
+\ No newline at end of file
diff --git a/inc/3rdparty/site_config/custom/ted.com.txt b/inc/3rdparty/site_config/custom/ted.com.txt
new file mode 100755
index 00000000..4940d2bc
--- /dev/null
+++ b/inc/3rdparty/site_config/custom/ted.com.txt
@@ -0,0 +1,11 @@
+title: //title
+body: //div[@class='talk-article__body talk-transcript__body'] | //div[@class='media__image media__image--thumb talk-link__image']
+strip_id_or_class: talk-transcript__para__time
+single_page_link: //a[@id='hero-transcript-link']
+#prune: no
+tidy: no
+test_url: http://www.ted.com/talks/andrew_solomon_how_the_worst_moments_in_our_lives_make_us_who_we_are
diff --git a/inc/3rdparty/site_config/index.php b/inc/3rdparty/site_config/index.php
index a1b767fd..76ca8b3c 100644
--- a/inc/3rdparty/site_config/index.php
+++ b/inc/3rdparty/site_config/index.php
@@ -1,3 +1,2 @@
-<?php
+<?php
-// this is here to prevent directory listing over the web
+// this is here to prevent directory listing over the web
+\ No newline at end of file
-?>
-\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/version.txt b/inc/3rdparty/site_config/standard/version.txt
index bf0d87ab..eaf01ebd 100644
--- a/inc/3rdparty/site_config/standard/version.txt
+++ b/inc/3rdparty/site_config/standard/version.txt
@@ -1 +1 @@
-4
-\ No newline at end of file
+2013-05-12T22:53:07Z
+\ No newline at end of file
diff --git a/inc/poche/Poche.class.php b/inc/poche/Poche.class.php
index 1b69cd61..37cf66a3 100755
--- a/inc/poche/Poche.class.php
+++ b/inc/poche/Poche.class.php
@@ -1142,11 +1142,12 @@ class Poche
     * return new purifier object with actual config
     */
    protected function getPurifier() {
-      $config = HTMLPurifier_Config::createDefault();
+      $config = HTMLPurifier_Config::createDefault();
-      $config->set('Cache.SerializerPath', CACHE);
+      $config->set('Cache.SerializerPath', CACHE);
-      $config->set('HTML.SafeIframe', true);
+      $config->set('HTML.SafeIframe', true);
-      $config->set('URI.SafeIframeRegexp', '%^(https?:)?//(www\.youtube(?:-nocookie)?\.com/embed/|player\.vimeo\.com/video/)%'); //allow YouTube and Vimeo$purifier = new HTMLPurifier($config);
+      //allow YouTube, Vimeo and dailymotion videos
+      $config->set('URI.SafeIframeRegexp', '%^(https?:)?//(www\.youtube(?:-nocookie)?\.com/embed/|player\.vimeo\.com/video/|www\.dailymotion\.com/embed/video/)%');
      return new HTMLPurifier($config);
    }
author	Nicolas Lœuillet <nicolas@loeuillet.org>	2014-05-29 12:50:28 +0200
committer	Nicolas Lœuillet <nicolas@loeuillet.org>	2014-05-29 12:50:28 +0200
commit	87f01ea2e97715ac5df4ef7a6741cc26f3a5cd1b (patch)
tree	558818975ac41403e7d55ad07c5b0ac29806e907 /inc
parent	ab157bbb75ba226917145c9bf906cbf764a85cd0 (diff)
parent	0b9bb8cb7868f24137c5d8b85c39cc88ea877411 (diff)
download	wallabag-87f01ea2e97715ac5df4ef7a6741cc26f3a5cd1b.tar.gz wallabag-87f01ea2e97715ac5df4ef7a6741cc26f3a5cd1b.tar.zst wallabag-87f01ea2e97715ac5df4ef7a6741cc26f3a5cd1b.zip