From 230fa05eb736ab58d4183ddb2932bc7c620a2dd4 Mon Sep 17 00:00:00 2001
From: tcitworld <tcit@tcit.fr>
Date: Sat, 17 May 2014 20:32:38 +0200
Subject: Better Grammar

---
 CONTRIBUTING.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 76708404..9ccb0b14 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,4 +1,4 @@
-# How contributing
+# How to contribute
 
 ## You found a bug
 Please [open a new issue](https://github.com/wallabag/wallabag/issues/new).
-- 
cgit v1.2.3


From 007f26e582251895ea7d12b509c8aee24c4b1f47 Mon Sep 17 00:00:00 2001
From: tcit <tcit@tcit.fr>
Date: Sun, 18 May 2014 22:11:56 +0200
Subject: Security fix for Download Images

---
 inc/poche/pochePictures.php | 36 +++++++++++++++++++++++++++++++++---
 1 file changed, 33 insertions(+), 3 deletions(-)

diff --git a/inc/poche/pochePictures.php b/inc/poche/pochePictures.php
index e4b0b160..3202f2cc 100644
--- a/inc/poche/pochePictures.php
+++ b/inc/poche/pochePictures.php
@@ -72,9 +72,39 @@ function download_pictures($absolute_path, $fullpath)
     if(file_exists($fullpath)) {
         unlink($fullpath);
     }
-    $fp = fopen($fullpath, 'x');
-    fwrite($fp, $rawdata);
-    fclose($fp);
+    
+    // check extension
+    $file_ext = strrchr($fullpath, '.');
+    $whitelist = array(".jpg",".jpeg",".gif",".png"); 
+    if (!(in_array($file_ext, $whitelist))) {
+        Tools::logm('processed image with not allowed extension. Skipping ' . $fullpath);
+    } else {
+        // check headers
+        $imageinfo = getimagesize($absolute_path);
+        if ($imageinfo['mime'] != 'image/gif' && $imageinfo['mime'] != 'image/jpeg'&& $imageinfo['mime'] != 'image/jpg'&& $imageinfo['mime'] != 'image/png') {
+            Tools::logm('processed image with bad header. Skipping ' . $fullpath);
+        } else {
+            // regenerate image
+            $im = imagecreatefromstring($rawdata);
+            if ($im) {
+                switch ($imageinfo['mime']) {
+                    case 'image/gif':
+                        imagegif($im, $fullpath);
+                        break;
+                    case 'image/jpeg':
+                    case 'image/jpg':
+                        imagejpeg($im, $fullpath); // default quality is 75%
+                        break;
+                    case 'image/png':
+                        imagepng($im, $fullpath);
+                        break;
+                }
+                imagedestroy($im);
+            } else {
+             Tools::logm('error while regenerating image ' . $fullpath);
+            }
+        }
+    }
 }
 
 /**
-- 
cgit v1.2.3


From 1d6a9ac25aa0ee1a51b3fcc70bc4247ff14c54e2 Mon Sep 17 00:00:00 2001
From: tcit <tcit@tcit.fr>
Date: Mon, 19 May 2014 15:24:11 +0200
Subject: Option for setting quality

---
 inc/poche/config.inc.default.php | 1 +
 inc/poche/pochePictures.php      | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/inc/poche/config.inc.default.php b/inc/poche/config.inc.default.php
index edc42fc9..0e82f0cd 100755
--- a/inc/poche/config.inc.default.php
+++ b/inc/poche/config.inc.default.php
@@ -31,6 +31,7 @@
 @define ('MODE_DEMO', FALSE);
 @define ('DEBUG_POCHE', FALSE);
 @define ('DOWNLOAD_PICTURES', FALSE);
+@define ('REGENERATE_PICTURES_QUALITY'), 75);
 @define ('CONVERT_LINKS_FOOTNOTES', FALSE);
 @define ('REVERT_FORCED_PARAGRAPH_ELEMENTS', FALSE);
 @define ('SHARE_TWITTER', TRUE);
diff --git a/inc/poche/pochePictures.php b/inc/poche/pochePictures.php
index f10cc25e..97eb56ac 100644
--- a/inc/poche/pochePictures.php
+++ b/inc/poche/pochePictures.php
@@ -94,10 +94,10 @@ function download_pictures($absolute_path, $fullpath)
                         break;
                     case 'image/jpeg':
                     case 'image/jpg':
-                        imagejpeg($im, $fullpath); // default quality is 75%
+                        imagejpeg($im, $fullpath, REGENERATE_PICTURES_QUALITY);
                         break;
                     case 'image/png':
-                        imagepng($im, $fullpath);
+                        imagepng($im, $fullpath, REGENERATE_PICTURES_QUALITY);
                         break;
                 }
                 imagedestroy($im);
-- 
cgit v1.2.3


From 6caba976ec0c2333c33b64bc4de26c64b19f2f49 Mon Sep 17 00:00:00 2001
From: tcit <tcit@tcit.fr>
Date: Mon, 19 May 2014 15:34:49 +0200
Subject: Bug with bracket

---
 inc/poche/config.inc.default.php | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/inc/poche/config.inc.default.php b/inc/poche/config.inc.default.php
index 0e82f0cd..6e25b2f7 100755
--- a/inc/poche/config.inc.default.php
+++ b/inc/poche/config.inc.default.php
@@ -31,7 +31,7 @@
 @define ('MODE_DEMO', FALSE);
 @define ('DEBUG_POCHE', FALSE);
 @define ('DOWNLOAD_PICTURES', FALSE);
-@define ('REGENERATE_PICTURES_QUALITY'), 75);
+@define ('REGENERATE_PICTURES_QUALITY', 75);
 @define ('CONVERT_LINKS_FOOTNOTES', FALSE);
 @define ('REVERT_FORCED_PARAGRAPH_ELEMENTS', FALSE);
 @define ('SHARE_TWITTER', TRUE);
-- 
cgit v1.2.3


From e3b00bcaf580177ecdbdb2ee90dfc263b1c2d79e Mon Sep 17 00:00:00 2001
From: tcit <tcit@tcit.fr>
Date: Mon, 19 May 2014 15:59:18 +0200
Subject: Fixed bug for png images

---
 inc/poche/pochePictures.php | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/inc/poche/pochePictures.php b/inc/poche/pochePictures.php
index 97eb56ac..8f86d2f2 100644
--- a/inc/poche/pochePictures.php
+++ b/inc/poche/pochePictures.php
@@ -97,7 +97,7 @@ function download_pictures($absolute_path, $fullpath)
                         imagejpeg($im, $fullpath, REGENERATE_PICTURES_QUALITY);
                         break;
                     case 'image/png':
-                        imagepng($im, $fullpath, REGENERATE_PICTURES_QUALITY);
+                        imagepng($im, $fullpath, ceil(REGENERATE_PICTURES_QUALITY / 100 * 9));
                         break;
                 }
                 imagedestroy($im);
-- 
cgit v1.2.3


From 0bf0dfe10d0dd4aaafcc7da7deb5be8ef76ad602 Mon Sep 17 00:00:00 2001
From: Simon Leblanc <contact@leblanc-simon.eu>
Date: Tue, 20 May 2014 00:42:51 +0200
Subject: Optimisation et gestion des erreurs

---
 inc/poche/pochePictures.php | 73 ++++++++++++++++++++++++++++-----------------
 1 file changed, 45 insertions(+), 28 deletions(-)

diff --git a/inc/poche/pochePictures.php b/inc/poche/pochePictures.php
index 8f86d2f2..7c319a85 100644
--- a/inc/poche/pochePictures.php
+++ b/inc/poche/pochePictures.php
@@ -14,6 +14,7 @@
 function filtre_picture($content, $url, $id)
 {
     $matches = array();
+    $processing_pictures = array(); // list of processing image to avoid processing the same pictures twice
     preg_match_all('#<\s*(img)[^>]+src="([^"]*)"[^>]*>#Si', $content, $matches, PREG_SET_ORDER);
     foreach($matches as $i => $link) {
         $link[1] = trim($link[1]);
@@ -22,8 +23,17 @@ function filtre_picture($content, $url, $id)
             $filename = basename(parse_url($absolute_path, PHP_URL_PATH));
             $directory = create_assets_directory($id);
             $fullpath = $directory . '/' . $filename;
-            download_pictures($absolute_path, $fullpath);
-            $content = str_replace($matches[$i][2], $fullpath, $content);
+            
+            if (in_array($absolute_path, $processing_pictures) === true) {
+                // replace picture's URL only if processing is OK : already processing -> go to next picture
+                continue;
+            }
+            
+            if (download_pictures($absolute_path, $fullpath) === true) {
+                $content = str_replace($matches[$i][2], $fullpath, $content);
+            }
+            
+            $processing_pictures[] = $absolute_path;
         }
 
     }
@@ -64,6 +74,8 @@ function get_absolute_link($relative_link, $url) {
 
 /**
  * Téléchargement des images
+ * 
+ * @return bool true if the download and processing is OK, false else
  */
 function download_pictures($absolute_path, $fullpath)
 {
@@ -79,33 +91,38 @@ function download_pictures($absolute_path, $fullpath)
     $whitelist = array(".jpg",".jpeg",".gif",".png"); 
     if (!(in_array($file_ext, $whitelist))) {
         Tools::logm('processed image with not allowed extension. Skipping ' . $fullpath);
-    } else {
-        // check headers
-        $imageinfo = getimagesize($absolute_path);
-        if ($imageinfo['mime'] != 'image/gif' && $imageinfo['mime'] != 'image/jpeg'&& $imageinfo['mime'] != 'image/jpg'&& $imageinfo['mime'] != 'image/png') {
-            Tools::logm('processed image with bad header. Skipping ' . $fullpath);
-        } else {
-            // regenerate image
-            $im = imagecreatefromstring($rawdata);
-            if ($im) {
-                switch ($imageinfo['mime']) {
-                    case 'image/gif':
-                        imagegif($im, $fullpath);
-                        break;
-                    case 'image/jpeg':
-                    case 'image/jpg':
-                        imagejpeg($im, $fullpath, REGENERATE_PICTURES_QUALITY);
-                        break;
-                    case 'image/png':
-                        imagepng($im, $fullpath, ceil(REGENERATE_PICTURES_QUALITY / 100 * 9));
-                        break;
-                }
-                imagedestroy($im);
-            } else {
-             Tools::logm('error while regenerating image ' . $fullpath);
-            }
-        }
+        return false;
+    }
+    
+    // check headers
+    $imageinfo = getimagesize($absolute_path);
+    if ($imageinfo['mime'] != 'image/gif' && $imageinfo['mime'] != 'image/jpeg'&& $imageinfo['mime'] != 'image/jpg'&& $imageinfo['mime'] != 'image/png') {
+        Tools::logm('processed image with bad header. Skipping ' . $fullpath);
+        return false;
     }
+    
+    // regenerate image
+    $im = imagecreatefromstring($rawdata);
+    if ($im === false) {
+        Tools::logm('error while regenerating image ' . $fullpath);
+        return false;
+    }
+    
+    switch ($imageinfo['mime']) {
+        case 'image/gif':
+            $result = imagegif($im, $fullpath);
+            break;
+        case 'image/jpeg':
+        case 'image/jpg':
+            $result = imagejpeg($im, $fullpath, REGENERATE_PICTURES_QUALITY);
+            break;
+        case 'image/png':
+            $result = imagepng($im, $fullpath, ceil(REGENERATE_PICTURES_QUALITY / 100 * 9));
+            break;
+    }
+    imagedestroy($im);
+    
+    return $result;
 }
 
 /**
-- 
cgit v1.2.3


From 38eecef26ba33a052475c52dead699e434b2362a Mon Sep 17 00:00:00 2001
From: tcitworld <tcit@tcit.fr>
Date: Tue, 20 May 2014 11:46:05 +0200
Subject: Added info for DOWNLOAD_PICTURES

We regenerate pictures, it might take some time
---
 inc/poche/config.inc.default.php | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/inc/poche/config.inc.default.php b/inc/poche/config.inc.default.php
index 6e25b2f7..ffcd205d 100755
--- a/inc/poche/config.inc.default.php
+++ b/inc/poche/config.inc.default.php
@@ -30,7 +30,7 @@
 
 @define ('MODE_DEMO', FALSE);
 @define ('DEBUG_POCHE', FALSE);
-@define ('DOWNLOAD_PICTURES', FALSE);
+@define ('DOWNLOAD_PICTURES', FALSE); # This can slow down the process of adding articles
 @define ('REGENERATE_PICTURES_QUALITY', 75);
 @define ('CONVERT_LINKS_FOOTNOTES', FALSE);
 @define ('REVERT_FORCED_PARAGRAPH_ELEMENTS', FALSE);
-- 
cgit v1.2.3


From f61ffec3529cb422f6703057fedcc7e23188e724 Mon Sep 17 00:00:00 2001
From: Guillaume Boudreau <guillaume@mobilogie.com>
Date: Mon, 19 May 2014 06:59:49 -0400
Subject: Fixed Baggy theme CSS for Chrome Extension (and < 500px width layout)

---
 themes/baggy/css/main.css | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/themes/baggy/css/main.css b/themes/baggy/css/main.css
index 6d3a56e1..6d320cd2 100755
--- a/themes/baggy/css/main.css
+++ b/themes/baggy/css/main.css
@@ -979,8 +979,8 @@ blockquote {
     content: none;
   }
   .logo {
-    width: 1.5em;
-    height: 1.5em;
+    width: 1.25em;
+    height: 1.25em;
     left: 0;
     top: 0;
   }
@@ -1030,6 +1030,7 @@ blockquote {
     margin-left: 1.5em;
     padding-right: 1.5em;
     position: static;
+    margin-top: 3em;
   }
   #article_toolbar .topPosF {
     display: none;
-- 
cgit v1.2.3


From 3ec62cf95ab4436923d4c665fad7aef226cbb822 Mon Sep 17 00:00:00 2001
From: Maryana Rozhankivska <mariroz@mr.lviv.ua>
Date: Thu, 22 May 2014 17:16:38 +0300
Subject: update to 3.2 version of full-text-rss, issue #694

---
 inc/3rdparty/config.php                            |  104 +-
 .../content-extractor/ContentExtractor.php         | 1455 +++++++------
 .../libraries/content-extractor/SiteConfig.php     |  681 +++---
 inc/3rdparty/libraries/feedwriter/FeedItem.php     |  100 +-
 inc/3rdparty/libraries/feedwriter/FeedWriter.php   |   17 +-
 inc/3rdparty/libraries/html5/TreeBuilder.php       |   13 +-
 .../libraries/humble-http-agent/CookieJar.php      |  807 ++++---
 .../humble-http-agent/HumbleHttpAgent.php          | 1589 +++++++-------
 .../SimplePie_HumbleHttpAgent.php                  |  157 +-
 .../libraries/language-detect/LanguageDetect.php   |  992 +++++----
 inc/3rdparty/libraries/readability/Readability.php | 2274 ++++++++++----------
 inc/3rdparty/makefulltextfeed.php                  |  349 +--
 inc/3rdparty/makefulltextfeedHelpers.php           |   42 +-
 inc/3rdparty/site_config/index.php                 |    5 +-
 inc/3rdparty/site_config/standard/version.txt      |    2 +-
 15 files changed, 4415 insertions(+), 4172 deletions(-)
 mode change 100644 => 100755 inc/3rdparty/libraries/feedwriter/FeedItem.php

diff --git a/inc/3rdparty/config.php b/inc/3rdparty/config.php
index e618117b..ec680d86 100755
--- a/inc/3rdparty/config.php
+++ b/inc/3rdparty/config.php
@@ -19,7 +19,7 @@ if (!isset($options)) $options = new stdClass();
 // Enable service
 // ----------------------
 // Set this to false if you want to disable the service.
-// If set to false, no feed is produced and users will 
+// If set to false, no feed is produced and users will
 // be told that the service is disabled.
 $options->enabled = true;
 
@@ -43,10 +43,64 @@ $options->default_entries = 5;
 // ----------------------
 // The maximum number of feed items to process when no access key is supplied.
 // This limits the user-supplied &max=x value. For example, if the user
-// asks for 20 items to be processed (&max=20), if max_entries is set to 
+// asks for 20 items to be processed (&max=20), if max_entries is set to
 // 10, only 10 will be processed.
 $options->max_entries = 10;
 
+// Full content
+// ----------------------
+// By default Full-Text RSS includes the extracted content in the output.
+// You can exclude this from the output by passing '&content=0' in the querystring.
+//
+// Possible values...
+// Always include: true
+// Never include: false
+// Include unless user overrides (&content=0): 'user' (default)
+//
+// Note: currently this does not disable full content extraction. It simply omits it
+// from the output.
+$options->content = 'user';
+
+// Excerpts
+// ----------------------
+// By default Full-Text RSS does not include excerpts in the output.
+// You can enable this by passing '&summary=1' in the querystring.
+// This will include a plain text excerpt from the extracted content.
+//
+// Possible values...
+// Always include: true (recommended for new users)
+// Never include: false
+// Don't include unless user overrides (&summary=1): 'user' (default)
+//
+// Important: if both content and excerpts are requested, the excerpt will be
+// placed in the description element and the full content inside content:encoded.
+// If excerpts are not requested, the full content will go inside the description element.
+//
+// Why are we not returning both excerpts and content by default?
+// Mainly for backward compatibility.
+// Excerpts should appear in the feed item's description element. Previous versions
+// of Full-Text RSS did not return excerpts, so the description element was always
+// used for the full content (as recommended by the RSS advisory). When returning both,
+// we need somewhere else to place the content (content:encoded).
+// Having both enabled should not create any problems for news readers, but it may create
+// problems for developers upgrading from one of our earlier versions who may now find
+// their applications are returning excerpts instead of the full content they were
+// expecting. To avoid such surprises for users who are upgrading Full-Text RSS,
+// excerpts must be explicitly requested in the querystring by default.
+//
+// Why not use a different element name for excerpts?
+// According to the RSS advisory:
+// "Publishers who employ summaries should store the summary in description and
+// the full content in content:encoded, ordering description first within the item.
+// On items with no summary, the full content should be stored in description."
+// See: http://www.rssboard.org/rss-profile#namespace-elements-content-encoded
+//
+// For more consistent element naming, we recommend new users set this option to true.
+// The full content can still be excluded via the querystring, but the element names
+// will not change: when $options->summary = true, the description element will always
+// be reserved for the excerpt and content:encoded always for full content.
+$options->summary = 'user';
+
 // Rewrite relative URLs
 // ----------------------
 // With this enabled relative URLs found in the extracted content
@@ -67,7 +121,7 @@ $options->exclude_items_on_fail = 'user';
 // Enable multi-page support
 // -------------------------
 // If enabled, we will try to follow next page links on multi-page articles.
-// Currently this only happens for sites where next_page_link has been defined 
+// Currently this only happens for sites where next_page_link has been defined
 // in a site config file.
 $options->multipage = true;
 
@@ -125,10 +179,10 @@ $options->detect_language = 1;
 
 // Registration key
 // ---------------
-// The registration key is optional. It is not required to use Full-Text RSS, 
-// and does not affect the normal operation of Full-Text RSS. It is currently 
-// only used on admin pages which help you update site patterns with the 
-// latest version offered by FiveFilters.org. For these admin-related 
+// The registration key is optional. It is not required to use Full-Text RSS,
+// and does not affect the normal operation of Full-Text RSS. It is currently
+// only used on admin pages which help you update site patterns with the
+// latest version offered by FiveFilters.org. For these admin-related
 // tasks to complete, we will require a valid registration key.
 // If you would like one, you can purchase the latest version of Full-Text RSS
 // at http://fivefilters.org/content-only/
@@ -144,12 +198,12 @@ $options->registration_key = '';
 // ----------------------
 // Certain pages/actions, e.g. updating site patterns with our online tool, will require admin credentials.
 // To use these pages, enter a password here and you'll be prompted for it when you try to access those pages.
-// If no password or username is set, pages requiring admin privelages will be inaccessible. 
+// If no password or username is set, pages requiring admin privelages will be inaccessible.
 // The default username is 'admin'.
 // If overriding with an environment variable, separate username and password with a colon, e.g.:
 // ftr_admin_credentials: admin:my-secret-password
 // Example: $options->admin_credentials = array('username'=>'admin', 'password'=>'my-secret-password');
-$options->admin_credentials = array('username'=>'admin', 'password'=>'admin');
+$options->admin_credentials = array('username'=>'admin', 'password'=>'');
 
 // URLs to allow
 // ----------------------
@@ -178,12 +232,12 @@ $options->key_required = false;
 // ----------------------
 // By default, when processing feeds, we assume item titles in the feed
 // have not been truncated. So after processing web pages, the extracted titles
-// are not used in the generated feed. If you prefer to have extracted titles in 
-// the feed you can either set this to false, in which case we will always favour 
-// extracted titles. Alternatively, if set to 'user' (default) we'll use the 
+// are not used in the generated feed. If you prefer to have extracted titles in
+// the feed you can either set this to false, in which case we will always favour
+// extracted titles. Alternatively, if set to 'user' (default) we'll use the
 // extracted title if you pass '&use_extracted_title' in the querystring.
 // Possible values:
-// * Favour feed titles: true 
+// * Favour feed titles: true
 // * Favour extracted titles: false
 // * Favour feed titles with user override: 'user' (default)
 // Note: this has no effect when the input URL is to a web page - in these cases
@@ -192,17 +246,17 @@ $options->favour_feed_titles = 'user';
 
 // Access keys (password protected access)
 // ------------------------------------
-// NOTE: You do not need an API key from fivefilters.org to run your own 
+// NOTE: You do not need an API key from fivefilters.org to run your own
 // copy of the code. This is here if you'd like to restrict access to
 // _your_ copy.
 // Keys let you group users - those with a key and those without - and
 // restrict access to the service to those without a key.
 // If you want everyone to access the service in the same way, you can
 // leave the array below empty and ignore the access key options further down.
-// The options further down let you control how the service should behave 
+// The options further down let you control how the service should behave
 // in each mode.
-// Note: Explicitly including the index number (1 and 2 in the examples below) 
-// is highly recommended (when generating feeds, we encode the key and 
+// Note: Explicitly including the index number (1 and 2 in the examples below)
+// is highly recommended (when generating feeds, we encode the key and
 // refer to it by index number and hash).
 $options->api_keys = array();
 // Example:
@@ -232,13 +286,13 @@ $options->max_entries_with_key = 10;
 // filter the resulting HTML for XSS attacks, making it redundant for
 // Full-Text RSS do the same. Similarly with frameworks/CMS which display
 // feed content - the content should be treated like any other user-submitted content.
-// 
+//
 // If you are writing an application yourself which is processing feeds generated by
 // Full-Text RSS, you can either filter the HTML yourself to remove potential XSS attacks
 // or enable this option. This might be useful if you are processing our generated
 // feeds with JavaScript on the client side - although there's client side xss
 // filtering available too, e.g. https://code.google.com/p/google-caja/wiki/JsHtmlSanitizer
-// 
+//
 // If enabled, we'll pass retrieved HTML content through htmLawed with
 // safe flag on and style attributes denied, see
 // http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed/htmLawed_README.htm#s3.6
@@ -253,8 +307,8 @@ $options->xss_filter = 'user';
 // Allowed parsers
 // ----------------------
 // Full-Text RSS attempts to use PHP's libxml extension to process HTML.
-// While fast, on some sites it may not always produce good results. 
-// For these sites, you can specify an alternative HTML parser: 
+// While fast, on some sites it may not always produce good results.
+// For these sites, you can specify an alternative HTML parser:
 // parser: html5lib
 // The html5lib parser is bundled with Full-Text RSS.
 // see http://code.google.com/p/html5lib/
@@ -273,7 +327,7 @@ $options->cors = false;
 
 // Use APC user cache?
 // ----------------------
-// If enabled we will store site config files (when requested 
+// If enabled we will store site config files (when requested
 // for the first time) in APC's user cache. Keys prefixed with 'sc.'
 // This improves performance by reducing disk access.
 // Note: this has no effect if APC is unavailable on your server.
@@ -346,7 +400,7 @@ $options->rewrite_url = array(
 // Valid actions:
 // * 'exclude' - exclude this item from the result
 // * 'link' - create HTML link to the item
-$options->content_type_exc = array( 
+$options->content_type_exc = array(
 							   'application/pdf' => array('action'=>'link', 'name'=>'PDF'),
 							   'image' => array('action'=>'link', 'name'=>'Image'),
 							   'audio' => array('action'=>'link', 'name'=>'Audio'),
@@ -375,13 +429,13 @@ $options->cache_cleanup = 100;
 /// DO NOT CHANGE ANYTHING BELOW THIS ///////////
 /////////////////////////////////////////////////
 
-if (!defined('_FF_FTR_VERSION')) define('_FF_FTR_VERSION', '3.1');
+if (!defined('_FF_FTR_VERSION')) define('_FF_FTR_VERSION', '3.2');
 
 if (basename(__FILE__) == 'config.php') {
 	if (file_exists(dirname(__FILE__).'/custom_config.php')) {
 		require_once dirname(__FILE__).'/custom_config.php';
 	}
-	
+
 	// check for environment variables - often used on cloud platforms
 	// environment variables should be prefixed with 'ftr_', e.g.
 	// ftr_max_entries: 1
diff --git a/inc/3rdparty/libraries/content-extractor/ContentExtractor.php b/inc/3rdparty/libraries/content-extractor/ContentExtractor.php
index ddd33bb5..21e693e7 100644
--- a/inc/3rdparty/libraries/content-extractor/ContentExtractor.php
+++ b/inc/3rdparty/libraries/content-extractor/ContentExtractor.php
@@ -1,728 +1,727 @@
-<?php
-/**
- * Content Extractor
- * 
- * Uses patterns specified in site config files and auto detection (hNews/PHP Readability) 
- * to extract content from HTML files.
- * 
- * @version 1.0
- * @date 2013-02-05
- * @author Keyvan Minoukadeh
- * @copyright 2013 Keyvan Minoukadeh
- * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
- */
-
-class ContentExtractor
-{
-	protected static $tidy_config = array(
-				 'clean' => true,
-				 'output-xhtml' => true,
-				 'logical-emphasis' => true,
-				 'show-body-only' => false,
-				 'new-blocklevel-tags' => 'article, aside, footer, header, hgroup, menu, nav, section, details, datagrid',
-				 'new-inline-tags' => 'mark, time, meter, progress, data',
-				 'wrap' => 0,
-				 'drop-empty-paras' => true,
-				 'drop-proprietary-attributes' => false,
-				 'enclose-text' => true,
-				 'enclose-block-text' => true,
-				 'merge-divs' => true,
-				 'merge-spans' => true,
-				 'char-encoding' => 'utf8',
-				 'hide-comments' => true
-				 );
-	protected $html;
-	protected $config;
-	protected $title;
-	protected $author = array();
-	protected $language;
-	protected $date;
-	protected $body;
-	protected $success = false;
-	protected $nextPageUrl;
-	public $allowedParsers = array('libxml', 'html5lib');
-	public $fingerprints = array();
-	public $readability;
-	public $debug = false;
-	public $debugVerbose = false;
-
-	function __construct($path, $fallback=null) {
-		SiteConfig::set_config_path($path, $fallback);	
-	}
-	
-	protected function debug($msg) {
-		if ($this->debug) {
-			$mem = round(memory_get_usage()/1024, 2);
-			$memPeak = round(memory_get_peak_usage()/1024, 2);
-			echo '* ',$msg;
-			if ($this->debugVerbose) echo ' - mem used: ',$mem," (peak: $memPeak)";
-			echo "\n";
-			ob_flush();
-			flush();
-		}
-	}
-	
-	public function reset() {
-		$this->html = null;
-		$this->readability = null;
-		$this->config = null;
-		$this->title = null;
-		$this->body = null;
-		$this->author = array();
-		$this->language = null;
-		$this->date = null;
-		$this->nextPageUrl = null;
-		$this->success = false;
-	}
-
-	public function findHostUsingFingerprints($html) {
-		$this->debug('Checking fingerprints...');
-		$head = substr($html, 0, 8000);
-		foreach ($this->fingerprints as $_fp => $_fphost) {
-			$lookin = 'html';
-			if (is_array($_fphost)) {
-				if (isset($_fphost['head']) && $_fphost['head']) {
-					$lookin = 'head';
-				}
-				$_fphost = $_fphost['hostname'];
-			}
-			if (strpos($$lookin, $_fp) !== false) {
-				$this->debug("Found match: $_fphost");
-				return $_fphost;
-			}
-		}
-		$this->debug('No fingerprint matches');
-		return false;
-	}
-	
-	// returns SiteConfig instance (joined in order: exact match, wildcard, fingerprint, global, default)
-	public function buildSiteConfig($url, $html='', $add_to_cache=true) {
-		// extract host name
-		$host = @parse_url($url, PHP_URL_HOST);
-		$host = strtolower($host);
-		if (substr($host, 0, 4) == 'www.') $host = substr($host, 4);
-		// is merged version already cached?
-		if (SiteConfig::is_cached("$host.merged")) {
-			$this->debug("Returning cached and merged site config for $host");
-			return SiteConfig::build("$host.merged");
-		}
-		// let's build from site_config/custom/ and standard/
-		$config = SiteConfig::build($host);
-		if ($add_to_cache && $config && !SiteConfig::is_cached("$host")) {
-			SiteConfig::add_to_cache($host, $config);
-		}
-		// if no match, use defaults
-		if (!$config) $config = new SiteConfig();
-		// load fingerprint config?
-		if ($config->autodetect_on_failure()) {
-			// check HTML for fingerprints
-			if (!empty($this->fingerprints) && ($_fphost = $this->findHostUsingFingerprints($html))) {
-				if ($config_fingerprint = SiteConfig::build($_fphost)) {
-					$this->debug("Appending site config settings from $_fphost (fingerprint match)");
-					$config->append($config_fingerprint);
-					if ($add_to_cache && !SiteConfig::is_cached($_fphost)) {
-						//$config_fingerprint->cache_in_apc = true;
-						SiteConfig::add_to_cache($_fphost, $config_fingerprint);
-					}
-				}
-			}
-		}
-		// load global config?
-		if ($config->autodetect_on_failure()) {
-			if ($config_global = SiteConfig::build('global', true)) {
-				$this->debug('Appending site config settings from global.txt');
-				$config->append($config_global);
-				if ($add_to_cache && !SiteConfig::is_cached('global')) {
-					//$config_global->cache_in_apc = true;
-					SiteConfig::add_to_cache('global', $config_global);
-				}
-			}
-		}
-		// store copy of merged config
-		if ($add_to_cache) {
-			// do not store in APC if wildcard match
-			$use_apc = ($host == $config->cache_key);
-			$config->cache_key = null;
-			SiteConfig::add_to_cache("$host.merged", $config, $use_apc);
-		}
-		return $config;
-	}
-	
-	// returns true on success, false on failure
-	// $smart_tidy indicates that if tidy is used and no results are produced, we will
-	// try again without it. Tidy helps us deal with PHP's patchy HTML parsing most of the time
-	// but it has problems of its own which we try to avoid with this option.
-	public function process($html, $url, $smart_tidy=true) {
-		$this->reset();
-		$this->config = $this->buildSiteConfig($url, $html);
-		
-		// do string replacements
-		if (!empty($this->config->find_string)) {
-			if (count($this->config->find_string) == count($this->config->replace_string)) {
-				$html = str_replace($this->config->find_string, $this->config->replace_string, $html, $_count);
-				$this->debug("Strings replaced: $_count (find_string and/or replace_string)");
-			} else {
-				$this->debug('Skipped string replacement - incorrect number of find-replace strings in site config');
-			}
-			unset($_count);
-		}
-		
-		// use tidy (if it exists)?
-		// This fixes problems with some sites which would otherwise
-		// trouble DOMDocument's HTML parsing. (Although sometimes it
-		// makes matters worse, which is why you can override it in site config files.)
-		$tidied = false;
-		if ($this->config->tidy() && function_exists('tidy_parse_string') && $smart_tidy) {
-			$this->debug('Using Tidy');
-			$tidy = tidy_parse_string($html, self::$tidy_config, 'UTF8');
-			if (tidy_clean_repair($tidy)) {
-				$original_html = $html;
-				$tidied = true;
-				$html = $tidy->value;
-			}
-			unset($tidy);
-		}
-		
-		// load and parse html
-		$_parser = $this->config->parser();
-		if (!in_array($_parser, $this->allowedParsers)) {
-			$this->debug("HTML parser $_parser not listed, using libxml instead");
-			$_parser = 'libxml';
-		}
-		$this->debug("Attempting to parse HTML with $_parser");
-		$this->readability = new Readability($html, $url, $_parser);
-		
-		// we use xpath to find elements in the given HTML document
-		// see http://en.wikipedia.org/wiki/XPath_1.0
-		$xpath = new DOMXPath($this->readability->dom);
-
-		// try to get next page link
-		foreach ($this->config->next_page_link as $pattern) {
-			$elems = @$xpath->evaluate($pattern, $this->readability->dom);
-			if (is_string($elems)) {
-				$this->nextPageUrl = trim($elems);
-				break;
-			} elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
-				foreach ($elems as $item) {
-					if ($item instanceof DOMElement && $item->hasAttribute('href')) {
-						$this->nextPageUrl = $item->getAttribute('href');
-						break 2;
-					} elseif ($item instanceof DOMAttr && $item->value) {
-						$this->nextPageUrl = $item->value;
-						break 2;
-					}
-				}
-			}
-		}
-		
-		// try to get title
-		foreach ($this->config->title as $pattern) {
-			// $this->debug("Trying $pattern");
-			$elems = @$xpath->evaluate($pattern, $this->readability->dom);
-			if (is_string($elems)) {
-				$this->title = trim($elems);
-				$this->debug('Title expression evaluated as string: '.$this->title);
-				$this->debug("...XPath match: $pattern");
-				break;
-			} elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
-				$this->title = $elems->item(0)->textContent;
-				$this->debug('Title matched: '.$this->title);
-				$this->debug("...XPath match: $pattern");
-				// remove title from document
-				try {
-					$elems->item(0)->parentNode->removeChild($elems->item(0));
-				} catch (DOMException $e) {
-					// do nothing
-				}
-				break;
-			}
-		}
-		
-		// try to get author (if it hasn't already been set)
-		if (empty($this->author)) {
-			foreach ($this->config->author as $pattern) {
-				$elems = @$xpath->evaluate($pattern, $this->readability->dom);
-				if (is_string($elems)) {
-					if (trim($elems) != '') {
-						$this->author[] = trim($elems);
-						$this->debug('Author expression evaluated as string: '.trim($elems));
-						$this->debug("...XPath match: $pattern");
-						break;
-					}
-				} elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
-					foreach ($elems as $elem) {
-						if (!isset($elem->parentNode)) continue;
-						$this->author[] = trim($elem->textContent);
-						$this->debug('Author matched: '.trim($elem->textContent));
-					}
-					if (!empty($this->author)) {
-						$this->debug("...XPath match: $pattern");
-						break;
-					}
-				}
-			}
-		}
-		
-		// try to get language
-		$_lang_xpath = array('//html[@lang]/@lang', '//meta[@name="DC.language"]/@content');
-		foreach ($_lang_xpath as $pattern) {
-			$elems = @$xpath->evaluate($pattern, $this->readability->dom);
-			if (is_string($elems)) {
-				if (trim($elems) != '') {
-					$this->language = trim($elems);
-					$this->debug('Language matched: '.$this->language);
-					break;
-				}
-			} elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
-				foreach ($elems as $elem) {
-					if (!isset($elem->parentNode)) continue;
-					$this->language = trim($elem->textContent);
-					$this->debug('Language matched: '.$this->language);					
-				}
-				if ($this->language) break;
-			}
-		}
-		
-		// try to get date
-		foreach ($this->config->date as $pattern) {
-			$elems = @$xpath->evaluate($pattern, $this->readability->dom);
-			if (is_string($elems)) {
-				$this->date = strtotime(trim($elems, "; \t\n\r\0\x0B"));				
-			} elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
-				$this->date = $elems->item(0)->textContent;
-				$this->date = strtotime(trim($this->date, "; \t\n\r\0\x0B"));
-				// remove date from document
-				// $elems->item(0)->parentNode->removeChild($elems->item(0));
-			}
-			if (!$this->date) {
-				$this->date = null;
-			} else {
-				$this->debug('Date matched: '.date('Y-m-d H:i:s', $this->date));
-				$this->debug("...XPath match: $pattern");
-				break;
-			}
-		}
-
-		// strip elements (using xpath expressions)
-		foreach ($this->config->strip as $pattern) {
-			$elems = @$xpath->query($pattern, $this->readability->dom);
-			// check for matches
-			if ($elems && $elems->length > 0) {
-				$this->debug('Stripping '.$elems->length.' elements (strip)');
-				for ($i=$elems->length-1; $i >= 0; $i--) {
-					$elems->item($i)->parentNode->removeChild($elems->item($i));
-				}
-			}
-		}
-		
-		// strip elements (using id and class attribute values)
-		foreach ($this->config->strip_id_or_class as $string) {
-			$string = strtr($string, array("'"=>'', '"'=>''));
-			$elems = @$xpath->query("//*[contains(@class, '$string') or contains(@id, '$string')]", $this->readability->dom);
-			// check for matches
-			if ($elems && $elems->length > 0) {
-				$this->debug('Stripping '.$elems->length.' elements (strip_id_or_class)');
-				for ($i=$elems->length-1; $i >= 0; $i--) {
-					$elems->item($i)->parentNode->removeChild($elems->item($i));
-				}
-			}
-		}
-		
-		// strip images (using src attribute values)
-		foreach ($this->config->strip_image_src as $string) {
-			$string = strtr($string, array("'"=>'', '"'=>''));
-			$elems = @$xpath->query("//img[contains(@src, '$string')]", $this->readability->dom);
-			// check for matches
-			if ($elems && $elems->length > 0) {
-				$this->debug('Stripping '.$elems->length.' image elements');
-				for ($i=$elems->length-1; $i >= 0; $i--) {
-					$elems->item($i)->parentNode->removeChild($elems->item($i));
-				}
-			}
-		}
-		// strip elements using Readability.com and Instapaper.com ignore class names
-		// .entry-unrelated and .instapaper_ignore
-		// See https://www.readability.com/publishers/guidelines/#view-plainGuidelines
-		// and http://blog.instapaper.com/post/730281947
-		$elems = @$xpath->query("//*[contains(concat(' ',normalize-space(@class),' '),' entry-unrelated ') or contains(concat(' ',normalize-space(@class),' '),' instapaper_ignore ')]", $this->readability->dom);
-		// check for matches
-		if ($elems && $elems->length > 0) {
-			$this->debug('Stripping '.$elems->length.' .entry-unrelated,.instapaper_ignore elements');
-			for ($i=$elems->length-1; $i >= 0; $i--) {
-				$elems->item($i)->parentNode->removeChild($elems->item($i));
-			}
-		}
-		
-		// strip elements that contain style="display: none;"
-		$elems = @$xpath->query("//*[contains(@style,'display:none')]", $this->readability->dom);
-		// check for matches
-		if ($elems && $elems->length > 0) {
-			$this->debug('Stripping '.$elems->length.' elements with inline display:none style');
-			for ($i=$elems->length-1; $i >= 0; $i--) {
-				$elems->item($i)->parentNode->removeChild($elems->item($i));
-			}
-		}
-		
-		// try to get body
-		foreach ($this->config->body as $pattern) {
-			$elems = @$xpath->query($pattern, $this->readability->dom);
-			// check for matches
-			if ($elems && $elems->length > 0) {
-				$this->debug('Body matched');
-				$this->debug("...XPath match: $pattern");
-				if ($elems->length == 1) {				
-					$this->body = $elems->item(0);
-					// prune (clean up elements that may not be content)
-					if ($this->config->prune()) {
-						$this->debug('...pruning content');
-						$this->readability->prepArticle($this->body);
-					}
-					break;
-				} else {
-					$this->body = $this->readability->dom->createElement('div');
-					$this->debug($elems->length.' body elems found');
-					foreach ($elems as $elem) {
-						if (!isset($elem->parentNode)) continue;
-						$isDescendant = false;
-						foreach ($this->body->childNodes as $parent) {
-							if ($this->isDescendant($parent, $elem)) {
-								$isDescendant = true;
-								break;
-							}
-						}
-						if ($isDescendant) {
-							$this->debug('...element is child of another body element, skipping.');
-						} else {
-							// prune (clean up elements that may not be content)
-							if ($this->config->prune()) {
-								$this->debug('Pruning content');
-								$this->readability->prepArticle($elem);
-							}
-							$this->debug('...element added to body');
-							$this->body->appendChild($elem);
-						}
-					}
-					if ($this->body->hasChildNodes()) break;
-				}
-			}
-		}		
-		
-		// auto detect?
-		$detect_title = $detect_body = $detect_author = $detect_date = false;
-		// detect title?
-		if (!isset($this->title)) {
-			if (empty($this->config->title) || $this->config->autodetect_on_failure()) {
-				$detect_title = true;
-			}
-		}
-		// detect body?
-		if (!isset($this->body)) {
-			if (empty($this->config->body) || $this->config->autodetect_on_failure()) {
-				$detect_body = true;
-			}
-		}
-		// detect author?
-		if (empty($this->author)) {
-			if (empty($this->config->author) || $this->config->autodetect_on_failure()) {
-				$detect_author = true;
-			}
-		}
-		// detect date?
-		if (!isset($this->date)) {
-			if (empty($this->config->date) || $this->config->autodetect_on_failure()) {
-				$detect_date = true;
-			}
-		}
-
-		// check for hNews
-		if ($detect_title || $detect_body) {
-			// check for hentry
-			$elems = @$xpath->query("//*[contains(concat(' ',normalize-space(@class),' '),' hentry ')]", $this->readability->dom);
-			if ($elems && $elems->length > 0) {
-				$this->debug('hNews: found hentry');
-				$hentry = $elems->item(0);
-				
-				if ($detect_title) {
-					// check for entry-title
-					$elems = @$xpath->query(".//*[contains(concat(' ',normalize-space(@class),' '),' entry-title ')]", $hentry);
-					if ($elems && $elems->length > 0) {
-						$this->title = $elems->item(0)->textContent;
-						$this->debug('hNews: found entry-title: '.$this->title);
-						// remove title from document
-						$elems->item(0)->parentNode->removeChild($elems->item(0));
-						$detect_title = false;
-					}
-				}
-				
-				if ($detect_date) {
-					// check for time element with pubdate attribute
-					$elems = @$xpath->query(".//time[@pubdate] | .//abbr[contains(concat(' ',normalize-space(@class),' '),' published ')]", $hentry);
-					if ($elems && $elems->length > 0) {
-						$this->date = strtotime(trim($elems->item(0)->textContent));
-						// remove date from document
-						//$elems->item(0)->parentNode->removeChild($elems->item(0));
-						if ($this->date) {
-							$this->debug('hNews: found publication date: '.date('Y-m-d H:i:s', $this->date));
-							$detect_date = false;
-						} else {
-							$this->date = null;
-						}
-					}
-				}
-
-				if ($detect_author) {
-					// check for time element with pubdate attribute
-					$elems = @$xpath->query(".//*[contains(concat(' ',normalize-space(@class),' '),' vcard ') and (contains(concat(' ',normalize-space(@class),' '),' author ') or contains(concat(' ',normalize-space(@class),' '),' byline '))]", $hentry);
-					if ($elems && $elems->length > 0) {
-						$author = $elems->item(0);
-						$fn = @$xpath->query(".//*[contains(concat(' ',normalize-space(@class),' '),' fn ')]", $author);
-						if ($fn && $fn->length > 0) {
-							foreach ($fn as $_fn) {
-								if (trim($_fn->textContent) != '') {
-									$this->author[] = trim($_fn->textContent);
-									$this->debug('hNews: found author: '.trim($_fn->textContent));
-								}
-							}
-						} else {
-							if (trim($author->textContent) != '') {
-								$this->author[] = trim($author->textContent);
-								$this->debug('hNews: found author: '.trim($author->textContent));
-							}
-						}
-						$detect_author = empty($this->author);
-					}
-				}
-				
-				// check for entry-content.
-				// according to hAtom spec, if there are multiple elements marked entry-content,
-				// we include all of these in the order they appear - see http://microformats.org/wiki/hatom#Entry_Content
-				if ($detect_body) {
-					$elems = @$xpath->query(".//*[contains(concat(' ',normalize-space(@class),' '),' entry-content ')]", $hentry);
-					if ($elems && $elems->length > 0) {
-						$this->debug('hNews: found entry-content');
-						if ($elems->length == 1) {
-							// what if it's empty? (some sites misuse hNews - place their content outside an empty entry-content element)
-							$e = $elems->item(0);
-							if (($e->tagName == 'img') || (trim($e->textContent) != '')) {
-								$this->body = $elems->item(0);
-								// prune (clean up elements that may not be content)
-								if ($this->config->prune()) {
-									$this->debug('Pruning content');
-									$this->readability->prepArticle($this->body);
-								}
-								$detect_body = false;
-							} else {
-								$this->debug('hNews: skipping entry-content - appears not to contain content');
-							}
-							unset($e);
-						} else {
-							$this->body = $this->readability->dom->createElement('div');
-							$this->debug($elems->length.' entry-content elems found');
-							foreach ($elems as $elem) {
-								if (!isset($elem->parentNode)) continue;
-								$isDescendant = false;
-								foreach ($this->body->childNodes as $parent) {
-									if ($this->isDescendant($parent, $elem)) {
-										$isDescendant = true;
-										break;
-									}
-								}
-								if ($isDescendant) {
-									$this->debug('Element is child of another body element, skipping.');
-								} else {
-									// prune (clean up elements that may not be content)
-									if ($this->config->prune()) {
-										$this->debug('Pruning content');
-										$this->readability->prepArticle($elem);
-									}								
-									$this->debug('Element added to body');									
-									$this->body->appendChild($elem);
-								}
-							}
-							$detect_body = false;
-						}
-					}
-				}
-			}
-		}
-
-		// check for elements marked with instapaper_title
-		if ($detect_title) {
-			// check for instapaper_title
-			$elems = @$xpath->query("//*[contains(concat(' ',normalize-space(@class),' '),' instapaper_title ')]", $this->readability->dom);
-			if ($elems && $elems->length > 0) {
-				$this->title = $elems->item(0)->textContent;
-				$this->debug('Title found (.instapaper_title): '.$this->title);
-				// remove title from document
-				$elems->item(0)->parentNode->removeChild($elems->item(0));
-				$detect_title = false;
-			}
-		}
-		// check for elements marked with instapaper_body
-		if ($detect_body) {
-			$elems = @$xpath->query("//*[contains(concat(' ',normalize-space(@class),' '),' instapaper_body ')]", $this->readability->dom);
-			if ($elems && $elems->length > 0) {
-				$this->debug('body found (.instapaper_body)');
-				$this->body = $elems->item(0);
-				// prune (clean up elements that may not be content)
-				if ($this->config->prune()) {
-					$this->debug('Pruning content');
-					$this->readability->prepArticle($this->body);
-				}
-				$detect_body = false;
-			}
-		}
-		
-		// Find author in rel="author" marked element
-		// We only use this if there's exactly one.
-		// If there's more than one, it could indicate more than
-		// one author, but it could also indicate that we're processing
-		// a page listing different articles with different authors.
-		if ($detect_author) {
-			$elems = @$xpath->query("//a[contains(concat(' ',normalize-space(@rel),' '),' author ')]", $this->readability->dom);
-			if ($elems && $elems->length == 1) {
-				$author = trim($elems->item(0)->textContent);
-				if ($author != '') {
-					$this->debug("Author found (rel=\"author\"): $author");
-					$this->author[] = $author;
-					$detect_author = false;
-				}
-			}
-		}
-
-		// Find date in pubdate marked time element
-		// For the same reason given above, we only use this
-		// if there's exactly one element.
-		if ($detect_date) {
-			$elems = @$xpath->query("//time[@pubdate]", $this->readability->dom);
-			if ($elems && $elems->length == 1) {
-				$this->date = strtotime(trim($elems->item(0)->textContent));
-				// remove date from document
-				//$elems->item(0)->parentNode->removeChild($elems->item(0));
-				if ($this->date) {
-					$this->debug('Date found (pubdate marked time element): '.date('Y-m-d H:i:s', $this->date));
-					$detect_date = false;
-				} else {
-					$this->date = null;
-				}
-			}
-		}
-
-		// still missing title or body, so we detect using Readability
-		if ($detect_title || $detect_body) {
-			$this->debug('Using Readability');
-			// clone body if we're only using Readability for title (otherwise it may interfere with body element)
-			if (isset($this->body)) $this->body = $this->body->cloneNode(true);
-			$success = $this->readability->init();
-		}
-		if ($detect_title) {
-			$this->debug('Detecting title');
-			$this->title = $this->readability->getTitle()->textContent;
-		}
-		if ($detect_body && $success) {
-			$this->debug('Detecting body');
-			$this->body = $this->readability->getContent();
-			if ($this->body->childNodes->length == 1 && $this->body->firstChild->nodeType === XML_ELEMENT_NODE) {
-				$this->body = $this->body->firstChild;
-			}
-			// prune (clean up elements that may not be content)
-			if ($this->config->prune()) {
-				$this->debug('Pruning content');
-				$this->readability->prepArticle($this->body);
-			}
-		}
-		if (isset($this->body)) {
-			// remove scripts
-			$this->readability->removeScripts($this->body);
-			// remove any h1-h6 elements that appear as first thing in the body
-			// and which match our title
-			if (isset($this->title) && ($this->title != '')) {
-				$firstChild = $this->body->firstChild;
-				while ($firstChild->nodeType && ($firstChild->nodeType !== XML_ELEMENT_NODE)) {
-					$firstChild = $firstChild->nextSibling;
-				}
-				if (($firstChild->nodeType === XML_ELEMENT_NODE)
-					&& in_array(strtolower($firstChild->tagName), array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'))
-					&& (strtolower(trim($firstChild->textContent)) == strtolower(trim($this->title)))) {
-						$this->body->removeChild($firstChild);
-				}
-			}
-			// prevent self-closing iframes
-			$elems = $this->body->getElementsByTagName('iframe');
-			for ($i = $elems->length-1; $i >= 0; $i--) {
-				$e = $elems->item($i);
-				if (!$e->hasChildNodes()) {
-					$e->appendChild($this->body->ownerDocument->createTextNode('[embedded content]'));
-				}
-			}
-			// remove image lazy loading - WordPress plugin http://wordpress.org/extend/plugins/lazy-load/
-			// the plugin replaces the src attribute to point to a 1x1 gif and puts the original src
-			// inside the data-lazy-src attribute. It also places the original image inside a noscript element 
-			// next to the amended one.
-			$elems = @$xpath->query("//img[@data-lazy-src]", $this->body);
-			for ($i = $elems->length-1; $i >= 0; $i--) {
-				$e = $elems->item($i);
-				// let's see if we can grab image from noscript
-				if ($e->nextSibling !== null && $e->nextSibling->nodeName === 'noscript') {
-					$_new_elem = $e->ownerDocument->createDocumentFragment();
-					@$_new_elem->appendXML($e->nextSibling->innerHTML);
-					$e->nextSibling->parentNode->replaceChild($_new_elem, $e->nextSibling);
-					$e->parentNode->removeChild($e);
-				} else {
-					// Use data-lazy-src as src value
-					$e->setAttribute('src', $e->getAttribute('data-lazy-src'));
-					$e->removeAttribute('data-lazy-src');
-				}
-			}
-		
-			$this->success = true;
-		}
-		
-		// if we've had no success and we've used tidy, there's a chance
-		// that tidy has messed up. So let's try again without tidy...
-		if (!$this->success && $tidied && $smart_tidy) {
-			$this->debug('Trying again without tidy');
-			$this->process($original_html, $url, false);
-		}
-
-		return $this->success;
-	}
-	
-	private function isDescendant(DOMElement $parent, DOMElement $child) {
-		$node = $child->parentNode;
-		while ($node != null) {
-			if ($node->isSameNode($parent))	return true;
-			$node = $node->parentNode;
-		}
-		return false;
-	}
-
-	public function getContent() {
-		return $this->body;
-	}
-	
-	public function getTitle() {
-		return $this->title;
-	}
-	
-	public function getAuthors() {
-		return $this->author;
-	}
-	
-	public function getLanguage() {
-		return $this->language;
-	}
-	
-	public function getDate() {
-		return $this->date;
-	}
-	
-	public function getSiteConfig() {
-		return $this->config;
-	}
-	
-	public function getNextPageUrl() {
-		return $this->nextPageUrl;
-	}
-}
-?>
\ No newline at end of file
+<?php
+/**
+ * Content Extractor
+ * 
+ * Uses patterns specified in site config files and auto detection (hNews/PHP Readability) 
+ * to extract content from HTML files.
+ * 
+ * @version 1.0
+ * @date 2013-02-05
+ * @author Keyvan Minoukadeh
+ * @copyright 2013 Keyvan Minoukadeh
+ * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
+ */
+
+class ContentExtractor
+{
+	protected static $tidy_config = array(
+				 'clean' => true,
+				 'output-xhtml' => true,
+				 'logical-emphasis' => true,
+				 'show-body-only' => false,
+				 'new-blocklevel-tags' => 'article, aside, footer, header, hgroup, menu, nav, section, details, datagrid',
+				 'new-inline-tags' => 'mark, time, meter, progress, data',
+				 'wrap' => 0,
+				 'drop-empty-paras' => true,
+				 'drop-proprietary-attributes' => false,
+				 'enclose-text' => true,
+				 'enclose-block-text' => true,
+				 'merge-divs' => true,
+				 'merge-spans' => true,
+				 'char-encoding' => 'utf8',
+				 'hide-comments' => true
+				 );
+	protected $html;
+	protected $config;
+	protected $title;
+	protected $author = array();
+	protected $language;
+	protected $date;
+	protected $body;
+	protected $success = false;
+	protected $nextPageUrl;
+	public $allowedParsers = array('libxml', 'html5lib');
+	public $fingerprints = array();
+	public $readability;
+	public $debug = false;
+	public $debugVerbose = false;
+
+	function __construct($path, $fallback=null) {
+		SiteConfig::set_config_path($path, $fallback);	
+	}
+	
+	protected function debug($msg) {
+		if ($this->debug) {
+			$mem = round(memory_get_usage()/1024, 2);
+			$memPeak = round(memory_get_peak_usage()/1024, 2);
+			echo '* ',$msg;
+			if ($this->debugVerbose) echo ' - mem used: ',$mem," (peak: $memPeak)";
+			echo "\n";
+			ob_flush();
+			flush();
+		}
+	}
+	
+	public function reset() {
+		$this->html = null;
+		$this->readability = null;
+		$this->config = null;
+		$this->title = null;
+		$this->body = null;
+		$this->author = array();
+		$this->language = null;
+		$this->date = null;
+		$this->nextPageUrl = null;
+		$this->success = false;
+	}
+
+	public function findHostUsingFingerprints($html) {
+		$this->debug('Checking fingerprints...');
+		$head = substr($html, 0, 8000);
+		foreach ($this->fingerprints as $_fp => $_fphost) {
+			$lookin = 'html';
+			if (is_array($_fphost)) {
+				if (isset($_fphost['head']) && $_fphost['head']) {
+					$lookin = 'head';
+				}
+				$_fphost = $_fphost['hostname'];
+			}
+			if (strpos($$lookin, $_fp) !== false) {
+				$this->debug("Found match: $_fphost");
+				return $_fphost;
+			}
+		}
+		$this->debug('No fingerprint matches');
+		return false;
+	}
+	
+	// returns SiteConfig instance (joined in order: exact match, wildcard, fingerprint, global, default)
+	public function buildSiteConfig($url, $html='', $add_to_cache=true) {
+		// extract host name
+		$host = @parse_url($url, PHP_URL_HOST);
+		$host = strtolower($host);
+		if (substr($host, 0, 4) == 'www.') $host = substr($host, 4);
+		// is merged version already cached?
+		if (SiteConfig::is_cached("$host.merged")) {
+			$this->debug("Returning cached and merged site config for $host");
+			return SiteConfig::build("$host.merged");
+		}
+		// let's build from site_config/custom/ and standard/
+		$config = SiteConfig::build($host);
+		if ($add_to_cache && $config && !SiteConfig::is_cached("$host")) {
+			SiteConfig::add_to_cache($host, $config);
+		}
+		// if no match, use defaults
+		if (!$config) $config = new SiteConfig();
+		// load fingerprint config?
+		if ($config->autodetect_on_failure()) {
+			// check HTML for fingerprints
+			if (!empty($this->fingerprints) && ($_fphost = $this->findHostUsingFingerprints($html))) {
+				if ($config_fingerprint = SiteConfig::build($_fphost)) {
+					$this->debug("Appending site config settings from $_fphost (fingerprint match)");
+					$config->append($config_fingerprint);
+					if ($add_to_cache && !SiteConfig::is_cached($_fphost)) {
+						//$config_fingerprint->cache_in_apc = true;
+						SiteConfig::add_to_cache($_fphost, $config_fingerprint);
+					}
+				}
+			}
+		}
+		// load global config?
+		if ($config->autodetect_on_failure()) {
+			if ($config_global = SiteConfig::build('global', true)) {
+				$this->debug('Appending site config settings from global.txt');
+				$config->append($config_global);
+				if ($add_to_cache && !SiteConfig::is_cached('global')) {
+					//$config_global->cache_in_apc = true;
+					SiteConfig::add_to_cache('global', $config_global);
+				}
+			}
+		}
+		// store copy of merged config
+		if ($add_to_cache) {
+			// do not store in APC if wildcard match
+			$use_apc = ($host == $config->cache_key);
+			$config->cache_key = null;
+			SiteConfig::add_to_cache("$host.merged", $config, $use_apc);
+		}
+		return $config;
+	}
+	
+	// returns true on success, false on failure
+	// $smart_tidy indicates that if tidy is used and no results are produced, we will
+	// try again without it. Tidy helps us deal with PHP's patchy HTML parsing most of the time
+	// but it has problems of its own which we try to avoid with this option.
+	public function process($html, $url, $smart_tidy=true) {
+		$this->reset();
+		$this->config = $this->buildSiteConfig($url, $html);
+		
+		// do string replacements
+		if (!empty($this->config->find_string)) {
+			if (count($this->config->find_string) == count($this->config->replace_string)) {
+				$html = str_replace($this->config->find_string, $this->config->replace_string, $html, $_count);
+				$this->debug("Strings replaced: $_count (find_string and/or replace_string)");
+			} else {
+				$this->debug('Skipped string replacement - incorrect number of find-replace strings in site config');
+			}
+			unset($_count);
+		}
+		
+		// use tidy (if it exists)?
+		// This fixes problems with some sites which would otherwise
+		// trouble DOMDocument's HTML parsing. (Although sometimes it
+		// makes matters worse, which is why you can override it in site config files.)
+		$tidied = false;
+		if ($this->config->tidy() && function_exists('tidy_parse_string') && $smart_tidy) {
+			$this->debug('Using Tidy');
+			$tidy = tidy_parse_string($html, self::$tidy_config, 'UTF8');
+			if (tidy_clean_repair($tidy)) {
+				$original_html = $html;
+				$tidied = true;
+				$html = $tidy->value;
+			}
+			unset($tidy);
+		}
+		
+		// load and parse html
+		$_parser = $this->config->parser();
+		if (!in_array($_parser, $this->allowedParsers)) {
+			$this->debug("HTML parser $_parser not listed, using libxml instead");
+			$_parser = 'libxml';
+		}
+		$this->debug("Attempting to parse HTML with $_parser");
+		$this->readability = new Readability($html, $url, $_parser);
+		
+		// we use xpath to find elements in the given HTML document
+		// see http://en.wikipedia.org/wiki/XPath_1.0
+		$xpath = new DOMXPath($this->readability->dom);
+
+		// try to get next page link
+		foreach ($this->config->next_page_link as $pattern) {
+			$elems = @$xpath->evaluate($pattern, $this->readability->dom);
+			if (is_string($elems)) {
+				$this->nextPageUrl = trim($elems);
+				break;
+			} elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
+				foreach ($elems as $item) {
+					if ($item instanceof DOMElement && $item->hasAttribute('href')) {
+						$this->nextPageUrl = $item->getAttribute('href');
+						break 2;
+					} elseif ($item instanceof DOMAttr && $item->value) {
+						$this->nextPageUrl = $item->value;
+						break 2;
+					}
+				}
+			}
+		}
+		
+		// try to get title
+		foreach ($this->config->title as $pattern) {
+			// $this->debug("Trying $pattern");
+			$elems = @$xpath->evaluate($pattern, $this->readability->dom);
+			if (is_string($elems)) {
+				$this->title = trim($elems);
+				$this->debug('Title expression evaluated as string: '.$this->title);
+				$this->debug("...XPath match: $pattern");
+				break;
+			} elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
+				$this->title = $elems->item(0)->textContent;
+				$this->debug('Title matched: '.$this->title);
+				$this->debug("...XPath match: $pattern");
+				// remove title from document
+				try {
+					@$elems->item(0)->parentNode->removeChild($elems->item(0));
+				} catch (DOMException $e) {
+					// do nothing
+				}
+				break;
+			}
+		}
+		
+		// try to get author (if it hasn't already been set)
+		if (empty($this->author)) {
+			foreach ($this->config->author as $pattern) {
+				$elems = @$xpath->evaluate($pattern, $this->readability->dom);
+				if (is_string($elems)) {
+					if (trim($elems) != '') {
+						$this->author[] = trim($elems);
+						$this->debug('Author expression evaluated as string: '.trim($elems));
+						$this->debug("...XPath match: $pattern");
+						break;
+					}
+				} elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
+					foreach ($elems as $elem) {
+						if (!isset($elem->parentNode)) continue;
+						$this->author[] = trim($elem->textContent);
+						$this->debug('Author matched: '.trim($elem->textContent));
+					}
+					if (!empty($this->author)) {
+						$this->debug("...XPath match: $pattern");
+						break;
+					}
+				}
+			}
+		}
+		
+		// try to get language
+		$_lang_xpath = array('//html[@lang]/@lang', '//meta[@name="DC.language"]/@content');
+		foreach ($_lang_xpath as $pattern) {
+			$elems = @$xpath->evaluate($pattern, $this->readability->dom);
+			if (is_string($elems)) {
+				if (trim($elems) != '') {
+					$this->language = trim($elems);
+					$this->debug('Language matched: '.$this->language);
+					break;
+				}
+			} elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
+				foreach ($elems as $elem) {
+					if (!isset($elem->parentNode)) continue;
+					$this->language = trim($elem->textContent);
+					$this->debug('Language matched: '.$this->language);					
+				}
+				if ($this->language) break;
+			}
+		}
+		
+		// try to get date
+		foreach ($this->config->date as $pattern) {
+			$elems = @$xpath->evaluate($pattern, $this->readability->dom);
+			if (is_string($elems)) {
+				$this->date = strtotime(trim($elems, "; \t\n\r\0\x0B"));				
+			} elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
+				$this->date = $elems->item(0)->textContent;
+				$this->date = strtotime(trim($this->date, "; \t\n\r\0\x0B"));
+				// remove date from document
+				// $elems->item(0)->parentNode->removeChild($elems->item(0));
+			}
+			if (!$this->date) {
+				$this->date = null;
+			} else {
+				$this->debug('Date matched: '.date('Y-m-d H:i:s', $this->date));
+				$this->debug("...XPath match: $pattern");
+				break;
+			}
+		}
+
+		// strip elements (using xpath expressions)
+		foreach ($this->config->strip as $pattern) {
+			$elems = @$xpath->query($pattern, $this->readability->dom);
+			// check for matches
+			if ($elems && $elems->length > 0) {
+				$this->debug('Stripping '.$elems->length.' elements (strip)');
+				for ($i=$elems->length-1; $i >= 0; $i--) {
+					$elems->item($i)->parentNode->removeChild($elems->item($i));
+				}
+			}
+		}
+		
+		// strip elements (using id and class attribute values)
+		foreach ($this->config->strip_id_or_class as $string) {
+			$string = strtr($string, array("'"=>'', '"'=>''));
+			$elems = @$xpath->query("//*[contains(@class, '$string') or contains(@id, '$string')]", $this->readability->dom);
+			// check for matches
+			if ($elems && $elems->length > 0) {
+				$this->debug('Stripping '.$elems->length.' elements (strip_id_or_class)');
+				for ($i=$elems->length-1; $i >= 0; $i--) {
+					$elems->item($i)->parentNode->removeChild($elems->item($i));
+				}
+			}
+		}
+		
+		// strip images (using src attribute values)
+		foreach ($this->config->strip_image_src as $string) {
+			$string = strtr($string, array("'"=>'', '"'=>''));
+			$elems = @$xpath->query("//img[contains(@src, '$string')]", $this->readability->dom);
+			// check for matches
+			if ($elems && $elems->length > 0) {
+				$this->debug('Stripping '.$elems->length.' image elements');
+				for ($i=$elems->length-1; $i >= 0; $i--) {
+					$elems->item($i)->parentNode->removeChild($elems->item($i));
+				}
+			}
+		}
+		// strip elements using Readability.com and Instapaper.com ignore class names
+		// .entry-unrelated and .instapaper_ignore
+		// See https://www.readability.com/publishers/guidelines/#view-plainGuidelines
+		// and http://blog.instapaper.com/post/730281947
+		$elems = @$xpath->query("//*[contains(concat(' ',normalize-space(@class),' '),' entry-unrelated ') or contains(concat(' ',normalize-space(@class),' '),' instapaper_ignore ')]", $this->readability->dom);
+		// check for matches
+		if ($elems && $elems->length > 0) {
+			$this->debug('Stripping '.$elems->length.' .entry-unrelated,.instapaper_ignore elements');
+			for ($i=$elems->length-1; $i >= 0; $i--) {
+				$elems->item($i)->parentNode->removeChild($elems->item($i));
+			}
+		}
+		
+		// strip elements that contain style="display: none;"
+		$elems = @$xpath->query("//*[contains(@style,'display:none')]", $this->readability->dom);
+		// check for matches
+		if ($elems && $elems->length > 0) {
+			$this->debug('Stripping '.$elems->length.' elements with inline display:none style');
+			for ($i=$elems->length-1; $i >= 0; $i--) {
+				$elems->item($i)->parentNode->removeChild($elems->item($i));
+			}
+		}
+		
+		// try to get body
+		foreach ($this->config->body as $pattern) {
+			$elems = @$xpath->query($pattern, $this->readability->dom);
+			// check for matches
+			if ($elems && $elems->length > 0) {
+				$this->debug('Body matched');
+				$this->debug("...XPath match: $pattern");
+				if ($elems->length == 1) {				
+					$this->body = $elems->item(0);
+					// prune (clean up elements that may not be content)
+					if ($this->config->prune()) {
+						$this->debug('...pruning content');
+						$this->readability->prepArticle($this->body);
+					}
+					break;
+				} else {
+					$this->body = $this->readability->dom->createElement('div');
+					$this->debug($elems->length.' body elems found');
+					foreach ($elems as $elem) {
+						if (!isset($elem->parentNode)) continue;
+						$isDescendant = false;
+						foreach ($this->body->childNodes as $parent) {
+							if ($this->isDescendant($parent, $elem)) {
+								$isDescendant = true;
+								break;
+							}
+						}
+						if ($isDescendant) {
+							$this->debug('...element is child of another body element, skipping.');
+						} else {
+							// prune (clean up elements that may not be content)
+							if ($this->config->prune()) {
+								$this->debug('Pruning content');
+								$this->readability->prepArticle($elem);
+							}
+							$this->debug('...element added to body');
+							$this->body->appendChild($elem);
+						}
+					}
+					if ($this->body->hasChildNodes()) break;
+				}
+			}
+		}		
+		
+		// auto detect?
+		$detect_title = $detect_body = $detect_author = $detect_date = false;
+		// detect title?
+		if (!isset($this->title)) {
+			if (empty($this->config->title) || $this->config->autodetect_on_failure()) {
+				$detect_title = true;
+			}
+		}
+		// detect body?
+		if (!isset($this->body)) {
+			if (empty($this->config->body) || $this->config->autodetect_on_failure()) {
+				$detect_body = true;
+			}
+		}
+		// detect author?
+		if (empty($this->author)) {
+			if (empty($this->config->author) || $this->config->autodetect_on_failure()) {
+				$detect_author = true;
+			}
+		}
+		// detect date?
+		if (!isset($this->date)) {
+			if (empty($this->config->date) || $this->config->autodetect_on_failure()) {
+				$detect_date = true;
+			}
+		}
+
+		// check for hNews
+		if ($detect_title || $detect_body) {
+			// check for hentry
+			$elems = @$xpath->query("//*[contains(concat(' ',normalize-space(@class),' '),' hentry ')]", $this->readability->dom);
+			if ($elems && $elems->length > 0) {
+				$this->debug('hNews: found hentry');
+				$hentry = $elems->item(0);
+				
+				if ($detect_title) {
+					// check for entry-title
+					$elems = @$xpath->query(".//*[contains(concat(' ',normalize-space(@class),' '),' entry-title ')]", $hentry);
+					if ($elems && $elems->length > 0) {
+						$this->title = $elems->item(0)->textContent;
+						$this->debug('hNews: found entry-title: '.$this->title);
+						// remove title from document
+						$elems->item(0)->parentNode->removeChild($elems->item(0));
+						$detect_title = false;
+					}
+				}
+				
+				if ($detect_date) {
+					// check for time element with pubdate attribute
+					$elems = @$xpath->query(".//time[@pubdate] | .//abbr[contains(concat(' ',normalize-space(@class),' '),' published ')]", $hentry);
+					if ($elems && $elems->length > 0) {
+						$this->date = strtotime(trim($elems->item(0)->textContent));
+						// remove date from document
+						//$elems->item(0)->parentNode->removeChild($elems->item(0));
+						if ($this->date) {
+							$this->debug('hNews: found publication date: '.date('Y-m-d H:i:s', $this->date));
+							$detect_date = false;
+						} else {
+							$this->date = null;
+						}
+					}
+				}
+
+				if ($detect_author) {
+					// check for time element with pubdate attribute
+					$elems = @$xpath->query(".//*[contains(concat(' ',normalize-space(@class),' '),' vcard ') and (contains(concat(' ',normalize-space(@class),' '),' author ') or contains(concat(' ',normalize-space(@class),' '),' byline '))]", $hentry);
+					if ($elems && $elems->length > 0) {
+						$author = $elems->item(0);
+						$fn = @$xpath->query(".//*[contains(concat(' ',normalize-space(@class),' '),' fn ')]", $author);
+						if ($fn && $fn->length > 0) {
+							foreach ($fn as $_fn) {
+								if (trim($_fn->textContent) != '') {
+									$this->author[] = trim($_fn->textContent);
+									$this->debug('hNews: found author: '.trim($_fn->textContent));
+								}
+							}
+						} else {
+							if (trim($author->textContent) != '') {
+								$this->author[] = trim($author->textContent);
+								$this->debug('hNews: found author: '.trim($author->textContent));
+							}
+						}
+						$detect_author = empty($this->author);
+					}
+				}
+				
+				// check for entry-content.
+				// according to hAtom spec, if there are multiple elements marked entry-content,
+				// we include all of these in the order they appear - see http://microformats.org/wiki/hatom#Entry_Content
+				if ($detect_body) {
+					$elems = @$xpath->query(".//*[contains(concat(' ',normalize-space(@class),' '),' entry-content ')]", $hentry);
+					if ($elems && $elems->length > 0) {
+						$this->debug('hNews: found entry-content');
+						if ($elems->length == 1) {
+							// what if it's empty? (some sites misuse hNews - place their content outside an empty entry-content element)
+							$e = $elems->item(0);
+							if (($e->tagName == 'img') || (trim($e->textContent) != '')) {
+								$this->body = $elems->item(0);
+								// prune (clean up elements that may not be content)
+								if ($this->config->prune()) {
+									$this->debug('Pruning content');
+									$this->readability->prepArticle($this->body);
+								}
+								$detect_body = false;
+							} else {
+								$this->debug('hNews: skipping entry-content - appears not to contain content');
+							}
+							unset($e);
+						} else {
+							$this->body = $this->readability->dom->createElement('div');
+							$this->debug($elems->length.' entry-content elems found');
+							foreach ($elems as $elem) {
+								if (!isset($elem->parentNode)) continue;
+								$isDescendant = false;
+								foreach ($this->body->childNodes as $parent) {
+									if ($this->isDescendant($parent, $elem)) {
+										$isDescendant = true;
+										break;
+									}
+								}
+								if ($isDescendant) {
+									$this->debug('Element is child of another body element, skipping.');
+								} else {
+									// prune (clean up elements that may not be content)
+									if ($this->config->prune()) {
+										$this->debug('Pruning content');
+										$this->readability->prepArticle($elem);
+									}								
+									$this->debug('Element added to body');									
+									$this->body->appendChild($elem);
+								}
+							}
+							$detect_body = false;
+						}
+					}
+				}
+			}
+		}
+
+		// check for elements marked with instapaper_title
+		if ($detect_title) {
+			// check for instapaper_title
+			$elems = @$xpath->query("//*[contains(concat(' ',normalize-space(@class),' '),' instapaper_title ')]", $this->readability->dom);
+			if ($elems && $elems->length > 0) {
+				$this->title = $elems->item(0)->textContent;
+				$this->debug('Title found (.instapaper_title): '.$this->title);
+				// remove title from document
+				$elems->item(0)->parentNode->removeChild($elems->item(0));
+				$detect_title = false;
+			}
+		}
+		// check for elements marked with instapaper_body
+		if ($detect_body) {
+			$elems = @$xpath->query("//*[contains(concat(' ',normalize-space(@class),' '),' instapaper_body ')]", $this->readability->dom);
+			if ($elems && $elems->length > 0) {
+				$this->debug('body found (.instapaper_body)');
+				$this->body = $elems->item(0);
+				// prune (clean up elements that may not be content)
+				if ($this->config->prune()) {
+					$this->debug('Pruning content');
+					$this->readability->prepArticle($this->body);
+				}
+				$detect_body = false;
+			}
+		}
+		
+		// Find author in rel="author" marked element
+		// We only use this if there's exactly one.
+		// If there's more than one, it could indicate more than
+		// one author, but it could also indicate that we're processing
+		// a page listing different articles with different authors.
+		if ($detect_author) {
+			$elems = @$xpath->query("//a[contains(concat(' ',normalize-space(@rel),' '),' author ')]", $this->readability->dom);
+			if ($elems && $elems->length == 1) {
+				$author = trim($elems->item(0)->textContent);
+				if ($author != '') {
+					$this->debug("Author found (rel=\"author\"): $author");
+					$this->author[] = $author;
+					$detect_author = false;
+				}
+			}
+		}
+
+		// Find date in pubdate marked time element
+		// For the same reason given above, we only use this
+		// if there's exactly one element.
+		if ($detect_date) {
+			$elems = @$xpath->query("//time[@pubdate]", $this->readability->dom);
+			if ($elems && $elems->length == 1) {
+				$this->date = strtotime(trim($elems->item(0)->textContent));
+				// remove date from document
+				//$elems->item(0)->parentNode->removeChild($elems->item(0));
+				if ($this->date) {
+					$this->debug('Date found (pubdate marked time element): '.date('Y-m-d H:i:s', $this->date));
+					$detect_date = false;
+				} else {
+					$this->date = null;
+				}
+			}
+		}
+
+		// still missing title or body, so we detect using Readability
+		if ($detect_title || $detect_body) {
+			$this->debug('Using Readability');
+			// clone body if we're only using Readability for title (otherwise it may interfere with body element)
+			if (isset($this->body)) $this->body = $this->body->cloneNode(true);
+			$success = $this->readability->init();
+		}
+		if ($detect_title) {
+			$this->debug('Detecting title');
+			$this->title = $this->readability->getTitle()->textContent;
+		}
+		if ($detect_body && $success) {
+			$this->debug('Detecting body');
+			$this->body = $this->readability->getContent();
+			if ($this->body->childNodes->length == 1 && $this->body->firstChild->nodeType === XML_ELEMENT_NODE) {
+				$this->body = $this->body->firstChild;
+			}
+			// prune (clean up elements that may not be content)
+			if ($this->config->prune()) {
+				$this->debug('Pruning content');
+				$this->readability->prepArticle($this->body);
+			}
+		}
+		if (isset($this->body)) {
+			// remove scripts
+			$this->readability->removeScripts($this->body);
+			// remove any h1-h6 elements that appear as first thing in the body
+			// and which match our title
+			if (isset($this->title) && ($this->title != '')) {
+				$firstChild = $this->body->firstChild;
+				while ($firstChild->nodeType && ($firstChild->nodeType !== XML_ELEMENT_NODE)) {
+					$firstChild = $firstChild->nextSibling;
+				}
+				if (($firstChild->nodeType === XML_ELEMENT_NODE)
+					&& in_array(strtolower($firstChild->tagName), array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'))
+					&& (strtolower(trim($firstChild->textContent)) == strtolower(trim($this->title)))) {
+						$this->body->removeChild($firstChild);
+				}
+			}
+			// prevent self-closing iframes
+			$elems = $this->body->getElementsByTagName('iframe');
+			for ($i = $elems->length-1; $i >= 0; $i--) {
+				$e = $elems->item($i);
+				if (!$e->hasChildNodes()) {
+					$e->appendChild($this->body->ownerDocument->createTextNode('[embedded content]'));
+				}
+			}
+			// remove image lazy loading - WordPress plugin http://wordpress.org/extend/plugins/lazy-load/
+			// the plugin replaces the src attribute to point to a 1x1 gif and puts the original src
+			// inside the data-lazy-src attribute. It also places the original image inside a noscript element 
+			// next to the amended one.
+			$elems = @$xpath->query("//img[@data-lazy-src]", $this->body);
+			for ($i = $elems->length-1; $i >= 0; $i--) {
+				$e = $elems->item($i);
+				// let's see if we can grab image from noscript
+				if ($e->nextSibling !== null && $e->nextSibling->nodeName === 'noscript') {
+					$_new_elem = $e->ownerDocument->createDocumentFragment();
+					@$_new_elem->appendXML($e->nextSibling->innerHTML);
+					$e->nextSibling->parentNode->replaceChild($_new_elem, $e->nextSibling);
+					$e->parentNode->removeChild($e);
+				} else {
+					// Use data-lazy-src as src value
+					$e->setAttribute('src', $e->getAttribute('data-lazy-src'));
+					$e->removeAttribute('data-lazy-src');
+				}
+			}
+		
+			$this->success = true;
+		}
+		
+		// if we've had no success and we've used tidy, there's a chance
+		// that tidy has messed up. So let's try again without tidy...
+		if (!$this->success && $tidied && $smart_tidy) {
+			$this->debug('Trying again without tidy');
+			$this->process($original_html, $url, false);
+		}
+
+		return $this->success;
+	}
+	
+	private function isDescendant(DOMElement $parent, DOMElement $child) {
+		$node = $child->parentNode;
+		while ($node != null) {
+			if ($node->isSameNode($parent))	return true;
+			$node = $node->parentNode;
+		}
+		return false;
+	}
+
+	public function getContent() {
+		return $this->body;
+	}
+	
+	public function getTitle() {
+		return $this->title;
+	}
+	
+	public function getAuthors() {
+		return $this->author;
+	}
+	
+	public function getLanguage() {
+		return $this->language;
+	}
+	
+	public function getDate() {
+		return $this->date;
+	}
+	
+	public function getSiteConfig() {
+		return $this->config;
+	}
+	
+	public function getNextPageUrl() {
+		return $this->nextPageUrl;
+	}
+}
\ No newline at end of file
diff --git a/inc/3rdparty/libraries/content-extractor/SiteConfig.php b/inc/3rdparty/libraries/content-extractor/SiteConfig.php
index c5e300d7..1f6a7603 100644
--- a/inc/3rdparty/libraries/content-extractor/SiteConfig.php
+++ b/inc/3rdparty/libraries/content-extractor/SiteConfig.php
@@ -1,338 +1,343 @@
-<?php
-/**
- * Site Config
- * 
- * Each instance of this class should hold extraction patterns and other directives
- * for a website. See ContentExtractor class to see how it's used.
- * 
- * @version 0.7
- * @date 2012-08-27
- * @author Keyvan Minoukadeh
- * @copyright 2012 Keyvan Minoukadeh
- * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
- */
-
-class SiteConfig
-{
-	// Use first matching element as title (0 or more xpath expressions)
-	public $title = array();
-	
-	// Use first matching element as body (0 or more xpath expressions)
-	public $body = array();
-	
-	// Use first matching element as author (0 or more xpath expressions)
-	public $author = array();
-	
-	// Use first matching element as date (0 or more xpath expressions)
-	public $date = array();
-	
-	// Strip elements matching these xpath expressions (0 or more)
-	public $strip = array();
-	
-	// Strip elements which contain these strings (0 or more) in the id or class attribute 
-	public $strip_id_or_class = array();
-	
-	// Strip images which contain these strings (0 or more) in the src attribute 
-	public $strip_image_src = array();
-	
-	// Additional HTTP headers to send
-	// NOT YET USED
-	public $http_header = array();
-	
-	// Process HTML with tidy before creating DOM (bool or null if undeclared)
-	public $tidy = null;
-	
-	protected $default_tidy = true; // used if undeclared
-	
-	// Autodetect title/body if xpath expressions fail to produce results.
-	// Note that this applies to title and body separately, ie. 
-	//   * if we get a body match but no title match, this option will determine whether we autodetect title 
-	//   * if neither match, this determines whether we autodetect title and body.
-	// Also note that this only applies when there is at least one xpath expression in title or body, ie.
-	//   * if title and body are both empty (no xpath expressions), this option has no effect (both title and body will be auto-detected)
-	//   * if there's an xpath expression for title and none for body, body will be auto-detected and this option will determine whether we auto-detect title if the xpath expression for it fails to produce results.
-	// Usage scenario: you want to extract something specific from a set of URLs, e.g. a table, and if the table is not found, you want to ignore the entry completely. Auto-detection is unlikely to succeed here, so you construct your patterns and set this option to false. Another scenario may be a site where auto-detection has proven to fail (or worse, picked up the wrong content).
-	// bool or null if undeclared
-	public $autodetect_on_failure = null;
-	protected $default_autodetect_on_failure = true; // used if undeclared
-	
-	// Clean up content block - attempt to remove elements that appear to be superfluous
-	// bool or null if undeclared
-	public $prune = null;
-	protected $default_prune = true; // used if undeclared
-	
-	// Test URL - if present, can be used to test the config above
-	public $test_url = array();
-	
-	// Single-page link - should identify a link element or URL pointing to the page holding the entire article
-	// This is useful for sites which split their articles across multiple pages. Links to such pages tend to 
-	// display the first page with links to the other pages at the bottom. Often there is also a link to a page
-	// which displays the entire article on one page (e.g. 'print view').
-	// This should be an XPath expression identifying the link to that page. If present and we find a match,
-	// we will retrieve that page and the rest of the options in this config will be applied to the new page.
-	public $single_page_link = array();
-	
-	public $next_page_link = array();
-	
-	// Single-page link in feed? - same as above, but patterns applied to item description HTML taken from feed
-	public $single_page_link_in_feed = array();
-	
-	// Which parser to use for turning raw HTML into a DOMDocument (either 'libxml' or 'html5lib')
-	// string or null if undeclared
-	public $parser = null;
-	protected $default_parser = 'libxml'; // used if undeclared
-	
-	// Strings to search for in HTML before processing begins (used with $replace_string)
-	public $find_string = array();
-	// Strings to replace those found in $find_string before HTML processing begins
-	public $replace_string = array();
-	
-	// the options below cannot be set in the config files which this class represents
-	
-	//public $cache_in_apc = false; // used to decide if we should cache in apc or not
-	public $cache_key = null;
-	public static $debug = false;
-	protected static $apc = false;
-	protected static $config_path;
-	protected static $config_path_fallback;
-	protected static $config_cache = array();
-	const HOSTNAME_REGEX = '/^(([a-zA-Z0-9-]*[a-zA-Z0-9])\.)*([A-Za-z0-9-]*[A-Za-z0-9])$/';
-	
-	protected static function debug($msg) {
-		if (self::$debug) {
-			//$mem = round(memory_get_usage()/1024, 2);
-			//$memPeak = round(memory_get_peak_usage()/1024, 2);
-			echo '* ',$msg;
-			//echo ' - mem used: ',$mem," (peak: $memPeak)\n";
-			echo "\n";
-			ob_flush();
-			flush();
-		}
-	}
-	
-	// enable APC caching of certain site config files?
-	// If enabled the following site config files will be 
-	// cached in APC cache (when requested for first time):
-	// * anything in site_config/custom/ and its corresponding file in site_config/standard/
-	// * the site config files associated with HTML fingerprints
-	// * the global site config file
-	// returns true if enabled, false otherwise
-	public static function use_apc($apc=true) {
-		if (!function_exists('apc_add')) {
-			if ($apc) self::debug('APC will not be used (function apc_add does not exist)');
-			return false;
-		}
-		self::$apc = $apc;
-		return $apc;
-	}
-	
-	// return bool or null
-	public function tidy($use_default=true) {
-		if ($use_default) return (isset($this->tidy)) ? $this->tidy : $this->default_tidy;
-		return $this->tidy;
-	}
-	
-	// return bool or null
-	public function prune($use_default=true) {
-		if ($use_default) return (isset($this->prune)) ? $this->prune : $this->default_prune;
-		return $this->prune;
-	}
-	
-	// return string or null
-	public function parser($use_default=true) {
-		if ($use_default) return (isset($this->parser)) ? $this->parser : $this->default_parser;
-		return $this->parser;
-	}
-
-	// return bool or null
-	public function autodetect_on_failure($use_default=true) {
-		if ($use_default) return (isset($this->autodetect_on_failure)) ? $this->autodetect_on_failure : $this->default_autodetect_on_failure;
-		return $this->autodetect_on_failure;
-	}
-	
-	public static function set_config_path($path, $fallback=null) {
-		self::$config_path = $path;
-		self::$config_path_fallback = $fallback;
-	}
-	
-	public static function add_to_cache($key, SiteConfig $config, $use_apc=true) {
-		$key = strtolower($key);
-		if (substr($key, 0, 4) == 'www.') $key = substr($key, 4);
-		if ($config->cache_key) $key = $config->cache_key;
-		self::$config_cache[$key] = $config;
-		if (self::$apc && $use_apc) {
-			self::debug("Adding site config to APC cache with key sc.$key");
-			apc_add("sc.$key", $config);
-		}
-		self::debug("Cached site config with key $key");
-	}
-	
-	public static function is_cached($key) {
-		$key = strtolower($key);
-		if (substr($key, 0, 4) == 'www.') $key = substr($key, 4);
-		if (array_key_exists($key, self::$config_cache)) {
-			return true;
-		} elseif (self::$apc && (bool)apc_fetch("sc.$key")) {
-			return true;
-		}
-		return false;
-	}
-	
-	public function append(SiteConfig $newconfig) {
-		// check for commands where we accept multiple statements (no test_url)
-		foreach (array('title', 'body', 'author', 'date', 'strip', 'strip_id_or_class', 'strip_image_src', 'single_page_link', 'single_page_link_in_feed', 'next_page_link', 'http_header', 'find_string', 'replace_string') as $var) {
-			// append array elements for this config variable from $newconfig to this config
-			//$this->$var = $this->$var + $newconfig->$var;
-			$this->$var = array_unique(array_merge($this->$var, $newconfig->$var));
-		}
-		// check for single statement commands
-		// we do not overwrite existing non null values
-		foreach (array('tidy', 'prune', 'parser', 'autodetect_on_failure') as $var) {
-			if ($this->$var === null) $this->$var = $newconfig->$var;
-		}
-	}
-	
-	// returns SiteConfig instance if an appropriate one is found, false otherwise
-	// if $exact_host_match is true, we will not look for wildcard config matches
-	// by default if host is 'test.example.org' we will look for and load '.example.org.txt' if it exists
-	public static function build($host, $exact_host_match=false) {
-		$host = strtolower($host);
-		if (substr($host, 0, 4) == 'www.') $host = substr($host, 4);
-		if (!$host || (strlen($host) > 200) || !preg_match(self::HOSTNAME_REGEX, ltrim($host, '.'))) return false;
-		// check for site configuration
-		$try = array($host);
-		// should we look for wildcard matches 
-		if (!$exact_host_match) {
-			$split = explode('.', $host);
-			if (count($split) > 1) {
-				array_shift($split);
-				$try[] = '.'.implode('.', $split);
-			}
-		}
-		
-		// look for site config file in primary folder
-		self::debug(". looking for site config for $host in primary folder");
-		foreach ($try as $h) {
-			if (array_key_exists($h, self::$config_cache)) {
-				self::debug("... site config for $h already loaded in this request");
-				return self::$config_cache[$h];
-			} elseif (self::$apc && ($sconfig = apc_fetch("sc.$h"))) {
-				self::debug("... site config for $h in APC cache");
-				return $sconfig;
-			} elseif (file_exists(self::$config_path."/$h.txt")) {
-				self::debug("... found site config ($h.txt)");
-				$file_primary = self::$config_path."/$h.txt";
-				$matched_name = $h;
-				break;
-			}
-		}
-		
-		// if we found site config, process it
-		if (isset($file_primary)) {
-			$config_lines = file($file_primary, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
-			if (!$config_lines || !is_array($config_lines)) return false;
-			$config = self::build_from_array($config_lines);
-			// if APC caching is available and enabled, mark this for cache
-			//$config->cache_in_apc = true;
-			$config->cache_key = $matched_name;
-			
-			// if autodetec on failure is off (on by default) we do not need to look
-			// in secondary folder
-			if (!$config->autodetect_on_failure()) {
-				self::debug('... autodetect on failure is disabled (no other site config files will be loaded)');
-				return $config;
-			}
-		}
-		
-		// look for site config file in secondary folder
-		if (isset(self::$config_path_fallback)) {
-			self::debug(". looking for site config for $host in secondary folder");
-			foreach ($try as $h) {
-				if (file_exists(self::$config_path_fallback."/$h.txt")) {
-					self::debug("... found site config in secondary folder ($h.txt)");
-					$file_secondary = self::$config_path_fallback."/$h.txt";
-					$matched_name = $h;
-					break;
-				}
-			}
-			if (!isset($file_secondary)) {
-				self::debug("... no site config match in secondary folder");
-			}
-		}
-		
-		// return false if no config file found
-		if (!isset($file_primary) && !isset($file_secondary)) {
-			self::debug("... no site config match for $host");
-			return false;
-		}
-		
-		// return primary config if secondary not found
-		if (!isset($file_secondary) && isset($config)) {
-			return $config;
-		}
-		
-		// process secondary config file
-		$config_lines = file($file_secondary, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
-		if (!$config_lines || !is_array($config_lines)) {
-			// failed to process secondary
-			if (isset($config)) {
-				// return primary config
-				return $config;
-			} else {
-				return false;
-			}
-		}
-		
-		// merge with primary and return
-		if (isset($config)) {
-			self::debug('. merging config files');
-			$config->append(self::build_from_array($config_lines));
-			return $config;
-		} else {
-			// return just secondary
-			$config = self::build_from_array($config_lines);
-			// if APC caching is available and enabled, mark this for cache
-			//$config->cache_in_apc = true;
-			$config->cache_key = $matched_name;
-			return $config;
-		}
-	}
-	
-	public static function build_from_array(array $lines) {
-		$config = new SiteConfig();
-		foreach ($lines as $line) {
-			$line = trim($line);
-			
-			// skip comments, empty lines
-			if ($line == '' || $line[0] == '#') continue;
-			
-			// get command
-			$command = explode(':', $line, 2);
-			// if there's no colon ':', skip this line
-			if (count($command) != 2) continue;
-			$val = trim($command[1]);
-			$command = trim($command[0]);
-			if ($command == '' || $val == '') continue;
-			
-			// check for commands where we accept multiple statements
-			if (in_array($command, array('title', 'body', 'author', 'date', 'strip', 'strip_id_or_class', 'strip_image_src', 'single_page_link', 'single_page_link_in_feed', 'next_page_link', 'http_header', 'test_url', 'find_string', 'replace_string'))) {
-				array_push($config->$command, $val);
-			// check for single statement commands that evaluate to true or false
-			} elseif (in_array($command, array('tidy', 'prune', 'autodetect_on_failure'))) {
-				$config->$command = ($val == 'yes');
-			// check for single statement commands stored as strings
-			} elseif (in_array($command, array('parser'))) {
-				$config->$command = $val;
-			// check for replace_string(find): replace
-			} elseif ((substr($command, -1) == ')') && preg_match('!^([a-z0-9_]+)\((.*?)\)$!i', $command, $match)) {
-				if (in_array($match[1], array('replace_string'))) {
-					$command = $match[1];
-					array_push($config->find_string, $match[2]);
-					array_push($config->$command, $val);
-				}
-			}
-		}
-		return $config;
-	}
-}
-?>
\ No newline at end of file
+<?php
+/**
+ * Site Config
+ * 
+ * Each instance of this class should hold extraction patterns and other directives
+ * for a website. See ContentExtractor class to see how it's used.
+ * 
+ * @version 0.8
+ * @date 2013-04-16
+ * @author Keyvan Minoukadeh
+ * @copyright 2013 Keyvan Minoukadeh
+ * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
+ */
+
+class SiteConfig
+{
+	// Use first matching element as title (0 or more xpath expressions)
+	public $title = array();
+	
+	// Use first matching element as body (0 or more xpath expressions)
+	public $body = array();
+	
+	// Use first matching element as author (0 or more xpath expressions)
+	public $author = array();
+	
+	// Use first matching element as date (0 or more xpath expressions)
+	public $date = array();
+	
+	// Strip elements matching these xpath expressions (0 or more)
+	public $strip = array();
+	
+	// Strip elements which contain these strings (0 or more) in the id or class attribute 
+	public $strip_id_or_class = array();
+	
+	// Strip images which contain these strings (0 or more) in the src attribute 
+	public $strip_image_src = array();
+	
+	// Additional HTTP headers to send
+	// NOT YET USED
+	public $http_header = array();
+	
+	// Process HTML with tidy before creating DOM (bool or null if undeclared)
+	public $tidy = null;
+	
+	protected $default_tidy = true; // used if undeclared
+	
+	// Autodetect title/body if xpath expressions fail to produce results.
+	// Note that this applies to title and body separately, ie. 
+	//   * if we get a body match but no title match, this option will determine whether we autodetect title 
+	//   * if neither match, this determines whether we autodetect title and body.
+	// Also note that this only applies when there is at least one xpath expression in title or body, ie.
+	//   * if title and body are both empty (no xpath expressions), this option has no effect (both title and body will be auto-detected)
+	//   * if there's an xpath expression for title and none for body, body will be auto-detected and this option will determine whether we auto-detect title if the xpath expression for it fails to produce results.
+	// Usage scenario: you want to extract something specific from a set of URLs, e.g. a table, and if the table is not found, you want to ignore the entry completely. Auto-detection is unlikely to succeed here, so you construct your patterns and set this option to false. Another scenario may be a site where auto-detection has proven to fail (or worse, picked up the wrong content).
+	// bool or null if undeclared
+	public $autodetect_on_failure = null;
+	protected $default_autodetect_on_failure = true; // used if undeclared
+	
+	// Clean up content block - attempt to remove elements that appear to be superfluous
+	// bool or null if undeclared
+	public $prune = null;
+	protected $default_prune = true; // used if undeclared
+	
+	// Test URL - if present, can be used to test the config above
+	public $test_url = array();
+	
+	// Single-page link - should identify a link element or URL pointing to the page holding the entire article
+	// This is useful for sites which split their articles across multiple pages. Links to such pages tend to 
+	// display the first page with links to the other pages at the bottom. Often there is also a link to a page
+	// which displays the entire article on one page (e.g. 'print view').
+	// This should be an XPath expression identifying the link to that page. If present and we find a match,
+	// we will retrieve that page and the rest of the options in this config will be applied to the new page.
+	public $single_page_link = array();
+	
+	public $next_page_link = array();
+	
+	// Single-page link in feed? - same as above, but patterns applied to item description HTML taken from feed
+	public $single_page_link_in_feed = array();
+	
+	// Which parser to use for turning raw HTML into a DOMDocument (either 'libxml' or 'html5lib')
+	// string or null if undeclared
+	public $parser = null;
+	protected $default_parser = 'libxml'; // used if undeclared
+	
+	// Strings to search for in HTML before processing begins (used with $replace_string)
+	public $find_string = array();
+	// Strings to replace those found in $find_string before HTML processing begins
+	public $replace_string = array();
+	
+	// the options below cannot be set in the config files which this class represents
+	
+	//public $cache_in_apc = false; // used to decide if we should cache in apc or not
+	public $cache_key = null;
+	public static $debug = false;
+	protected static $apc = false;
+	protected static $config_path;
+	protected static $config_path_fallback;
+	protected static $config_cache = array();
+	const HOSTNAME_REGEX = '/^(([a-zA-Z0-9-]*[a-zA-Z0-9])\.)*([A-Za-z0-9-]*[A-Za-z0-9])$/';
+	
+	protected static function debug($msg) {
+		if (self::$debug) {
+			//$mem = round(memory_get_usage()/1024, 2);
+			//$memPeak = round(memory_get_peak_usage()/1024, 2);
+			echo '* ',$msg;
+			//echo ' - mem used: ',$mem," (peak: $memPeak)\n";
+			echo "\n";
+			ob_flush();
+			flush();
+		}
+	}
+	
+	// enable APC caching of certain site config files?
+	// If enabled the following site config files will be 
+	// cached in APC cache (when requested for first time):
+	// * anything in site_config/custom/ and its corresponding file in site_config/standard/
+	// * the site config files associated with HTML fingerprints
+	// * the global site config file
+	// returns true if enabled, false otherwise
+	public static function use_apc($apc=true) {
+		if (!function_exists('apc_add')) {
+			if ($apc) self::debug('APC will not be used (function apc_add does not exist)');
+			return false;
+		}
+		self::$apc = $apc;
+		return $apc;
+	}
+	
+	// return bool or null
+	public function tidy($use_default=true) {
+		if ($use_default) return (isset($this->tidy)) ? $this->tidy : $this->default_tidy;
+		return $this->tidy;
+	}
+	
+	// return bool or null
+	public function prune($use_default=true) {
+		if ($use_default) return (isset($this->prune)) ? $this->prune : $this->default_prune;
+		return $this->prune;
+	}
+	
+	// return string or null
+	public function parser($use_default=true) {
+		if ($use_default) return (isset($this->parser)) ? $this->parser : $this->default_parser;
+		return $this->parser;
+	}
+
+	// return bool or null
+	public function autodetect_on_failure($use_default=true) {
+		if ($use_default) return (isset($this->autodetect_on_failure)) ? $this->autodetect_on_failure : $this->default_autodetect_on_failure;
+		return $this->autodetect_on_failure;
+	}
+	
+	public static function set_config_path($path, $fallback=null) {
+		self::$config_path = $path;
+		self::$config_path_fallback = $fallback;
+	}
+	
+	public static function add_to_cache($key, SiteConfig $config, $use_apc=true) {
+		$key = strtolower($key);
+		if (substr($key, 0, 4) == 'www.') $key = substr($key, 4);
+		if ($config->cache_key) $key = $config->cache_key;
+		self::$config_cache[$key] = $config;
+		if (self::$apc && $use_apc) {
+			self::debug("Adding site config to APC cache with key sc.$key");
+			apc_add("sc.$key", $config);
+		}
+		self::debug("Cached site config with key $key");
+	}
+	
+	public static function is_cached($key) {
+		$key = strtolower($key);
+		if (substr($key, 0, 4) == 'www.') $key = substr($key, 4);
+		if (array_key_exists($key, self::$config_cache)) {
+			return true;
+		} elseif (self::$apc && (bool)apc_fetch("sc.$key")) {
+			return true;
+		}
+		return false;
+	}
+	
+	public function append(SiteConfig $newconfig) {
+		// check for commands where we accept multiple statements (no test_url)
+		foreach (array('title', 'body', 'author', 'date', 'strip', 'strip_id_or_class', 'strip_image_src', 'single_page_link', 'single_page_link_in_feed', 'next_page_link', 'http_header') as $var) {
+			// append array elements for this config variable from $newconfig to this config
+			//$this->$var = $this->$var + $newconfig->$var;
+			$this->$var = array_unique(array_merge($this->$var, $newconfig->$var));
+		}
+		// check for single statement commands
+		// we do not overwrite existing non null values
+		foreach (array('tidy', 'prune', 'parser', 'autodetect_on_failure') as $var) {
+			if ($this->$var === null) $this->$var = $newconfig->$var;
+		}
+		// treat find_string and replace_string separately (don't apply array_unique) (thanks fabrizio!)
+		foreach (array('find_string', 'replace_string') as $var) {
+			// append array elements for this config variable from $newconfig to this config
+			//$this->$var = $this->$var + $newconfig->$var;
+			$this->$var = array_merge($this->$var, $newconfig->$var);
+		}
+	}
+	
+	// returns SiteConfig instance if an appropriate one is found, false otherwise
+	// if $exact_host_match is true, we will not look for wildcard config matches
+	// by default if host is 'test.example.org' we will look for and load '.example.org.txt' if it exists
+	public static function build($host, $exact_host_match=false) {
+		$host = strtolower($host);
+		if (substr($host, 0, 4) == 'www.') $host = substr($host, 4);
+		if (!$host || (strlen($host) > 200) || !preg_match(self::HOSTNAME_REGEX, ltrim($host, '.'))) return false;
+		// check for site configuration
+		$try = array($host);
+		// should we look for wildcard matches 
+		if (!$exact_host_match) {
+			$split = explode('.', $host);
+			if (count($split) > 1) {
+				array_shift($split);
+				$try[] = '.'.implode('.', $split);
+			}
+		}
+		
+		// look for site config file in primary folder
+		self::debug(". looking for site config for $host in primary folder");
+		foreach ($try as $h) {
+			if (array_key_exists($h, self::$config_cache)) {
+				self::debug("... site config for $h already loaded in this request");
+				return self::$config_cache[$h];
+			} elseif (self::$apc && ($sconfig = apc_fetch("sc.$h"))) {
+				self::debug("... site config for $h in APC cache");
+				return $sconfig;
+			} elseif (file_exists(self::$config_path."/$h.txt")) {
+				self::debug("... found site config ($h.txt)");
+				$file_primary = self::$config_path."/$h.txt";
+				$matched_name = $h;
+				break;
+			}
+		}
+		
+		// if we found site config, process it
+		if (isset($file_primary)) {
+			$config_lines = file($file_primary, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
+			if (!$config_lines || !is_array($config_lines)) return false;
+			$config = self::build_from_array($config_lines);
+			// if APC caching is available and enabled, mark this for cache
+			//$config->cache_in_apc = true;
+			$config->cache_key = $matched_name;
+			
+			// if autodetec on failure is off (on by default) we do not need to look
+			// in secondary folder
+			if (!$config->autodetect_on_failure()) {
+				self::debug('... autodetect on failure is disabled (no other site config files will be loaded)');
+				return $config;
+			}
+		}
+		
+		// look for site config file in secondary folder
+		if (isset(self::$config_path_fallback)) {
+			self::debug(". looking for site config for $host in secondary folder");
+			foreach ($try as $h) {
+				if (file_exists(self::$config_path_fallback."/$h.txt")) {
+					self::debug("... found site config in secondary folder ($h.txt)");
+					$file_secondary = self::$config_path_fallback."/$h.txt";
+					$matched_name = $h;
+					break;
+				}
+			}
+			if (!isset($file_secondary)) {
+				self::debug("... no site config match in secondary folder");
+			}
+		}
+		
+		// return false if no config file found
+		if (!isset($file_primary) && !isset($file_secondary)) {
+			self::debug("... no site config match for $host");
+			return false;
+		}
+		
+		// return primary config if secondary not found
+		if (!isset($file_secondary) && isset($config)) {
+			return $config;
+		}
+		
+		// process secondary config file
+		$config_lines = file($file_secondary, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
+		if (!$config_lines || !is_array($config_lines)) {
+			// failed to process secondary
+			if (isset($config)) {
+				// return primary config
+				return $config;
+			} else {
+				return false;
+			}
+		}
+		
+		// merge with primary and return
+		if (isset($config)) {
+			self::debug('. merging config files');
+			$config->append(self::build_from_array($config_lines));
+			return $config;
+		} else {
+			// return just secondary
+			$config = self::build_from_array($config_lines);
+			// if APC caching is available and enabled, mark this for cache
+			//$config->cache_in_apc = true;
+			$config->cache_key = $matched_name;
+			return $config;
+		}
+	}
+	
+	public static function build_from_array(array $lines) {
+		$config = new SiteConfig();
+		foreach ($lines as $line) {
+			$line = trim($line);
+			
+			// skip comments, empty lines
+			if ($line == '' || $line[0] == '#') continue;
+			
+			// get command
+			$command = explode(':', $line, 2);
+			// if there's no colon ':', skip this line
+			if (count($command) != 2) continue;
+			$val = trim($command[1]);
+			$command = trim($command[0]);
+			if ($command == '' || $val == '') continue;
+			
+			// check for commands where we accept multiple statements
+			if (in_array($command, array('title', 'body', 'author', 'date', 'strip', 'strip_id_or_class', 'strip_image_src', 'single_page_link', 'single_page_link_in_feed', 'next_page_link', 'http_header', 'test_url', 'find_string', 'replace_string'))) {
+				array_push($config->$command, $val);
+			// check for single statement commands that evaluate to true or false
+			} elseif (in_array($command, array('tidy', 'prune', 'autodetect_on_failure'))) {
+				$config->$command = ($val == 'yes');
+			// check for single statement commands stored as strings
+			} elseif (in_array($command, array('parser'))) {
+				$config->$command = $val;
+			// check for replace_string(find): replace
+			} elseif ((substr($command, -1) == ')') && preg_match('!^([a-z0-9_]+)\((.*?)\)$!i', $command, $match)) {
+				if (in_array($match[1], array('replace_string'))) {
+					$command = $match[1];
+					array_push($config->find_string, $match[2]);
+					array_push($config->$command, $val);
+				}
+			}
+		}
+		return $config;
+	}
+}
\ No newline at end of file
diff --git a/inc/3rdparty/libraries/feedwriter/FeedItem.php b/inc/3rdparty/libraries/feedwriter/FeedItem.php
old mode 100644
new mode 100755
index 54a56f22..40786598
--- a/inc/3rdparty/libraries/feedwriter/FeedItem.php
+++ b/inc/3rdparty/libraries/feedwriter/FeedItem.php
@@ -1,7 +1,7 @@
 <?php
  /**
  * Univarsel Feed Writer
- * 
+ *
  * FeedItem class - Used as feed element in FeedWriter class
  *
  * @package         UnivarselFeedWriter
@@ -12,20 +12,20 @@
  {
     private $elements = array();    //Collection of feed elements
     private $version;
-    
+
     /**
-    * Constructor 
-    * 
-    * @param    contant     (RSS1/RSS2/ATOM) RSS2 is default. 
-    */ 
+    * Constructor
+    *
+    * @param    contant     (RSS1/RSS2/ATOM) RSS2 is default.
+    */
     function __construct($version = RSS2)
-    {    
+    {
         $this->version = $version;
     }
 
     /**
     * Set element (overwrites existing elements with $elementName)
-    * 
+    *
     * @access   public
     * @param    srting  The tag name of an element
     * @param    srting  The content of tag
@@ -38,11 +38,11 @@
             unset($this->elements[$elementName]);
         }
         $this->addElement($elementName, $content, $attributes);
-    }    
-    
+    }
+
     /**
     * Add an element to elements array
-    * 
+    *
     * @access   public
     * @param    srting  The tag name of an element
     * @param    srting  The content of tag
@@ -61,11 +61,11 @@
         $this->elements[$elementName][$i]['content']    = $content;
         $this->elements[$elementName][$i]['attributes'] = $attributes;
     }
-    
+
     /**
-    * Set multiple feed elements from an array. 
+    * Set multiple feed elements from an array.
     * Elements which have attributes cannot be added by this method
-    * 
+    *
     * @access   public
     * @param    array   array of elements in 'tagName' => 'tagContent' format.
     * @return   void
@@ -73,15 +73,15 @@
     public function addElementArray($elementArray)
     {
         if(! is_array($elementArray)) return;
-        foreach ($elementArray as $elementName => $content) 
+        foreach ($elementArray as $elementName => $content)
         {
             $this->addElement($elementName, $content);
         }
     }
-    
+
     /**
     * Return the collection of elements in this feed item
-    * 
+    *
     * @access   public
     * @return   array
     */
@@ -89,68 +89,74 @@
     {
         return $this->elements;
     }
-    
+
     // Wrapper functions ------------------------------------------------------
-    
+
     /**
     * Set the 'dscription' element of feed item
-    * 
+    *
     * @access   public
     * @param    string  The content of 'description' element
     * @return   void
     */
-    public function setDescription($description) 
+    public function setDescription($description)
     {
-        $this->setElement('description', $description);
+        $tag = ($this->version == ATOM)? 'summary' : 'description';
+        $this->setElement($tag, $description);
     }
-    
+
     /**
     * @desc     Set the 'title' element of feed item
     * @access   public
     * @param    string  The content of 'title' element
     * @return   void
     */
-    public function setTitle($title) 
+    public function setTitle($title)
     {
-        $this->setElement('title', $title);      
+        $this->setElement('title', $title);
     }
-    
+
     /**
     * Set the 'date' element of feed item
-    * 
+    *
     * @access   public
     * @param    string  The content of 'date' element
     * @return   void
     */
-    public function setDate($date) 
+    public function setDate($date)
     {
         if(! is_numeric($date))
         {
             $date = strtotime($date);
         }
-      
-        if($this->version == RSS2) 
+
+        if($this->version == ATOM)
+        {
+        	$tag    = 'updated';
+        	$value  = date(DATE_ATOM, $date);
+        }
+        elseif($this->version == RSS2)
         {
-            $tag    = 'pubDate';
-            $value  = date(DATE_RSS, $date);
+        	$tag    = 'pubDate';
+        	$value  = date(DATE_RSS, $date);
         }
-        else                                
+        else
         {
-            $tag    = 'dc:date';
-            $value  = date("Y-m-d", $date);
+        	$tag    = 'dc:date';
+        	$value  = date("Y-m-d", $date);
         }
-        
-        $this->setElement($tag, $value);    
+
+        $this->setElement($tag, $value);
     }
-    
+
     /**
     * Set the 'link' element of feed item
-    * 
+    *
     * @access   public
     * @param    string  The content of 'link' element
     * @return   void
     */
-    public function setLink($link) 
+    public function setLink($link)
     {
         if($this->version == RSS2 || $this->version == RSS1)
         {
@@ -161,27 +167,27 @@
         {
             $this->setElement('link','',array('href'=>$link));
             $this->setElement('id', FeedWriter::uuid($link,'urn:uuid:'));
-        } 
-        
+        }
+
     }
 
     /**
     * Set the 'source' element of feed item
-    * 
+    *
     * @access   public
     * @param    string  The content of 'source' element
     * @return   void
     */
-    public function setSource($link) 
+    public function setSource($link)
     {
         $attributes = array('url'=>$link);
         $this->setElement('source', "wallabag",$attributes);
     }
-    
+
     /**
     * Set the 'encloser' element of feed item
     * For RSS 2.0 only
-    * 
+    *
     * @access   public
     * @param    string  The url attribute of encloser tag
     * @param    string  The length attribute of encloser tag
@@ -193,6 +199,6 @@
         $attributes = array('url'=>$url, 'length'=>$length, 'type'=>$type);
         $this->setElement('enclosure','',$attributes);
     }
-    
+
  } // end of class FeedItem
 ?>
\ No newline at end of file
diff --git a/inc/3rdparty/libraries/feedwriter/FeedWriter.php b/inc/3rdparty/libraries/feedwriter/FeedWriter.php
index d708e99b..77755690 100755
--- a/inc/3rdparty/libraries/feedwriter/FeedWriter.php
+++ b/inc/3rdparty/libraries/feedwriter/FeedWriter.php
@@ -97,15 +97,12 @@ define('JSONP', 3, true);
               header('X-content-type-options: nosniff');
           } elseif ($this->version == JSON) {
               header('Content-type: application/json; charset=UTF-8');
+              $this->json = new stdClass();
           } elseif ($this->version == JSONP) {
               header('Content-type: application/javascript; charset=UTF-8');
+              $this->json = new stdClass();
           }
         }
-      
-        if ($this->version == JSON || $this->version == JSONP) {
-          $this->json = new stdClass();
-        }
-      
 
         $this->printHead();
         $this->printChannels();
@@ -116,6 +113,11 @@ define('JSONP', 3, true);
         }
     }
 
+    public function &getItems()
+    {
+    	return $this->items;
+    }
+
     /**
     * Create a new FeedItem.
     *
@@ -199,7 +201,8 @@ define('JSONP', 3, true);
     */
     public function setDescription($description)
     {
-        $this->setChannelElement('description', $description);
+        $tag = ($this->version == ATOM)? 'subtitle' : 'description';
+        $this->setChannelElement($tag, $desciption);
     }
 
     /**
@@ -244,7 +247,7 @@ define('JSONP', 3, true);
         {
             $out  = '<?xml version="1.0" encoding="utf-8"?>'."\n";
             if ($this->xsl) $out .= '<?xml-stylesheet type="text/xsl" href="'.htmlspecialchars($this->xsl).'"?>' . PHP_EOL;
-            $out .= '<rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:media="http://search.yahoo.com/mrss/">' . PHP_EOL;
+            $out .= '<rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:media="http://search.yahoo.com/mrss/">' . PHP_EOL;
             echo $out;
         }
         elseif ($this->version == JSON || $this->version == JSONP)
diff --git a/inc/3rdparty/libraries/html5/TreeBuilder.php b/inc/3rdparty/libraries/html5/TreeBuilder.php
index 2f5244f9..c4a48b21 100644
--- a/inc/3rdparty/libraries/html5/TreeBuilder.php
+++ b/inc/3rdparty/libraries/html5/TreeBuilder.php
@@ -134,6 +134,7 @@ class HTML5_TreeBuilder {
 
     // Namespaces for foreign content
     const NS_HTML   = null; // to prevent DOM from requiring NS on everything
+    const NS_XHTML  = 'http://www.w3.org/1999/xhtml';
     const NS_MATHML = 'http://www.w3.org/1998/Math/MathML';
     const NS_SVG    = 'http://www.w3.org/2000/svg';
     const NS_XLINK  = 'http://www.w3.org/1999/xlink';
@@ -3157,11 +3158,19 @@ class HTML5_TreeBuilder {
         }
 
     private function insertElement($token, $append = true) {
-        $el = $this->dom->createElementNS(self::NS_HTML, $token['name']);
+        //$el = $this->dom->createElementNS(self::NS_HTML, $token['name']);
+        $namespaceURI = strpos($token['name'], ':') ? self::NS_XHTML : self::NS_HTML;
+        $el = $this->dom->createElementNS($namespaceURI, $token['name']);
 
         if (!empty($token['attr'])) {
             foreach($token['attr'] as $attr) {
-                if(!$el->hasAttribute($attr['name'])) {
+
+				// mike@macgirvin.com 2011-11-17, check attribute name for
+				// validity (ignoring extenders and combiners) as illegal chars in names
+				// causes everything to abort
+
+ 				$valid = preg_match('/^[a-zA-Z\_\:]([\-a-zA-Z0-9\_\:\.]+$)/',$attr['name']);
+                if($attr['name'] && (!$el->hasAttribute($attr['name'])) && ($valid)) {
                     $el->setAttribute($attr['name'], $attr['value']);
                 }
             }
diff --git a/inc/3rdparty/libraries/humble-http-agent/CookieJar.php b/inc/3rdparty/libraries/humble-http-agent/CookieJar.php
index 83e94f14..e4d5f495 100644
--- a/inc/3rdparty/libraries/humble-http-agent/CookieJar.php
+++ b/inc/3rdparty/libraries/humble-http-agent/CookieJar.php
@@ -1,404 +1,403 @@
-<?php
-/**
- * Cookie Jar
- * 
- * PHP class for handling cookies, as defined by the Netscape spec: 
- * <http://curl.haxx.se/rfc/cookie_spec.html>
- *
- * This class should be used to handle cookies (storing cookies from HTTP response messages, and
- * sending out cookies in HTTP request messages). This has been adapted for FiveFilters.org 
- * from the original version used in HTTP Navigator. See http://www.keyvan.net/code/http-navigator/
- * 
- * This class is mainly based on Cookies.pm <http://search.cpan.org/author/GAAS/libwww-perl-5.65/
- * lib/HTTP/Cookies.pm> from the libwww-perl collection <http://www.linpro.no/lwp/>.
- * Unlike Cookies.pm, this class only supports the Netscape cookie spec, not RFC 2965.
- * 
- * @version 0.5
- * @date 2011-03-15
- * @see http://php.net/HttpRequestPool
- * @author Keyvan Minoukadeh
- * @copyright 2011 Keyvan Minoukadeh
- * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
- */
-
-class CookieJar
-{
-    /**
-    * Cookies - array containing all cookies.
-    *
-    * <pre>
-    * Cookies are stored like this:
-    *   [domain][path][name] = array
-    * where array is:
-    *   0 => value, 1 => secure, 2 => expires
-    * </pre>
-    * @var array
-    * @access private
-    */
-    public $cookies = array();
-	public $debug = false;
-
-    /**
-    * Constructor
-    */
-    function __construct() {
-    }
-
-	protected function debug($msg, $file=null, $line=null) {
-		if ($this->debug) {
-			$mem = round(memory_get_usage()/1024, 2);
-			$memPeak = round(memory_get_peak_usage()/1024, 2);
-			echo '* ',$msg;
-			if (isset($file, $line)) echo " ($file line $line)";
-			echo ' - mem used: ',$mem," (peak: $memPeak)\n";	
-			ob_flush();
-			flush();
-		}
-	}	
-	
-    /**
-    * Get matching cookies
-    *
-    * Only use this method if you cannot use add_cookie_header(), for example, if you want to use
-    * this cookie jar class without using the request class.
-    *
-    * @param array $param associative array containing 'domain', 'path', 'secure' keys
-    * @return string
-    * @see add_cookie_header()
-    */
-    public function getMatchingCookies($url)
-    {
-		if (($parts = @parse_url($url)) && isset($parts['scheme'], $parts['host'], $parts['path'])) {
-			$param['domain'] = $parts['host'];
-			$param['path'] = $parts['path'];
-			$param['secure'] = (strtolower($parts['scheme']) == 'https');
-			unset($parts);
-		} else {
-			return false;
-		}
-        // RFC 2965 notes:
-        //  If multiple cookies satisfy the criteria above, they are ordered in
-        //  the Cookie header such that those with more specific Path attributes
-        //  precede those with less specific.  Ordering with respect to other
-        //  attributes (e.g., Domain) is unspecified.
-        $domain = $param['domain'];
-        if (strpos($domain, '.') === false) $domain .= '.local';
-        $request_path = $param['path'];
-        if ($request_path == '') $request_path = '/';
-        $request_secure = $param['secure'];
-        $now = time();
-        $matched_cookies = array();
-        // domain - find matching domains
-        $this->debug('Finding matching domains for '.$domain, __FILE__, __LINE__);
-        while (strpos($domain, '.') !== false) {
-            if (isset($this->cookies[$domain])) {
-                $this->debug(' domain match found: '.$domain);
-                $cookies =& $this->cookies[$domain];
-            } else {
-                $domain = $this->_reduce_domain($domain);
-                continue;
-            }
-            // paths - find matching paths starting from most specific
-            $this->debug('  - Finding matching paths for '.$request_path);
-            $paths = array_keys($cookies);
-            usort($paths, array($this, '_cmp_length'));
-            foreach ($paths as $path) {
-                // continue to next cookie if request path does not path-match cookie path
-                if (!$this->_path_match($request_path, $path)) continue;
-                // loop through cookie names
-                $this->debug('     path match found: '.$path);
-                foreach ($cookies[$path] as $name => $values) {
-                    // if this cookie is secure but request isn't, continue to next cookie
-                    if ($values[1] && !$request_secure) continue;
-                    // if cookie is not a session cookie and has expired, continue to next cookie
-                    if (is_int($values[2]) && ($values[2] < $now)) continue;
-                    // cookie matches request
-                    $this->debug('      cookie match: '.$name.'='.$values[0]);
-                    $matched_cookies[] = $name.'='.$values[0];
-                }
-            }
-            $domain = $this->_reduce_domain($domain);
-        }
-        // return cookies
-        return implode('; ', $matched_cookies);
-    }
-
-    /**
-    * Parse Set-Cookie values.
-    *
-    * Only use this method if you cannot use extract_cookies(), for example, if you want to use
-    * this cookie jar class without using the response class.
-    *
-    * @param array $set_cookies array holding 1 or more "Set-Cookie" header values
-    * @param array $param associative array containing 'host', 'path' keys
-    * @return void
-    * @see extract_cookies()
-    */
-    public function storeCookies($url, $set_cookies)
-    {
-        if (count($set_cookies) == 0) return;
-		$param = @parse_url($url);
-		if (!is_array($param) || !isset($param['host'])) return;
-        $request_host = $param['host'];
-        if (strpos($request_host, '.') === false) $request_host .= '.local';
-        $request_path = @$param['path'];
-        if ($request_path == '') $request_path = '/';
-        //
-        // loop through set-cookie headers
-        //
-        foreach ($set_cookies as $set_cookie) {
-            $this->debug('Parsing: '.$set_cookie);
-            // temporary cookie store (before adding to jar)
-            $tmp_cookie = array();
-            $param = explode(';', $set_cookie);
-            // loop through params
-            for ($x=0; $x<count($param); $x++) {
-                $key_val = explode('=', $param[$x], 2);
-                if (count($key_val) != 2) {
-                    // if the first param isn't a name=value pair, continue to the next set-cookie
-                    // header
-                    if ($x == 0) continue 2;
-                    // check for secure flag
-                    if (strtolower(trim($key_val[0])) == 'secure') $tmp_cookie['secure'] = true;
-                    // continue to next param
-                    continue;
-                }
-                list($key, $val) = array_map('trim', $key_val);
-                // first name=value pair is the cookie name and value
-                // the name and value are stored under 'name' and 'value' to avoid conflicts
-                // with later parameters.
-                if ($x == 0) {
-                    $tmp_cookie = array('name'=>$key, 'value'=>$val);
-                    continue;
-                }
-                $key = strtolower($key);
-                if (in_array($key, array('expires', 'path', 'domain', 'secure'))) {
-                    $tmp_cookie[$key] = $val;
-                }
-            }
-            //
-            // set cookie
-            //
-            // check domain
-            if (isset($tmp_cookie['domain']) && ($tmp_cookie['domain'] != $request_host) &&
-                    ($tmp_cookie['domain'] != ".$request_host")) {
-                $domain = $tmp_cookie['domain'];
-                if ((strpos($domain, '.') === false) && ($domain != 'local')) {
-                    $this->debug(' - domain "'.$domain.'" has no dot and is not a local domain');
-                    continue;
-                }
-                if (preg_match('/\.[0-9]+$/', $domain)) {
-                    $this->debug(' - domain "'.$domain.'" appears to be an ip address');
-                    continue;
-                }
-                if (substr($domain, 0, 1) != '.') $domain = ".$domain";
-                if (!$this->_domain_match($request_host, $domain)) {
-                    $this->debug(' - request host "'.$request_host.'" does not domain-match "'.$domain.'"');
-                    continue;
-                }
-            } else {
-                // if domain is not specified in the set-cookie header, domain will default to
-                // the request host
-                $domain = $request_host;
-            }
-            // check path
-            if (isset($tmp_cookie['path']) && ($tmp_cookie['path'] != '')) {
-                $path = urldecode($tmp_cookie['path']);
-                if (!$this->_path_match($request_path, $path)) {
-                    $this->debug(' - request path "'.$request_path.'" does not path-match "'.$path.'"');
-                    continue;
-                }
-            } else {
-                $path = $request_path;
-                $path = substr($path, 0, strrpos($path, '/'));
-                if ($path == '') $path = '/';
-            }
-            // check if secure
-            $secure = (isset($tmp_cookie['secure'])) ? true : false;
-            // check expiry
-            if (isset($tmp_cookie['expires'])) {
-                if (($expires = strtotime($tmp_cookie['expires'])) < 0) {
-                    $expires = null;
-                }
-            } else {
-                $expires = null;
-            }
-            // set cookie
-            $this->set_cookie($domain, $path, $tmp_cookie['name'], $tmp_cookie['value'], $secure, $expires);
-        }
-    }
-	
-	// return array of set-cookie values extracted from HTTP response headers (string $h)
-	public function extractCookies($h) {
-        $x = 0;
-        $lines = 0;
-        $headers = array();
-        $last_match = false;
-		$h = explode("\n", $h);
-        foreach ($h as $line) {
-			$line = rtrim($line);
-            $lines++;
-
-            $trimmed_line = trim($line);
-            if (isset($line_last)) {
-                // check if we have \r\n\r\n (indicating the end of headers)
-                // some servers will not use CRLF (\r\n), so we make CR (\r) optional.
-                // if (preg_match('/\015?\012\015?\012/', $line_last.$line)) {
-                //     break;
-                // }
-                // As an alternative, we can check if the current trimmed line is empty
-                if ($trimmed_line == '') {
-                    break;
-                }
-
-                // check for continuation line...
-                // RFC 2616 Section 2.2 "Basic Rules":
-                // HTTP/1.1 header field values can be folded onto multiple lines if the
-                // continuation line begins with a space or horizontal tab. All linear
-                // white space, including folding, has the same semantics as SP. A
-                // recipient MAY replace any linear white space with a single SP before
-                // interpreting the field value or forwarding the message downstream.
-                if ($last_match && preg_match('/^\s+(.*)/', $line, $match)) {
-                    // append to previous header value
-                    $headers[$x-1] .= ' '.rtrim($match[1]);
-                    continue;
-                }
-            }
-            $line_last = $line;
-
-            // split header name and value
-            if (preg_match('/^Set-Cookie\s*:\s*(.*)/i', $line, $match)) {
-                $headers[$x++] = rtrim($match[1]);
-                $last_match = true;
-            } else {
-                $last_match = false;
-            }
-        }
-        return $headers;
-	}
-
-    /**
-    * Set Cookie
-    * @param string $domain
-    * @param string $path
-    * @param string $name cookie name
-    * @param string $value cookie value
-    * @param bool $secure
-    * @param int $expires expiry time (null if session cookie, <= 0 will delete cookie)
-    * @return void
-    */
-    function set_cookie($domain, $path, $name, $value, $secure=false, $expires=null)
-    {
-        if ($domain == '') return;
-        if ($path == '') return;
-        if ($name == '') return;
-        // check if cookie needs to go
-        if (isset($expires) && ($expires <= 0)) {
-            if (isset($this->cookies[$domain][$path][$name])) unset($this->cookies[$domain][$path][$name]);
-            return;
-        }
-        if ($value == '') return;
-        $this->cookies[$domain][$path][$name] = array($value, $secure, $expires);
-        return;
-    }
-
-    /**
-    * Clear cookies - [domain [,path [,name]]] - call method with no arguments to clear all cookies.
-    * @param string $domain
-    * @param string $path
-    * @param string $name
-    * @return void
-    */
-    function clear($domain=null, $path=null, $name=null)
-    {
-        if (!isset($domain)) {
-            $this->cookies = array();
-        } elseif (!isset($path)) {
-            if (isset($this->cookies[$domain])) unset($this->cookies[$domain]);
-        } elseif (!isset($name)) {
-            if (isset($this->cookies[$domain][$path])) unset($this->cookies[$domain][$path]);
-        } elseif (isset($name)) {
-            if (isset($this->cookies[$domain][$path][$name])) unset($this->cookies[$domain][$path][$name]);
-        }
-    }
-
-    /**
-    * Compare string length - used for sorting
-    * @access private
-    * @return int
-    */
-    function _cmp_length($a, $b)
-    {
-        $la = strlen($a); $lb = strlen($b);
-        if ($la == $lb) return 0;
-        return ($la > $lb) ? -1 : 1;
-    }
-
-    /**
-    * Reduce domain
-    * @param string $domain
-    * @return string
-    * @access private
-    */
-    function _reduce_domain($domain)
-    {
-        if ($domain == '') return '';
-        if (substr($domain, 0, 1) == '.') return substr($domain, 1);
-        return substr($domain, strpos($domain, '.'));
-    }
-
-    /**
-    * Path match - check if path1 path-matches path2
-    *
-    * From RFC 2965: 
-    *   <i>For two strings that represent paths, P1 and P2, P1 path-matches P2
-    *   if P2 is a prefix of P1 (including the case where P1 and P2 string-
-    *   compare equal).  Thus, the string /tec/waldo path-matches /tec.</i>
-    * @param string $path1
-    * @param string $path2
-    * @return bool
-    * @access private
-    */
-    function _path_match($path1, $path2)
-    {
-        return (substr($path1, 0, strlen($path2)) == $path2);
-    }
-
-    /**
-    * Domain match - check if domain1 domain-matches domain2
-    *
-    * A few extracts from RFC 2965: 
-    *  -  A Set-Cookie2 from request-host y.x.foo.com for Domain=.foo.com
-    *     would be rejected, because H is y.x and contains a dot.
-    *
-    *  -  A Set-Cookie2 from request-host x.foo.com for Domain=.foo.com
-    *     would be accepted.
-    *
-    *  -  A Set-Cookie2 with Domain=.com or Domain=.com., will always be
-    *     rejected, because there is no embedded dot.
-    *
-    *  -  A Set-Cookie2 from request-host example for Domain=.local will
-    *     be accepted, because the effective host name for the request-
-    *     host is example.local, and example.local domain-matches .local.
-    *
-    * I'm ignoring the first point for now (must check to see how other browsers handle
-    * this rule for Set-Cookie headers)
-    *
-    * @param string $domain1
-    * @param string $domain2
-    * @return bool
-    * @access private
-    */
-    function _domain_match($domain1, $domain2)
-    {
-        $domain1 = strtolower($domain1);
-        $domain2 = strtolower($domain2);
-        while (strpos($domain1, '.') !== false) {
-            if ($domain1 == $domain2) return true;
-            $domain1 = $this->_reduce_domain($domain1);
-            continue;
-        }
-        return false;
-    }
-}
-?>
\ No newline at end of file
+<?php
+/**
+ * Cookie Jar
+ * 
+ * PHP class for handling cookies, as defined by the Netscape spec: 
+ * <http://curl.haxx.se/rfc/cookie_spec.html>
+ *
+ * This class should be used to handle cookies (storing cookies from HTTP response messages, and
+ * sending out cookies in HTTP request messages). This has been adapted for FiveFilters.org 
+ * from the original version used in HTTP Navigator. See http://www.keyvan.net/code/http-navigator/
+ * 
+ * This class is mainly based on Cookies.pm <http://search.cpan.org/author/GAAS/libwww-perl-5.65/
+ * lib/HTTP/Cookies.pm> from the libwww-perl collection <http://www.linpro.no/lwp/>.
+ * Unlike Cookies.pm, this class only supports the Netscape cookie spec, not RFC 2965.
+ * 
+ * @version 0.5
+ * @date 2011-03-15
+ * @see http://php.net/HttpRequestPool
+ * @author Keyvan Minoukadeh
+ * @copyright 2011 Keyvan Minoukadeh
+ * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
+ */
+
+class CookieJar
+{
+    /**
+    * Cookies - array containing all cookies.
+    *
+    * <pre>
+    * Cookies are stored like this:
+    *   [domain][path][name] = array
+    * where array is:
+    *   0 => value, 1 => secure, 2 => expires
+    * </pre>
+    * @var array
+    * @access private
+    */
+    public $cookies = array();
+	public $debug = false;
+
+    /**
+    * Constructor
+    */
+    function __construct() {
+    }
+
+	protected function debug($msg, $file=null, $line=null) {
+		if ($this->debug) {
+			$mem = round(memory_get_usage()/1024, 2);
+			$memPeak = round(memory_get_peak_usage()/1024, 2);
+			echo '* ',$msg;
+			if (isset($file, $line)) echo " ($file line $line)";
+			echo ' - mem used: ',$mem," (peak: $memPeak)\n";	
+			ob_flush();
+			flush();
+		}
+	}	
+	
+    /**
+    * Get matching cookies
+    *
+    * Only use this method if you cannot use add_cookie_header(), for example, if you want to use
+    * this cookie jar class without using the request class.
+    *
+    * @param array $param associative array containing 'domain', 'path', 'secure' keys
+    * @return string
+    * @see add_cookie_header()
+    */
+    public function getMatchingCookies($url)
+    {
+		if (($parts = @parse_url($url)) && isset($parts['scheme'], $parts['host'], $parts['path'])) {
+			$param['domain'] = $parts['host'];
+			$param['path'] = $parts['path'];
+			$param['secure'] = (strtolower($parts['scheme']) == 'https');
+			unset($parts);
+		} else {
+			return false;
+		}
+        // RFC 2965 notes:
+        //  If multiple cookies satisfy the criteria above, they are ordered in
+        //  the Cookie header such that those with more specific Path attributes
+        //  precede those with less specific.  Ordering with respect to other
+        //  attributes (e.g., Domain) is unspecified.
+        $domain = $param['domain'];
+        if (strpos($domain, '.') === false) $domain .= '.local';
+        $request_path = $param['path'];
+        if ($request_path == '') $request_path = '/';
+        $request_secure = $param['secure'];
+        $now = time();
+        $matched_cookies = array();
+        // domain - find matching domains
+        $this->debug('Finding matching domains for '.$domain, __FILE__, __LINE__);
+        while (strpos($domain, '.') !== false) {
+            if (isset($this->cookies[$domain])) {
+                $this->debug(' domain match found: '.$domain);
+                $cookies =& $this->cookies[$domain];
+            } else {
+                $domain = $this->_reduce_domain($domain);
+                continue;
+            }
+            // paths - find matching paths starting from most specific
+            $this->debug('  - Finding matching paths for '.$request_path);
+            $paths = array_keys($cookies);
+            usort($paths, array($this, '_cmp_length'));
+            foreach ($paths as $path) {
+                // continue to next cookie if request path does not path-match cookie path
+                if (!$this->_path_match($request_path, $path)) continue;
+                // loop through cookie names
+                $this->debug('     path match found: '.$path);
+                foreach ($cookies[$path] as $name => $values) {
+                    // if this cookie is secure but request isn't, continue to next cookie
+                    if ($values[1] && !$request_secure) continue;
+                    // if cookie is not a session cookie and has expired, continue to next cookie
+                    if (is_int($values[2]) && ($values[2] < $now)) continue;
+                    // cookie matches request
+                    $this->debug('      cookie match: '.$name.'='.$values[0]);
+                    $matched_cookies[] = $name.'='.$values[0];
+                }
+            }
+            $domain = $this->_reduce_domain($domain);
+        }
+        // return cookies
+        return implode('; ', $matched_cookies);
+    }
+
+    /**
+    * Parse Set-Cookie values.
+    *
+    * Only use this method if you cannot use extract_cookies(), for example, if you want to use
+    * this cookie jar class without using the response class.
+    *
+    * @param array $set_cookies array holding 1 or more "Set-Cookie" header values
+    * @param array $param associative array containing 'host', 'path' keys
+    * @return void
+    * @see extract_cookies()
+    */
+    public function storeCookies($url, $set_cookies)
+    {
+        if (count($set_cookies) == 0) return;
+		$param = @parse_url($url);
+		if (!is_array($param) || !isset($param['host'])) return;
+        $request_host = $param['host'];
+        if (strpos($request_host, '.') === false) $request_host .= '.local';
+        $request_path = @$param['path'];
+        if ($request_path == '') $request_path = '/';
+        //
+        // loop through set-cookie headers
+        //
+        foreach ($set_cookies as $set_cookie) {
+            $this->debug('Parsing: '.$set_cookie);
+            // temporary cookie store (before adding to jar)
+            $tmp_cookie = array();
+            $param = explode(';', $set_cookie);
+            // loop through params
+            for ($x=0; $x<count($param); $x++) {
+                $key_val = explode('=', $param[$x], 2);
+                if (count($key_val) != 2) {
+                    // if the first param isn't a name=value pair, continue to the next set-cookie
+                    // header
+                    if ($x == 0) continue 2;
+                    // check for secure flag
+                    if (strtolower(trim($key_val[0])) == 'secure') $tmp_cookie['secure'] = true;
+                    // continue to next param
+                    continue;
+                }
+                list($key, $val) = array_map('trim', $key_val);
+                // first name=value pair is the cookie name and value
+                // the name and value are stored under 'name' and 'value' to avoid conflicts
+                // with later parameters.
+                if ($x == 0) {
+                    $tmp_cookie = array('name'=>$key, 'value'=>$val);
+                    continue;
+                }
+                $key = strtolower($key);
+                if (in_array($key, array('expires', 'path', 'domain', 'secure'))) {
+                    $tmp_cookie[$key] = $val;
+                }
+            }
+            //
+            // set cookie
+            //
+            // check domain
+            if (isset($tmp_cookie['domain']) && ($tmp_cookie['domain'] != $request_host) &&
+                    ($tmp_cookie['domain'] != ".$request_host")) {
+                $domain = $tmp_cookie['domain'];
+                if ((strpos($domain, '.') === false) && ($domain != 'local')) {
+                    $this->debug(' - domain "'.$domain.'" has no dot and is not a local domain');
+                    continue;
+                }
+                if (preg_match('/\.[0-9]+$/', $domain)) {
+                    $this->debug(' - domain "'.$domain.'" appears to be an ip address');
+                    continue;
+                }
+                if (substr($domain, 0, 1) != '.') $domain = ".$domain";
+                if (!$this->_domain_match($request_host, $domain)) {
+                    $this->debug(' - request host "'.$request_host.'" does not domain-match "'.$domain.'"');
+                    continue;
+                }
+            } else {
+                // if domain is not specified in the set-cookie header, domain will default to
+                // the request host
+                $domain = $request_host;
+            }
+            // check path
+            if (isset($tmp_cookie['path']) && ($tmp_cookie['path'] != '')) {
+                $path = urldecode($tmp_cookie['path']);
+                if (!$this->_path_match($request_path, $path)) {
+                    $this->debug(' - request path "'.$request_path.'" does not path-match "'.$path.'"');
+                    continue;
+                }
+            } else {
+                $path = $request_path;
+                $path = substr($path, 0, strrpos($path, '/'));
+                if ($path == '') $path = '/';
+            }
+            // check if secure
+            $secure = (isset($tmp_cookie['secure'])) ? true : false;
+            // check expiry
+            if (isset($tmp_cookie['expires'])) {
+                if (($expires = strtotime($tmp_cookie['expires'])) < 0) {
+                    $expires = null;
+                }
+            } else {
+                $expires = null;
+            }
+            // set cookie
+            $this->set_cookie($domain, $path, $tmp_cookie['name'], $tmp_cookie['value'], $secure, $expires);
+        }
+    }
+	
+	// return array of set-cookie values extracted from HTTP response headers (string $h)
+	public function extractCookies($h) {
+        $x = 0;
+        $lines = 0;
+        $headers = array();
+        $last_match = false;
+		$h = explode("\n", $h);
+        foreach ($h as $line) {
+			$line = rtrim($line);
+            $lines++;
+
+            $trimmed_line = trim($line);
+            if (isset($line_last)) {
+                // check if we have \r\n\r\n (indicating the end of headers)
+                // some servers will not use CRLF (\r\n), so we make CR (\r) optional.
+                // if (preg_match('/\015?\012\015?\012/', $line_last.$line)) {
+                //     break;
+                // }
+                // As an alternative, we can check if the current trimmed line is empty
+                if ($trimmed_line == '') {
+                    break;
+                }
+
+                // check for continuation line...
+                // RFC 2616 Section 2.2 "Basic Rules":
+                // HTTP/1.1 header field values can be folded onto multiple lines if the
+                // continuation line begins with a space or horizontal tab. All linear
+                // white space, including folding, has the same semantics as SP. A
+                // recipient MAY replace any linear white space with a single SP before
+                // interpreting the field value or forwarding the message downstream.
+                if ($last_match && preg_match('/^\s+(.*)/', $line, $match)) {
+                    // append to previous header value
+                    $headers[$x-1] .= ' '.rtrim($match[1]);
+                    continue;
+                }
+            }
+            $line_last = $line;
+
+            // split header name and value
+            if (preg_match('/^Set-Cookie\s*:\s*(.*)/i', $line, $match)) {
+                $headers[$x++] = rtrim($match[1]);
+                $last_match = true;
+            } else {
+                $last_match = false;
+            }
+        }
+        return $headers;
+	}
+
+    /**
+    * Set Cookie
+    * @param string $domain
+    * @param string $path
+    * @param string $name cookie name
+    * @param string $value cookie value
+    * @param bool $secure
+    * @param int $expires expiry time (null if session cookie, <= 0 will delete cookie)
+    * @return void
+    */
+    function set_cookie($domain, $path, $name, $value, $secure=false, $expires=null)
+    {
+        if ($domain == '') return;
+        if ($path == '') return;
+        if ($name == '') return;
+        // check if cookie needs to go
+        if (isset($expires) && ($expires <= 0)) {
+            if (isset($this->cookies[$domain][$path][$name])) unset($this->cookies[$domain][$path][$name]);
+            return;
+        }
+        if ($value == '') return;
+        $this->cookies[$domain][$path][$name] = array($value, $secure, $expires);
+        return;
+    }
+
+    /**
+    * Clear cookies - [domain [,path [,name]]] - call method with no arguments to clear all cookies.
+    * @param string $domain
+    * @param string $path
+    * @param string $name
+    * @return void
+    */
+    function clear($domain=null, $path=null, $name=null)
+    {
+        if (!isset($domain)) {
+            $this->cookies = array();
+        } elseif (!isset($path)) {
+            if (isset($this->cookies[$domain])) unset($this->cookies[$domain]);
+        } elseif (!isset($name)) {
+            if (isset($this->cookies[$domain][$path])) unset($this->cookies[$domain][$path]);
+        } elseif (isset($name)) {
+            if (isset($this->cookies[$domain][$path][$name])) unset($this->cookies[$domain][$path][$name]);
+        }
+    }
+
+    /**
+    * Compare string length - used for sorting
+    * @access private
+    * @return int
+    */
+    function _cmp_length($a, $b)
+    {
+        $la = strlen($a); $lb = strlen($b);
+        if ($la == $lb) return 0;
+        return ($la > $lb) ? -1 : 1;
+    }
+
+    /**
+    * Reduce domain
+    * @param string $domain
+    * @return string
+    * @access private
+    */
+    function _reduce_domain($domain)
+    {
+        if ($domain == '') return '';
+        if (substr($domain, 0, 1) == '.') return substr($domain, 1);
+        return substr($domain, strpos($domain, '.'));
+    }
+
+    /**
+    * Path match - check if path1 path-matches path2
+    *
+    * From RFC 2965: 
+    *   <i>For two strings that represent paths, P1 and P2, P1 path-matches P2
+    *   if P2 is a prefix of P1 (including the case where P1 and P2 string-
+    *   compare equal).  Thus, the string /tec/waldo path-matches /tec.</i>
+    * @param string $path1
+    * @param string $path2
+    * @return bool
+    * @access private
+    */
+    function _path_match($path1, $path2)
+    {
+        return (substr($path1, 0, strlen($path2)) == $path2);
+    }
+
+    /**
+    * Domain match - check if domain1 domain-matches domain2
+    *
+    * A few extracts from RFC 2965: 
+    *  -  A Set-Cookie2 from request-host y.x.foo.com for Domain=.foo.com
+    *     would be rejected, because H is y.x and contains a dot.
+    *
+    *  -  A Set-Cookie2 from request-host x.foo.com for Domain=.foo.com
+    *     would be accepted.
+    *
+    *  -  A Set-Cookie2 with Domain=.com or Domain=.com., will always be
+    *     rejected, because there is no embedded dot.
+    *
+    *  -  A Set-Cookie2 from request-host example for Domain=.local will
+    *     be accepted, because the effective host name for the request-
+    *     host is example.local, and example.local domain-matches .local.
+    *
+    * I'm ignoring the first point for now (must check to see how other browsers handle
+    * this rule for Set-Cookie headers)
+    *
+    * @param string $domain1
+    * @param string $domain2
+    * @return bool
+    * @access private
+    */
+    function _domain_match($domain1, $domain2)
+    {
+        $domain1 = strtolower($domain1);
+        $domain2 = strtolower($domain2);
+        while (strpos($domain1, '.') !== false) {
+            if ($domain1 == $domain2) return true;
+            $domain1 = $this->_reduce_domain($domain1);
+            continue;
+        }
+        return false;
+    }
+}
\ No newline at end of file
diff --git a/inc/3rdparty/libraries/humble-http-agent/HumbleHttpAgent.php b/inc/3rdparty/libraries/humble-http-agent/HumbleHttpAgent.php
index e4f1b3b3..963f0c05 100644
--- a/inc/3rdparty/libraries/humble-http-agent/HumbleHttpAgent.php
+++ b/inc/3rdparty/libraries/humble-http-agent/HumbleHttpAgent.php
@@ -1,779 +1,810 @@
-<?php
-/**
- * Humble HTTP Agent
- * 
- * This class is designed to take advantage of parallel HTTP requests
- * offered by PHP's PECL HTTP extension or the curl_multi_* functions. 
- * For environments which do not have these options, it reverts to standard sequential 
- * requests (using file_get_contents())
- * 
- * @version 1.1
- * @date 2012-08-20
- * @see http://php.net/HttpRequestPool
- * @author Keyvan Minoukadeh
- * @copyright 2011-2012 Keyvan Minoukadeh
- * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
- */
-
-class HumbleHttpAgent
-{
-	const METHOD_REQUEST_POOL = 1;
-	const METHOD_CURL_MULTI = 2;
-	const METHOD_FILE_GET_CONTENTS = 4;
-	//const UA_BROWSER = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0.1) Gecko/20100101 Firefox/4.0.1';
-	const UA_BROWSER = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.92 Safari/535.2';
-	const UA_PHP = 'PHP/5.2';
-	const REF_GOOGLE = 'http://www.google.co.uk/url?sa=t&source=web&cd=1';
-	
-	protected $requests = array();
-	protected $redirectQueue = array();
-	protected $requestOptions;
-	protected $maxParallelRequests = 5;
-	protected $cache = null; //TODO
-	protected $httpContext;
-	protected $minimiseMemoryUse = false; //TODO
-	protected $method;
-	protected $cookieJar;
-	public $debug = false;
-	public $debugVerbose = false;
-	public $rewriteHashbangFragment = true; // see http://code.google.com/web/ajaxcrawling/docs/specification.html
-	public $maxRedirects = 5;
-	public $userAgentMap = array();
-	public $rewriteUrls = array();
-	public $userAgentDefault;
-	public $referer;
-	//public $userAgent = 'Mozilla/5.0';
-	
-	// Prevent certain file/mime types
-	// HTTP responses which match these content types will
-	// be returned without body.
-	public $headerOnlyTypes = array();
-	// URLs ending with one of these extensions will
-	// prompt Humble HTTP Agent to send a HEAD request first
-	// to see if returned content type matches $headerOnlyTypes.
-	public $headerOnlyClues = array('pdf','mp3','zip','exe','gif','gzip','gz','jpeg','jpg','mpg','mpeg','png','ppt','mov');
-	// AJAX triggers to search for.
-	// for AJAX sites, e.g. Blogger with its dynamic views templates.
-	public $ajaxTriggers = array("<meta name='fragment' content='!'",'<meta name="fragment" content="!"',"<meta content='!' name='fragment'",'<meta content="!" name="fragment"');
-	
-	//TODO: set max file size
-	//TODO: normalise headers
-	
-	function __construct($requestOptions=null, $method=null) {
-		$this->userAgentDefault = self::UA_BROWSER;
-		$this->referer = self::REF_GOOGLE;
-		// set the request method
-		if (in_array($method, array(1,2,4))) {
-			$this->method = $method;
-		} else {
-			if (class_exists('HttpRequestPool')) {
-				$this->method = self::METHOD_REQUEST_POOL;
-			} elseif (function_exists('curl_multi_init')) {
-				$this->method = self::METHOD_CURL_MULTI;
-			} else {
-				$this->method = self::METHOD_FILE_GET_CONTENTS;
-			}
-		}
-		if ($this->method == self::METHOD_CURL_MULTI) {
-			require_once(dirname(__FILE__).'/RollingCurl.php');
-		}
-		// create cookie jar
-		$this->cookieJar = new CookieJar();
-		// set request options (redirect must be 0)
-		$this->requestOptions = array(
-			'timeout' => 15,
-			'redirect' => 0 // we handle redirects manually so we can rewrite the new hashbang URLs that are creeping up over the web
-			// TODO: test onprogress?
-		);
-		if (is_array($requestOptions)) {
-			$this->requestOptions = array_merge($this->requestOptions, $requestOptions);
-		}
-		$this->httpContext = array(
-			'http' => array(
-				'ignore_errors' => true,
-				'timeout' => $this->requestOptions['timeout'],
-				'max_redirects' => $this->requestOptions['redirect'],
-				'header' => "Accept: */*\r\n"
-				)
-			);
-	}
-	
-	protected function debug($msg) {
-		if ($this->debug) {
-			$mem = round(memory_get_usage()/1024, 2);
-			$memPeak = round(memory_get_peak_usage()/1024, 2);
-			echo '* ',$msg;
-			if ($this->debugVerbose) echo ' - mem used: ',$mem," (peak: $memPeak)";
-			echo "\n";
-			ob_flush();
-			flush();
-		}
-	}
-	
-	protected function getUserAgent($url, $asArray=false) {
-		$host = @parse_url($url, PHP_URL_HOST);
-		if (strtolower(substr($host, 0, 4)) == 'www.') {
-			$host = substr($host, 4);
-		}
-		if ($host) {
-			$try = array($host);
-			$split = explode('.', $host);
-			if (count($split) > 1) {
-				array_shift($split);
-				$try[] = '.'.implode('.', $split);
-			}
-			foreach ($try as $h) {
-				if (isset($this->userAgentMap[$h])) {
-					$ua = $this->userAgentMap[$h];
-					break;
-				}
-			}
-		}
-		if (!isset($ua)) $ua = $this->userAgentDefault;
-		if ($asArray) {
-			return array('User-Agent' => $ua);
-		} else {
-			return 'User-Agent: '.$ua;
-		}
-	}
-	
-	public function rewriteHashbangFragment($url) {
-		// return $url if there's no '#!'
-		if (strpos($url, '#!') === false) return $url;
-		// split $url and rewrite
-		// TODO: is SimplePie_IRI included?
-		$iri = new SimplePie_IRI($url);
-		$fragment = substr($iri->fragment, 1); // strip '!'
-		$iri->fragment = null;
-		if (isset($iri->query)) {
-			parse_str($iri->query, $query);
-		} else {
-			$query = array();
-		}
-		$query['_escaped_fragment_'] = (string)$fragment;
-		$iri->query = str_replace('%2F', '/', http_build_query($query)); // needed for some sites
-		return $iri->get_iri();
-	}
-	
-	public function getUglyURL($url, $html) {
-		if ($html == '') return false;
-		$found = false;
-		foreach ($this->ajaxTriggers as $string) {
-			if (stripos($html, $string)) {
-				$found = true;
-				break;
-			}
-		}
-		if (!$found) return false;
-		$iri = new SimplePie_IRI($url);
-		if (isset($iri->query)) {
-			parse_str($iri->query, $query);
-		} else {
-			$query = array();
-		}
-		$query['_escaped_fragment_'] = '';
-		$iri->query = str_replace('%2F', '/', http_build_query($query)); // needed for some sites
-		return $iri->get_iri();
-	}
-	
-	public function removeFragment($url) {
-		$pos = strpos($url, '#');
-		if ($pos === false) {
-			return $url;
-		} else {
-			return substr($url, 0, $pos);
-		}
-	}
-	
-	public function rewriteUrls($url) {
-		foreach ($this->rewriteUrls as $find => $action) {
-			if (strpos($url, $find) !== false) {
-				if (is_array($action)) {
-					return strtr($url, $action);
-				}
-			}
-		}
-		return $url;
-	}
-	
-	public function enableDebug($bool=true) {
-		$this->debug = (bool)$bool;
-	}
-	
-	public function minimiseMemoryUse($bool = true) {
-		$this->minimiseMemoryUse = $bool;
-	}
-	
-	public function setMaxParallelRequests($max) {
-		$this->maxParallelRequests = $max;
-	}
-	
-	public function validateUrl($url) {
-		$url = filter_var($url, FILTER_SANITIZE_URL);
-		$test = filter_var($url, FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED);
-		// deal with bug http://bugs.php.net/51192 (present in PHP 5.2.13 and PHP 5.3.2)
-		if ($test === false) {
-			$test = filter_var(strtr($url, '-', '_'), FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED);
-		}
-		if ($test !== false && $test !== null && preg_match('!^https?://!', $url)) {
-			return $url;
-		} else {
-			return false;
-		}
-	}
-	
-	public function fetchAll(array $urls) {
-		$this->fetchAllOnce($urls, $isRedirect=false);
-		$redirects = 0;
-		while (!empty($this->redirectQueue) && ++$redirects <= $this->maxRedirects) {
-			$this->debug("Following redirects #$redirects...");
-			$this->fetchAllOnce($this->redirectQueue, $isRedirect=true);
-		}
-	}
-	
-	// fetch all URLs without following redirects
-	public function fetchAllOnce(array $urls, $isRedirect=false) {
-		if (!$isRedirect) $urls = array_unique($urls);
-		if (empty($urls)) return;
-		
-		//////////////////////////////////////////////////////
-		// parallel (HttpRequestPool)
-		if ($this->method == self::METHOD_REQUEST_POOL) {
-			$this->debug('Starting parallel fetch (HttpRequestPool)');
-			try {
-				while (count($urls) > 0) {
-					$this->debug('Processing set of '.min($this->maxParallelRequests, count($urls)));
-					$subset = array_splice($urls, 0, $this->maxParallelRequests);
-					$pool = new HttpRequestPool();
-					foreach ($subset as $orig => $url) {
-						if (!$isRedirect) $orig = $url;
-						unset($this->redirectQueue[$orig]);
-						$this->debug("...$url");
-						if (!$isRedirect && isset($this->requests[$url])) {
-							$this->debug("......in memory");
-						/*
-						} elseif ($this->isCached($url)) {
-							$this->debug("......is cached");
-							if (!$this->minimiseMemoryUse) {
-								$this->requests[$url] = $this->getCached($url);
-							}
-						*/
-						} else {
-							$this->debug("......adding to pool");
-							$req_url = $this->rewriteUrls($url);
-							$req_url = ($this->rewriteHashbangFragment) ? $this->rewriteHashbangFragment($req_url) : $req_url;
-							$req_url = $this->removeFragment($req_url);
-							if (!empty($this->headerOnlyTypes) && !isset($this->requests[$orig]['wrongGuess']) && $this->possibleUnsupportedType($req_url)) {
-								$_meth = HttpRequest::METH_HEAD;
-							} else {
-								$_meth = HttpRequest::METH_GET;
-								unset($this->requests[$orig]['wrongGuess']);
-							}
-							$httpRequest = new HttpRequest($req_url, $_meth, $this->requestOptions);
-							// send cookies, if we have any
-							if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) {
-								$this->debug("......sending cookies: $cookies");
-								$httpRequest->addHeaders(array('Cookie' => $cookies));
-							}
-							//$httpRequest->addHeaders(array('User-Agent' => $this->userAgent));
-							$httpRequest->addHeaders($this->getUserAgent($req_url, true));
-							// add referer for picky sites
-							$httpRequest->addheaders(array('Referer' => $this->referer));
-							$this->requests[$orig] = array('headers'=>null, 'body'=>null, 'httpRequest'=>$httpRequest);
-							$this->requests[$orig]['original_url'] = $orig;
-							$pool->attach($httpRequest);
-						}
-					}
-					// did we get anything into the pool?
-					if (count($pool) > 0) {
-						$this->debug('Sending request...');
-						try {
-							$pool->send();
-						} catch (HttpRequestPoolException $e) {
-							// do nothing
-						}
-						$this->debug('Received responses');
-						foreach($subset as $orig => $url) {
-							if (!$isRedirect) $orig = $url;
-							$request = $this->requests[$orig]['httpRequest'];
-							//$this->requests[$orig]['headers'] = $this->headersToString($request->getResponseHeader());
-							// getResponseHeader() doesn't return status line, so, for consistency...
-							$this->requests[$orig]['headers'] = substr($request->getRawResponseMessage(), 0, $request->getResponseInfo('header_size'));
-							// check content type
-							// TODO: use getResponseHeader('content-type') or getResponseInfo()
-							if ($this->headerOnlyType($this->requests[$orig]['headers'])) {
-								$this->requests[$orig]['body'] = '';
-								$_header_only_type = true;
-								$this->debug('Header only type returned');
-							} else {
-								$this->requests[$orig]['body'] = $request->getResponseBody();
-								$_header_only_type = false;
-							}
-							$this->requests[$orig]['effective_url'] = $request->getResponseInfo('effective_url');
-							$this->requests[$orig]['status_code'] = $status_code = $request->getResponseCode();
-							// is redirect?
-							if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && $request->getResponseHeader('location')) {
-								$redirectURL = $request->getResponseHeader('location');
-								if (!preg_match('!^https?://!i', $redirectURL)) {
-									$redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url);
-								}
-								if ($this->validateURL($redirectURL)) {
-									$this->debug('Redirect detected. Valid URL: '.$redirectURL);
-									// store any cookies
-									$cookies = $request->getResponseHeader('set-cookie');
-									if ($cookies && !is_array($cookies)) $cookies = array($cookies);
-									if ($cookies) $this->cookieJar->storeCookies($url, $cookies);
-									$this->redirectQueue[$orig] = $redirectURL;
-								} else {
-									$this->debug('Redirect detected. Invalid URL: '.$redirectURL);
-								}
-							} elseif (!$_header_only_type && $request->getMethod() === HttpRequest::METH_HEAD) {
-								// the response content-type did not match our 'header only' types, 
-								// but we'd issues a HEAD request because we assumed it would. So
-								// let's queue a proper GET request for this item...
-								$this->debug('Wrong guess at content-type, queing GET request');
-								$this->requests[$orig]['wrongGuess'] = true;
-								$this->redirectQueue[$orig] = $this->requests[$orig]['effective_url'];
-							} elseif (strpos($this->requests[$orig]['effective_url'], '_escaped_fragment_') === false) {
-								// check for <meta name='fragment' content='!'/>
-								// for AJAX sites, e.g. Blogger with its dynamic views templates.
-								// Based on Google's spec: https://developers.google.com/webmasters/ajax-crawling/docs/specification
-								if (isset($this->requests[$orig]['body'])) {
-									$redirectURL = $this->getUglyURL($this->requests[$orig]['effective_url'], substr($this->requests[$orig]['body'], 0, 4000));
-									if ($redirectURL) {
-										$this->debug('AJAX trigger (meta name="fragment" content="!") found. Queueing '.$redirectURL);
-										$this->redirectQueue[$orig] = $redirectURL;
-									}
-								}
-							}
-							//die($url.' -multi- '.$request->getResponseInfo('effective_url'));
-							$pool->detach($request);
-							unset($this->requests[$orig]['httpRequest'], $request);
-							/*
-							if ($this->minimiseMemoryUse) {
-								if ($this->cache($url)) {
-									unset($this->requests[$url]);
-								}
-							}
-							*/
-						}
-					}
-				}
-			} catch (HttpException $e) {
-				$this->debug($e);
-				return false;
-			}
-		}
-		
-		//////////////////////////////////////////////////////////
-		// parallel (curl_multi_*)
-		elseif ($this->method == self::METHOD_CURL_MULTI) {
-			$this->debug('Starting parallel fetch (curl_multi_*)');
-			while (count($urls) > 0) {
-				$this->debug('Processing set of '.min($this->maxParallelRequests, count($urls)));
-				$subset = array_splice($urls, 0, $this->maxParallelRequests);
-				$pool = new RollingCurl(array($this, 'handleCurlResponse'));
-				$pool->window_size = count($subset);		
-				
-				foreach ($subset as $orig => $url) {
-					if (!$isRedirect) $orig = $url;
-					unset($this->redirectQueue[$orig]);
-					$this->debug("...$url");
-					if (!$isRedirect && isset($this->requests[$url])) {
-						$this->debug("......in memory");
-					/*
-					} elseif ($this->isCached($url)) {
-						$this->debug("......is cached");
-						if (!$this->minimiseMemoryUse) {
-							$this->requests[$url] = $this->getCached($url);
-						}
-					*/
-					} else {
-						$this->debug("......adding to pool");
-						$req_url = $this->rewriteUrls($url);
-						$req_url = ($this->rewriteHashbangFragment) ? $this->rewriteHashbangFragment($req_url) : $req_url;
-						$req_url = $this->removeFragment($req_url);
-						if (!empty($this->headerOnlyTypes) && !isset($this->requests[$orig]['wrongGuess']) && $this->possibleUnsupportedType($req_url)) {
-							$_meth = 'HEAD';
-						} else {
-							$_meth = 'GET';
-							unset($this->requests[$orig]['wrongGuess']);
-						}						
-						$headers = array();
-						//$headers[] = 'User-Agent: '.$this->userAgent;
-						$headers[] = $this->getUserAgent($req_url);
-						// add referer for picky sites
-						$headers[] = 'Referer: '.$this->referer;
-						// send cookies, if we have any
-						if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) {
-							$this->debug("......sending cookies: $cookies");
-							$headers[] = 'Cookie: '.$cookies;
-						}
-						$httpRequest = new RollingCurlRequest($req_url, $_meth, null, $headers, array(
-							CURLOPT_CONNECTTIMEOUT => $this->requestOptions['timeout'],
-							CURLOPT_TIMEOUT => $this->requestOptions['timeout']
-							));
-						$httpRequest->set_original_url($orig);
-						$this->requests[$orig] = array('headers'=>null, 'body'=>null, 'httpRequest'=>$httpRequest);
-						$this->requests[$orig]['original_url'] = $orig; // TODO: is this needed anymore?
-						$pool->add($httpRequest);
-					}
-				}
-				// did we get anything into the pool?
-				if (count($pool) > 0) {
-					$this->debug('Sending request...');
-					$pool->execute(); // this will call handleCurlResponse() and populate $this->requests[$orig]
-					$this->debug('Received responses');
-					foreach($subset as $orig => $url) {
-						if (!$isRedirect) $orig = $url;
-						// $this->requests[$orig]['headers']
-						// $this->requests[$orig]['body']
-						// $this->requests[$orig]['effective_url']
-						// check content type
-						if ($this->headerOnlyType($this->requests[$orig]['headers'])) {
-							$this->requests[$orig]['body'] = '';
-							$_header_only_type = true;
-							$this->debug('Header only type returned');
-						} else {
-							$_header_only_type = false;
-						}
-						$status_code = $this->requests[$orig]['status_code'];
-						if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && isset($this->requests[$orig]['location'])) {
-							$redirectURL = $this->requests[$orig]['location'];
-							if (!preg_match('!^https?://!i', $redirectURL)) {
-								$redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url);
-							}
-							if ($this->validateURL($redirectURL)) {
-								$this->debug('Redirect detected. Valid URL: '.$redirectURL);
-								// store any cookies
-								$cookies = $this->cookieJar->extractCookies($this->requests[$orig]['headers']);
-								if (!empty($cookies)) $this->cookieJar->storeCookies($url, $cookies);							
-								$this->redirectQueue[$orig] = $redirectURL;
-							} else {
-								$this->debug('Redirect detected. Invalid URL: '.$redirectURL);
-							}
-						} elseif (!$_header_only_type && $this->requests[$orig]['method'] == 'HEAD') {
-							// the response content-type did not match our 'header only' types, 
-							// but we'd issues a HEAD request because we assumed it would. So
-							// let's queue a proper GET request for this item...
-							$this->debug('Wrong guess at content-type, queing GET request');
-							$this->requests[$orig]['wrongGuess'] = true;
-							$this->redirectQueue[$orig] = $this->requests[$orig]['effective_url'];
-						} elseif (strpos($this->requests[$orig]['effective_url'], '_escaped_fragment_') === false) {
-							// check for <meta name='fragment' content='!'/>
-							// for AJAX sites, e.g. Blogger with its dynamic views templates.
-							// Based on Google's spec: https://developers.google.com/webmasters/ajax-crawling/docs/specification
-							if (isset($this->requests[$orig]['body'])) {
-								$redirectURL = $this->getUglyURL($this->requests[$orig]['effective_url'], substr($this->requests[$orig]['body'], 0, 4000));
-								if ($redirectURL) {
-									$this->debug('AJAX trigger (meta name="fragment" content="!") found. Queueing '.$redirectURL);
-									$this->redirectQueue[$orig] = $redirectURL;
-								}
-							}
-						}
-						// die($url.' -multi- '.$request->getResponseInfo('effective_url'));
-						unset($this->requests[$orig]['httpRequest'], $this->requests[$orig]['method']);
-					}
-				}
-			}
-		}
-
-		//////////////////////////////////////////////////////
-		// sequential (file_get_contents)
-		else {
-			$this->debug('Starting sequential fetch (file_get_contents)');
-			$this->debug('Processing set of '.count($urls));
-			foreach ($urls as $orig => $url) {
-				if (!$isRedirect) $orig = $url;
-				unset($this->redirectQueue[$orig]);
-				$this->debug("...$url");
-				if (!$isRedirect && isset($this->requests[$url])) {
-					$this->debug("......in memory");
-				/*
-				} elseif ($this->isCached($url)) {
-					$this->debug("......is cached");
-					if (!$this->minimiseMemoryUse) {
-						$this->requests[$url] = $this->getCached($url);
-					}
-				*/
-				} else {
-					$this->debug("Sending request for $url");
-					$this->requests[$orig]['original_url'] = $orig;
-					$req_url = $this->rewriteUrls($url);
-					$req_url = ($this->rewriteHashbangFragment) ? $this->rewriteHashbangFragment($req_url) : $req_url;
-					$req_url = $this->removeFragment($req_url);
-					// send cookies, if we have any
-					$httpContext = $this->httpContext;
-					$httpContext['http']['header'] .= $this->getUserAgent($req_url)."\r\n";
-					// add referer for picky sites
-					$httpContext['http']['header'] .= 'Referer: '.$this->referer."\r\n";
-					if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) {
-						$this->debug("......sending cookies: $cookies");
-						$httpContext['http']['header'] .= 'Cookie: '.$cookies."\r\n";
-					}
-					if (false !== ($html = @file_get_contents($req_url, false, stream_context_create($httpContext)))) {
-						$this->debug('Received response');
-						// get status code
-						if (!isset($http_response_header[0]) || !preg_match('!^HTTP/\d+\.\d+\s+(\d+)!', trim($http_response_header[0]), $match)) {
-							$this->debug('Error: no status code found');
-							// TODO: handle error - no status code
-						} else {
-							$this->requests[$orig]['headers'] = $this->headersToString($http_response_header, false);
-							// check content type
-							if ($this->headerOnlyType($this->requests[$orig]['headers'])) {
-								$this->requests[$orig]['body'] = '';
-							} else {
-								$this->requests[$orig]['body'] = $html;
-							}
-							$this->requests[$orig]['effective_url'] = $req_url;
-							$this->requests[$orig]['status_code'] = $status_code = (int)$match[1];
-							unset($match);
-							// handle redirect
-							if (preg_match('/^Location:(.*?)$/mi', $this->requests[$orig]['headers'], $match)) {
-								$this->requests[$orig]['location'] =  trim($match[1]);
-							}
-							if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && isset($this->requests[$orig]['location'])) {
-								$redirectURL = $this->requests[$orig]['location'];
-								if (!preg_match('!^https?://!i', $redirectURL)) {
-									$redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url);
-								}
-								if ($this->validateURL($redirectURL)) {
-									$this->debug('Redirect detected. Valid URL: '.$redirectURL);
-									// store any cookies
-									$cookies = $this->cookieJar->extractCookies($this->requests[$orig]['headers']);
-									if (!empty($cookies)) $this->cookieJar->storeCookies($url, $cookies);
-									$this->redirectQueue[$orig] = $redirectURL;
-								} else {
-									$this->debug('Redirect detected. Invalid URL: '.$redirectURL);
-								}
-							} elseif (strpos($this->requests[$orig]['effective_url'], '_escaped_fragment_') === false) {
-								// check for <meta name='fragment' content='!'/>
-								// for AJAX sites, e.g. Blogger with its dynamic views templates.
-								// Based on Google's spec: https://developers.google.com/webmasters/ajax-crawling/docs/specification
-								if (isset($this->requests[$orig]['body'])) {
-									$redirectURL = $this->getUglyURL($this->requests[$orig]['effective_url'], substr($this->requests[$orig]['body'], 0, 4000));
-									if ($redirectURL) {
-										$this->debug('AJAX trigger (meta name="fragment" content="!") found. Queueing '.$redirectURL);
-										$this->redirectQueue[$orig] = $redirectURL;
-									}
-								}
-							}
-						}
-					} else {
-						$this->debug('Error retrieving URL');
-						//print_r($req_url);
-						//print_r($http_response_header);
-						//print_r($html);
-						
-						// TODO: handle error - failed to retrieve URL
-					}
-				}
-			}
-		}
-	}
-	
-	public function handleCurlResponse($response, $info, $request) {
-		$orig = $request->url_original;
-		$this->requests[$orig]['headers'] = substr($response, 0, $info['header_size']);
-		$this->requests[$orig]['body'] = substr($response, $info['header_size']);
-		$this->requests[$orig]['method'] = $request->method;
-		$this->requests[$orig]['effective_url'] = $info['url'];
-		$this->requests[$orig]['status_code'] = (int)$info['http_code'];
-		if (preg_match('/^Location:(.*?)$/mi', $this->requests[$orig]['headers'], $match)) {
-			$this->requests[$orig]['location'] =  trim($match[1]);
-		}
-	}
-	
-	protected function headersToString(array $headers, $associative=true) {
-		if (!$associative) {
-			return implode("\n", $headers);
-		} else {
-			$str = '';
-			foreach ($headers as $key => $val) {
-				if (is_array($val)) {
-					foreach ($val as $v) $str .= "$key: $v\n";
-				} else {
-					$str .= "$key: $val\n";
-				}
-			}
-			return rtrim($str);
-		}
-	}
-	
-	public function get($url, $remove=false, $gzdecode=true) {
-		$url = "$url";
-		if (isset($this->requests[$url]) && isset($this->requests[$url]['body'])) {
-			$this->debug("URL already fetched - in memory ($url, effective: {$this->requests[$url]['effective_url']})");
-			$response = $this->requests[$url];
-		/*
-		} elseif ($this->isCached($url)) {
-			$this->debug("URL already fetched - in disk cache ($url)");
-			$response = $this->getCached($url);
-			$this->requests[$url] = $response;
-		*/
-		} else {
-			$this->debug("Fetching URL ($url)");
-			$this->fetchAll(array($url));
-			if (isset($this->requests[$url]) && isset($this->requests[$url]['body'])) {
-				$response = $this->requests[$url];
-			} else {
-				$this->debug("Request failed");
-				$response = false;
-			}
-		}
-		/*
-		if ($this->minimiseMemoryUse && $response) {
-			$this->cache($url);
-			unset($this->requests[$url]);
-		}
-		*/
-		if ($remove && $response) unset($this->requests[$url]);
-		if ($gzdecode && stripos($response['headers'], 'Content-Encoding: gzip')) {
-			if ($html = gzdecode($response['body'])) {
-				$response['body'] = $html;
-			}
-		}
-		return $response;
-	}
-	
-	public function parallelSupport() {
-		return class_exists('HttpRequestPool') || function_exists('curl_multi_init');
-	}
-	
-	private function headerOnlyType($headers) {
-		if (preg_match('!^Content-Type:\s*(([a-z-]+)/([^;\r\n ]+))!im', $headers, $match)) {
-			// look for full mime type (e.g. image/jpeg) or just type (e.g. image)
-			$match[1] = strtolower(trim($match[1]));
-			$match[2] = strtolower(trim($match[2]));
-			foreach (array($match[1], $match[2]) as $mime) {
-				if (in_array($mime, $this->headerOnlyTypes)) return true;
-			}
-		}
-		return false;
-	}
-	
-	private function possibleUnsupportedType($url) {
-		$path = @parse_url($url, PHP_URL_PATH);
-		if ($path && strpos($path, '.') !== false) {
-			$ext = strtolower(trim(pathinfo($path, PATHINFO_EXTENSION)));
-			return in_array($ext, $this->headerOnlyClues);
-		}
-		return false;
-	}
-}
-
-// gzdecode from http://www.php.net/manual/en/function.gzdecode.php#82930
-if (!function_exists('gzdecode')) {
-	function gzdecode($data,&$filename='',&$error='',$maxlength=null) 
-	{
-		$len = strlen($data);
-		if ($len < 18 || strcmp(substr($data,0,2),"\x1f\x8b")) {
-			$error = "Not in GZIP format.";
-			return null;  // Not GZIP format (See RFC 1952)
-		}
-		$method = ord(substr($data,2,1));  // Compression method
-		$flags  = ord(substr($data,3,1));  // Flags
-		if ($flags & 31 != $flags) {
-			$error = "Reserved bits not allowed.";
-			return null;
-		}
-		// NOTE: $mtime may be negative (PHP integer limitations)
-		$mtime = unpack("V", substr($data,4,4));
-		$mtime = $mtime[1];
-		$xfl   = substr($data,8,1);
-		$os    = substr($data,8,1);
-		$headerlen = 10;
-		$extralen  = 0;
-		$extra     = "";
-		if ($flags & 4) {
-			// 2-byte length prefixed EXTRA data in header
-			if ($len - $headerlen - 2 < 8) {
-				return false;  // invalid
-			}
-			$extralen = unpack("v",substr($data,8,2));
-			$extralen = $extralen[1];
-			if ($len - $headerlen - 2 - $extralen < 8) {
-				return false;  // invalid
-			}
-			$extra = substr($data,10,$extralen);
-			$headerlen += 2 + $extralen;
-		}
-		$filenamelen = 0;
-		$filename = "";
-		if ($flags & 8) {
-			// C-style string
-			if ($len - $headerlen - 1 < 8) {
-				return false; // invalid
-			}
-			$filenamelen = strpos(substr($data,$headerlen),chr(0));
-			if ($filenamelen === false || $len - $headerlen - $filenamelen - 1 < 8) {
-				return false; // invalid
-			}
-			$filename = substr($data,$headerlen,$filenamelen);
-			$headerlen += $filenamelen + 1;
-		}
-		$commentlen = 0;
-		$comment = "";
-		if ($flags & 16) {
-			// C-style string COMMENT data in header
-			if ($len - $headerlen - 1 < 8) {
-				return false;    // invalid
-			}
-			$commentlen = strpos(substr($data,$headerlen),chr(0));
-			if ($commentlen === false || $len - $headerlen - $commentlen - 1 < 8) {
-				return false;    // Invalid header format
-			}
-			$comment = substr($data,$headerlen,$commentlen);
-			$headerlen += $commentlen + 1;
-		}
-		$headercrc = "";
-		if ($flags & 2) {
-			// 2-bytes (lowest order) of CRC32 on header present
-			if ($len - $headerlen - 2 < 8) {
-				return false;    // invalid
-			}
-			$calccrc = crc32(substr($data,0,$headerlen)) & 0xffff;
-			$headercrc = unpack("v", substr($data,$headerlen,2));
-			$headercrc = $headercrc[1];
-			if ($headercrc != $calccrc) {
-				$error = "Header checksum failed.";
-				return false;    // Bad header CRC
-			}
-			$headerlen += 2;
-		}
-		// GZIP FOOTER
-		$datacrc = unpack("V",substr($data,-8,4));
-		$datacrc = sprintf('%u',$datacrc[1] & 0xFFFFFFFF);
-		$isize = unpack("V",substr($data,-4));
-		$isize = $isize[1];
-		// decompression:
-		$bodylen = $len-$headerlen-8;
-		if ($bodylen < 1) {
-			// IMPLEMENTATION BUG!
-			return null;
-		}
-		$body = substr($data,$headerlen,$bodylen);
-		$data = "";
-		if ($bodylen > 0) {
-			switch ($method) {
-			case 8:
-				// Currently the only supported compression method:
-				$data = gzinflate($body,$maxlength);
-				break;
-			default:
-				$error = "Unknown compression method.";
-				return false;
-			}
-		}  // zero-byte body content is allowed
-		// Verifiy CRC32
-		$crc   = sprintf("%u",crc32($data));
-		$crcOK = $crc == $datacrc;
-		$lenOK = $isize == strlen($data);
-		if (!$lenOK || !$crcOK) {
-			$error = ( $lenOK ? '' : 'Length check FAILED. ') . ( $crcOK ? '' : 'Checksum FAILED.');
-			return false;
-		}
-		return $data;
-	}
-}
-?>
\ No newline at end of file
+<?php
+/**
+ * Humble HTTP Agent
+ * 
+ * This class is designed to take advantage of parallel HTTP requests
+ * offered by PHP's PECL HTTP extension or the curl_multi_* functions. 
+ * For environments which do not have these options, it reverts to standard sequential 
+ * requests (using file_get_contents())
+ * 
+ * @version 1.4
+ * @date 2013-05-10
+ * @see http://php.net/HttpRequestPool
+ * @author Keyvan Minoukadeh
+ * @copyright 2011-2013 Keyvan Minoukadeh
+ * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
+ */
+
+class HumbleHttpAgent
+{
+	const METHOD_REQUEST_POOL = 1;
+	const METHOD_CURL_MULTI = 2;
+	const METHOD_FILE_GET_CONTENTS = 4;
+	//const UA_BROWSER = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0.1) Gecko/20100101 Firefox/4.0.1';
+	const UA_BROWSER = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.92 Safari/535.2';
+	const UA_PHP = 'PHP/5.4';
+	const REF_GOOGLE = 'http://www.google.co.uk/url?sa=t&source=web&cd=1';
+	
+	protected $requests = array();
+	protected $redirectQueue = array();
+	protected $requestOptions;
+	protected $maxParallelRequests = 5;
+	protected $cache = null; //TODO
+	protected $httpContext;
+	protected $minimiseMemoryUse = false; //TODO
+	protected $method;
+	protected $cookieJar;
+	public $debug = false;
+	public $debugVerbose = false;
+	public $rewriteHashbangFragment = true; // see http://code.google.com/web/ajaxcrawling/docs/specification.html
+	public $maxRedirects = 5;
+	public $userAgentMap = array();
+	public $rewriteUrls = array();
+	public $userAgentDefault;
+	public $referer;
+	//public $userAgent = 'Mozilla/5.0';
+	
+	// Prevent certain file/mime types
+	// HTTP responses which match these content types will
+	// be returned without body.
+	public $headerOnlyTypes = array();
+	// URLs ending with one of these extensions will
+	// prompt Humble HTTP Agent to send a HEAD request first
+	// to see if returned content type matches $headerOnlyTypes.
+	public $headerOnlyClues = array('pdf','mp3','zip','exe','gif','gzip','gz','jpeg','jpg','mpg','mpeg','png','ppt','mov');
+	// AJAX triggers to search for.
+	// for AJAX sites, e.g. Blogger with its dynamic views templates.
+	public $ajaxTriggers = array("<meta name='fragment' content='!'",'<meta name="fragment" content="!"',"<meta content='!' name='fragment'",'<meta content="!" name="fragment"');
+	
+	//TODO: set max file size
+	//TODO: normalise headers
+	
+	function __construct($requestOptions=null, $method=null) {
+		$this->userAgentDefault = self::UA_BROWSER;
+		$this->referer = self::REF_GOOGLE;
+		// set the request method
+		if (in_array($method, array(1,2,4))) {
+			$this->method = $method;
+		} else {
+			if (class_exists('HttpRequestPool')) {
+				$this->method = self::METHOD_REQUEST_POOL;
+			} elseif (function_exists('curl_multi_init')) {
+				$this->method = self::METHOD_CURL_MULTI;
+			} else {
+				$this->method = self::METHOD_FILE_GET_CONTENTS;
+			}
+		}
+		if ($this->method == self::METHOD_CURL_MULTI) {
+			require_once(dirname(__FILE__).'/RollingCurl.php');
+		}
+		// create cookie jar
+		$this->cookieJar = new CookieJar();
+		// set request options (redirect must be 0)
+		$this->requestOptions = array(
+			'timeout' => 15,
+			'connecttimeout' => 15,
+			'dns_cache_timeout' => 300,
+			'redirect' => 0 // we handle redirects manually so we can rewrite the new hashbang URLs that are creeping up over the web
+			// TODO: test onprogress?
+		);
+		if (is_array($requestOptions)) {
+			$this->requestOptions = array_merge($this->requestOptions, $requestOptions);
+		}
+		$this->httpContext = array(
+			'http' => array(
+				'ignore_errors' => true,
+				'timeout' => $this->requestOptions['timeout'],
+				'max_redirects' => $this->requestOptions['redirect'],
+				'header' => "Accept: */*\r\n"
+				)
+			);
+	}
+	
+	protected function debug($msg) {
+		if ($this->debug) {
+			$mem = round(memory_get_usage()/1024, 2);
+			$memPeak = round(memory_get_peak_usage()/1024, 2);
+			echo '* ',$msg;
+			if ($this->debugVerbose) echo ' - mem used: ',$mem," (peak: $memPeak)";
+			echo "\n";
+			ob_flush();
+			flush();
+		}
+	}
+	
+	protected function getUserAgent($url, $asArray=false) {
+		$host = @parse_url($url, PHP_URL_HOST);
+		if (strtolower(substr($host, 0, 4)) == 'www.') {
+			$host = substr($host, 4);
+		}
+		if ($host) {
+			$try = array($host);
+			$split = explode('.', $host);
+			if (count($split) > 1) {
+				array_shift($split);
+				$try[] = '.'.implode('.', $split);
+			}
+			foreach ($try as $h) {
+				if (isset($this->userAgentMap[$h])) {
+					$ua = $this->userAgentMap[$h];
+					break;
+				}
+			}
+		}
+		if (!isset($ua)) $ua = $this->userAgentDefault;
+		if ($asArray) {
+			return array('User-Agent' => $ua);
+		} else {
+			return 'User-Agent: '.$ua;
+		}
+	}
+	
+	public function rewriteHashbangFragment($url) {
+		// return $url if there's no '#!'
+		if (strpos($url, '#!') === false) return $url;
+		// split $url and rewrite
+		// TODO: is SimplePie_IRI included?
+		$iri = new SimplePie_IRI($url);
+		$fragment = substr($iri->fragment, 1); // strip '!'
+		$iri->fragment = null;
+		if (isset($iri->query)) {
+			parse_str($iri->query, $query);
+		} else {
+			$query = array();
+		}
+		$query['_escaped_fragment_'] = (string)$fragment;
+		$iri->query = str_replace('%2F', '/', http_build_query($query)); // needed for some sites
+		return $iri->get_iri();
+	}
+	
+	public function getRedirectURLfromHTML($url, $html) {
+		$redirect_url = $this->getMetaRefreshURL($url, $html);
+		if (!$redirect_url) {
+			$redirect_url = $this->getUglyURL($url, $html);
+		}
+		return $redirect_url;
+	}
+	
+	public function getMetaRefreshURL($url, $html) {
+		if ($html == '') return false;
+		// <meta HTTP-EQUIV="REFRESH" content="0; url=http://www.bernama.com/bernama/v6/newsindex.php?id=943513">
+		if (!preg_match('!<meta http-equiv=["\']?refresh["\']? content=["\']?[0-9];\s*url=["\']?([^"\'>]+)["\']*>!i', $html, $match)) {
+			return false;
+		}
+		$redirect_url = $match[1];
+		if (preg_match('!^https?://!i', $redirect_url)) {
+			// already absolute
+			$this->debug('Meta refresh redirect found (http-equiv="refresh"), new URL: '.$redirect_url);
+			return $redirect_url;
+		}
+		// absolutize redirect URL
+		$base = new SimplePie_IRI($url);
+		// remove '//' in URL path (causes URLs not to resolve properly)
+		if (isset($base->path)) $base->path = preg_replace('!//+!', '/', $base->path);
+		if ($absolute = SimplePie_IRI::absolutize($base, $redirect_url)) {
+			$this->debug('Meta refresh redirect found (http-equiv="refresh"), new URL: '.$absolute);
+			return $absolute;
+		}
+		return false;
+	}	
+	
+	public function getUglyURL($url, $html) {
+		if ($html == '') return false;
+		$found = false;
+		foreach ($this->ajaxTriggers as $string) {
+			if (stripos($html, $string)) {
+				$found = true;
+				break;
+			}
+		}
+		if (!$found) return false;
+		$iri = new SimplePie_IRI($url);
+		if (isset($iri->query)) {
+			parse_str($iri->query, $query);
+		} else {
+			$query = array();
+		}
+		$query['_escaped_fragment_'] = '';
+		$iri->query = str_replace('%2F', '/', http_build_query($query)); // needed for some sites
+		$ugly_url = $iri->get_iri();
+		$this->debug('AJAX trigger (meta name="fragment" content="!") found, new URL: '.$ugly_url);
+		return $ugly_url;
+	}
+	
+	public function removeFragment($url) {
+		$pos = strpos($url, '#');
+		if ($pos === false) {
+			return $url;
+		} else {
+			return substr($url, 0, $pos);
+		}
+	}
+	
+	public function rewriteUrls($url) {
+		foreach ($this->rewriteUrls as $find => $action) {
+			if (strpos($url, $find) !== false) {
+				if (is_array($action)) {
+					return strtr($url, $action);
+				}
+			}
+		}
+		return $url;
+	}
+	
+	public function enableDebug($bool=true) {
+		$this->debug = (bool)$bool;
+	}
+	
+	public function minimiseMemoryUse($bool = true) {
+		$this->minimiseMemoryUse = $bool;
+	}
+	
+	public function setMaxParallelRequests($max) {
+		$this->maxParallelRequests = $max;
+	}
+	
+	public function validateUrl($url) {
+		$url = filter_var($url, FILTER_SANITIZE_URL);
+		$test = filter_var($url, FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED);
+		// deal with bug http://bugs.php.net/51192 (present in PHP 5.2.13 and PHP 5.3.2)
+		if ($test === false) {
+			$test = filter_var(strtr($url, '-', '_'), FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED);
+		}
+		if ($test !== false && $test !== null && preg_match('!^https?://!', $url)) {
+			return $url;
+		} else {
+			return false;
+		}
+	}
+	
+	public function fetchAll(array $urls) {
+		$this->fetchAllOnce($urls, $isRedirect=false);
+		$redirects = 0;
+		while (!empty($this->redirectQueue) && ++$redirects <= $this->maxRedirects) {
+			$this->debug("Following redirects #$redirects...");
+			$this->fetchAllOnce($this->redirectQueue, $isRedirect=true);
+		}
+	}
+	
+	// fetch all URLs without following redirects
+	public function fetchAllOnce(array $urls, $isRedirect=false) {
+		if (!$isRedirect) $urls = array_unique($urls);
+		if (empty($urls)) return;
+		
+		//////////////////////////////////////////////////////
+		// parallel (HttpRequestPool)
+		if ($this->method == self::METHOD_REQUEST_POOL) {
+			$this->debug('Starting parallel fetch (HttpRequestPool)');
+			try {
+				while (count($urls) > 0) {
+					$this->debug('Processing set of '.min($this->maxParallelRequests, count($urls)));
+					$subset = array_splice($urls, 0, $this->maxParallelRequests);
+					$pool = new HttpRequestPool();
+					foreach ($subset as $orig => $url) {
+						if (!$isRedirect) $orig = $url;
+						unset($this->redirectQueue[$orig]);
+						$this->debug("...$url");
+						if (!$isRedirect && isset($this->requests[$url])) {
+							$this->debug("......in memory");
+						/*
+						} elseif ($this->isCached($url)) {
+							$this->debug("......is cached");
+							if (!$this->minimiseMemoryUse) {
+								$this->requests[$url] = $this->getCached($url);
+							}
+						*/
+						} else {
+							$this->debug("......adding to pool");
+							$req_url = $this->rewriteUrls($url);
+							$req_url = ($this->rewriteHashbangFragment) ? $this->rewriteHashbangFragment($req_url) : $req_url;
+							$req_url = $this->removeFragment($req_url);
+							if (!empty($this->headerOnlyTypes) && !isset($this->requests[$orig]['wrongGuess']) && $this->possibleUnsupportedType($req_url)) {
+								$_meth = HttpRequest::METH_HEAD;
+							} else {
+								$_meth = HttpRequest::METH_GET;
+								unset($this->requests[$orig]['wrongGuess']);
+							}
+							$httpRequest = new HttpRequest($req_url, $_meth, $this->requestOptions);
+							// send cookies, if we have any
+							if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) {
+								$this->debug("......sending cookies: $cookies");
+								$httpRequest->addHeaders(array('Cookie' => $cookies));
+							}
+							//$httpRequest->addHeaders(array('User-Agent' => $this->userAgent));
+							$httpRequest->addHeaders($this->getUserAgent($req_url, true));
+							// add referer for picky sites
+							$httpRequest->addheaders(array('Referer' => $this->referer));
+							$this->requests[$orig] = array('headers'=>null, 'body'=>null, 'httpRequest'=>$httpRequest);
+							$this->requests[$orig]['original_url'] = $orig;
+							$pool->attach($httpRequest);
+						}
+					}
+					// did we get anything into the pool?
+					if (count($pool) > 0) {
+						$this->debug('Sending request...');
+						try {
+							$pool->send();
+						} catch (HttpRequestPoolException $e) {
+							// do nothing
+						}
+						$this->debug('Received responses');
+						foreach($subset as $orig => $url) {
+							if (!$isRedirect) $orig = $url;
+							$request = $this->requests[$orig]['httpRequest'];
+							//$this->requests[$orig]['headers'] = $this->headersToString($request->getResponseHeader());
+							// getResponseHeader() doesn't return status line, so, for consistency...
+							$this->requests[$orig]['headers'] = substr($request->getRawResponseMessage(), 0, $request->getResponseInfo('header_size'));
+							// check content type
+							// TODO: use getResponseHeader('content-type') or getResponseInfo()
+							if ($this->headerOnlyType($this->requests[$orig]['headers'])) {
+								$this->requests[$orig]['body'] = '';
+								$_header_only_type = true;
+								$this->debug('Header only type returned');
+							} else {
+								$this->requests[$orig]['body'] = $request->getResponseBody();
+								$_header_only_type = false;
+							}
+							$this->requests[$orig]['effective_url'] = $request->getResponseInfo('effective_url');
+							$this->requests[$orig]['status_code'] = $status_code = $request->getResponseCode();
+							// is redirect?
+							if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && $request->getResponseHeader('location')) {
+								$redirectURL = $request->getResponseHeader('location');
+								if (!preg_match('!^https?://!i', $redirectURL)) {
+									$redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url);
+								}
+								if ($this->validateURL($redirectURL)) {
+									$this->debug('Redirect detected. Valid URL: '.$redirectURL);
+									// store any cookies
+									$cookies = $request->getResponseHeader('set-cookie');
+									if ($cookies && !is_array($cookies)) $cookies = array($cookies);
+									if ($cookies) $this->cookieJar->storeCookies($url, $cookies);
+									$this->redirectQueue[$orig] = $redirectURL;
+								} else {
+									$this->debug('Redirect detected. Invalid URL: '.$redirectURL);
+								}
+							} elseif (!$_header_only_type && $request->getMethod() === HttpRequest::METH_HEAD) {
+								// the response content-type did not match our 'header only' types, 
+								// but we'd issues a HEAD request because we assumed it would. So
+								// let's queue a proper GET request for this item...
+								$this->debug('Wrong guess at content-type, queing GET request');
+								$this->requests[$orig]['wrongGuess'] = true;
+								$this->redirectQueue[$orig] = $this->requests[$orig]['effective_url'];
+							} elseif (strpos($this->requests[$orig]['effective_url'], '_escaped_fragment_') === false) {
+								// check for <meta name='fragment' content='!'/>
+								// for AJAX sites, e.g. Blogger with its dynamic views templates.
+								// Based on Google's spec: https://developers.google.com/webmasters/ajax-crawling/docs/specification
+								if (isset($this->requests[$orig]['body'])) {
+									$redirectURL = $this->getRedirectURLfromHTML($this->requests[$orig]['effective_url'], substr($this->requests[$orig]['body'], 0, 4000));
+									if ($redirectURL) {
+										$this->redirectQueue[$orig] = $redirectURL;
+									}
+								}
+							}
+							//die($url.' -multi- '.$request->getResponseInfo('effective_url'));
+							$pool->detach($request);
+							unset($this->requests[$orig]['httpRequest'], $request);
+							/*
+							if ($this->minimiseMemoryUse) {
+								if ($this->cache($url)) {
+									unset($this->requests[$url]);
+								}
+							}
+							*/
+						}
+					}
+				}
+			} catch (HttpException $e) {
+				$this->debug($e);
+				return false;
+			}
+		}
+		
+		//////////////////////////////////////////////////////////
+		// parallel (curl_multi_*)
+		elseif ($this->method == self::METHOD_CURL_MULTI) {
+			$this->debug('Starting parallel fetch (curl_multi_*)');
+			while (count($urls) > 0) {
+				$this->debug('Processing set of '.min($this->maxParallelRequests, count($urls)));
+				$subset = array_splice($urls, 0, $this->maxParallelRequests);
+				$pool = new RollingCurl(array($this, 'handleCurlResponse'));
+				$pool->window_size = count($subset);		
+				
+				foreach ($subset as $orig => $url) {
+					if (!$isRedirect) $orig = $url;
+					unset($this->redirectQueue[$orig]);
+					$this->debug("...$url");
+					if (!$isRedirect && isset($this->requests[$url])) {
+						$this->debug("......in memory");
+					/*
+					} elseif ($this->isCached($url)) {
+						$this->debug("......is cached");
+						if (!$this->minimiseMemoryUse) {
+							$this->requests[$url] = $this->getCached($url);
+						}
+					*/
+					} else {
+						$this->debug("......adding to pool");
+						$req_url = $this->rewriteUrls($url);
+						$req_url = ($this->rewriteHashbangFragment) ? $this->rewriteHashbangFragment($req_url) : $req_url;
+						$req_url = $this->removeFragment($req_url);
+						if (!empty($this->headerOnlyTypes) && !isset($this->requests[$orig]['wrongGuess']) && $this->possibleUnsupportedType($req_url)) {
+							$_meth = 'HEAD';
+						} else {
+							$_meth = 'GET';
+							unset($this->requests[$orig]['wrongGuess']);
+						}						
+						$headers = array();
+						//$headers[] = 'User-Agent: '.$this->userAgent;
+						$headers[] = $this->getUserAgent($req_url);
+						// add referer for picky sites
+						$headers[] = 'Referer: '.$this->referer;
+						// send cookies, if we have any
+						if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) {
+							$this->debug("......sending cookies: $cookies");
+							$headers[] = 'Cookie: '.$cookies;
+						}
+						$httpRequest = new RollingCurlRequest($req_url, $_meth, null, $headers, array(
+							CURLOPT_CONNECTTIMEOUT => $this->requestOptions['timeout'],
+							CURLOPT_TIMEOUT => $this->requestOptions['timeout']
+							));
+						$httpRequest->set_original_url($orig);
+						$this->requests[$orig] = array('headers'=>null, 'body'=>null, 'httpRequest'=>$httpRequest);
+						$this->requests[$orig]['original_url'] = $orig; // TODO: is this needed anymore?
+						$pool->add($httpRequest);
+					}
+				}
+				// did we get anything into the pool?
+				if (count($pool) > 0) {
+					$this->debug('Sending request...');
+					$pool->execute(); // this will call handleCurlResponse() and populate $this->requests[$orig]
+					$this->debug('Received responses');
+					foreach($subset as $orig => $url) {
+						if (!$isRedirect) $orig = $url;
+						// $this->requests[$orig]['headers']
+						// $this->requests[$orig]['body']
+						// $this->requests[$orig]['effective_url']
+						// check content type
+						if ($this->headerOnlyType($this->requests[$orig]['headers'])) {
+							$this->requests[$orig]['body'] = '';
+							$_header_only_type = true;
+							$this->debug('Header only type returned');
+						} else {
+							$_header_only_type = false;
+						}
+						$status_code = $this->requests[$orig]['status_code'];
+						if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && isset($this->requests[$orig]['location'])) {
+							$redirectURL = $this->requests[$orig]['location'];
+							if (!preg_match('!^https?://!i', $redirectURL)) {
+								$redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url);
+							}
+							if ($this->validateURL($redirectURL)) {
+								$this->debug('Redirect detected. Valid URL: '.$redirectURL);
+								// store any cookies
+								$cookies = $this->cookieJar->extractCookies($this->requests[$orig]['headers']);
+								if (!empty($cookies)) $this->cookieJar->storeCookies($url, $cookies);							
+								$this->redirectQueue[$orig] = $redirectURL;
+							} else {
+								$this->debug('Redirect detected. Invalid URL: '.$redirectURL);
+							}
+						} elseif (!$_header_only_type && $this->requests[$orig]['method'] == 'HEAD') {
+							// the response content-type did not match our 'header only' types, 
+							// but we'd issues a HEAD request because we assumed it would. So
+							// let's queue a proper GET request for this item...
+							$this->debug('Wrong guess at content-type, queing GET request');
+							$this->requests[$orig]['wrongGuess'] = true;
+							$this->redirectQueue[$orig] = $this->requests[$orig]['effective_url'];
+						} elseif (strpos($this->requests[$orig]['effective_url'], '_escaped_fragment_') === false) {
+							// check for <meta name='fragment' content='!'/>
+							// for AJAX sites, e.g. Blogger with its dynamic views templates.
+							// Based on Google's spec: https://developers.google.com/webmasters/ajax-crawling/docs/specification
+							if (isset($this->requests[$orig]['body'])) {
+								$redirectURL = $this->getRedirectURLfromHTML($this->requests[$orig]['effective_url'], substr($this->requests[$orig]['body'], 0, 4000));
+								if ($redirectURL) {
+									$this->redirectQueue[$orig] = $redirectURL;
+								}
+							}
+						}
+						// die($url.' -multi- '.$request->getResponseInfo('effective_url'));
+						unset($this->requests[$orig]['httpRequest'], $this->requests[$orig]['method']);
+					}
+				}
+			}
+		}
+
+		//////////////////////////////////////////////////////
+		// sequential (file_get_contents)
+		else {
+			$this->debug('Starting sequential fetch (file_get_contents)');
+			$this->debug('Processing set of '.count($urls));
+			foreach ($urls as $orig => $url) {
+				if (!$isRedirect) $orig = $url;
+				unset($this->redirectQueue[$orig]);
+				$this->debug("...$url");
+				if (!$isRedirect && isset($this->requests[$url])) {
+					$this->debug("......in memory");
+				/*
+				} elseif ($this->isCached($url)) {
+					$this->debug("......is cached");
+					if (!$this->minimiseMemoryUse) {
+						$this->requests[$url] = $this->getCached($url);
+					}
+				*/
+				} else {
+					$this->debug("Sending request for $url");
+					$this->requests[$orig]['original_url'] = $orig;
+					$req_url = $this->rewriteUrls($url);
+					$req_url = ($this->rewriteHashbangFragment) ? $this->rewriteHashbangFragment($req_url) : $req_url;
+					$req_url = $this->removeFragment($req_url);
+					// send cookies, if we have any
+					$httpContext = $this->httpContext;
+					$httpContext['http']['header'] .= $this->getUserAgent($req_url)."\r\n";
+					// add referer for picky sites
+					$httpContext['http']['header'] .= 'Referer: '.$this->referer."\r\n";
+					if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) {
+						$this->debug("......sending cookies: $cookies");
+						$httpContext['http']['header'] .= 'Cookie: '.$cookies."\r\n";
+					}
+					if (false !== ($html = @file_get_contents($req_url, false, stream_context_create($httpContext)))) {
+						$this->debug('Received response');
+						// get status code
+						if (!isset($http_response_header[0]) || !preg_match('!^HTTP/\d+\.\d+\s+(\d+)!', trim($http_response_header[0]), $match)) {
+							$this->debug('Error: no status code found');
+							// TODO: handle error - no status code
+						} else {
+							$this->requests[$orig]['headers'] = $this->headersToString($http_response_header, false);
+							// check content type
+							if ($this->headerOnlyType($this->requests[$orig]['headers'])) {
+								$this->requests[$orig]['body'] = '';
+							} else {
+								$this->requests[$orig]['body'] = $html;
+							}
+							$this->requests[$orig]['effective_url'] = $req_url;
+							$this->requests[$orig]['status_code'] = $status_code = (int)$match[1];
+							unset($match);
+							// handle redirect
+							if (preg_match('/^Location:(.*?)$/mi', $this->requests[$orig]['headers'], $match)) {
+								$this->requests[$orig]['location'] =  trim($match[1]);
+							}
+							if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && isset($this->requests[$orig]['location'])) {
+								$redirectURL = $this->requests[$orig]['location'];
+								if (!preg_match('!^https?://!i', $redirectURL)) {
+									$redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url);
+								}
+								if ($this->validateURL($redirectURL)) {
+									$this->debug('Redirect detected. Valid URL: '.$redirectURL);
+									// store any cookies
+									$cookies = $this->cookieJar->extractCookies($this->requests[$orig]['headers']);
+									if (!empty($cookies)) $this->cookieJar->storeCookies($url, $cookies);
+									$this->redirectQueue[$orig] = $redirectURL;
+								} else {
+									$this->debug('Redirect detected. Invalid URL: '.$redirectURL);
+								}
+							} elseif (strpos($this->requests[$orig]['effective_url'], '_escaped_fragment_') === false) {
+								// check for <meta name='fragment' content='!'/>
+								// for AJAX sites, e.g. Blogger with its dynamic views templates.
+								// Based on Google's spec: https://developers.google.com/webmasters/ajax-crawling/docs/specification
+								if (isset($this->requests[$orig]['body'])) {
+									$redirectURL = $this->getRedirectURLfromHTML($this->requests[$orig]['effective_url'], substr($this->requests[$orig]['body'], 0, 4000));
+									if ($redirectURL) {
+										$this->redirectQueue[$orig] = $redirectURL;
+									}
+								}
+							}
+						}
+					} else {
+						$this->debug('Error retrieving URL');
+						//print_r($req_url);
+						//print_r($http_response_header);
+						//print_r($html);
+						
+						// TODO: handle error - failed to retrieve URL
+					}
+				}
+			}
+		}
+	}
+	
+	public function handleCurlResponse($response, $info, $request) {
+		$orig = $request->url_original;
+		$this->requests[$orig]['headers'] = substr($response, 0, $info['header_size']);
+		$this->requests[$orig]['body'] = substr($response, $info['header_size']);
+		$this->requests[$orig]['method'] = $request->method;
+		$this->requests[$orig]['effective_url'] = $info['url'];
+		$this->requests[$orig]['status_code'] = (int)$info['http_code'];
+		if (preg_match('/^Location:(.*?)$/mi', $this->requests[$orig]['headers'], $match)) {
+			$this->requests[$orig]['location'] =  trim($match[1]);
+		}
+	}
+	
+	protected function headersToString(array $headers, $associative=true) {
+		if (!$associative) {
+			return implode("\n", $headers);
+		} else {
+			$str = '';
+			foreach ($headers as $key => $val) {
+				if (is_array($val)) {
+					foreach ($val as $v) $str .= "$key: $v\n";
+				} else {
+					$str .= "$key: $val\n";
+				}
+			}
+			return rtrim($str);
+		}
+	}
+	
+	public function get($url, $remove=false, $gzdecode=true) {
+		$url = "$url";
+		if (isset($this->requests[$url]) && isset($this->requests[$url]['body'])) {
+			$this->debug("URL already fetched - in memory ($url, effective: {$this->requests[$url]['effective_url']})");
+			$response = $this->requests[$url];
+		/*
+		} elseif ($this->isCached($url)) {
+			$this->debug("URL already fetched - in disk cache ($url)");
+			$response = $this->getCached($url);
+			$this->requests[$url] = $response;
+		*/
+		} else {
+			$this->debug("Fetching URL ($url)");
+			$this->fetchAll(array($url));
+			if (isset($this->requests[$url]) && isset($this->requests[$url]['body'])) {
+				$response = $this->requests[$url];
+			} else {
+				$this->debug("Request failed");
+				$response = false;
+			}
+		}
+		/*
+		if ($this->minimiseMemoryUse && $response) {
+			$this->cache($url);
+			unset($this->requests[$url]);
+		}
+		*/
+		if ($remove && $response) unset($this->requests[$url]);
+		if ($gzdecode && stripos($response['headers'], 'Content-Encoding: gzip')) {
+			if ($html = gzdecode($response['body'])) {
+				$response['body'] = $html;
+			}
+		}
+		return $response;
+	}
+	
+	public function parallelSupport() {
+		return class_exists('HttpRequestPool') || function_exists('curl_multi_init');
+	}
+	
+	private function headerOnlyType($headers) {
+		if (preg_match('!^Content-Type:\s*(([a-z-]+)/([^;\r\n ]+))!im', $headers, $match)) {
+			// look for full mime type (e.g. image/jpeg) or just type (e.g. image)
+			$match[1] = strtolower(trim($match[1]));
+			$match[2] = strtolower(trim($match[2]));
+			foreach (array($match[1], $match[2]) as $mime) {
+				if (in_array($mime, $this->headerOnlyTypes)) return true;
+			}
+		}
+		return false;
+	}
+	
+	private function possibleUnsupportedType($url) {
+		$path = @parse_url($url, PHP_URL_PATH);
+		if ($path && strpos($path, '.') !== false) {
+			$ext = strtolower(trim(pathinfo($path, PATHINFO_EXTENSION)));
+			return in_array($ext, $this->headerOnlyClues);
+		}
+		return false;
+	}
+}
+
+// gzdecode from http://www.php.net/manual/en/function.gzdecode.php#82930
+if (!function_exists('gzdecode')) {
+	function gzdecode($data,&$filename='',&$error='',$maxlength=null) 
+	{
+		$len = strlen($data);
+		if ($len < 18 || strcmp(substr($data,0,2),"\x1f\x8b")) {
+			$error = "Not in GZIP format.";
+			return null;  // Not GZIP format (See RFC 1952)
+		}
+		$method = ord(substr($data,2,1));  // Compression method
+		$flags  = ord(substr($data,3,1));  // Flags
+		if ($flags & 31 != $flags) {
+			$error = "Reserved bits not allowed.";
+			return null;
+		}
+		// NOTE: $mtime may be negative (PHP integer limitations)
+		$mtime = unpack("V", substr($data,4,4));
+		$mtime = $mtime[1];
+		$xfl   = substr($data,8,1);
+		$os    = substr($data,8,1);
+		$headerlen = 10;
+		$extralen  = 0;
+		$extra     = "";
+		if ($flags & 4) {
+			// 2-byte length prefixed EXTRA data in header
+			if ($len - $headerlen - 2 < 8) {
+				return false;  // invalid
+			}
+			$extralen = unpack("v",substr($data,8,2));
+			$extralen = $extralen[1];
+			if ($len - $headerlen - 2 - $extralen < 8) {
+				return false;  // invalid
+			}
+			$extra = substr($data,10,$extralen);
+			$headerlen += 2 + $extralen;
+		}
+		$filenamelen = 0;
+		$filename = "";
+		if ($flags & 8) {
+			// C-style string
+			if ($len - $headerlen - 1 < 8) {
+				return false; // invalid
+			}
+			$filenamelen = strpos(substr($data,$headerlen),chr(0));
+			if ($filenamelen === false || $len - $headerlen - $filenamelen - 1 < 8) {
+				return false; // invalid
+			}
+			$filename = substr($data,$headerlen,$filenamelen);
+			$headerlen += $filenamelen + 1;
+		}
+		$commentlen = 0;
+		$comment = "";
+		if ($flags & 16) {
+			// C-style string COMMENT data in header
+			if ($len - $headerlen - 1 < 8) {
+				return false;    // invalid
+			}
+			$commentlen = strpos(substr($data,$headerlen),chr(0));
+			if ($commentlen === false || $len - $headerlen - $commentlen - 1 < 8) {
+				return false;    // Invalid header format
+			}
+			$comment = substr($data,$headerlen,$commentlen);
+			$headerlen += $commentlen + 1;
+		}
+		$headercrc = "";
+		if ($flags & 2) {
+			// 2-bytes (lowest order) of CRC32 on header present
+			if ($len - $headerlen - 2 < 8) {
+				return false;    // invalid
+			}
+			$calccrc = crc32(substr($data,0,$headerlen)) & 0xffff;
+			$headercrc = unpack("v", substr($data,$headerlen,2));
+			$headercrc = $headercrc[1];
+			if ($headercrc != $calccrc) {
+				$error = "Header checksum failed.";
+				return false;    // Bad header CRC
+			}
+			$headerlen += 2;
+		}
+		// GZIP FOOTER
+		$datacrc = unpack("V",substr($data,-8,4));
+		$datacrc = sprintf('%u',$datacrc[1] & 0xFFFFFFFF);
+		$isize = unpack("V",substr($data,-4));
+		$isize = $isize[1];
+		// decompression:
+		$bodylen = $len-$headerlen-8;
+		if ($bodylen < 1) {
+			// IMPLEMENTATION BUG!
+			return null;
+		}
+		$body = substr($data,$headerlen,$bodylen);
+		$data = "";
+		if ($bodylen > 0) {
+			switch ($method) {
+			case 8:
+				// Currently the only supported compression method:
+				$data = gzinflate($body,$maxlength);
+				break;
+			default:
+				$error = "Unknown compression method.";
+				return false;
+			}
+		}  // zero-byte body content is allowed
+		// Verifiy CRC32
+		$crc   = sprintf("%u",crc32($data));
+		$crcOK = $crc == $datacrc;
+		$lenOK = $isize == strlen($data);
+		if (!$lenOK || !$crcOK) {
+			$error = ( $lenOK ? '' : 'Length check FAILED. ') . ( $crcOK ? '' : 'Checksum FAILED.');
+			return false;
+		}
+		return $data;
+	}
+}
\ No newline at end of file
diff --git a/inc/3rdparty/libraries/humble-http-agent/SimplePie_HumbleHttpAgent.php b/inc/3rdparty/libraries/humble-http-agent/SimplePie_HumbleHttpAgent.php
index ecd46d5f..c524a1ee 100644
--- a/inc/3rdparty/libraries/humble-http-agent/SimplePie_HumbleHttpAgent.php
+++ b/inc/3rdparty/libraries/humble-http-agent/SimplePie_HumbleHttpAgent.php
@@ -1,79 +1,78 @@
-<?php
-/**
- * Humble HTTP Agent extension for SimplePie_File
- * 
- * This class is designed to extend and override SimplePie_File
- * in order to prevent duplicate HTTP requests being sent out.
- * The idea is to initialise an instance of Humble HTTP Agent
- * and attach it, to a static class variable, of this class.
- * SimplePie will then automatically initialise this class
- * 
- * @date 2011-02-28
- */
-
-class SimplePie_HumbleHttpAgent extends SimplePie_File
-{
-	protected static $agent;
-	var $url;
-	var $useragent;
-	var $success = true;
-	var $headers = array();
-	var $body;
-	var $status_code;
-	var $redirects = 0;
-	var $error;
-	var $method = SIMPLEPIE_FILE_SOURCE_NONE;
-
-	public static function set_agent(HumbleHttpAgent $agent) {
-		self::$agent = $agent;
-	}
-	
-	public function __construct($url, $timeout = 10, $redirects = 5, $headers = null, $useragent = null, $force_fsockopen = false) {
-		if (class_exists('idna_convert'))
-		{
-			$idn = new idna_convert();
-			$parsed = SimplePie_Misc::parse_url($url);
-			$url = SimplePie_Misc::compress_parse_url($parsed['scheme'], $idn->encode($parsed['authority']), $parsed['path'], $parsed['query'], $parsed['fragment']);
-		}
-		$this->url = $url;
-		$this->useragent = $useragent;
-		if (preg_match('/^http(s)?:\/\//i', $url))
-		{
-			if (!is_array($headers))
-			{
-				$headers = array();
-			}
-			$this->method = SIMPLEPIE_FILE_SOURCE_REMOTE | SIMPLEPIE_FILE_SOURCE_CURL;
-			$headers2 = array();
-			foreach ($headers as $key => $value) {
-				$headers2[] = "$key: $value";
-			}
-			//TODO: allow for HTTP headers
-			// curl_setopt($fp, CURLOPT_HTTPHEADER, $headers2);
-
-			$response = self::$agent->get($url);
-			
-			if ($response === false || !isset($response['status_code'])) {
-				$this->error = 'failed to fetch URL';
-				$this->success = false;
-			} else {
-				// The extra lines at the end are there to satisfy SimplePie's HTTP parser.
-				// The class expects a full HTTP message, whereas we're giving it only
-				// headers - the new lines indicate the start of the body.
-				$parser = new SimplePie_HTTP_Parser($response['headers']."\r\n\r\n");
-				if ($parser->parse()) {
-					$this->headers = $parser->headers;
-					//$this->body = $parser->body;
-					$this->body = $response['body'];
-					$this->status_code = $parser->status_code;
-				}
-			}
-		}
-		else
-		{
-			$this->error = 'invalid URL';
-			$this->success = false;
-		}
-	}
-}
-?>
\ No newline at end of file
+<?php
+/**
+ * Humble HTTP Agent extension for SimplePie_File
+ * 
+ * This class is designed to extend and override SimplePie_File
+ * in order to prevent duplicate HTTP requests being sent out.
+ * The idea is to initialise an instance of Humble HTTP Agent
+ * and attach it, to a static class variable, of this class.
+ * SimplePie will then automatically initialise this class
+ * 
+ * @date 2011-02-28
+ */
+
+class SimplePie_HumbleHttpAgent extends SimplePie_File
+{
+	protected static $agent;
+	var $url;
+	var $useragent;
+	var $success = true;
+	var $headers = array();
+	var $body;
+	var $status_code;
+	var $redirects = 0;
+	var $error;
+	var $method = SIMPLEPIE_FILE_SOURCE_NONE;
+
+	public static function set_agent(HumbleHttpAgent $agent) {
+		self::$agent = $agent;
+	}
+	
+	public function __construct($url, $timeout = 10, $redirects = 5, $headers = null, $useragent = null, $force_fsockopen = false) {
+		if (class_exists('idna_convert'))
+		{
+			$idn = new idna_convert();
+			$parsed = SimplePie_Misc::parse_url($url);
+			$url = SimplePie_Misc::compress_parse_url($parsed['scheme'], $idn->encode($parsed['authority']), $parsed['path'], $parsed['query'], $parsed['fragment']);
+		}
+		$this->url = $url;
+		$this->useragent = $useragent;
+		if (preg_match('/^http(s)?:\/\//i', $url))
+		{
+			if (!is_array($headers))
+			{
+				$headers = array();
+			}
+			$this->method = SIMPLEPIE_FILE_SOURCE_REMOTE | SIMPLEPIE_FILE_SOURCE_CURL;
+			$headers2 = array();
+			foreach ($headers as $key => $value) {
+				$headers2[] = "$key: $value";
+			}
+			//TODO: allow for HTTP headers
+			// curl_setopt($fp, CURLOPT_HTTPHEADER, $headers2);
+
+			$response = self::$agent->get($url);
+			
+			if ($response === false || !isset($response['status_code'])) {
+				$this->error = 'failed to fetch URL';
+				$this->success = false;
+			} else {
+				// The extra lines at the end are there to satisfy SimplePie's HTTP parser.
+				// The class expects a full HTTP message, whereas we're giving it only
+				// headers - the new lines indicate the start of the body.
+				$parser = new SimplePie_HTTP_Parser($response['headers']."\r\n\r\n");
+				if ($parser->parse()) {
+					$this->headers = $parser->headers;
+					//$this->body = $parser->body;
+					$this->body = $response['body'];
+					$this->status_code = $parser->status_code;
+				}
+			}
+		}
+		else
+		{
+			$this->error = 'invalid URL';
+			$this->success = false;
+		}
+	}
+}
\ No newline at end of file
diff --git a/inc/3rdparty/libraries/language-detect/LanguageDetect.php b/inc/3rdparty/libraries/language-detect/LanguageDetect.php
index 09b11546..382d869c 100644
--- a/inc/3rdparty/libraries/language-detect/LanguageDetect.php
+++ b/inc/3rdparty/libraries/language-detect/LanguageDetect.php
@@ -6,23 +6,24 @@
  * Attempts to detect the language of a sample of text by correlating ranked
  * 3-gram frequencies to a table of 3-gram frequencies of known languages.
  *
- * Implements a version of a technique originally proposed by Cavnar & Trenkle 
- * (1994): "N-Gram-Based Text Categorization" 
+ * Implements a version of a technique originally proposed by Cavnar & Trenkle
+ * (1994): "N-Gram-Based Text Categorization"
  *
- * PHP versions 4 and 5
+ * PHP version 5
  *
- * @category   Text
- * @package    Text_LanguageDetect
- * @author     Nicholas Pisarro <infinityminusnine+pear@gmail.com>
- * @copyright  2005-2006 Nicholas Pisarro
- * @license    http://www.debian.org/misc/bsd.license BSD
- * @version    CVS: $Id: LanguageDetect.php,v 1.20 2008/07/01 02:09:15 taak Exp $
- * @link       http://pear.php.net/package/Text_LanguageDetect/
- * @link       http://langdetect.blogspot.com/
+ * @category  Text
+ * @package   Text_LanguageDetect
+ * @author    Nicholas Pisarro <infinityminusnine+pear@gmail.com>
+ * @copyright 2005-2006 Nicholas Pisarro
+ * @license   http://www.debian.org/misc/bsd.license BSD
+ * @version   SVN: $Id: LanguageDetect.php 322353 2012-01-16 08:41:43Z cweiske $
+ * @link      http://pear.php.net/package/Text_LanguageDetect/
+ * @link      http://langdetect.blogspot.com/
  */
 
-//require_once 'PEAR.php';
-require_once 'Parser.php';
+require_once 'LanguageDetect/Exception.php';
+require_once 'LanguageDetect/Parser.php';
+require_once 'LanguageDetect/ISO639.php';
 
 /**
  * Language detection class
@@ -41,9 +42,10 @@ require_once 'Parser.php';
  *
  * echo "Supported languages:\n";
  *
- * $langs = $l->getLanguages();
- * if (PEAR::isError($langs)) {
- *     die($langs->getMessage());
+ * try {
+ *     $langs = $l->getLanguages();
+ * } catch (Text_LanguageDetect_Exception $e) {
+ *     die($e->getMessage());
  * }
  *
  * sort($langs);
@@ -54,38 +56,38 @@ require_once 'Parser.php';
  * }
  * </code>
  *
- * @category   Text
- * @package    Text_LanguageDetect
- * @author     Nicholas Pisarro <infinityminusnine+pear@gmail.com>
- * @copyright  2005 Nicholas Pisarro
- * @license    http://www.debian.org/misc/bsd.license BSD
- * @version    Release: @package_version@
- * @todo       allow users to generate their own language models
+ * @category  Text
+ * @package   Text_LanguageDetect
+ * @author    Nicholas Pisarro <infinityminusnine+pear@gmail.com>
+ * @copyright 2005 Nicholas Pisarro
+ * @license   http://www.debian.org/misc/bsd.license BSD
+ * @version   Release: @package_version@
+ * @link      http://pear.php.net/package/Text_LanguageDetect/
+ * @todo      allow users to generate their own language models
  */
- 
 class Text_LanguageDetect
 {
-    /** 
+    /**
      * The filename that stores the trigram data for the detector
      *
-     * If this value starts with a slash (/) or a dot (.) the value of 
+     * If this value starts with a slash (/) or a dot (.) the value of
      * $this->_data_dir will be ignored
-     * 
+     *
      * @var      string
      * @access   private
      */
-    var $_db_filename = './lang.dat';
+    var $_db_filename = 'lang.dat';
 
     /**
      * The filename that stores the unicode block definitions
      *
-     * If this value starts with a slash (/) or a dot (.) the value of 
+     * If this value starts with a slash (/) or a dot (.) the value of
      * $this->_data_dir will be ignored
-     * 
+     *
      * @var string
      * @access private
      */
-    var $_unicode_db_filename = './unicode_blocks.dat';
+    var $_unicode_db_filename = 'unicode_blocks.dat';
 
     /**
      * The data directory
@@ -99,11 +101,8 @@ class Text_LanguageDetect
 
     /**
      * The trigram data for comparison
-     * 
-     * Will be loaded on start from $this->_db_filename
      *
-     * May be set to a PEAR_Error object if there is an error during its 
-     * initialization
+     * Will be loaded on start from $this->_db_filename
      *
      * @var      array
      * @access   private
@@ -120,7 +119,7 @@ class Text_LanguageDetect
 
     /**
      * The size of the trigram data arrays
-     * 
+     *
      * @var      int
      * @access   private
      */
@@ -140,7 +139,7 @@ class Text_LanguageDetect
 
     /**
      * Whether or not to simulate perl's Language::Guess exactly
-     * 
+     *
      * @access  private
      * @var     bool
      * @see     setPerlCompatible()
@@ -164,19 +163,25 @@ class Text_LanguageDetect
      */
     var $_clusters;
 
+    /**
+     * Which type of "language names" are accepted and returned:
+     *
+     * 0 - language name ("english")
+     * 2 - 2-letter ISO 639-1 code ("en")
+     * 3 - 3-letter ISO 639-2 code ("eng")
+     */
+    var $_name_mode = 0;
+
     /**
      * Constructor
      *
      * Will attempt to load the language database. If it fails, you will get
-     * a PEAR_Error object returned when you try to use detect()
-     *
+     * an exception.
      */
-    function Text_LanguageDetect($db=null, $unicode_db=null)
+    function __construct()
     {
-		if (isset($db)) $this->_db_filename = $db;
-		if (isset($unicode_db)) $this->_unicode_db_filename = $unicode_db;
-		
         $data = $this->_readdb($this->_db_filename);
+        $this->_checkTrigram($data['trigram']);
         $this->_lang_db = $data['trigram'];
 
         if (isset($data['trigram-unicodemap'])) {
@@ -186,29 +191,32 @@ class Text_LanguageDetect
         // Not yet implemented:
         if (isset($data['trigram-clusters'])) {
             $this->_clusters = $data['trigram-clusters'];
-        }		
+        }
     }
 
     /**
      * Returns the path to the location of the database
      *
-     * @access    private
-     * @return    string    expected path to the language model database
+     * @param string $fname File name to load
+     *
+     * @return string expected path to the language model database
+     * @access private
      */
     function _get_data_loc($fname)
     {
-        return $fname;
+        return dirname(__FILE__).'/'.$fname;
     }
 
     /**
      * Loads the language trigram database from filename
      *
      * Trigram datbase should be a serialize()'d array
-     * 
-     * @access    private
-     * @param     string      $fname   the filename where the data is stored
-     * @return    array                the language model data
-     * @throws    PEAR_Error
+     *
+     * @param string $fname the filename where the data is stored
+     *
+     * @return array the language model data
+     * @throws Text_LanguageDetect_Exception
+     * @access private
      */
     function _readdb($fname)
     {
@@ -217,79 +225,74 @@ class Text_LanguageDetect
 
         // input check
         if (!file_exists($fname)) {
-            throw new Exception('Language database does not exist.');
+            throw new Text_LanguageDetect_Exception(
+                'Language database does not exist: ' . $fname,
+                Text_LanguageDetect_Exception::DB_NOT_FOUND
+            );
         } elseif (!is_readable($fname)) {
-            throw new Exception('Language database is not readable.');
+            throw new Text_LanguageDetect_Exception(
+                'Language database is not readable: ' . $fname,
+                Text_LanguageDetect_Exception::DB_NOT_READABLE
+            );
         }
 
-        if (function_exists('file_get_contents')) {
-            return unserialize(file_get_contents($fname));
-        } else {
-            // if you don't have file_get_contents(), 
-            // then this is the next fastest way
-            ob_start();
-            readfile($fname);
-            $contents = ob_get_contents();
-            ob_end_clean();
-            return unserialize($contents);
-        }
+        return unserialize(file_get_contents($fname));
     }
 
 
     /**
      * Checks if this object is ready to detect languages
-     * 
-     * @access   private
-     * @param    mixed   &$err  error object to be returned by reference, if any
-     * @return   bool           true if no errors
+     *
+     * @param array $trigram Trigram data from database
+     *
+     * @return void
+     * @access private
      */
-    function _setup_ok(&$err)
+    function _checkTrigram($trigram)
     {
-        if (!is_array($this->_lang_db)) {
+        if (!is_array($trigram)) {
             if (ini_get('magic_quotes_runtime')) {
-                throw new Exception('Error loading database. Try turning magic_quotes_runtime off.');
-            } else {
-                throw new Exception('Language database is not an array.');
+                throw new Text_LanguageDetect_Exception(
+                    'Error loading database. Try turning magic_quotes_runtime off.',
+                    Text_LanguageDetect_Exception::MAGIC_QUOTES
+                );
             }
-            return false;
-
-        } elseif (empty($this->_lang_db)) {
-            throw new Exception('Language database has no elements.');
-            return false;
-
-        } else {
-            return true;
+            throw new Text_LanguageDetect_Exception(
+                'Language database is not an array.',
+                Text_LanguageDetect_Exception::DB_NOT_ARRAY
+            );
+        } elseif (empty($trigram)) {
+            throw new Text_LanguageDetect_Exception(
+                'Language database has no elements.',
+                Text_LanguageDetect_Exception::DB_EMPTY
+            );
         }
     }
 
     /**
      * Omits languages
      *
-     * Pass this function the name of or an array of names of 
+     * Pass this function the name of or an array of names of
      * languages that you don't want considered
      *
-     * If you're only expecting a limited set of languages, this can greatly 
+     * If you're only expecting a limited set of languages, this can greatly
      * speed up processing
      *
-     * @access   public
-     * @param    mixed  $omit_list      language name or array of names to omit
-     * @param    bool   $include_only   if true will include (rather than 
-     *                                  exclude) only those in the list
-     * @return   int                    number of languages successfully deleted
-     * @throws   PEAR_Error
+     * @param mixed $omit_list    language name or array of names to omit
+     * @param bool  $include_only if true will include (rather than
+     *                            exclude) only those in the list
+     *
+     * @return int number of languages successfully deleted
+     * @throws Text_LanguageDetect_Exception
      */
-    function omitLanguages($omit_list, $include_only = false)
+    public function omitLanguages($omit_list, $include_only = false)
     {
-
-        // setup check
-        if (!$this->_setup_ok($err)) {
-            return $err;
-        }
-
         $deleted = 0;
 
-        // deleting the given languages
+        $omit_list = $this->_convertFromNameMode($omit_list);
+
         if (!$include_only) {
+            // deleting the given languages
             if (!is_array($omit_list)) {
                 $omit_list = strtolower($omit_list); // case desensitize
                 if (isset($this->_lang_db[$omit_list])) {
@@ -301,12 +304,12 @@ class Text_LanguageDetect
                     if (isset($this->_lang_db[$omit_lang])) {
                         unset($this->_lang_db[$omit_lang]);
                         $deleted++;
-                    } 
+                    }
                 }
             }
 
-        // deleting all except the given languages
         } else {
+            // deleting all except the given languages
             if (!is_array($omit_list)) {
                 $omit_list = array($omit_list);
             }
@@ -327,7 +330,7 @@ class Text_LanguageDetect
         // reset the cluster cache if the number of languages changes
         // this will then have to be recalculated
         if (isset($this->_clusters) && $deleted > 0) {
-            unset($this->_clusters);
+            $this->_clusters = null;
         }
 
         return $deleted;
@@ -339,49 +342,40 @@ class Text_LanguageDetect
      *
      * @access public
      * @return int            the number of languages
-     * @throws PEAR_Error
+     * @throws   Text_LanguageDetect_Exception
      */
     function getLanguageCount()
     {
-        if (!$this->_setup_ok($err)) {
-            return $err;
-        } else {
-            return count($this->_lang_db);
-        }
+        return count($this->_lang_db);
     }
 
     /**
-     * Returns true if a given language exists
+     * Checks if the language with the given name exists in the database
      *
-     * If passed an array of names, will return true only if all exist
+     * @param mixed $lang Language name or array of language names
      *
-     * @access    public
-     * @param     mixed       $lang    language name or array of language names
-     * @return    bool                 true if language model exists
-     * @throws    PEAR_Error
+     * @return bool true if language model exists
      */
-    function languageExists($lang)
+    public function languageExists($lang)
     {
-        if (!$this->_setup_ok($err)) {
-            return $err;
-        } else {
-            // string
-            if (is_string($lang)) {
-                return isset($this->_lang_db[strtolower($lang)]);
-
-            // array
-            } elseif (is_array($lang)) {
-                foreach ($lang as $test_lang) {
-                    if (!isset($this->_lang_db[strtolower($test_lang)])) {
-                        return false;
-                    } 
-                }
-                return true;
+        $lang = $this->_convertFromNameMode($lang);
 
-            // other (error)
-            } else {
-                throw new Exception('Unknown type passed to languageExists()');
+        if (is_string($lang)) {
+            return isset($this->_lang_db[strtolower($lang)]);
+
+        } elseif (is_array($lang)) {
+            foreach ($lang as $test_lang) {
+                if (!isset($this->_lang_db[strtolower($test_lang)])) {
+                    return false;
+                }
             }
+            return true;
+
+        } else {
+            throw new Text_LanguageDetect_Exception(
+                'Unsupported parameter type passed to languageExists()',
+                Text_LanguageDetect_Exception::PARAM_TYPE
+            );
         }
     }
 
@@ -389,25 +383,24 @@ class Text_LanguageDetect
      * Returns the list of detectable languages
      *
      * @access public
-     * @return array        the names of the languages known to this object
-     * @throws PEAR_Error
+     * @return array        the names of the languages known to this object<<<<<<<
+     * @throws   Text_LanguageDetect_Exception
      */
     function getLanguages()
     {
-        if (!$this->_setup_ok($err)) {
-            return $err;
-        } else {
-            return array_keys($this->_lang_db);
-        }
+        return $this->_convertToNameMode(
+            array_keys($this->_lang_db)
+        );
     }
 
     /**
      * Make this object behave like Language::Guess
-     * 
-     * @access    public
-     * @param     bool     $setting     false to turn off perl compatibility
+     *
+     * @param bool $setting false to turn off perl compatibility
+     *
+     * @return void
      */
-    function setPerlCompatible($setting = true)
+    public function setPerlCompatible($setting = true)
     {
         if (is_bool($setting)) { // input check
             $this->_perl_compatible = $setting;
@@ -421,6 +414,21 @@ class Text_LanguageDetect
 
     }
 
+    /**
+     * Sets the way how language names are accepted and returned.
+     *
+     * @param integer $name_mode One of the following modes:
+     *                           0 - language name ("english")
+     *                           2 - 2-letter ISO 639-1 code ("en")
+     *                           3 - 3-letter ISO 639-2 code ("eng")
+     *
+     * @return void
+     */
+    function setNameMode($name_mode)
+    {
+        $this->_name_mode = $name_mode;
+    }
+
     /**
      * Whether to use unicode block ranges in detection
      *
@@ -429,10 +437,11 @@ class Text_LanguageDetect
      * in languages that use latin scripts. In other cases it should speed up
      * detection noticeably.
      *
-     * @access  public
-     * @param   bool    $setting    false to turn off
+     * @param bool $setting false to turn off
+     *
+     * @return void
      */
-    function useUnicodeBlocks($setting = true)
+    public function useUnicodeBlocks($setting = true)
     {
         if (is_bool($setting)) {
             $this->_use_unicode_narrowing = $setting;
@@ -442,15 +451,15 @@ class Text_LanguageDetect
     /**
      * Converts a piece of text into trigrams
      *
-     * Superceded by the Text_LanguageDetect_Parser class 
+     * @param string $text text to convert
      *
-     * @access    private
-     * @param     string    $text    text to convert
-     * @return    array              array of trigram frequencies
+     * @return     array array of trigram frequencies
+     * @access     private
+     * @deprecated Superceded by the Text_LanguageDetect_Parser class
      */
     function _trigram($text)
     {
-        $s = new Text_LanguageDetect_Parser($text, $this->_db_filename, $this->_unicode_db_filename);
+        $s = new Text_LanguageDetect_Parser($text);
         $s->prepareTrigram();
         $s->prepareUnicode(false);
         $s->setPadStart(!$this->_perl_compatible);
@@ -463,11 +472,12 @@ class Text_LanguageDetect
      *
      * Thresholds (cuts off) the list at $this->_threshold
      *
-     * @access    protected
-     * @param     array     $arr     array of trgram 
-     * @return    array              ranks of trigrams
+     * @param array $arr array of trigram
+     *
+     * @return array ranks of trigrams
+     * @access protected
      */
-    function _arr_rank(&$arr)
+    function _arr_rank($arr)
     {
 
         // sorts alphabetically first as a standard way of breaking rank ties
@@ -494,14 +504,17 @@ class Text_LanguageDetect
 
     /**
      * Sorts an array by value breaking ties alphabetically
-     * 
-     * @access   private
-     * @param    array     &$arr     the array to sort
+     *
+     * @param array &$arr the array to sort
+     *
+     * @return void
+     * @access private
      */
     function _bub_sort(&$arr)
     {
         // should do the same as this perl statement:
-        // sort { $trigrams{$b} == $trigrams{$a} ?  $a cmp $b : $trigrams{$b} <=> $trigrams{$a} }
+        // sort { $trigrams{$b} == $trigrams{$a}
+        //   ?  $a cmp $b : $trigrams{$b} <=> $trigrams{$a} }
 
         // needs to sort by both key and value at once
         // using the key to break ties for the value
@@ -528,13 +541,14 @@ class Text_LanguageDetect
     /**
      * Sort function used by bubble sort
      *
-     * Callback function for usort(). 
+     * Callback function for usort().
      *
-     * @access   private
-     * @param    array        first param passed by usort()
-     * @param    array        second param passed by usort()
-     * @return   int          1 if $a is greater, -1 if not
-     * @see      _bub_sort()
+     * @param array $a first param passed by usort()
+     * @param array $b second param passed by usort()
+     *
+     * @return int 1 if $a is greater, -1 if not
+     * @see    _bub_sort()
+     * @access private
      */
     function _sort_func($a, $b)
     {
@@ -542,12 +556,12 @@ class Text_LanguageDetect
         list($a_key, $a_value) = $a;
         list($b_key, $b_value) = $b;
 
-        // if the values are the same, break ties using the key
         if ($a_value == $b_value) {
+            // if the values are the same, break ties using the key
             return strcmp($a_key, $b_key);
 
-        // if not, just sort normally
         } else {
+            // if not, just sort normally
             if ($a_value > $b_value) {
                 return -1;
             } else {
@@ -559,23 +573,24 @@ class Text_LanguageDetect
     }
 
     /**
-     * Calculates a linear rank-order distance statistic between two sets of 
+     * Calculates a linear rank-order distance statistic between two sets of
      * ranked trigrams
      *
-     * Sums the differences in rank for each trigram. If the trigram does not 
+     * Sums the differences in rank for each trigram. If the trigram does not
      * appear in both, consider it a difference of $this->_threshold.
      *
      * This distance measure was proposed by Cavnar & Trenkle (1994). Despite
      * its simplicity it has been shown to be highly accurate for language
      * identification tasks.
      *
-     * @access  private
-     * @param   array    $arr1  the reference set of trigram ranks
-     * @param   array    $arr2  the target set of trigram ranks
-     * @return  int             the sum of the differences between the ranks of
-     *                          the two trigram sets
+     * @param array $arr1 the reference set of trigram ranks
+     * @param array $arr2 the target set of trigram ranks
+     *
+     * @return int the sum of the differences between the ranks of
+     *             the two trigram sets
+     * @access private
      */
-    function _distance(&$arr1, &$arr2)
+    function _distance($arr1, $arr2)
     {
         $sumdist = 0;
 
@@ -598,14 +613,15 @@ class Text_LanguageDetect
 
     /**
      * Normalizes the score returned by _distance()
-     * 
+     *
      * Different if perl compatible or not
      *
-     * @access    private
-     * @param     int    $score          the score from _distance()
-     * @param     int    $base_count     the number of trigrams being considered
-     * @return    float                  the normalized score
-     * @see       _distance()
+     * @param int $score      the score from _distance()
+     * @param int $base_count the number of trigrams being considered
+     *
+     * @return float the normalized score
+     * @see    _distance()
+     * @access private
      */
     function _normalize_score($score, $base_count = null)
     {
@@ -630,29 +646,24 @@ class Text_LanguageDetect
      *
      * If perl compatible, the score is 300-0, 0 being most similar.
      * Otherwise, it's 0-1 with 1 being most similar.
-     * 
+     *
      * The $sample text should be at least a few sentences in length;
      * should be ascii-7 or utf8 encoded, if another and the mbstring extension
      * is present it will try to detect and convert. However, experience has
-     * shown that mb_detect_encoding() *does not work very well* with at least 
+     * shown that mb_detect_encoding() *does not work very well* with at least
      * some types of encoding.
      *
-     * @access  public
-     * @param   string  $sample a sample of text to compare.
-     * @param   int     $limit  if specified, return an array of the most likely
-     *                           $limit languages and their scores.
-     * @return  mixed       sorted array of language scores, blank array if no 
-     *                      useable text was found, or PEAR_Error if error 
-     *                      with the object setup
-     * @see     _distance()
-     * @throws  PEAR_Error
+     * @param string $sample a sample of text to compare.
+     * @param int    $limit  if specified, return an array of the most likely
+     *                       $limit languages and their scores.
+     *
+     * @return mixed sorted array of language scores, blank array if no
+     *               useable text was found
+     * @see    _distance()
+     * @throws Text_LanguageDetect_Exception
      */
-    function detect($sample, $limit = 0)
+    public function detect($sample, $limit = 0)
     {
-        if (!$this->_setup_ok($err)) {
-            return $err;
-        }
-
         // input check
         if (!Text_LanguageDetect_Parser::validateString($sample)) {
             return array();
@@ -660,36 +671,27 @@ class Text_LanguageDetect
 
         // check char encoding
         // (only if mbstring extension is compiled and PHP > 4.0.6)
-        if (function_exists('mb_detect_encoding') 
-            && function_exists('mb_convert_encoding')) {
-
+        if (function_exists('mb_detect_encoding')
+            && function_exists('mb_convert_encoding')
+        ) {
             // mb_detect_encoding isn't very reliable, to say the least
-            // detection should still work with a sufficient sample of ascii characters
+            // detection should still work with a sufficient sample
+            //  of ascii characters
             $encoding = mb_detect_encoding($sample);
 
             // mb_detect_encoding() will return FALSE if detection fails
             // don't attempt conversion if that's the case
-            if ($encoding != 'ASCII' && $encoding != 'UTF-8' && $encoding !== false) {
-            
-                if (function_exists('mb_list_encodings')) {
- 
-                    // verify the encoding exists in mb_list_encodings
-                    if (in_array($encoding, mb_list_encodings())) {
-                        $sample = mb_convert_encoding($sample, 'UTF-8', $encoding);
-                    }
-
-                    // if the previous condition failed:
-                    // somehow we detected an encoding that also we don't support
-
-                } else {
-                    // php 4 doesnt have mb_list_encodings()
-                    // so attempt with error suppression
-                    $sample = @mb_convert_encoding($sample, 'UTF-8', $encoding);
+            if ($encoding != 'ASCII' && $encoding != 'UTF-8'
+                && $encoding !== false
+            ) {
+                // verify the encoding exists in mb_list_encodings
+                if (in_array($encoding, mb_list_encodings())) {
+                    $sample = mb_convert_encoding($sample, 'UTF-8', $encoding);
                 }
             }
         }
 
-        $sample_obj = new Text_LanguageDetect_Parser($sample, $this->_db_filename, $this->_unicode_db_filename);
+        $sample_obj = new Text_LanguageDetect_Parser($sample);
         $sample_obj->prepareTrigram();
         if ($this->_use_unicode_narrowing) {
             $sample_obj->prepareUnicode();
@@ -713,7 +715,10 @@ class Text_LanguageDetect
             if (is_array($blocks)) {
                 $present_blocks = array_keys($blocks);
             } else {
-                throw new Exception('Error during block detection');
+                throw new Text_LanguageDetect_Exception(
+                    'Error during block detection',
+                    Text_LanguageDetect_Exception::BLOCK_DETECTION
+                );
             }
 
             $possible_langs = array();
@@ -731,30 +736,30 @@ class Text_LanguageDetect
             }
 
             // could also try an intersect operation rather than a union
-            // in other words, choose languages whose trigrams contain 
+            // in other words, choose languages whose trigrams contain
             // ALL of the unicode blocks found in this sample
             // would improve speed but would be completely thrown off by an
             // unexpected character, like an umlaut appearing in english text
 
             $possible_langs = array_intersect(
-                        array_keys($this->_lang_db),
-                        array_unique($possible_langs)
+                array_keys($this->_lang_db),
+                array_unique($possible_langs)
             );
 
-            // needs to intersect it with the keys of _lang_db in case 
+            // needs to intersect it with the keys of _lang_db in case
             // languages have been omitted
 
-        // or just try 'em all
         } else {
+            // or just try 'em all
             $possible_langs = array_keys($this->_lang_db);
         }
 
 
         foreach ($possible_langs as $lang) {
-            $scores[$lang] =
-                $this->_normalize_score(
-                        $this->_distance($this->_lang_db[$lang], $trigram_freqs),
-                        $trigram_count);
+            $scores[$lang] = $this->_normalize_score(
+                $this->_distance($this->_lang_db[$lang], $trigram_freqs),
+                $trigram_count
+            );
         }
 
         unset($sample_obj);
@@ -772,7 +777,6 @@ class Text_LanguageDetect
             $limited_scores = array();
 
             $i = 0;
-
             foreach ($scores as $key => $value) {
                 if ($i++ >= $limit) {
                     break;
@@ -781,9 +785,9 @@ class Text_LanguageDetect
                 $limited_scores[$key] = $value;
             }
 
-            return $limited_scores;
+            return $this->_convertToNameMode($limited_scores, true);
         } else {
-            return $scores;
+            return $this->_convertToNameMode($scores, true);
         }
     }
 
@@ -791,35 +795,33 @@ class Text_LanguageDetect
      * Returns only the most similar language to the text sample
      *
      * Calls $this->detect() and returns only the top result
-     * 
-     * @access   public
-     * @param    string    $sample    text to detect the language of
-     * @return   string               the name of the most likely language
-     *                                or null if no language is similar
-     * @see      detect()
-     * @throws   PEAR_Error
+     *
+     * @param string $sample text to detect the language of
+     *
+     * @return string the name of the most likely language
+     *                or null if no language is similar
+     * @see    detect()
+     * @throws Text_LanguageDetect_Exception
      */
-    function detectSimple($sample)
+    public function detectSimple($sample)
     {
         $scores = $this->detect($sample, 1);
 
         // if top language has the maximum possible score,
         // then the top score will have been picked at random
-        if (    !is_array($scores) 
-                || empty($scores) 
-                || current($scores) == $this->_max_score) {
-
+        if (!is_array($scores) || empty($scores)
+            || current($scores) == $this->_max_score
+        ) {
             return null;
-
         } else {
-            return ucfirst(key($scores));
+            return key($scores);
         }
     }
 
     /**
      * Returns an array containing the most similar language and a confidence
      * rating
-     * 
+     *
      * Confidence is a simple measure calculated from the similarity score
      * minus the similarity score from the next most similar language
      * divided by the highest possible score. Languages that have closely
@@ -827,46 +829,43 @@ class Text_LanguageDetect
      * confidence scores.
      *
      * The similarity score answers the question "How likely is the text the
-     * returned language regardless of the other languages considered?" The 
+     * returned language regardless of the other languages considered?" The
      * confidence score is one way of answering the question "how likely is the
      * text the detected language relative to the rest of the language model
      * set?"
      *
      * To see how similar languages are a priori, see languageSimilarity()
-     * 
-     * @access   public
-     * @param    string    $sample    text for which language will be detected
-     * @return   array     most similar language, score and confidence rating
-     *                     or null if no language is similar
-     * @see      detect()
-     * @throws   PEAR_Error
+     *
+     * @param string $sample text for which language will be detected
+     *
+     * @return array most similar language, score and confidence rating
+     *               or null if no language is similar
+     * @see    detect()
+     * @throws Text_LanguageDetect_Exception
      */
-    function detectConfidence($sample)
+    public function detectConfidence($sample)
     {
         $scores = $this->detect($sample, 2);
 
-        // if most similar language has the max score, it 
+        // if most similar language has the max score, it
         // will have been picked at random
-        if (    !is_array($scores) 
-                || empty($scores) 
-                || current($scores) == $this->_max_score) {
-
+        if (!is_array($scores) || empty($scores)
+            || current($scores) == $this->_max_score
+        ) {
             return null;
         }
 
-        $arr['language'] = ucfirst(key($scores));
+        $arr['language'] = key($scores);
         $arr['similarity'] = current($scores);
         if (next($scores) !== false) { // if false then no next element
             // the goal is to return a higher value if the distance between
             // the similarity of the first score and the second score is high
 
             if ($this->_perl_compatible) {
-
-                $arr['confidence'] =
-                    (current($scores) - $arr['similarity']) / $this->_max_score;
+                $arr['confidence'] = (current($scores) - $arr['similarity'])
+                    / $this->_max_score;
 
             } else {
-
                 $arr['confidence'] = $arr['similarity'] - current($scores);
 
             }
@@ -882,32 +881,26 @@ class Text_LanguageDetect
      * Returns the distribution of unicode blocks in a given utf8 string
      *
      * For the block name of a single char, use unicodeBlockName()
-     * 
-     * @access public
-     * @param string $str input string. Must be ascii or utf8
-     * @param bool $skip_symbols if true, skip ascii digits, symbols and 
-     *                           non-printing characters. Includes spaces,
-     *                           newlines and common punctutation characters.
+     *
+     * @param string $str          input string. Must be ascii or utf8
+     * @param bool   $skip_symbols if true, skip ascii digits, symbols and
+     *                             non-printing characters. Includes spaces,
+     *                             newlines and common punctutation characters.
+     *
      * @return array
-     * @throws PEAR_Error
+     * @throws Text_LanguageDetect_Exception
      */
-    function detectUnicodeBlocks($str, $skip_symbols)
+    public function detectUnicodeBlocks($str, $skip_symbols)
     {
-        // input check
-        if (!is_bool($skip_symbols)) {
-            throw new Exception('Second parameter must be boolean');
-        } 
-
-        if (!is_string($str)) {
-            throw new Exception('First parameter was not a string');
-        }
+        $skip_symbols = (bool)$skip_symbols;
+        $str          = (string)$str;
 
-        $sample_obj = new Text_LanguageDetect_Parser($str, $this->_db_filename, $this->_unicode_db_filename);
+        $sample_obj = new Text_LanguageDetect_Parser($str);
         $sample_obj->prepareUnicode();
         $sample_obj->prepareTrigram(false);
         $sample_obj->setUnicodeSkipSymbols($skip_symbols);
         $sample_obj->analyze();
-        $blocks =& $sample_obj->getUnicodeBlocks();
+        $blocks = $sample_obj->getUnicodeBlocks();
         unset($sample_obj);
         return $blocks;
     }
@@ -915,38 +908,37 @@ class Text_LanguageDetect
     /**
      * Returns the block name for a given unicode value
      *
-     * If passed a string, will assume it is being passed a UTF8-formatted 
+     * If passed a string, will assume it is being passed a UTF8-formatted
      * character and will automatically convert. Otherwise it will assume it
      * is being passed a numeric unicode value.
      *
      * Make sure input is of the correct type!
      *
-     * @access public
      * @param mixed $unicode unicode value or utf8 char
+     *
      * @return mixed the block name string or false if not found
-     * @throws PEAR_Error
+     * @throws Text_LanguageDetect_Exception
      */
-    function unicodeBlockName($unicode) {
+    public function unicodeBlockName($unicode)
+    {
         if (is_string($unicode)) {
             // assume it is being passed a utf8 char, so convert it
-
-            // input check
-            if ($this->utf8strlen($unicode) > 1) {
-                throw new Exception('Pass this function only a single char');
+            if (self::utf8strlen($unicode) > 1) {
+                throw new Text_LanguageDetect_Exception(
+                    'Pass a single char only to this method',
+                    Text_LanguageDetect_Exception::PARAM_TYPE
+                );
             }
-
             $unicode = $this->_utf8char2unicode($unicode);
 
-            if ($unicode == -1) {
-                throw new Exception('Malformatted char');
-            }
-
-        // input check
         } elseif (!is_int($unicode)) {
-            throw new Exception('Input must be of type string or int.');
+            throw new Text_LanguageDetect_Exception(
+                'Input must be of type string or int.',
+                Text_LanguageDetect_Exception::PARAM_TYPE
+            );
         }
 
-        $blocks =& $this->_read_unicode_block_db();
+        $blocks = $this->_read_unicode_block_db();
 
         $result = $this->_unicode_block_name($unicode, $blocks);
 
@@ -964,14 +956,17 @@ class Text_LanguageDetect
      * the public interface for this function, which does input checks which
      * this function omits for speed.
      *
-     * @access  protected
-     * @param   int     $unicode the unicode value
-     * @param   array   &$blocks the block database
-     * @param   int     $block_count the number of defined blocks in the database
-     * @see     unicodeBlockName()
+     * @param int   $unicode     the unicode value
+     * @param array $blocks      the block database
+     * @param int   $block_count the number of defined blocks in the database
+     *
+     * @return mixed Block name, -1 if it failed
+     * @see    unicodeBlockName()
+     * @access protected
      */
-    function _unicode_block_name($unicode, &$blocks, $block_count = -1) {
-        // for a reference, see 
+    function _unicode_block_name($unicode, $blocks, $block_count = -1)
+    {
+        // for a reference, see
         // http://www.unicode.org/Public/UNIDATA/Blocks.txt
 
         // assume that ascii characters are the most common
@@ -994,35 +989,36 @@ class Text_LanguageDetect
         while ($low <= $high) {
             $mid = floor(($low + $high) / 2);
 
-            // if it's lower than the lower bound
             if ($unicode < $blocks[$mid][0]) {
+                // if it's lower than the lower bound
                 $high = $mid - 1;
 
-            // if it's higher than the upper bound
             } elseif ($unicode > $blocks[$mid][1]) {
+                // if it's higher than the upper bound
                 $low = $mid + 1;
 
-            // found it
             } else {
+                // found it
                 return $blocks[$mid];
             }
         }
 
-        // failed to find the block 
+        // failed to find the block
         return -1;
 
-        // todo: differentiate when it's out of range or when it falls 
+        // todo: differentiate when it's out of range or when it falls
         //       into an unassigned range?
     }
 
     /**
      * Brings up the unicode block database
      *
-     * @access protected
      * @return array the database of unicode block definitions
-     * @throws PEAR_Error
+     * @throws Text_LanguageDetect_Exception
+     * @access protected
      */
-    function &_read_unicode_block_db() {
+    function _read_unicode_block_db()
+    {
         // since the unicode definitions are always going to be the same,
         // might as well share the memory for the db with all other instances
         // of this class
@@ -1037,29 +1033,27 @@ class Text_LanguageDetect
 
     /**
      * Calculate the similarities between the language models
-     * 
+     *
      * Use this function to see how similar languages are to each other.
      *
      * If passed 2 language names, will return just those languages compared.
      * If passed 1 language name, will return that language compared to
      * all others.
-     * If passed none, will return an array of every language model compared 
+     * If passed none, will return an array of every language model compared
      * to every other one.
      *
-     * @access  public
-     * @param   string   $lang1   the name of the first language to be compared
-     * @param   string   $lang2   the name of the second language to be compared
-     * @return  array    scores of every language compared
-     *                   or the score of just the provided languages
-     *                   or null if one of the supplied languages does not exist
-     * @throws  PEAR_Error
+     * @param string $lang1 the name of the first language to be compared
+     * @param string $lang2 the name of the second language to be compared
+     *
+     * @return array scores of every language compared
+     *               or the score of just the provided languages
+     *               or null if one of the supplied languages does not exist
+     * @throws Text_LanguageDetect_Exception
      */
-    function languageSimilarity($lang1 = null, $lang2 = null)
+    public function languageSimilarity($lang1 = null, $lang2 = null)
     {
-        if (!$this->_setup_ok($err)) {
-            return $err;
-        }
-
+        $lang1 = $this->_convertFromNameMode($lang1);
+        $lang2 = $this->_convertFromNameMode($lang2);
         if ($lang1 != null) {
             $lang1 = strtolower($lang1);
 
@@ -1069,12 +1063,8 @@ class Text_LanguageDetect
             }
 
             if ($lang2 != null) {
-
-                // can't only set the second param
-                if ($lang1 == null) {
-                    return null;
-                // check if language model exists
-                } elseif (!isset($this->_lang_db[$lang2])) {
+                if (!isset($this->_lang_db[$lang2])) {
+                    // check if language model exists
                     return null;
                 }
 
@@ -1088,14 +1078,15 @@ class Text_LanguageDetect
                     )
                 );
 
-
-            // compare just $lang1 to all languages
             } else {
+                // compare just $lang1 to all languages
                 $return_arr = array();
                 foreach ($this->_lang_db as $key => $value) {
-                    if ($key != $lang1) { // don't compare a language to itself
+                    if ($key != $lang1) {
+                        // don't compare a language to itself
                         $return_arr[$key] = $this->_normalize_score(
-                            $this->_distance($this->_lang_db[$lang1], $value));
+                            $this->_distance($this->_lang_db[$lang1], $value)
+                        );
                     }
                 }
                 asort($return_arr);
@@ -1104,30 +1095,27 @@ class Text_LanguageDetect
             }
 
 
-        // compare all languages to each other
         } else {
+            // compare all languages to each other
             $return_arr = array();
             foreach (array_keys($this->_lang_db) as $lang1) {
                 foreach (array_keys($this->_lang_db) as $lang2) {
-
                     // skip comparing languages to themselves
-                    if ($lang1 != $lang2) { 
-                    
-                        // don't re-calculate what's already been done
-                        if (isset($return_arr[$lang2][$lang1])) {
+                    if ($lang1 != $lang2) {
 
-                            $return_arr[$lang1][$lang2] =
-                                $return_arr[$lang2][$lang1];
+                        if (isset($return_arr[$lang2][$lang1])) {
+                            // don't re-calculate what's already been done
+                            $return_arr[$lang1][$lang2]
+                                = $return_arr[$lang2][$lang1];
 
-                        // calculate
                         } else {
-
-                            $return_arr[$lang1][$lang2] = 
-                                $this->_normalize_score(
-                                        $this->_distance(
-                                            $this->_lang_db[$lang1],
-                                            $this->_lang_db[$lang2]
-                                        )
+                            // calculate
+                            $return_arr[$lang1][$lang2]
+                                = $this->_normalize_score(
+                                    $this->_distance(
+                                        $this->_lang_db[$lang1],
+                                        $this->_lang_db[$lang2]
+                                    )
                                 );
 
                         }
@@ -1150,20 +1138,14 @@ class Text_LanguageDetect
      *
      * @access      public
      * @return      array language cluster data
-     * @throws      PEAR_Error
+     * @throws      Text_LanguageDetect_Exception
      * @see         languageSimilarity()
-     * @deprecated  this function will eventually be removed and placed into 
+     * @deprecated  this function will eventually be removed and placed into
      *              the model generation class
      */
     function clusterLanguages()
     {
         // todo: set the maximum number of clusters
-
-        // setup check
-        if (!$this->_setup_ok($err)) {
-            return $err;
-        }
-
         // return cached result, if any
         if (isset($this->_clusters)) {
             return $this->_clusters;
@@ -1177,7 +1159,10 @@ class Text_LanguageDetect
 
         foreach ($langs as $lang) {
             if (!isset($this->_lang_db[$lang])) {
-                throw new Exception("missing $lang!\n");
+                throw new Text_LanguageDetect_Exception(
+                    "missing $lang!",
+                    Text_LanguageDetect_Exception::UNKNOWN_LANGUAGE
+                );
             }
         }
 
@@ -1186,7 +1171,9 @@ class Text_LanguageDetect
             $langs[$lang1] = $lang1;
             unset($langs[$old_key]);
         }
-        
+
+        $result_data = $really_map = array();
+
         $i = 0;
         while (count($langs) > 2 && $i++ < 200) {
             $highest_score = -1;
@@ -1194,18 +1181,22 @@ class Text_LanguageDetect
             $highest_key2 = '';
             foreach ($langs as $lang1) {
                 foreach ($langs as $lang2) {
-                    if (    $lang1 != $lang2 
-                            && $arr[$lang1][$lang2] > $highest_score) {
+                    if ($lang1 != $lang2
+                        && $arr[$lang1][$lang2] > $highest_score
+                    ) {
                         $highest_score = $arr[$lang1][$lang2];
                         $highest_key1 = $lang1;
                         $highest_key2 = $lang2;
                     }
                 }
             }
-            
+
             if (!$highest_key1) {
                 // should not ever happen
-                throw new Exception("no highest key? (step: $i)");
+                throw new Text_LanguageDetect_Exception(
+                    "no highest key? (step: $i)",
+                    Text_LanguageDetect_Exception::NO_HIGHEST_KEY
+                );
             }
 
             if ($highest_score == 0) {
@@ -1217,7 +1208,7 @@ class Text_LanguageDetect
             $sum1 = array_sum($arr[$highest_key1]);
             $sum2 = array_sum($arr[$highest_key2]);
 
-            // use the score for the one that is most similar to the rest of 
+            // use the score for the one that is most similar to the rest of
             // the field as the score for the group
             // todo: could try averaging or "centroid" method instead
             // seems like that might make more sense
@@ -1248,7 +1239,7 @@ class Text_LanguageDetect
             $really_lang = $replaceme;
             while (isset($really_map[$really_lang])) {
                 $really_lang = $really_map[$really_lang];
-            } 
+            }
             $really_map[$newkey] = $really_lang;
 
 
@@ -1259,8 +1250,8 @@ class Text_LanguageDetect
                         $arr[$key1][$newkey] = $arr[$key1][$key2];
                         unset($arr[$key1][$key2]);
                         // replacing $arr[$key1][$key2] with $arr[$key1][$newkey]
-                    } 
-                    
+                    }
+
                     if ($key1 == $replaceme) {
                         $arr[$newkey][$key2] = $arr[$key1][$key2];
                         unset($arr[$key1][$key2]);
@@ -1273,7 +1264,7 @@ class Text_LanguageDetect
                     }
                 }
             }
-                        
+
 
             unset($langs[$highest_key1]);
             unset($langs[$highest_key2]);
@@ -1293,7 +1284,7 @@ class Text_LanguageDetect
         }
 
         $return_val = array(
-                'open_forks' => $langs, 
+                'open_forks' => $langs,
                         // the top level of clusters
                         // clusters that are mutually exclusive
                         // or specified by a specific maximum
@@ -1323,11 +1314,11 @@ class Text_LanguageDetect
      * use, and it may disappear or its functionality may change in future
      * releases without notice.
      *
-     * This compares the sample text to top the top level of clusters. If the 
+     * This compares the sample text to top the top level of clusters. If the
      * sample is similar to the cluster it will drop down and compare it to the
      * languages in the cluster, and so on until it hits a leaf node.
      *
-     * this should find the language in considerably fewer compares 
+     * this should find the language in considerably fewer compares
      * (the equivalent of a binary search), however clusterLanguages() is costly
      * and the loss of accuracy from this technique is significant.
      *
@@ -1337,15 +1328,14 @@ class Text_LanguageDetect
      * was very large, however in such cases some method of Bayesian inference
      * might be more helpful.
      *
-     * @see     clusterLanguages()
-     * @access  public
-     * @param   string $str input string
-     * @return  array language scores (only those compared)
-     * @throws  PEAR_Error
+     * @param string $str input string
+     *
+     * @return array language scores (only those compared)
+     * @throws Text_LanguageDetect_Exception
+     * @see    clusterLanguages()
      */
-    function clusteredSearch($str)
+    public function clusteredSearch($str)
     {
-
         // input check
         if (!Text_LanguageDetect_Parser::validateString($str)) {
             return array();
@@ -1359,7 +1349,7 @@ class Text_LanguageDetect
         $dendogram_data  = $result['fork_data'];
         $dendogram_alias = $result['name_map'];
 
-        $sample_obj = new Text_LanguageDetect_Parser($str, $this->_db_filename, $this->_unicode_db_filename);
+        $sample_obj = new Text_LanguageDetect_Parser($str);
         $sample_obj->prepareTrigram();
         $sample_obj->setPadStart(!$this->_perl_compatible);
         $sample_obj->analyze();
@@ -1372,7 +1362,7 @@ class Text_LanguageDetect
         }
 
         $i = 0; // counts the number of steps
-        
+
         foreach ($dendogram_start as $lang) {
             if (isset($dendogram_alias[$lang])) {
                 $lang_key = $dendogram_alias[$lang];
@@ -1382,7 +1372,8 @@ class Text_LanguageDetect
 
             $scores[$lang] = $this->_normalize_score(
                 $this->_distance($this->_lang_db[$lang_key], $sample_result),
-                $sample_count);
+                $sample_count
+            );
 
             $i++;
         }
@@ -1411,7 +1402,8 @@ class Text_LanguageDetect
 
                 $scores[$lang] = $this->_normalize_score(
                     $this->_distance($this->_lang_db[$lang_key], $sample_result),
-                    $sample_count);
+                    $sample_count
+                );
 
                 //todo: does not need to do same comparison again
             }
@@ -1428,8 +1420,8 @@ class Text_LanguageDetect
 
             $diff = $scores[$cur_key] - $scores[$loser_key];
 
-            // $cur_key ({$dendogram_alias[$cur_key]}) wins 
-            // over $loser_key ({$dendogram_alias[$loser_key]}) 
+            // $cur_key ({$dendogram_alias[$cur_key]}) wins
+            // over $loser_key ({$dendogram_alias[$loser_key]})
             // with a difference of $diff
         }
 
@@ -1439,9 +1431,9 @@ class Text_LanguageDetect
         // which paths the algorithm decided to take along the tree
 
         // but sometimes the last item is only the second highest
-        if (   ($this->_perl_compatible  && (end($scores) > prev($scores)))
-            || (!$this->_perl_compatible && (end($scores) < prev($scores)))) {
-
+        if (($this->_perl_compatible  && (end($scores) > prev($scores)))
+            || (!$this->_perl_compatible && (end($scores) < prev($scores)))
+        ) {
             $real_last_score = current($scores);
             $real_last_key = key($scores);
 
@@ -1449,7 +1441,7 @@ class Text_LanguageDetect
             unset($scores[$real_last_key]);
             $scores[$real_last_key] = $real_last_score;
         }
-            
+
 
         if (!$this->_perl_compatible) {
             $scores = array_reverse($scores, true);
@@ -1464,12 +1456,11 @@ class Text_LanguageDetect
      *
      * Returns the numbers of characters (not bytes) in a utf8 string
      *
-     * @static
-     * @access  public
-     * @param   string $str string to get the length of
-     * @return  int         number of chars
+     * @param string $str string to get the length of
+     *
+     * @return int number of chars
      */
-    function utf8strlen($str)
+    public static function utf8strlen($str)
     {
         // utf8_decode() will convert unknown chars to '?', which is actually
         // ideal for counting.
@@ -1482,53 +1473,45 @@ class Text_LanguageDetect
     /**
      * Returns the unicode value of a utf8 char
      *
-     * @access  protected
-     * @param   string $char a utf8 (possibly multi-byte) char
-     * @return  int          unicode value or -1 if malformatted
+     * @param string $char a utf8 (possibly multi-byte) char
+     *
+     * @return int unicode value
+     * @access protected
+     * @link   http://en.wikipedia.org/wiki/UTF-8
      */
-    function _utf8char2unicode($char) {
-
+    function _utf8char2unicode($char)
+    {
         // strlen() here will actually get the binary length of a single char
         switch (strlen($char)) {
-
-            // for a reference, see http://en.wikipedia.org/wiki/UTF-8
-
-            case 1:
-                // normal ASCII-7 byte
-                // 0xxxxxxx -->  0xxxxxxx
-                return ord($char{0});
-
-            case 2:
-                // 2 byte unicode
-                // 110zzzzx 10xxxxxx --> 00000zzz zxxxxxxx
-                $z = (ord($char{0}) & 0x000001F) << 6;
-                $x = (ord($char{1}) & 0x0000003F);
-
-                return ($z | $x);
-
-            case 3:
-                // 3 byte unicode
-                // 1110zzzz 10zxxxxx 10xxxxxx --> zzzzzxxx xxxxxxxx 
-                $z =  (ord($char{0}) & 0x0000000F) << 12;
-                $x1 = (ord($char{1}) & 0x0000003F) << 6;
-                $x2 = (ord($char{2}) & 0x0000003F);
-
-                return ($z | $x1 | $x2);
-
-            case 4:
-                // 4 byte unicode
-                // 11110zzz 10zzxxxx 10xxxxxx 10xxxxxx -->
-                // 000zzzzz xxxxxxxx xxxxxxxx
-                $z1 = (ord($char{0}) & 0x00000007) << 18;
-                $z2 = (ord($char{1}) & 0x0000003F) << 12;
-                $x1 = (ord($char{2}) & 0x0000003F) << 6;
-                $x2 = (ord($char{3}) & 0x0000003F);
-
-                return ($z1 | $z2 | $x1 | $x2);
-
-            default:
-                // error: malformatted char?
-                return -1;
+        case 1:
+            // normal ASCII-7 byte
+            // 0xxxxxxx -->  0xxxxxxx
+            return ord($char{0});
+
+        case 2:
+            // 2 byte unicode
+            // 110zzzzx 10xxxxxx --> 00000zzz zxxxxxxx
+            $z = (ord($char{0}) & 0x000001F) << 6;
+            $x = (ord($char{1}) & 0x0000003F);
+            return ($z | $x);
+
+        case 3:
+            // 3 byte unicode
+            // 1110zzzz 10zxxxxx 10xxxxxx --> zzzzzxxx xxxxxxxx
+            $z =  (ord($char{0}) & 0x0000000F) << 12;
+            $x1 = (ord($char{1}) & 0x0000003F) << 6;
+            $x2 = (ord($char{2}) & 0x0000003F);
+            return ($z | $x1 | $x2);
+
+        case 4:
+            // 4 byte unicode
+            // 11110zzz 10zzxxxx 10xxxxxx 10xxxxxx -->
+            // 000zzzzz xxxxxxxx xxxxxxxx
+            $z1 = (ord($char{0}) & 0x00000007) << 18;
+            $z2 = (ord($char{1}) & 0x0000003F) << 12;
+            $x1 = (ord($char{2}) & 0x0000003F) << 6;
+            $x2 = (ord($char{3}) & 0x0000003F);
+            return ($z1 | $z2 | $x1 | $x2);
         }
     }
 
@@ -1536,18 +1519,18 @@ class Text_LanguageDetect
      * utf8-safe fast character iterator
      *
      * Will get the next character starting from $counter, which will then be
-     * incremented. If a multi-byte char the bytes will be concatenated and 
+     * incremented. If a multi-byte char the bytes will be concatenated and
      * $counter will be incremeted by the number of bytes in the char.
      *
-     * @access  private
-     * @param   string  &$str        the string being iterated over
-     * @param   int     &$counter    the iterator, will increment by reference
-     * @param   bool    $special_convert whether to do special conversions
-     * @return  char    the next (possibly multi-byte) char from $counter
+     * @param string $str             the string being iterated over
+     * @param int    &$counter        the iterator, will increment by reference
+     * @param bool   $special_convert whether to do special conversions
+     *
+     * @return char the next (possibly multi-byte) char from $counter
+     * @access private
      */
-    function _next_char(&$str, &$counter, $special_convert = false)
+    static function _next_char($str, &$counter, $special_convert = false)
     {
-
         $char = $str{$counter++};
         $ord = ord($char);
 
@@ -1556,7 +1539,6 @@ class Text_LanguageDetect
 
         // normal ascii one byte char
         if ($ord <= 127) {
-
             // special conversions needed for this package
             // (that only apply to regular ascii characters)
             // lower case, and convert all non-alphanumeric characters
@@ -1571,8 +1553,8 @@ class Text_LanguageDetect
 
             return $char;
 
-        // multi-byte chars
         } elseif ($ord >> 5 == 6) { // two-byte char
+            // multi-byte chars
             $nextchar = $str{$counter++}; // get next byte
 
             // lower-casing of non-ascii characters is still incomplete
@@ -1582,27 +1564,27 @@ class Text_LanguageDetect
                 if ($ord == 195) {
                     $nextord = ord($nextchar);
                     $nextord_adj = $nextord + 64;
-                    // for a reference, see 
+                    // for a reference, see
                     // http://www.ramsch.org/martin/uni/fmi-hp/iso8859-1.html
 
                     // &Agrave; - &THORN; but not &times;
-                    if (    $nextord_adj >= 192
-                            && $nextord_adj <= 222 
-                            && $nextord_adj != 215) {
-
-                        $nextchar = chr($nextord + 32); 
+                    if ($nextord_adj >= 192
+                        && $nextord_adj <= 222
+                        && $nextord_adj != 215
+                    ) {
+                        $nextchar = chr($nextord + 32);
                     }
 
-                // lower case cyrillic alphabet
                 } elseif ($ord == 208) {
+                    // lower case cyrillic alphabet
                     $nextord = ord($nextchar);
                     // if A - Pe
                     if ($nextord >= 144 && $nextord <= 159) {
                         // lower case
                         $nextchar = chr($nextord + 32);
 
-                    // if Er - Ya
                     } elseif ($nextord >= 160 && $nextord <= 175) {
+                        // if Er - Ya
                         // lower case
                         $char = chr(209); // == $ord++
                         $nextchar = chr($nextord - 32);
@@ -1611,12 +1593,11 @@ class Text_LanguageDetect
             }
 
             // tag on next byte
-            return $char . $nextchar; 
-
+            return $char . $nextchar;
         } elseif ($ord >> 4  == 14) { // three-byte char
-            
+
             // tag on next 2 bytes
-            return $char . $str{$counter++} . $str{$counter++}; 
+            return $char . $str{$counter++} . $str{$counter++};
 
         } elseif ($ord >> 3 == 30) { // four-byte char
 
@@ -1628,8 +1609,85 @@ class Text_LanguageDetect
         }
     }
 
-}
+    /**
+     * Converts an $language input parameter from the configured mode
+     * to the language name that is used internally.
+     *
+     * Works for strings and arrays.
+     *
+     * @param string|array $lang       A language description ("english"/"en"/"eng")
+     * @param boolean      $convertKey If $lang is an array, setting $key
+     *                                 converts the keys to the language name.
+     *
+     * @return string|array Language name
+     */
+    function _convertFromNameMode($lang, $convertKey = false)
+    {
+        if ($this->_name_mode == 0) {
+            return $lang;
+        }
+
+        if ($this->_name_mode == 2) {
+            $method = 'code2ToName';
+        } else {
+            $method = 'code3ToName';
+        }
+
+        if (is_string($lang)) {
+            return (string)Text_LanguageDetect_ISO639::$method($lang);
+        }
+
+        $newlang = array();
+        foreach ($lang as $key => $val) {
+            if ($convertKey) {
+                $newkey = (string)Text_LanguageDetect_ISO639::$method($key);
+                $newlang[$newkey] = $val;
+            } else {
+                $newlang[$key] = (string)Text_LanguageDetect_ISO639::$method($val);
+            }
+        }
+        return $newlang;
+    }
 
-/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
+    /**
+     * Converts an $language output parameter from the language name that is
+     * used internally to the configured mode.
+     *
+     * Works for strings and arrays.
+     *
+     * @param string|array $lang       A language description ("english"/"en"/"eng")
+     * @param boolean      $convertKey If $lang is an array, setting $key
+     *                                 converts the keys to the language name.
+     *
+     * @return string|array Language name
+     */
+    function _convertToNameMode($lang, $convertKey = false)
+    {
+        if ($this->_name_mode == 0) {
+            return $lang;
+        }
+
+        if ($this->_name_mode == 2) {
+            $method = 'nameToCode2';
+        } else {
+            $method = 'nameToCode3';
+        }
+
+        if (is_string($lang)) {
+            return Text_LanguageDetect_ISO639::$method($lang);
+        }
+
+        $newlang = array();
+        foreach ($lang as $key => $val) {
+            if ($convertKey) {
+                $newkey = Text_LanguageDetect_ISO639::$method($key);
+                $newlang[$newkey] = $val;
+            } else {
+                $newlang[$key] = Text_LanguageDetect_ISO639::$method($val);
+            }
+        }
+        return $newlang;
+    }
+}
 
-?>
+/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
\ No newline at end of file
diff --git a/inc/3rdparty/libraries/readability/Readability.php b/inc/3rdparty/libraries/readability/Readability.php
index 2e8991cc..d0f09d74 100644
--- a/inc/3rdparty/libraries/readability/Readability.php
+++ b/inc/3rdparty/libraries/readability/Readability.php
@@ -1,1138 +1,1138 @@
-<?php
-/** 
-* Arc90's Readability ported to PHP for FiveFilters.org
-* Based on readability.js version 1.7.1 (without multi-page support)
-* Updated to allow HTML5 parsing with html5lib
-* Updated with lightClean mode to preserve more images and youtube/vimeo/viddler embeds
-* ------------------------------------------------------
-* Original URL: http://lab.arc90.com/experiments/readability/js/readability.js
-* Arc90's project URL: http://lab.arc90.com/experiments/readability/
-* JS Source: http://code.google.com/p/arc90labs-readability
-* Ported by: Keyvan Minoukadeh, http://www.keyvan.net
-* More information: http://fivefilters.org/content-only/
-* License: Apache License, Version 2.0
-* Requires: PHP5
-* Date: 2012-09-19
-* 
-* Differences between the PHP port and the original
-* ------------------------------------------------------
-* Arc90's Readability is designed to run in the browser. It works on the DOM 
-* tree (the parsed HTML) after the page's CSS styles have been applied and 
-* Javascript code executed. This PHP port does not run inside a browser. 
-* We use PHP's ability to parse HTML to build our DOM tree, but we cannot 
-* rely on CSS or Javascript support. As such, the results will not always 
-* match Arc90's Readability. (For example, if a web page contains CSS style 
-* rules or Javascript code which hide certain HTML elements from display, 
-* Arc90's Readability will dismiss those from consideration but our PHP port, 
-* unable to understand CSS or Javascript, will not know any better.)
-* 
-* Another significant difference is that the aim of Arc90's Readability is 
-* to re-present the main content block of a given web page so users can 
-* read it more easily in their browsers. Correct identification, clean up, 
-* and separation of the content block is only a part of this process. 
-* This PHP port is only concerned with this part, it does not include code 
-* that relates to presentation in the browser - Arc90 already do 
-* that extremely well, and for PDF output there's FiveFilters.org's 
-* PDF Newspaper: http://fivefilters.org/pdf-newspaper/.
-* 
-* Finally, this class contains methods that might be useful for developers 
-* working on HTML document fragments. So without deviating too much from 
-* the original code (which I don't want to do because it makes debugging 
-* and updating more difficult), I've tried to make it a little more 
-* developer friendly. You should be able to use the methods here on 
-* existing DOMElement objects without passing an entire HTML document to 
-* be parsed.
-*/
-
-// This class allows us to do JavaScript like assignements to innerHTML
-require_once(dirname(__FILE__).'/JSLikeHTMLElement.php');
-
-// Alternative usage (for testing only!)
-// uncomment the lines below and call Readability.php in your browser 
-// passing it the URL of the page you'd like content from, e.g.:
-// Readability.php?url=http://medialens.org/alerts/09/090615_the_guardian_climate.php
-
-/*
-if (!isset($_GET['url']) || $_GET['url'] == '') {
-	die('Please pass a URL to the script. E.g. Readability.php?url=bla.com/story.html');
-}
-$url = $_GET['url'];
-if (!preg_match('!^https?://!i', $url)) $url = 'http://'.$url;
-$html = file_get_contents($url);
-$r = new Readability($html, $url);
-$r->init();
-echo $r->articleContent->innerHTML;
-*/
-
-class Readability
-{
-	public $version = '1.7.1-without-multi-page';
-	public $convertLinksToFootnotes = false;
-	public $revertForcedParagraphElements = true;
-	public $articleTitle;
-	public $articleContent;
-	public $dom;
-	public $url = null; // optional - URL where HTML was retrieved
-	public $debug = false;
-	public $lightClean = true; // preserves more content (experimental) added 2012-09-19
-	protected $body = null; // 
-	protected $bodyCache = null; // Cache the body HTML in case we need to re-use it later
-	protected $flags = 7; // 1 | 2 | 4;   // Start with all flags set.
-	protected $success = false; // indicates whether we were able to extract or not
-	
-	/**
-	* All of the regular expressions in use within readability.
-	* Defined up here so we don't instantiate them repeatedly in loops.
-	**/
-	public $regexps = array(
-		'unlikelyCandidates' => '/combx|comment|community|disqus|extra|foot|header|menu|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup/i',
-		'okMaybeItsACandidate' => '/and|article|body|column|main|shadow/i',
-		'positive' => '/article|body|content|entry|hentry|main|page|attachment|pagination|post|text|blog|story/i',
-		'negative' => '/combx|comment|com-|contact|foot|footer|_nav|footnote|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|widget/i',
-		'divToPElements' => '/<(a|blockquote|dl|div|img|ol|p|pre|table|ul)/i',
-		'replaceBrs' => '/(<br[^>]*>[ \n\r\t]*){2,}/i',
-		'replaceFonts' => '/<(\/?)font[^>]*>/i',
-		// 'trimRe' => '/^\s+|\s+$/g', // PHP has trim()
-		'normalize' => '/\s{2,}/',
-		'killBreaks' => '/(<br\s*\/?>(\s|&nbsp;?)*){1,}/',
-		'video' => '!//(player\.|www\.)?(youtube|vimeo|viddler)\.com!i',
-		'skipFootnoteLink' => '/^\s*(\[?[a-z0-9]{1,2}\]?|^|edit|citation needed)\s*$/i'
-	);	
-	
-	/* constants */
-	const FLAG_STRIP_UNLIKELYS = 1;
-	const FLAG_WEIGHT_CLASSES = 2;
-	const FLAG_CLEAN_CONDITIONALLY = 4;
-	
-	/**
-	* Create instance of Readability
-	* @param string UTF-8 encoded string
-	* @param string (optional) URL associated with HTML (used for footnotes)
-	* @param string which parser to use for turning raw HTML into a DOMDocument (either 'libxml' or 'html5lib')
-	*/	
-	function __construct($html, $url=null, $parser='libxml')
-	{
-		$this->url = $url;
-		/* Turn all double br's into p's */
-		$html = preg_replace($this->regexps['replaceBrs'], '</p><p>', $html);
-		$html = preg_replace($this->regexps['replaceFonts'], '<$1span>', $html);
-		$html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
-		if (trim($html) == '') $html = '<html></html>';
-		if ($parser=='html5lib' && ($this->dom = HTML5_Parser::parse($html))) {
-			// all good
-		} else {
-			$this->dom = new DOMDocument();
-			$this->dom->preserveWhiteSpace = false;
-			@$this->dom->loadHTML($html);
-		}
-		$this->dom->registerNodeClass('DOMElement', 'JSLikeHTMLElement');
-	}
-
-	/**
-	* Get article title element
-	* @return DOMElement
-	*/
-	public function getTitle() {
-		return $this->articleTitle;
-	}
-	
-	/**
-	* Get article content element
-	* @return DOMElement
-	*/
-	public function getContent() {
-		return $this->articleContent;
-	}	
-	
-	/**
-	* Runs readability.
-	* 
-	* Workflow:
-	*  1. Prep the document by removing script tags, css, etc.
-	*  2. Build readability's DOM tree.
-	*  3. Grab the article content from the current dom tree.
-	*  4. Replace the current DOM tree with the new one.
-	*  5. Read peacefully.
-	*
-	* @return boolean true if we found content, false otherwise
-	**/
-	public function init()
-	{
-		if (!isset($this->dom->documentElement)) return false;
-		$this->removeScripts($this->dom);
-		//die($this->getInnerHTML($this->dom->documentElement));
-		
-		// Assume successful outcome
-		$this->success = true;
-
-		$bodyElems = $this->dom->getElementsByTagName('body');
-		if ($bodyElems->length > 0) {
-			if ($this->bodyCache == null) {
-				$this->bodyCache = $bodyElems->item(0)->innerHTML;
-			}
-			if ($this->body == null) {
-				$this->body = $bodyElems->item(0);
-			}
-		}
-
-		$this->prepDocument();
-		
-		//die($this->dom->documentElement->parentNode->nodeType);
-		//$this->setInnerHTML($this->dom->documentElement, $this->getInnerHTML($this->dom->documentElement));
-		//die($this->getInnerHTML($this->dom->documentElement));
-
-		/* Build readability's DOM tree */
-		$overlay        = $this->dom->createElement('div');
-		$innerDiv       = $this->dom->createElement('div');
-		$articleTitle   = $this->getArticleTitle();
-		$articleContent = $this->grabArticle();
-
-		if (!$articleContent) {
-			$this->success = false;
-			$articleContent = $this->dom->createElement('div');
-			$articleContent->setAttribute('id', 'readability-content');
-			$articleContent->innerHTML = '<p>Sorry, Readability was unable to parse this page for content.</p>';		
-		}
-		
-		$overlay->setAttribute('id', 'readOverlay');
-		$innerDiv->setAttribute('id', 'readInner');
-
-		/* Glue the structure of our document together. */
-		$innerDiv->appendChild($articleTitle);
-		$innerDiv->appendChild($articleContent);
-		$overlay->appendChild($innerDiv);
-		
-		/* Clear the old HTML, insert the new content. */
-		$this->body->innerHTML = '';
-		$this->body->appendChild($overlay);
-		//document.body.insertBefore(overlay, document.body.firstChild);
-		$this->body->removeAttribute('style');
-
-		$this->postProcessContent($articleContent);
-		
-		// Set title and content instance variables
-		$this->articleTitle = $articleTitle;
-		$this->articleContent = $articleContent;
-		
-		return $this->success;
-	}
-	
-	/**
-	* Debug
-	*/
-	protected function dbg($msg) {
-		if ($this->debug) echo '* ',$msg, "\n";
-	}
-	
-	/**
-	* Run any post-process modifications to article content as necessary.
-	*
-	* @param DOMElement
-	* @return void
-	*/
-	public function postProcessContent($articleContent) {
-		if ($this->convertLinksToFootnotes && !preg_match('/wikipedia\.org/', @$this->url)) { 
-			$this->addFootnotes($articleContent);
-		}
-	}
-	
-	/**
-	* Get the article title as an H1.
-	*
-	* @return DOMElement
-	*/
-	protected function getArticleTitle() {
-		$curTitle = '';
-		$origTitle = '';
-
-		try {
-			$curTitle = $origTitle = $this->getInnerText($this->dom->getElementsByTagName('title')->item(0));
-		} catch(Exception $e) {}
-		
-		if (preg_match('/ [\|\-] /', $curTitle))
-		{
-			$curTitle = preg_replace('/(.*)[\|\-] .*/i', '$1', $origTitle);
-			
-			if (count(explode(' ', $curTitle)) < 3) {
-				$curTitle = preg_replace('/[^\|\-]*[\|\-](.*)/i', '$1', $origTitle);
-			}
-		}
-		else if (strpos($curTitle, ': ') !== false)
-		{
-			$curTitle = preg_replace('/.*:(.*)/i', '$1', $origTitle);
-
-			if (count(explode(' ', $curTitle)) < 3) {
-				$curTitle = preg_replace('/[^:]*[:](.*)/i','$1', $origTitle);
-			}
-		}
-		else if(strlen($curTitle) > 150 || strlen($curTitle) < 15)
-		{
-			$hOnes = $this->dom->getElementsByTagName('h1');
-			if($hOnes->length == 1)
-			{
-				$curTitle = $this->getInnerText($hOnes->item(0));
-			}
-		}
-
-		$curTitle = trim($curTitle);
-
-		if (count(explode(' ', $curTitle)) <= 4) {
-			$curTitle = $origTitle;
-		}
-		
-		$articleTitle = $this->dom->createElement('h1');
-		$articleTitle->innerHTML = $curTitle;
-		
-		return $articleTitle;
-	}
-	
-	/**
-	* Prepare the HTML document for readability to scrape it.
-	* This includes things like stripping javascript, CSS, and handling terrible markup.
-	* 
-	* @return void
-	**/
-	protected function prepDocument() {
-		/**
-		* In some cases a body element can't be found (if the HTML is totally hosed for example)
-		* so we create a new body node and append it to the document.
-		*/
-		if ($this->body == null)
-		{
-			$this->body = $this->dom->createElement('body');
-			$this->dom->documentElement->appendChild($this->body);
-		}
-		$this->body->setAttribute('id', 'readabilityBody');
-
-		/* Remove all style tags in head */
-		$styleTags = $this->dom->getElementsByTagName('style');
-		for ($i = $styleTags->length-1; $i >= 0; $i--)
-		{
-			$styleTags->item($i)->parentNode->removeChild($styleTags->item($i));
-		}
-
-		/* Turn all double br's into p's */
-		/* Note, this is pretty costly as far as processing goes. Maybe optimize later. */
-		//document.body.innerHTML = document.body.innerHTML.replace(readability.regexps.replaceBrs, '</p><p>').replace(readability.regexps.replaceFonts, '<$1span>');
-		// We do this in the constructor for PHP as that's when we have raw HTML - before parsing it into a DOM tree.
-		// Manipulating innerHTML as it's done in JS is not possible in PHP.
-	}
-
-	/**
-	* For easier reading, convert this document to have footnotes at the bottom rather than inline links.
-	* @see http://www.roughtype.com/archives/2010/05/experiments_in.php
-	*
-	* @return void
-	**/
-	public function addFootnotes($articleContent) {
-		$footnotesWrapper = $this->dom->createElement('div');
-		$footnotesWrapper->setAttribute('id', 'readability-footnotes');
-		$footnotesWrapper->innerHTML = '<h3>References</h3>';
-		
-		$articleFootnotes = $this->dom->createElement('ol');
-		$articleFootnotes->setAttribute('id', 'readability-footnotes-list');
-		$footnotesWrapper->appendChild($articleFootnotes);
-		
-		$articleLinks = $articleContent->getElementsByTagName('a');
-		
-		$linkCount = 0;
-		for ($i = 0; $i < $articleLinks->length; $i++)
-		{
-			$articleLink  = $articleLinks->item($i);
-			$footnoteLink = $articleLink->cloneNode(true);
-			$refLink      = $this->dom->createElement('a');
-			$footnote     = $this->dom->createElement('li');
-			$linkDomain   = @parse_url($footnoteLink->getAttribute('href'), PHP_URL_HOST);
-			if (!$linkDomain && isset($this->url)) $linkDomain = @parse_url($this->url, PHP_URL_HOST);
-			//linkDomain   = footnoteLink.host ? footnoteLink.host : document.location.host,
-			$linkText     = $this->getInnerText($articleLink);
-			
-			if ((strpos($articleLink->getAttribute('class'), 'readability-DoNotFootnote') !== false) || preg_match($this->regexps['skipFootnoteLink'], $linkText)) {
-				continue;
-			}
-			
-			$linkCount++;
-
-			/** Add a superscript reference after the article link */
-			$refLink->setAttribute('href', '#readabilityFootnoteLink-' . $linkCount);
-			$refLink->innerHTML = '<small><sup>[' . $linkCount . ']</sup></small>';
-			$refLink->setAttribute('class', 'readability-DoNotFootnote');
-			$refLink->setAttribute('style', 'color: inherit;');
-			
-			//TODO: does this work or should we use DOMNode.isSameNode()?
-			if ($articleLink->parentNode->lastChild == $articleLink) {
-				$articleLink->parentNode->appendChild($refLink);
-			} else {
-				$articleLink->parentNode->insertBefore($refLink, $articleLink->nextSibling);
-			}
-
-			$articleLink->setAttribute('style', 'color: inherit; text-decoration: none;');
-			$articleLink->setAttribute('name', 'readabilityLink-' . $linkCount);
-
-			$footnote->innerHTML = '<small><sup><a href="#readabilityLink-' . $linkCount . '" title="Jump to Link in Article">^</a></sup></small> ';
-
-			$footnoteLink->innerHTML = ($footnoteLink->getAttribute('title') != '' ? $footnoteLink->getAttribute('title') : $linkText);
-			$footnoteLink->setAttribute('name', 'readabilityFootnoteLink-' . $linkCount);
-			
-			$footnote->appendChild($footnoteLink);
-			if ($linkDomain) $footnote->innerHTML = $footnote->innerHTML . '<small> (' . $linkDomain . ')</small>';
-			
-			$articleFootnotes->appendChild($footnote);
-		}
-
-		if ($linkCount > 0) {
-			$articleContent->appendChild($footnotesWrapper);           
-		}
-	}
-
-	/**
-	* Reverts P elements with class 'readability-styled'
-	* to text nodes - which is what they were before.
-	*
-	* @param DOMElement
-	* @return void
-	*/
-	function revertReadabilityStyledElements($articleContent) {
-		$xpath = new DOMXPath($articleContent->ownerDocument);
-		$elems = $xpath->query('.//p[@class="readability-styled"]', $articleContent);
-		//$elems = $articleContent->getElementsByTagName('p');
-		for ($i = $elems->length-1; $i >= 0; $i--) {
-			$e = $elems->item($i);
-			$e->parentNode->replaceChild($articleContent->ownerDocument->createTextNode($e->textContent), $e);
-			//if ($e->hasAttribute('class') && $e->getAttribute('class') == 'readability-styled') {
-			//	$e->parentNode->replaceChild($this->dom->createTextNode($e->textContent), $e);
-			//}
-		}
-	}
-	
-	/**
-	* Prepare the article node for display. Clean out any inline styles,
-	* iframes, forms, strip extraneous <p> tags, etc.
-	*
-	* @param DOMElement
-	* @return void
-	*/
-	function prepArticle($articleContent) {
-		$this->cleanStyles($articleContent);
-		$this->killBreaks($articleContent);
-		if ($this->revertForcedParagraphElements) {
-			$this->revertReadabilityStyledElements($articleContent);
-		}
-
-		/* Clean out junk from the article content */
-		$this->cleanConditionally($articleContent, 'form');
-		$this->clean($articleContent, 'object');
-		$this->clean($articleContent, 'h1');
-
-		/**
-		* If there is only one h2, they are probably using it
-		* as a header and not a subheader, so remove it since we already have a header.
-		***/
-		if (!$this->lightClean && ($articleContent->getElementsByTagName('h2')->length == 1)) {
-			$this->clean($articleContent, 'h2'); 
-		}
-		$this->clean($articleContent, 'iframe');
-
-		$this->cleanHeaders($articleContent);
-
-		/* Do these last as the previous stuff may have removed junk that will affect these */
-		$this->cleanConditionally($articleContent, 'table');
-		$this->cleanConditionally($articleContent, 'ul');
-		$this->cleanConditionally($articleContent, 'div');
-
-		/* Remove extra paragraphs */
-		$articleParagraphs = $articleContent->getElementsByTagName('p');
-		for ($i = $articleParagraphs->length-1; $i >= 0; $i--)
-		{
-			$imgCount    = $articleParagraphs->item($i)->getElementsByTagName('img')->length;
-			$embedCount  = $articleParagraphs->item($i)->getElementsByTagName('embed')->length;
-			$objectCount = $articleParagraphs->item($i)->getElementsByTagName('object')->length;
-			$iframeCount = $articleParagraphs->item($i)->getElementsByTagName('iframe')->length;
-			
-			if ($imgCount === 0 && $embedCount === 0 && $objectCount === 0 && $iframeCount === 0 && $this->getInnerText($articleParagraphs->item($i), false) == '')
-			{
-				$articleParagraphs->item($i)->parentNode->removeChild($articleParagraphs->item($i));
-			}
-		}
-
-		try {
-			$articleContent->innerHTML = preg_replace('/<br[^>]*>\s*<p/i', '<p', $articleContent->innerHTML);
-			//articleContent.innerHTML = articleContent.innerHTML.replace(/<br[^>]*>\s*<p/gi, '<p');      
-		}
-		catch (Exception $e) {
-			$this->dbg("Cleaning innerHTML of breaks failed. This is an IE strict-block-elements bug. Ignoring.: " . $e);
-		}
-	}
-	
-	/**
-	* Initialize a node with the readability object. Also checks the
-	* className/id for special names to add to its score.
-	*
-	* @param Element
-	* @return void
-	**/
-	protected function initializeNode($node) {
-		$readability = $this->dom->createAttribute('readability');
-		$readability->value = 0; // this is our contentScore
-		$node->setAttributeNode($readability);		         
-
-		switch (strtoupper($node->tagName)) { // unsure if strtoupper is needed, but using it just in case
-			case 'DIV':
-				$readability->value += 5;
-				break;
-
-			case 'PRE':
-			case 'TD':
-			case 'BLOCKQUOTE':
-				$readability->value += 3;
-				break;
-				
-			case 'ADDRESS':
-			case 'OL':
-			case 'UL':
-			case 'DL':
-			case 'DD':
-			case 'DT':
-			case 'LI':
-			case 'FORM':
-				$readability->value -= 3;
-				break;
-
-			case 'H1':
-			case 'H2':
-			case 'H3':
-			case 'H4':
-			case 'H5':
-			case 'H6':
-			case 'TH':
-				$readability->value -= 5;
-				break;
-		}
-		$readability->value += $this->getClassWeight($node);
-	}
-	
-	/***
-	* grabArticle - Using a variety of metrics (content score, classname, element types), find the content that is
-	*               most likely to be the stuff a user wants to read. Then return it wrapped up in a div.
-	*
-	* @return DOMElement
-	**/
-	protected function grabArticle($page=null) {
-		$stripUnlikelyCandidates = $this->flagIsActive(self::FLAG_STRIP_UNLIKELYS);
-		if (!$page) $page = $this->dom;
-		$allElements = $page->getElementsByTagName('*');
-		/**
-		* First, node prepping. Trash nodes that look cruddy (like ones with the class name "comment", etc), and turn divs
-		* into P tags where they have been used inappropriately (as in, where they contain no other block level elements.)
-		*
-		* Note: Assignment from index for performance. See http://www.peachpit.com/articles/article.aspx?p=31567&seqNum=5
-		* TODO: Shouldn't this be a reverse traversal?
-		**/
-		$node = null;
-		$nodesToScore = array();
-		for ($nodeIndex = 0; ($node = $allElements->item($nodeIndex)); $nodeIndex++) {
-		//for ($nodeIndex=$targetList->length-1; $nodeIndex >= 0; $nodeIndex--) {
-			//$node = $targetList->item($nodeIndex);
-			$tagName = strtoupper($node->tagName);
-			/* Remove unlikely candidates */
-			if ($stripUnlikelyCandidates) {
-				$unlikelyMatchString = $node->getAttribute('class') . $node->getAttribute('id');
-				if (
-					preg_match($this->regexps['unlikelyCandidates'], $unlikelyMatchString) &&
-					!preg_match($this->regexps['okMaybeItsACandidate'], $unlikelyMatchString) &&
-					$tagName != 'BODY'
-				)
-				{
-					$this->dbg('Removing unlikely candidate - ' . $unlikelyMatchString);
-					//$nodesToRemove[] = $node;
-					$node->parentNode->removeChild($node);
-					$nodeIndex--;
-					continue;
-				}               
-			}
-
-			if ($tagName == 'P' || $tagName == 'TD' || $tagName == 'PRE') {
-				$nodesToScore[] = $node;
-			}
-
-			/* Turn all divs that don't have children block level elements into p's */
-			if ($tagName == 'DIV') {
-				if (!preg_match($this->regexps['divToPElements'], $node->innerHTML)) {
-					//$this->dbg('Altering div to p');
-					$newNode = $this->dom->createElement('p');
-					try {
-						$newNode->innerHTML = $node->innerHTML;
-						//$nodesToReplace[] = array('new'=>$newNode, 'old'=>$node);
-						$node->parentNode->replaceChild($newNode, $node);
-						$nodeIndex--;
-						$nodesToScore[] = $node; // or $newNode?
-					}
-					catch(Exception $e) {
-						$this->dbg('Could not alter div to p, reverting back to div.: ' . $e);
-					}
-				}
-				else
-				{
-					/* EXPERIMENTAL */
-					// TODO: change these p elements back to text nodes after processing
-					for ($i = 0, $il = $node->childNodes->length; $i < $il; $i++) {
-						$childNode = $node->childNodes->item($i);
-						if ($childNode->nodeType == 3) { // XML_TEXT_NODE
-							//$this->dbg('replacing text node with a p tag with the same content.');
-							$p = $this->dom->createElement('p');
-							$p->innerHTML = $childNode->nodeValue;
-							$p->setAttribute('style', 'display: inline;');
-							$p->setAttribute('class', 'readability-styled');
-							$childNode->parentNode->replaceChild($p, $childNode);
-						}
-					}
-				}
-			}
-		}
-		
-		/**
-		* Loop through all paragraphs, and assign a score to them based on how content-y they look.
-		* Then add their score to their parent node.
-		*
-		* A score is determined by things like number of commas, class names, etc. Maybe eventually link density.
-		**/
-		$candidates = array();
-		for ($pt=0; $pt < count($nodesToScore); $pt++) {
-			$parentNode      = $nodesToScore[$pt]->parentNode;
-			// $grandParentNode = $parentNode ? $parentNode->parentNode : null;
-			$grandParentNode = !$parentNode ? null : (($parentNode->parentNode instanceof DOMElement) ? $parentNode->parentNode : null);
-			$innerText       = $this->getInnerText($nodesToScore[$pt]);
-
-			if (!$parentNode || !isset($parentNode->tagName)) {
-				continue;
-			}
-
-			/* If this paragraph is less than 25 characters, don't even count it. */
-			if(strlen($innerText) < 25) {
-				continue;
-			}
-
-			/* Initialize readability data for the parent. */
-			if (!$parentNode->hasAttribute('readability')) 
-			{
-				$this->initializeNode($parentNode);
-				$candidates[] = $parentNode;
-			}
-
-			/* Initialize readability data for the grandparent. */
-			if ($grandParentNode && !$grandParentNode->hasAttribute('readability') && isset($grandParentNode->tagName))
-			{
-				$this->initializeNode($grandParentNode);
-				$candidates[] = $grandParentNode;
-			}
-
-			$contentScore = 0;
-
-			/* Add a point for the paragraph itself as a base. */
-			$contentScore++;
-
-			/* Add points for any commas within this paragraph */
-			$contentScore += count(explode(',', $innerText));
-			
-			/* For every 100 characters in this paragraph, add another point. Up to 3 points. */
-			$contentScore += min(floor(strlen($innerText) / 100), 3);
-			
-			/* Add the score to the parent. The grandparent gets half. */
-			$parentNode->getAttributeNode('readability')->value += $contentScore;
-
-			if ($grandParentNode) {
-				$grandParentNode->getAttributeNode('readability')->value += $contentScore/2;             
-			}
-		}
-
-		/**
-		* After we've calculated scores, loop through all of the possible candidate nodes we found
-		* and find the one with the highest score.
-		**/
-		$topCandidate = null;
-		for ($c=0, $cl=count($candidates); $c < $cl; $c++)
-		{
-			/**
-			* Scale the final candidates score based on link density. Good content should have a
-			* relatively small link density (5% or less) and be mostly unaffected by this operation.
-			**/
-			$readability = $candidates[$c]->getAttributeNode('readability');
-			$readability->value = $readability->value * (1-$this->getLinkDensity($candidates[$c]));
-
-			$this->dbg('Candidate: ' . $candidates[$c]->tagName . ' (' . $candidates[$c]->getAttribute('class') . ':' . $candidates[$c]->getAttribute('id') . ') with score ' . $readability->value);
-
-			if (!$topCandidate || $readability->value > (int)$topCandidate->getAttribute('readability')) {
-				$topCandidate = $candidates[$c];
-			}
-		}
-
-		/**
-		* If we still have no top candidate, just use the body as a last resort.
-		* We also have to copy the body node so it is something we can modify.
-		**/
-		if ($topCandidate === null || strtoupper($topCandidate->tagName) == 'BODY')
-		{
-			$topCandidate = $this->dom->createElement('div');
-			if ($page instanceof DOMDocument) {
-				if (!isset($page->documentElement)) {
-					// we don't have a body either? what a mess! :)
-				} else {
-					$topCandidate->innerHTML = $page->documentElement->innerHTML;
-					$page->documentElement->innerHTML = '';
-					$page->documentElement->appendChild($topCandidate);
-				}
-			} else {
-				$topCandidate->innerHTML = $page->innerHTML;
-				$page->innerHTML = '';
-				$page->appendChild($topCandidate);
-			}
-			$this->initializeNode($topCandidate);
-		}
-
-		/**
-		* Now that we have the top candidate, look through its siblings for content that might also be related.
-		* Things like preambles, content split by ads that we removed, etc.
-		**/
-		$articleContent        = $this->dom->createElement('div');
-		$articleContent->setAttribute('id', 'readability-content');
-		$siblingScoreThreshold = max(10, ((int)$topCandidate->getAttribute('readability')) * 0.2);
-		$siblingNodes          = $topCandidate->parentNode->childNodes;
-		if (!isset($siblingNodes)) {
-			$siblingNodes = new stdClass;
-			$siblingNodes->length = 0;
-		}
-
-		for ($s=0, $sl=$siblingNodes->length; $s < $sl; $s++)
-		{
-			$siblingNode = $siblingNodes->item($s);
-			$append      = false;
-
-			$this->dbg('Looking at sibling node: ' . $siblingNode->nodeName . (($siblingNode->nodeType === XML_ELEMENT_NODE && $siblingNode->hasAttribute('readability')) ? (' with score ' . $siblingNode->getAttribute('readability')) : ''));
-
-			//dbg('Sibling has score ' . ($siblingNode->readability ? siblingNode.readability.contentScore : 'Unknown'));
-
-			if ($siblingNode === $topCandidate)
-			// or if ($siblingNode->isSameNode($topCandidate))
-			{
-				$append = true;
-			}
-
-			$contentBonus = 0;
-			/* Give a bonus if sibling nodes and top candidates have the example same classname */
-			if ($siblingNode->nodeType === XML_ELEMENT_NODE && $siblingNode->getAttribute('class') == $topCandidate->getAttribute('class') && $topCandidate->getAttribute('class') != '') {
-				$contentBonus += ((int)$topCandidate->getAttribute('readability')) * 0.2;
-			}
-
-			if ($siblingNode->nodeType === XML_ELEMENT_NODE && $siblingNode->hasAttribute('readability') && (((int)$siblingNode->getAttribute('readability')) + $contentBonus) >= $siblingScoreThreshold)
-			{
-				$append = true;
-			}
-			
-			if (strtoupper($siblingNode->nodeName) == 'P') {
-				$linkDensity = $this->getLinkDensity($siblingNode);
-				$nodeContent = $this->getInnerText($siblingNode);
-				$nodeLength  = strlen($nodeContent);
-				
-				if ($nodeLength > 80 && $linkDensity < 0.25)
-				{
-					$append = true;
-				}
-				else if ($nodeLength < 80 && $linkDensity === 0 && preg_match('/\.( |$)/', $nodeContent))
-				{
-					$append = true;
-				}
-			}
-
-			if ($append)
-			{
-				$this->dbg('Appending node: ' . $siblingNode->nodeName);
-
-				$nodeToAppend = null;
-				$sibNodeName = strtoupper($siblingNode->nodeName);
-				if ($sibNodeName != 'DIV' && $sibNodeName != 'P') {
-					/* We have a node that isn't a common block level element, like a form or td tag. Turn it into a div so it doesn't get filtered out later by accident. */
-					
-					$this->dbg('Altering siblingNode of ' . $sibNodeName . ' to div.');
-					$nodeToAppend = $this->dom->createElement('div');
-					try {
-						$nodeToAppend->setAttribute('id', $siblingNode->getAttribute('id'));
-						$nodeToAppend->innerHTML = $siblingNode->innerHTML;
-					}
-					catch(Exception $e)
-					{
-						$this->dbg('Could not alter siblingNode to div, reverting back to original.');
-						$nodeToAppend = $siblingNode;
-						$s--;
-						$sl--;
-					}
-				} else {
-					$nodeToAppend = $siblingNode;
-					$s--;
-					$sl--;
-				}
-				
-				/* To ensure a node does not interfere with readability styles, remove its classnames */
-				$nodeToAppend->removeAttribute('class');
-
-				/* Append sibling and subtract from our list because it removes the node when you append to another node */
-				$articleContent->appendChild($nodeToAppend);
-			}
-		}
-
-		/**
-		* So we have all of the content that we need. Now we clean it up for presentation.
-		**/
-		$this->prepArticle($articleContent);
-
-		/**
-		* Now that we've gone through the full algorithm, check to see if we got any meaningful content.
-		* If we didn't, we may need to re-run grabArticle with different flags set. This gives us a higher
-		* likelihood of finding the content, and the sieve approach gives us a higher likelihood of
-		* finding the -right- content.
-		**/
-		if (strlen($this->getInnerText($articleContent, false)) < 250)
-		{
-			// TODO: find out why element disappears sometimes, e.g. for this URL http://www.businessinsider.com/6-hedge-fund-etfs-for-average-investors-2011-7
-			// in the meantime, we check and create an empty element if it's not there.
-			if (!isset($this->body->childNodes)) $this->body = $this->dom->createElement('body');
-			$this->body->innerHTML = $this->bodyCache;
-			
-			if ($this->flagIsActive(self::FLAG_STRIP_UNLIKELYS)) {
-				$this->removeFlag(self::FLAG_STRIP_UNLIKELYS);
-				return $this->grabArticle($this->body);
-			}
-			else if ($this->flagIsActive(self::FLAG_WEIGHT_CLASSES)) {
-				$this->removeFlag(self::FLAG_WEIGHT_CLASSES);
-				return $this->grabArticle($this->body);              
-			}
-			else if ($this->flagIsActive(self::FLAG_CLEAN_CONDITIONALLY)) {
-				$this->removeFlag(self::FLAG_CLEAN_CONDITIONALLY);
-				return $this->grabArticle($this->body);
-			}
-			else {
-				return false;
-			}
-		}
-		return $articleContent;
-	}
-	
-	/**
-	* Remove script tags from document
-	*
-	* @param DOMElement
-	* @return void
-	*/
-	public function removeScripts($doc) {
-		$scripts = $doc->getElementsByTagName('script');
-		for($i = $scripts->length-1; $i >= 0; $i--)
-		{
-			$scripts->item($i)->parentNode->removeChild($scripts->item($i));
-		}
-	}
-	
-	/**
-	* Get the inner text of a node.
-	* This also strips out any excess whitespace to be found.
-	*
-	* @param DOMElement $
-	* @param boolean $normalizeSpaces (default: true)
-	* @return string
-	**/
-	public function getInnerText($e, $normalizeSpaces=true) {
-		$textContent = '';
-
-		if (!isset($e->textContent) || $e->textContent == '') {
-			return '';
-		}
-
-		$textContent = trim($e->textContent);
-
-		if ($normalizeSpaces) {
-			return preg_replace($this->regexps['normalize'], ' ', $textContent);
-		} else {
-			return $textContent;
-		}
-	}
-
-	/**
-	* Get the number of times a string $s appears in the node $e.
-	*
-	* @param DOMElement $e
-	* @param string - what to count. Default is ","
-	* @return number (integer)
-	**/
-	public function getCharCount($e, $s=',') {
-		return substr_count($this->getInnerText($e), $s);
-	}
-
-	/**
-	* Remove the style attribute on every $e and under.
-	*
-	* @param DOMElement $e
-	* @return void
-	*/
-	public function cleanStyles($e) {
-		if (!is_object($e)) return;
-		$elems = $e->getElementsByTagName('*');
-		foreach ($elems as $elem) {
-			$elem->removeAttribute('style');
-		}
-	}
-	
-	/**
-	* Get the density of links as a percentage of the content
-	* This is the amount of text that is inside a link divided by the total text in the node.
-	* 
-	* @param DOMElement $e
-	* @return number (float)
-	*/
-	public function getLinkDensity($e) {
-		$links      = $e->getElementsByTagName('a');
-		$textLength = strlen($this->getInnerText($e));
-		$linkLength = 0;
-		for ($i=0, $il=$links->length; $i < $il; $i++)
-		{
-			$linkLength += strlen($this->getInnerText($links->item($i)));
-		}
-		if ($textLength > 0) {
-			return $linkLength / $textLength;
-		} else {
-			return 0;
-		}
-	}
-	
-	/**
-	* Get an elements class/id weight. Uses regular expressions to tell if this 
-	* element looks good or bad.
-	*
-	* @param DOMElement $e
-	* @return number (Integer)
-	*/
-	public function getClassWeight($e) {
-		if(!$this->flagIsActive(self::FLAG_WEIGHT_CLASSES)) {
-			return 0;
-		}
-
-		$weight = 0;
-
-		/* Look for a special classname */
-		if ($e->hasAttribute('class') && $e->getAttribute('class') != '')
-		{
-			if (preg_match($this->regexps['negative'], $e->getAttribute('class'))) {
-				$weight -= 25;
-			}
-			if (preg_match($this->regexps['positive'], $e->getAttribute('class'))) {
-				$weight += 25;
-			}
-		}
-
-		/* Look for a special ID */
-		if ($e->hasAttribute('id') && $e->getAttribute('id') != '')
-		{
-			if (preg_match($this->regexps['negative'], $e->getAttribute('id'))) {
-				$weight -= 25;
-			}
-			if (preg_match($this->regexps['positive'], $e->getAttribute('id'))) {
-				$weight += 25;
-			}
-		}
-		return $weight;
-	}
-
-	/**
-	* Remove extraneous break tags from a node.
-	*
-	* @param DOMElement $node
-	* @return void
-	*/
-	public function killBreaks($node) {
-		$html = $node->innerHTML;
-		$html = preg_replace($this->regexps['killBreaks'], '<br />', $html);
-		$node->innerHTML = $html;
-	}
-
-	/**
-	* Clean a node of all elements of type "tag".
-	* (Unless it's a youtube/vimeo video. People love movies.)
-	*
-	* Updated 2012-09-18 to preserve youtube/vimeo iframes
-	*
-	* @param DOMElement $e
-	* @param string $tag
-	* @return void
-	*/
-	public function clean($e, $tag) {
-		$targetList = $e->getElementsByTagName($tag);
-		$isEmbed = ($tag == 'iframe' || $tag == 'object' || $tag == 'embed');
-		
-		for ($y=$targetList->length-1; $y >= 0; $y--) {
-			/* Allow youtube and vimeo videos through as people usually want to see those. */
-			if ($isEmbed) {
-				$attributeValues = '';
-				for ($i=0, $il=$targetList->item($y)->attributes->length; $i < $il; $i++) {
-					$attributeValues .= $targetList->item($y)->attributes->item($i)->value . '|'; // DOMAttr? (TODO: test)
-				}
-				
-				/* First, check the elements attributes to see if any of them contain youtube or vimeo */
-				if (preg_match($this->regexps['video'], $attributeValues)) {
-					continue;
-				}
-
-				/* Then check the elements inside this element for the same. */
-				if (preg_match($this->regexps['video'], $targetList->item($y)->innerHTML)) {
-					continue;
-				}
-			}
-			$targetList->item($y)->parentNode->removeChild($targetList->item($y));
-		}
-	}
-	
-	/**
-	* Clean an element of all tags of type "tag" if they look fishy.
-	* "Fishy" is an algorithm based on content length, classnames, 
-	* link density, number of images & embeds, etc.
-	*
-	* @param DOMElement $e
-	* @param string $tag
-	* @return void
-	*/
-	public function cleanConditionally($e, $tag) {
-		if (!$this->flagIsActive(self::FLAG_CLEAN_CONDITIONALLY)) {
-			return;
-		}
-
-		$tagsList = $e->getElementsByTagName($tag);
-		$curTagsLength = $tagsList->length;
-
-		/**
-		* Gather counts for other typical elements embedded within.
-		* Traverse backwards so we can remove nodes at the same time without effecting the traversal.
-		*
-		* TODO: Consider taking into account original contentScore here.
-		*/
-		for ($i=$curTagsLength-1; $i >= 0; $i--) {
-			$weight = $this->getClassWeight($tagsList->item($i));
-			$contentScore = ($tagsList->item($i)->hasAttribute('readability')) ? (int)$tagsList->item($i)->getAttribute('readability') : 0;
-			
-			$this->dbg('Cleaning Conditionally ' . $tagsList->item($i)->tagName . ' (' . $tagsList->item($i)->getAttribute('class') . ':' . $tagsList->item($i)->getAttribute('id') . ')' . (($tagsList->item($i)->hasAttribute('readability')) ? (' with score ' . $tagsList->item($i)->getAttribute('readability')) : ''));
-
-			if ($weight + $contentScore < 0) {
-				$tagsList->item($i)->parentNode->removeChild($tagsList->item($i));
-			}
-			else if ( $this->getCharCount($tagsList->item($i), ',') < 10) {
-				/**
-				* If there are not very many commas, and the number of
-				* non-paragraph elements is more than paragraphs or other ominous signs, remove the element.
-				**/
-				$p      = $tagsList->item($i)->getElementsByTagName('p')->length;
-				$img    = $tagsList->item($i)->getElementsByTagName('img')->length;
-				$li     = $tagsList->item($i)->getElementsByTagName('li')->length-100;
-				$input  = $tagsList->item($i)->getElementsByTagName('input')->length;
-				$a 		= $tagsList->item($i)->getElementsByTagName('a')->length;
-
-				$embedCount = 0;
-				$embeds = $tagsList->item($i)->getElementsByTagName('embed');
-				for ($ei=0, $il=$embeds->length; $ei < $il; $ei++) {
-					if (preg_match($this->regexps['video'], $embeds->item($ei)->getAttribute('src'))) {
-						$embedCount++; 
-					}
-				}
-				$embeds = $tagsList->item($i)->getElementsByTagName('iframe');
-				for ($ei=0, $il=$embeds->length; $ei < $il; $ei++) {
-					if (preg_match($this->regexps['video'], $embeds->item($ei)->getAttribute('src'))) {
-						$embedCount++; 
-					}
-				}
-
-				$linkDensity   = $this->getLinkDensity($tagsList->item($i));
-				$contentLength = strlen($this->getInnerText($tagsList->item($i)));
-				$toRemove      = false;
-
-				if ($this->lightClean) {
-					$this->dbg('Light clean...');
-					if ( ($img > $p) && ($img > 4) ) {
-						$this->dbg(' more than 4 images and more image elements than paragraph elements');
-						$toRemove = true;
-					} else if ($li > $p && $tag != 'ul' && $tag != 'ol') {
-						$this->dbg(' too many <li> elements, and parent is not <ul> or <ol>');
-						$toRemove = true;
-					} else if ( $input > floor($p/3) ) {
-						$this->dbg(' too many <input> elements');
-						$toRemove = true; 
-					} else if ($contentLength < 25 && ($embedCount === 0 && ($img === 0 || $img > 2))) {
-						$this->dbg(' content length less than 25 chars, 0 embeds and either 0 images or more than 2 images');
-						$toRemove = true;
-					} else if($weight < 25 && $linkDensity > 0.2) {
-						$this->dbg(' weight smaller than 25 and link density above 0.2');
-						$toRemove = true;
-					} else if($a > 2 && ($weight >= 25 && $linkDensity > 0.5)) {
-						$this->dbg(' more than 2 links and weight above 25 but link density greater than 0.5');
-						$toRemove = true;
-					} else if($embedCount > 3) {
-						$this->dbg(' more than 3 embeds');
-						$toRemove = true;
-					}
-				} else {
-					$this->dbg('Standard clean...');
-					if ( $img > $p ) {
-						$this->dbg(' more image elements than paragraph elements');
-						$toRemove = true;
-					} else if ($li > $p && $tag != 'ul' && $tag != 'ol') {
-						$this->dbg(' too many <li> elements, and parent is not <ul> or <ol>');
-						$toRemove = true;
-					} else if ( $input > floor($p/3) ) {
-						$this->dbg(' too many <input> elements');
-						$toRemove = true; 
-					} else if ($contentLength < 25 && ($img === 0 || $img > 2) ) {
-						$this->dbg(' content length less than 25 chars and 0 images, or more than 2 images');
-						$toRemove = true;
-					} else if($weight < 25 && $linkDensity > 0.2) {
-						$this->dbg(' weight smaller than 25 and link density above 0.2');
-						$toRemove = true;
-					} else if($weight >= 25 && $linkDensity > 0.5) {
-						$this->dbg(' weight above 25 but link density greater than 0.5');
-						$toRemove = true;
-					} else if(($embedCount == 1 && $contentLength < 75) || $embedCount > 1) {
-						$this->dbg(' 1 embed and content length smaller than 75 chars, or more than one embed');
-						$toRemove = true;
-					}
-				}
-
-				if ($toRemove) {
-					//$this->dbg('Removing: '.$tagsList->item($i)->innerHTML);
-					$tagsList->item($i)->parentNode->removeChild($tagsList->item($i));
-				}
-			}
-		}
-	}
-
-	/**
-	* Clean out spurious headers from an Element. Checks things like classnames and link density.
-	*
-	* @param DOMElement $e
-	* @return void
-	*/
-	public function cleanHeaders($e) {
-		for ($headerIndex = 1; $headerIndex < 3; $headerIndex++) {
-			$headers = $e->getElementsByTagName('h' . $headerIndex);
-			for ($i=$headers->length-1; $i >=0; $i--) {
-				if ($this->getClassWeight($headers->item($i)) < 0 || $this->getLinkDensity($headers->item($i)) > 0.33) {
-					$headers->item($i)->parentNode->removeChild($headers->item($i));
-				}
-			}
-		}
-	}
-
-	public function flagIsActive($flag) {
-		return ($this->flags & $flag) > 0;
-	}
-	
-	public function addFlag($flag) {
-		$this->flags = $this->flags | $flag;
-	}
-	
-	public function removeFlag($flag) {
-		$this->flags = $this->flags & ~$flag;
-	}
-}
+<?php
+/** 
+* Arc90's Readability ported to PHP for FiveFilters.org
+* Based on readability.js version 1.7.1 (without multi-page support)
+* Updated to allow HTML5 parsing with html5lib
+* Updated with lightClean mode to preserve more images and youtube/vimeo/viddler embeds
+* ------------------------------------------------------
+* Original URL: http://lab.arc90.com/experiments/readability/js/readability.js
+* Arc90's project URL: http://lab.arc90.com/experiments/readability/
+* JS Source: http://code.google.com/p/arc90labs-readability
+* Ported by: Keyvan Minoukadeh, http://www.keyvan.net
+* More information: http://fivefilters.org/content-only/
+* License: Apache License, Version 2.0
+* Requires: PHP5
+* Date: 2012-09-19
+* 
+* Differences between the PHP port and the original
+* ------------------------------------------------------
+* Arc90's Readability is designed to run in the browser. It works on the DOM 
+* tree (the parsed HTML) after the page's CSS styles have been applied and 
+* Javascript code executed. This PHP port does not run inside a browser. 
+* We use PHP's ability to parse HTML to build our DOM tree, but we cannot 
+* rely on CSS or Javascript support. As such, the results will not always 
+* match Arc90's Readability. (For example, if a web page contains CSS style 
+* rules or Javascript code which hide certain HTML elements from display, 
+* Arc90's Readability will dismiss those from consideration but our PHP port, 
+* unable to understand CSS or Javascript, will not know any better.)
+* 
+* Another significant difference is that the aim of Arc90's Readability is 
+* to re-present the main content block of a given web page so users can 
+* read it more easily in their browsers. Correct identification, clean up, 
+* and separation of the content block is only a part of this process. 
+* This PHP port is only concerned with this part, it does not include code 
+* that relates to presentation in the browser - Arc90 already do 
+* that extremely well, and for PDF output there's FiveFilters.org's 
+* PDF Newspaper: http://fivefilters.org/pdf-newspaper/.
+* 
+* Finally, this class contains methods that might be useful for developers 
+* working on HTML document fragments. So without deviating too much from 
+* the original code (which I don't want to do because it makes debugging 
+* and updating more difficult), I've tried to make it a little more 
+* developer friendly. You should be able to use the methods here on 
+* existing DOMElement objects without passing an entire HTML document to 
+* be parsed.
+*/
+
+// This class allows us to do JavaScript like assignements to innerHTML
+require_once(dirname(__FILE__).'/JSLikeHTMLElement.php');
+
+// Alternative usage (for testing only!)
+// uncomment the lines below and call Readability.php in your browser 
+// passing it the URL of the page you'd like content from, e.g.:
+// Readability.php?url=http://medialens.org/alerts/09/090615_the_guardian_climate.php
+
+/*
+if (!isset($_GET['url']) || $_GET['url'] == '') {
+	die('Please pass a URL to the script. E.g. Readability.php?url=bla.com/story.html');
+}
+$url = $_GET['url'];
+if (!preg_match('!^https?://!i', $url)) $url = 'http://'.$url;
+$html = file_get_contents($url);
+$r = new Readability($html, $url);
+$r->init();
+echo $r->articleContent->innerHTML;
+*/
+
+class Readability
+{
+	public $version = '1.7.1-without-multi-page';
+	public $convertLinksToFootnotes = false;
+	public $revertForcedParagraphElements = true;
+	public $articleTitle;
+	public $articleContent;
+	public $dom;
+	public $url = null; // optional - URL where HTML was retrieved
+	public $debug = false;
+	public $lightClean = true; // preserves more content (experimental) added 2012-09-19
+	protected $body = null; // 
+	protected $bodyCache = null; // Cache the body HTML in case we need to re-use it later
+	protected $flags = 7; // 1 | 2 | 4;   // Start with all flags set.
+	protected $success = false; // indicates whether we were able to extract or not
+	
+	/**
+	* All of the regular expressions in use within readability.
+	* Defined up here so we don't instantiate them repeatedly in loops.
+	**/
+	public $regexps = array(
+		'unlikelyCandidates' => '/combx|comment|community|disqus|extra|foot|header|menu|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup/i',
+		'okMaybeItsACandidate' => '/and|article|body|column|main|shadow/i',
+		'positive' => '/article|body|content|entry|hentry|main|page|attachment|pagination|post|text|blog|story/i',
+		'negative' => '/combx|comment|com-|contact|foot|footer|_nav|footnote|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|widget/i',
+		'divToPElements' => '/<(a|blockquote|dl|div|img|ol|p|pre|table|ul)/i',
+		'replaceBrs' => '/(<br[^>]*>[ \n\r\t]*){2,}/i',
+		'replaceFonts' => '/<(\/?)font[^>]*>/i',
+		// 'trimRe' => '/^\s+|\s+$/g', // PHP has trim()
+		'normalize' => '/\s{2,}/',
+		'killBreaks' => '/(<br\s*\/?>(\s|&nbsp;?)*){1,}/',
+		'video' => '!//(player\.|www\.)?(youtube|vimeo|viddler)\.com!i',
+		'skipFootnoteLink' => '/^\s*(\[?[a-z0-9]{1,2}\]?|^|edit|citation needed)\s*$/i'
+	);	
+	
+	/* constants */
+	const FLAG_STRIP_UNLIKELYS = 1;
+	const FLAG_WEIGHT_CLASSES = 2;
+	const FLAG_CLEAN_CONDITIONALLY = 4;
+	
+	/**
+	* Create instance of Readability
+	* @param string UTF-8 encoded string
+	* @param string (optional) URL associated with HTML (used for footnotes)
+	* @param string which parser to use for turning raw HTML into a DOMDocument (either 'libxml' or 'html5lib')
+	*/	
+	function __construct($html, $url=null, $parser='libxml')
+	{
+		$this->url = $url;
+		/* Turn all double br's into p's */
+		$html = preg_replace($this->regexps['replaceBrs'], '</p><p>', $html);
+		$html = preg_replace($this->regexps['replaceFonts'], '<$1span>', $html);
+		$html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
+		if (trim($html) == '') $html = '<html></html>';
+		if ($parser=='html5lib' && ($this->dom = HTML5_Parser::parse($html))) {
+			// all good
+		} else {
+			$this->dom = new DOMDocument();
+			$this->dom->preserveWhiteSpace = false;
+			@$this->dom->loadHTML($html);
+		}
+		$this->dom->registerNodeClass('DOMElement', 'JSLikeHTMLElement');
+	}
+
+	/**
+	* Get article title element
+	* @return DOMElement
+	*/
+	public function getTitle() {
+		return $this->articleTitle;
+	}
+	
+	/**
+	* Get article content element
+	* @return DOMElement
+	*/
+	public function getContent() {
+		return $this->articleContent;
+	}	
+	
+	/**
+	* Runs readability.
+	* 
+	* Workflow:
+	*  1. Prep the document by removing script tags, css, etc.
+	*  2. Build readability's DOM tree.
+	*  3. Grab the article content from the current dom tree.
+	*  4. Replace the current DOM tree with the new one.
+	*  5. Read peacefully.
+	*
+	* @return boolean true if we found content, false otherwise
+	**/
+	public function init()
+	{
+		if (!isset($this->dom->documentElement)) return false;
+		$this->removeScripts($this->dom);
+		//die($this->getInnerHTML($this->dom->documentElement));
+		
+		// Assume successful outcome
+		$this->success = true;
+
+		$bodyElems = $this->dom->getElementsByTagName('body');
+		if ($bodyElems->length > 0) {
+			if ($this->bodyCache == null) {
+				$this->bodyCache = $bodyElems->item(0)->innerHTML;
+			}
+			if ($this->body == null) {
+				$this->body = $bodyElems->item(0);
+			}
+		}
+
+		$this->prepDocument();
+		
+		//die($this->dom->documentElement->parentNode->nodeType);
+		//$this->setInnerHTML($this->dom->documentElement, $this->getInnerHTML($this->dom->documentElement));
+		//die($this->getInnerHTML($this->dom->documentElement));
+
+		/* Build readability's DOM tree */
+		$overlay        = $this->dom->createElement('div');
+		$innerDiv       = $this->dom->createElement('div');
+		$articleTitle   = $this->getArticleTitle();
+		$articleContent = $this->grabArticle();
+
+		if (!$articleContent) {
+			$this->success = false;
+			$articleContent = $this->dom->createElement('div');
+			$articleContent->setAttribute('id', 'readability-content');
+			$articleContent->innerHTML = '<p>Sorry, Readability was unable to parse this page for content.</p>';		
+		}
+		
+		$overlay->setAttribute('id', 'readOverlay');
+		$innerDiv->setAttribute('id', 'readInner');
+
+		/* Glue the structure of our document together. */
+		$innerDiv->appendChild($articleTitle);
+		$innerDiv->appendChild($articleContent);
+		$overlay->appendChild($innerDiv);
+		
+		/* Clear the old HTML, insert the new content. */
+		$this->body->innerHTML = '';
+		$this->body->appendChild($overlay);
+		//document.body.insertBefore(overlay, document.body.firstChild);
+		$this->body->removeAttribute('style');
+
+		$this->postProcessContent($articleContent);
+		
+		// Set title and content instance variables
+		$this->articleTitle = $articleTitle;
+		$this->articleContent = $articleContent;
+		
+		return $this->success;
+	}
+	
+	/**
+	* Debug
+	*/
+	protected function dbg($msg) {
+		if ($this->debug) echo '* ',$msg, "\n";
+	}
+	
+	/**
+	* Run any post-process modifications to article content as necessary.
+	*
+	* @param DOMElement
+	* @return void
+	*/
+	public function postProcessContent($articleContent) {
+		if ($this->convertLinksToFootnotes && !preg_match('/wikipedia\.org/', @$this->url)) { 
+			$this->addFootnotes($articleContent);
+		}
+	}
+	
+	/**
+	* Get the article title as an H1.
+	*
+	* @return DOMElement
+	*/
+	protected function getArticleTitle() {
+		$curTitle = '';
+		$origTitle = '';
+
+		try {
+			$curTitle = $origTitle = $this->getInnerText($this->dom->getElementsByTagName('title')->item(0));
+		} catch(Exception $e) {}
+		
+		if (preg_match('/ [\|\-] /', $curTitle))
+		{
+			$curTitle = preg_replace('/(.*)[\|\-] .*/i', '$1', $origTitle);
+			
+			if (count(explode(' ', $curTitle)) < 3) {
+				$curTitle = preg_replace('/[^\|\-]*[\|\-](.*)/i', '$1', $origTitle);
+			}
+		}
+		else if (strpos($curTitle, ': ') !== false)
+		{
+			$curTitle = preg_replace('/.*:(.*)/i', '$1', $origTitle);
+
+			if (count(explode(' ', $curTitle)) < 3) {
+				$curTitle = preg_replace('/[^:]*[:](.*)/i','$1', $origTitle);
+			}
+		}
+		else if(strlen($curTitle) > 150 || strlen($curTitle) < 15)
+		{
+			$hOnes = $this->dom->getElementsByTagName('h1');
+			if($hOnes->length == 1)
+			{
+				$curTitle = $this->getInnerText($hOnes->item(0));
+			}
+		}
+
+		$curTitle = trim($curTitle);
+
+		if (count(explode(' ', $curTitle)) <= 4) {
+			$curTitle = $origTitle;
+		}
+		
+		$articleTitle = $this->dom->createElement('h1');
+		$articleTitle->innerHTML = $curTitle;
+		
+		return $articleTitle;
+	}
+	
+	/**
+	* Prepare the HTML document for readability to scrape it.
+	* This includes things like stripping javascript, CSS, and handling terrible markup.
+	* 
+	* @return void
+	**/
+	protected function prepDocument() {
+		/**
+		* In some cases a body element can't be found (if the HTML is totally hosed for example)
+		* so we create a new body node and append it to the document.
+		*/
+		if ($this->body == null)
+		{
+			$this->body = $this->dom->createElement('body');
+			$this->dom->documentElement->appendChild($this->body);
+		}
+		$this->body->setAttribute('id', 'readabilityBody');
+
+		/* Remove all style tags in head */
+		$styleTags = $this->dom->getElementsByTagName('style');
+		for ($i = $styleTags->length-1; $i >= 0; $i--)
+		{
+			$styleTags->item($i)->parentNode->removeChild($styleTags->item($i));
+		}
+
+		/* Turn all double br's into p's */
+		/* Note, this is pretty costly as far as processing goes. Maybe optimize later. */
+		//document.body.innerHTML = document.body.innerHTML.replace(readability.regexps.replaceBrs, '</p><p>').replace(readability.regexps.replaceFonts, '<$1span>');
+		// We do this in the constructor for PHP as that's when we have raw HTML - before parsing it into a DOM tree.
+		// Manipulating innerHTML as it's done in JS is not possible in PHP.
+	}
+
+	/**
+	* For easier reading, convert this document to have footnotes at the bottom rather than inline links.
+	* @see http://www.roughtype.com/archives/2010/05/experiments_in.php
+	*
+	* @return void
+	**/
+	public function addFootnotes($articleContent) {
+		$footnotesWrapper = $this->dom->createElement('div');
+		$footnotesWrapper->setAttribute('id', 'readability-footnotes');
+		$footnotesWrapper->innerHTML = '<h3>References</h3>';
+		
+		$articleFootnotes = $this->dom->createElement('ol');
+		$articleFootnotes->setAttribute('id', 'readability-footnotes-list');
+		$footnotesWrapper->appendChild($articleFootnotes);
+		
+		$articleLinks = $articleContent->getElementsByTagName('a');
+		
+		$linkCount = 0;
+		for ($i = 0; $i < $articleLinks->length; $i++)
+		{
+			$articleLink  = $articleLinks->item($i);
+			$footnoteLink = $articleLink->cloneNode(true);
+			$refLink      = $this->dom->createElement('a');
+			$footnote     = $this->dom->createElement('li');
+			$linkDomain   = @parse_url($footnoteLink->getAttribute('href'), PHP_URL_HOST);
+			if (!$linkDomain && isset($this->url)) $linkDomain = @parse_url($this->url, PHP_URL_HOST);
+			//linkDomain   = footnoteLink.host ? footnoteLink.host : document.location.host,
+			$linkText     = $this->getInnerText($articleLink);
+			
+			if ((strpos($articleLink->getAttribute('class'), 'readability-DoNotFootnote') !== false) || preg_match($this->regexps['skipFootnoteLink'], $linkText)) {
+				continue;
+			}
+			
+			$linkCount++;
+
+			/** Add a superscript reference after the article link */
+			$refLink->setAttribute('href', '#readabilityFootnoteLink-' . $linkCount);
+			$refLink->innerHTML = '<small><sup>[' . $linkCount . ']</sup></small>';
+			$refLink->setAttribute('class', 'readability-DoNotFootnote');
+			$refLink->setAttribute('style', 'color: inherit;');
+			
+			//TODO: does this work or should we use DOMNode.isSameNode()?
+			if ($articleLink->parentNode->lastChild == $articleLink) {
+				$articleLink->parentNode->appendChild($refLink);
+			} else {
+				$articleLink->parentNode->insertBefore($refLink, $articleLink->nextSibling);
+			}
+
+			$articleLink->setAttribute('style', 'color: inherit; text-decoration: none;');
+			$articleLink->setAttribute('name', 'readabilityLink-' . $linkCount);
+
+			$footnote->innerHTML = '<small><sup><a href="#readabilityLink-' . $linkCount . '" title="Jump to Link in Article">^</a></sup></small> ';
+
+			$footnoteLink->innerHTML = ($footnoteLink->getAttribute('title') != '' ? $footnoteLink->getAttribute('title') : $linkText);
+			$footnoteLink->setAttribute('name', 'readabilityFootnoteLink-' . $linkCount);
+			
+			$footnote->appendChild($footnoteLink);
+			if ($linkDomain) $footnote->innerHTML = $footnote->innerHTML . '<small> (' . $linkDomain . ')</small>';
+			
+			$articleFootnotes->appendChild($footnote);
+		}
+
+		if ($linkCount > 0) {
+			$articleContent->appendChild($footnotesWrapper);           
+		}
+	}
+
+	/**
+	* Reverts P elements with class 'readability-styled'
+	* to text nodes - which is what they were before.
+	*
+	* @param DOMElement
+	* @return void
+	*/
+	function revertReadabilityStyledElements($articleContent) {
+		$xpath = new DOMXPath($articleContent->ownerDocument);
+		$elems = $xpath->query('.//p[@class="readability-styled"]', $articleContent);
+		//$elems = $articleContent->getElementsByTagName('p');
+		for ($i = $elems->length-1; $i >= 0; $i--) {
+			$e = $elems->item($i);
+			$e->parentNode->replaceChild($articleContent->ownerDocument->createTextNode($e->textContent), $e);
+			//if ($e->hasAttribute('class') && $e->getAttribute('class') == 'readability-styled') {
+			//	$e->parentNode->replaceChild($this->dom->createTextNode($e->textContent), $e);
+			//}
+		}
+	}
+	
+	/**
+	* Prepare the article node for display. Clean out any inline styles,
+	* iframes, forms, strip extraneous <p> tags, etc.
+	*
+	* @param DOMElement
+	* @return void
+	*/
+	function prepArticle($articleContent) {
+		$this->cleanStyles($articleContent);
+		$this->killBreaks($articleContent);
+		if ($this->revertForcedParagraphElements) {
+			$this->revertReadabilityStyledElements($articleContent);
+		}
+
+		/* Clean out junk from the article content */
+		$this->cleanConditionally($articleContent, 'form');
+		$this->clean($articleContent, 'object');
+		$this->clean($articleContent, 'h1');
+
+		/**
+		* If there is only one h2, they are probably using it
+		* as a header and not a subheader, so remove it since we already have a header.
+		***/
+		if (!$this->lightClean && ($articleContent->getElementsByTagName('h2')->length == 1)) {
+			$this->clean($articleContent, 'h2'); 
+		}
+		$this->clean($articleContent, 'iframe');
+
+		$this->cleanHeaders($articleContent);
+
+		/* Do these last as the previous stuff may have removed junk that will affect these */
+		$this->cleanConditionally($articleContent, 'table');
+		$this->cleanConditionally($articleContent, 'ul');
+		$this->cleanConditionally($articleContent, 'div');
+
+		/* Remove extra paragraphs */
+		$articleParagraphs = $articleContent->getElementsByTagName('p');
+		for ($i = $articleParagraphs->length-1; $i >= 0; $i--)
+		{
+			$imgCount    = $articleParagraphs->item($i)->getElementsByTagName('img')->length;
+			$embedCount  = $articleParagraphs->item($i)->getElementsByTagName('embed')->length;
+			$objectCount = $articleParagraphs->item($i)->getElementsByTagName('object')->length;
+			$iframeCount = $articleParagraphs->item($i)->getElementsByTagName('iframe')->length;
+			
+			if ($imgCount === 0 && $embedCount === 0 && $objectCount === 0 && $iframeCount === 0 && $this->getInnerText($articleParagraphs->item($i), false) == '')
+			{
+				$articleParagraphs->item($i)->parentNode->removeChild($articleParagraphs->item($i));
+			}
+		}
+
+		try {
+			$articleContent->innerHTML = preg_replace('/<br[^>]*>\s*<p/i', '<p', $articleContent->innerHTML);
+			//articleContent.innerHTML = articleContent.innerHTML.replace(/<br[^>]*>\s*<p/gi, '<p');      
+		}
+		catch (Exception $e) {
+			$this->dbg("Cleaning innerHTML of breaks failed. This is an IE strict-block-elements bug. Ignoring.: " . $e);
+		}
+	}
+	
+	/**
+	* Initialize a node with the readability object. Also checks the
+	* className/id for special names to add to its score.
+	*
+	* @param Element
+	* @return void
+	**/
+	protected function initializeNode($node) {
+		$readability = $this->dom->createAttribute('readability');
+		$readability->value = 0; // this is our contentScore
+		$node->setAttributeNode($readability);		         
+
+		switch (strtoupper($node->tagName)) { // unsure if strtoupper is needed, but using it just in case
+			case 'DIV':
+				$readability->value += 5;
+				break;
+
+			case 'PRE':
+			case 'TD':
+			case 'BLOCKQUOTE':
+				$readability->value += 3;
+				break;
+				
+			case 'ADDRESS':
+			case 'OL':
+			case 'UL':
+			case 'DL':
+			case 'DD':
+			case 'DT':
+			case 'LI':
+			case 'FORM':
+				$readability->value -= 3;
+				break;
+
+			case 'H1':
+			case 'H2':
+			case 'H3':
+			case 'H4':
+			case 'H5':
+			case 'H6':
+			case 'TH':
+				$readability->value -= 5;
+				break;
+		}
+		$readability->value += $this->getClassWeight($node);
+	}
+	
+	/***
+	* grabArticle - Using a variety of metrics (content score, classname, element types), find the content that is
+	*               most likely to be the stuff a user wants to read. Then return it wrapped up in a div.
+	*
+	* @return DOMElement
+	**/
+	protected function grabArticle($page=null) {
+		$stripUnlikelyCandidates = $this->flagIsActive(self::FLAG_STRIP_UNLIKELYS);
+		if (!$page) $page = $this->dom;
+		$allElements = $page->getElementsByTagName('*');
+		/**
+		* First, node prepping. Trash nodes that look cruddy (like ones with the class name "comment", etc), and turn divs
+		* into P tags where they have been used inappropriately (as in, where they contain no other block level elements.)
+		*
+		* Note: Assignment from index for performance. See http://www.peachpit.com/articles/article.aspx?p=31567&seqNum=5
+		* TODO: Shouldn't this be a reverse traversal?
+		**/
+		$node = null;
+		$nodesToScore = array();
+		for ($nodeIndex = 0; ($node = $allElements->item($nodeIndex)); $nodeIndex++) {
+		//for ($nodeIndex=$targetList->length-1; $nodeIndex >= 0; $nodeIndex--) {
+			//$node = $targetList->item($nodeIndex);
+			$tagName = strtoupper($node->tagName);
+			/* Remove unlikely candidates */
+			if ($stripUnlikelyCandidates) {
+				$unlikelyMatchString = $node->getAttribute('class') . $node->getAttribute('id');
+				if (
+					preg_match($this->regexps['unlikelyCandidates'], $unlikelyMatchString) &&
+					!preg_match($this->regexps['okMaybeItsACandidate'], $unlikelyMatchString) &&
+					$tagName != 'BODY'
+				)
+				{
+					$this->dbg('Removing unlikely candidate - ' . $unlikelyMatchString);
+					//$nodesToRemove[] = $node;
+					$node->parentNode->removeChild($node);
+					$nodeIndex--;
+					continue;
+				}               
+			}
+
+			if ($tagName == 'P' || $tagName == 'TD' || $tagName == 'PRE') {
+				$nodesToScore[] = $node;
+			}
+
+			/* Turn all divs that don't have children block level elements into p's */
+			if ($tagName == 'DIV') {
+				if (!preg_match($this->regexps['divToPElements'], $node->innerHTML)) {
+					//$this->dbg('Altering div to p');
+					$newNode = $this->dom->createElement('p');
+					try {
+						$newNode->innerHTML = $node->innerHTML;
+						//$nodesToReplace[] = array('new'=>$newNode, 'old'=>$node);
+						$node->parentNode->replaceChild($newNode, $node);
+						$nodeIndex--;
+						$nodesToScore[] = $node; // or $newNode?
+					}
+					catch(Exception $e) {
+						$this->dbg('Could not alter div to p, reverting back to div.: ' . $e);
+					}
+				}
+				else
+				{
+					/* EXPERIMENTAL */
+					// TODO: change these p elements back to text nodes after processing
+					for ($i = 0, $il = $node->childNodes->length; $i < $il; $i++) {
+						$childNode = $node->childNodes->item($i);
+						if ($childNode->nodeType == 3) { // XML_TEXT_NODE
+							//$this->dbg('replacing text node with a p tag with the same content.');
+							$p = $this->dom->createElement('p');
+							$p->innerHTML = $childNode->nodeValue;
+							$p->setAttribute('style', 'display: inline;');
+							$p->setAttribute('class', 'readability-styled');
+							$childNode->parentNode->replaceChild($p, $childNode);
+						}
+					}
+				}
+			}
+		}
+		
+		/**
+		* Loop through all paragraphs, and assign a score to them based on how content-y they look.
+		* Then add their score to their parent node.
+		*
+		* A score is determined by things like number of commas, class names, etc. Maybe eventually link density.
+		**/
+		$candidates = array();
+		for ($pt=0; $pt < count($nodesToScore); $pt++) {
+			$parentNode      = $nodesToScore[$pt]->parentNode;
+			// $grandParentNode = $parentNode ? $parentNode->parentNode : null;
+			$grandParentNode = !$parentNode ? null : (($parentNode->parentNode instanceof DOMElement) ? $parentNode->parentNode : null);
+			$innerText       = $this->getInnerText($nodesToScore[$pt]);
+
+			if (!$parentNode || !isset($parentNode->tagName)) {
+				continue;
+			}
+
+			/* If this paragraph is less than 25 characters, don't even count it. */
+			if(strlen($innerText) < 25) {
+				continue;
+			}
+
+			/* Initialize readability data for the parent. */
+			if (!$parentNode->hasAttribute('readability')) 
+			{
+				$this->initializeNode($parentNode);
+				$candidates[] = $parentNode;
+			}
+
+			/* Initialize readability data for the grandparent. */
+			if ($grandParentNode && !$grandParentNode->hasAttribute('readability') && isset($grandParentNode->tagName))
+			{
+				$this->initializeNode($grandParentNode);
+				$candidates[] = $grandParentNode;
+			}
+
+			$contentScore = 0;
+
+			/* Add a point for the paragraph itself as a base. */
+			$contentScore++;
+
+			/* Add points for any commas within this paragraph */
+			$contentScore += count(explode(',', $innerText));
+			
+			/* For every 100 characters in this paragraph, add another point. Up to 3 points. */
+			$contentScore += min(floor(strlen($innerText) / 100), 3);
+			
+			/* Add the score to the parent. The grandparent gets half. */
+			$parentNode->getAttributeNode('readability')->value += $contentScore;
+
+			if ($grandParentNode) {
+				$grandParentNode->getAttributeNode('readability')->value += $contentScore/2;             
+			}
+		}
+
+		/**
+		* After we've calculated scores, loop through all of the possible candidate nodes we found
+		* and find the one with the highest score.
+		**/
+		$topCandidate = null;
+		for ($c=0, $cl=count($candidates); $c < $cl; $c++)
+		{
+			/**
+			* Scale the final candidates score based on link density. Good content should have a
+			* relatively small link density (5% or less) and be mostly unaffected by this operation.
+			**/
+			$readability = $candidates[$c]->getAttributeNode('readability');
+			$readability->value = $readability->value * (1-$this->getLinkDensity($candidates[$c]));
+
+			$this->dbg('Candidate: ' . $candidates[$c]->tagName . ' (' . $candidates[$c]->getAttribute('class') . ':' . $candidates[$c]->getAttribute('id') . ') with score ' . $readability->value);
+
+			if (!$topCandidate || $readability->value > (int)$topCandidate->getAttribute('readability')) {
+				$topCandidate = $candidates[$c];
+			}
+		}
+
+		/**
+		* If we still have no top candidate, just use the body as a last resort.
+		* We also have to copy the body node so it is something we can modify.
+		**/
+		if ($topCandidate === null || strtoupper($topCandidate->tagName) == 'BODY')
+		{
+			$topCandidate = $this->dom->createElement('div');
+			if ($page instanceof DOMDocument) {
+				if (!isset($page->documentElement)) {
+					// we don't have a body either? what a mess! :)
+				} else {
+					$topCandidate->innerHTML = $page->documentElement->innerHTML;
+					$page->documentElement->innerHTML = '';
+					$page->documentElement->appendChild($topCandidate);
+				}
+			} else {
+				$topCandidate->innerHTML = $page->innerHTML;
+				$page->innerHTML = '';
+				$page->appendChild($topCandidate);
+			}
+			$this->initializeNode($topCandidate);
+		}
+
+		/**
+		* Now that we have the top candidate, look through its siblings for content that might also be related.
+		* Things like preambles, content split by ads that we removed, etc.
+		**/
+		$articleContent        = $this->dom->createElement('div');
+		$articleContent->setAttribute('id', 'readability-content');
+		$siblingScoreThreshold = max(10, ((int)$topCandidate->getAttribute('readability')) * 0.2);
+		$siblingNodes          = $topCandidate->parentNode->childNodes;
+		if (!isset($siblingNodes)) {
+			$siblingNodes = new stdClass;
+			$siblingNodes->length = 0;
+		}
+
+		for ($s=0, $sl=$siblingNodes->length; $s < $sl; $s++)
+		{
+			$siblingNode = $siblingNodes->item($s);
+			$append      = false;
+
+			$this->dbg('Looking at sibling node: ' . $siblingNode->nodeName . (($siblingNode->nodeType === XML_ELEMENT_NODE && $siblingNode->hasAttribute('readability')) ? (' with score ' . $siblingNode->getAttribute('readability')) : ''));
+
+			//dbg('Sibling has score ' . ($siblingNode->readability ? siblingNode.readability.contentScore : 'Unknown'));
+
+			if ($siblingNode === $topCandidate)
+			// or if ($siblingNode->isSameNode($topCandidate))
+			{
+				$append = true;
+			}
+
+			$contentBonus = 0;
+			/* Give a bonus if sibling nodes and top candidates have the example same classname */
+			if ($siblingNode->nodeType === XML_ELEMENT_NODE && $siblingNode->getAttribute('class') == $topCandidate->getAttribute('class') && $topCandidate->getAttribute('class') != '') {
+				$contentBonus += ((int)$topCandidate->getAttribute('readability')) * 0.2;
+			}
+
+			if ($siblingNode->nodeType === XML_ELEMENT_NODE && $siblingNode->hasAttribute('readability') && (((int)$siblingNode->getAttribute('readability')) + $contentBonus) >= $siblingScoreThreshold)
+			{
+				$append = true;
+			}
+			
+			if (strtoupper($siblingNode->nodeName) == 'P') {
+				$linkDensity = $this->getLinkDensity($siblingNode);
+				$nodeContent = $this->getInnerText($siblingNode);
+				$nodeLength  = strlen($nodeContent);
+				
+				if ($nodeLength > 80 && $linkDensity < 0.25)
+				{
+					$append = true;
+				}
+				else if ($nodeLength < 80 && $linkDensity === 0 && preg_match('/\.( |$)/', $nodeContent))
+				{
+					$append = true;
+				}
+			}
+
+			if ($append)
+			{
+				$this->dbg('Appending node: ' . $siblingNode->nodeName);
+
+				$nodeToAppend = null;
+				$sibNodeName = strtoupper($siblingNode->nodeName);
+				if ($sibNodeName != 'DIV' && $sibNodeName != 'P') {
+					/* We have a node that isn't a common block level element, like a form or td tag. Turn it into a div so it doesn't get filtered out later by accident. */
+					
+					$this->dbg('Altering siblingNode of ' . $sibNodeName . ' to div.');
+					$nodeToAppend = $this->dom->createElement('div');
+					try {
+						$nodeToAppend->setAttribute('id', $siblingNode->getAttribute('id'));
+						$nodeToAppend->innerHTML = $siblingNode->innerHTML;
+					}
+					catch(Exception $e)
+					{
+						$this->dbg('Could not alter siblingNode to div, reverting back to original.');
+						$nodeToAppend = $siblingNode;
+						$s--;
+						$sl--;
+					}
+				} else {
+					$nodeToAppend = $siblingNode;
+					$s--;
+					$sl--;
+				}
+				
+				/* To ensure a node does not interfere with readability styles, remove its classnames */
+				$nodeToAppend->removeAttribute('class');
+
+				/* Append sibling and subtract from our list because it removes the node when you append to another node */
+				$articleContent->appendChild($nodeToAppend);
+			}
+		}
+
+		/**
+		* So we have all of the content that we need. Now we clean it up for presentation.
+		**/
+		$this->prepArticle($articleContent);
+
+		/**
+		* Now that we've gone through the full algorithm, check to see if we got any meaningful content.
+		* If we didn't, we may need to re-run grabArticle with different flags set. This gives us a higher
+		* likelihood of finding the content, and the sieve approach gives us a higher likelihood of
+		* finding the -right- content.
+		**/
+		if (strlen($this->getInnerText($articleContent, false)) < 250)
+		{
+			// TODO: find out why element disappears sometimes, e.g. for this URL http://www.businessinsider.com/6-hedge-fund-etfs-for-average-investors-2011-7
+			// in the meantime, we check and create an empty element if it's not there.
+			if (!isset($this->body->childNodes)) $this->body = $this->dom->createElement('body');
+			$this->body->innerHTML = $this->bodyCache;
+			
+			if ($this->flagIsActive(self::FLAG_STRIP_UNLIKELYS)) {
+				$this->removeFlag(self::FLAG_STRIP_UNLIKELYS);
+				return $this->grabArticle($this->body);
+			}
+			else if ($this->flagIsActive(self::FLAG_WEIGHT_CLASSES)) {
+				$this->removeFlag(self::FLAG_WEIGHT_CLASSES);
+				return $this->grabArticle($this->body);              
+			}
+			else if ($this->flagIsActive(self::FLAG_CLEAN_CONDITIONALLY)) {
+				$this->removeFlag(self::FLAG_CLEAN_CONDITIONALLY);
+				return $this->grabArticle($this->body);
+			}
+			else {
+				return false;
+			}
+		}
+		return $articleContent;
+	}
+	
+	/**
+	* Remove script tags from document
+	*
+	* @param DOMElement
+	* @return void
+	*/
+	public function removeScripts($doc) {
+		$scripts = $doc->getElementsByTagName('script');
+		for($i = $scripts->length-1; $i >= 0; $i--)
+		{
+			$scripts->item($i)->parentNode->removeChild($scripts->item($i));
+		}
+	}
+	
+	/**
+	* Get the inner text of a node.
+	* This also strips out any excess whitespace to be found.
+	*
+	* @param DOMElement $
+	* @param boolean $normalizeSpaces (default: true)
+	* @return string
+	**/
+	public function getInnerText($e, $normalizeSpaces=true) {
+		$textContent = '';
+
+		if (!isset($e->textContent) || $e->textContent == '') {
+			return '';
+		}
+
+		$textContent = trim($e->textContent);
+
+		if ($normalizeSpaces) {
+			return preg_replace($this->regexps['normalize'], ' ', $textContent);
+		} else {
+			return $textContent;
+		}
+	}
+
+	/**
+	* Get the number of times a string $s appears in the node $e.
+	*
+	* @param DOMElement $e
+	* @param string - what to count. Default is ","
+	* @return number (integer)
+	**/
+	public function getCharCount($e, $s=',') {
+		return substr_count($this->getInnerText($e), $s);
+	}
+
+	/**
+	* Remove the style attribute on every $e and under.
+	*
+	* @param DOMElement $e
+	* @return void
+	*/
+	public function cleanStyles($e) {
+		if (!is_object($e)) return;
+		$elems = $e->getElementsByTagName('*');
+		foreach ($elems as $elem) {
+			$elem->removeAttribute('style');
+		}
+	}
+	
+	/**
+	* Get the density of links as a percentage of the content
+	* This is the amount of text that is inside a link divided by the total text in the node.
+	* 
+	* @param DOMElement $e
+	* @return number (float)
+	*/
+	public function getLinkDensity($e) {
+		$links      = $e->getElementsByTagName('a');
+		$textLength = strlen($this->getInnerText($e));
+		$linkLength = 0;
+		for ($i=0, $il=$links->length; $i < $il; $i++)
+		{
+			$linkLength += strlen($this->getInnerText($links->item($i)));
+		}
+		if ($textLength > 0) {
+			return $linkLength / $textLength;
+		} else {
+			return 0;
+		}
+	}
+	
+	/**
+	* Get an elements class/id weight. Uses regular expressions to tell if this 
+	* element looks good or bad.
+	*
+	* @param DOMElement $e
+	* @return number (Integer)
+	*/
+	public function getClassWeight($e) {
+		if(!$this->flagIsActive(self::FLAG_WEIGHT_CLASSES)) {
+			return 0;
+		}
+
+		$weight = 0;
+
+		/* Look for a special classname */
+		if ($e->hasAttribute('class') && $e->getAttribute('class') != '')
+		{
+			if (preg_match($this->regexps['negative'], $e->getAttribute('class'))) {
+				$weight -= 25;
+			}
+			if (preg_match($this->regexps['positive'], $e->getAttribute('class'))) {
+				$weight += 25;
+			}
+		}
+
+		/* Look for a special ID */
+		if ($e->hasAttribute('id') && $e->getAttribute('id') != '')
+		{
+			if (preg_match($this->regexps['negative'], $e->getAttribute('id'))) {
+				$weight -= 25;
+			}
+			if (preg_match($this->regexps['positive'], $e->getAttribute('id'))) {
+				$weight += 25;
+			}
+		}
+		return $weight;
+	}
+
+	/**
+	* Remove extraneous break tags from a node.
+	*
+	* @param DOMElement $node
+	* @return void
+	*/
+	public function killBreaks($node) {
+		$html = $node->innerHTML;
+		$html = preg_replace($this->regexps['killBreaks'], '<br />', $html);
+		$node->innerHTML = $html;
+	}
+
+	/**
+	* Clean a node of all elements of type "tag".
+	* (Unless it's a youtube/vimeo video. People love movies.)
+	*
+	* Updated 2012-09-18 to preserve youtube/vimeo iframes
+	*
+	* @param DOMElement $e
+	* @param string $tag
+	* @return void
+	*/
+	public function clean($e, $tag) {
+		$targetList = $e->getElementsByTagName($tag);
+		$isEmbed = ($tag == 'iframe' || $tag == 'object' || $tag == 'embed');
+		
+		for ($y=$targetList->length-1; $y >= 0; $y--) {
+			/* Allow youtube and vimeo videos through as people usually want to see those. */
+			if ($isEmbed) {
+				$attributeValues = '';
+				for ($i=0, $il=$targetList->item($y)->attributes->length; $i < $il; $i++) {
+					$attributeValues .= $targetList->item($y)->attributes->item($i)->value . '|'; // DOMAttr? (TODO: test)
+				}
+				
+				/* First, check the elements attributes to see if any of them contain youtube or vimeo */
+				if (preg_match($this->regexps['video'], $attributeValues)) {
+					continue;
+				}
+
+				/* Then check the elements inside this element for the same. */
+				if (preg_match($this->regexps['video'], $targetList->item($y)->innerHTML)) {
+					continue;
+				}
+			}
+			$targetList->item($y)->parentNode->removeChild($targetList->item($y));
+		}
+	}
+	
+	/**
+	* Clean an element of all tags of type "tag" if they look fishy.
+	* "Fishy" is an algorithm based on content length, classnames, 
+	* link density, number of images & embeds, etc.
+	*
+	* @param DOMElement $e
+	* @param string $tag
+	* @return void
+	*/
+	public function cleanConditionally($e, $tag) {
+		if (!$this->flagIsActive(self::FLAG_CLEAN_CONDITIONALLY)) {
+			return;
+		}
+
+		$tagsList = $e->getElementsByTagName($tag);
+		$curTagsLength = $tagsList->length;
+
+		/**
+		* Gather counts for other typical elements embedded within.
+		* Traverse backwards so we can remove nodes at the same time without effecting the traversal.
+		*
+		* TODO: Consider taking into account original contentScore here.
+		*/
+		for ($i=$curTagsLength-1; $i >= 0; $i--) {
+			$weight = $this->getClassWeight($tagsList->item($i));
+			$contentScore = ($tagsList->item($i)->hasAttribute('readability')) ? (int)$tagsList->item($i)->getAttribute('readability') : 0;
+			
+			$this->dbg('Cleaning Conditionally ' . $tagsList->item($i)->tagName . ' (' . $tagsList->item($i)->getAttribute('class') . ':' . $tagsList->item($i)->getAttribute('id') . ')' . (($tagsList->item($i)->hasAttribute('readability')) ? (' with score ' . $tagsList->item($i)->getAttribute('readability')) : ''));
+
+			if ($weight + $contentScore < 0) {
+				$tagsList->item($i)->parentNode->removeChild($tagsList->item($i));
+			}
+			else if ( $this->getCharCount($tagsList->item($i), ',') < 10) {
+				/**
+				* If there are not very many commas, and the number of
+				* non-paragraph elements is more than paragraphs or other ominous signs, remove the element.
+				**/
+				$p      = $tagsList->item($i)->getElementsByTagName('p')->length;
+				$img    = $tagsList->item($i)->getElementsByTagName('img')->length;
+				$li     = $tagsList->item($i)->getElementsByTagName('li')->length-100;
+				$input  = $tagsList->item($i)->getElementsByTagName('input')->length;
+				$a 		= $tagsList->item($i)->getElementsByTagName('a')->length;
+
+				$embedCount = 0;
+				$embeds = $tagsList->item($i)->getElementsByTagName('embed');
+				for ($ei=0, $il=$embeds->length; $ei < $il; $ei++) {
+					if (preg_match($this->regexps['video'], $embeds->item($ei)->getAttribute('src'))) {
+						$embedCount++; 
+					}
+				}
+				$embeds = $tagsList->item($i)->getElementsByTagName('iframe');
+				for ($ei=0, $il=$embeds->length; $ei < $il; $ei++) {
+					if (preg_match($this->regexps['video'], $embeds->item($ei)->getAttribute('src'))) {
+						$embedCount++; 
+					}
+				}
+
+				$linkDensity   = $this->getLinkDensity($tagsList->item($i));
+				$contentLength = strlen($this->getInnerText($tagsList->item($i)));
+				$toRemove      = false;
+
+				if ($this->lightClean) {
+					$this->dbg('Light clean...');
+					if ( ($img > $p) && ($img > 4) ) {
+						$this->dbg(' more than 4 images and more image elements than paragraph elements');
+						$toRemove = true;
+					} else if ($li > $p && $tag != 'ul' && $tag != 'ol') {
+						$this->dbg(' too many <li> elements, and parent is not <ul> or <ol>');
+						$toRemove = true;
+					} else if ( $input > floor($p/3) ) {
+						$this->dbg(' too many <input> elements');
+						$toRemove = true; 
+					} else if ($contentLength < 10 && ($embedCount === 0 && ($img === 0 || $img > 2))) {
+						$this->dbg(' content length less than 10 chars, 0 embeds and either 0 images or more than 2 images');
+						$toRemove = true;
+					} else if($weight < 25 && $linkDensity > 0.2) {
+						$this->dbg(' weight smaller than 25 and link density above 0.2');
+						$toRemove = true;
+					} else if($a > 2 && ($weight >= 25 && $linkDensity > 0.5)) {
+						$this->dbg(' more than 2 links and weight above 25 but link density greater than 0.5');
+						$toRemove = true;
+					} else if($embedCount > 3) {
+						$this->dbg(' more than 3 embeds');
+						$toRemove = true;
+					}
+				} else {
+					$this->dbg('Standard clean...');
+					if ( $img > $p ) {
+						$this->dbg(' more image elements than paragraph elements');
+						$toRemove = true;
+					} else if ($li > $p && $tag != 'ul' && $tag != 'ol') {
+						$this->dbg(' too many <li> elements, and parent is not <ul> or <ol>');
+						$toRemove = true;
+					} else if ( $input > floor($p/3) ) {
+						$this->dbg(' too many <input> elements');
+						$toRemove = true; 
+					} else if ($contentLength < 25 && ($img === 0 || $img > 2) ) {
+						$this->dbg(' content length less than 25 chars and 0 images, or more than 2 images');
+						$toRemove = true;
+					} else if($weight < 25 && $linkDensity > 0.2) {
+						$this->dbg(' weight smaller than 25 and link density above 0.2');
+						$toRemove = true;
+					} else if($weight >= 25 && $linkDensity > 0.5) {
+						$this->dbg(' weight above 25 but link density greater than 0.5');
+						$toRemove = true;
+					} else if(($embedCount == 1 && $contentLength < 75) || $embedCount > 1) {
+						$this->dbg(' 1 embed and content length smaller than 75 chars, or more than one embed');
+						$toRemove = true;
+					}
+				}
+
+				if ($toRemove) {
+					//$this->dbg('Removing: '.$tagsList->item($i)->innerHTML);
+					$tagsList->item($i)->parentNode->removeChild($tagsList->item($i));
+				}
+			}
+		}
+	}
+
+	/**
+	* Clean out spurious headers from an Element. Checks things like classnames and link density.
+	*
+	* @param DOMElement $e
+	* @return void
+	*/
+	public function cleanHeaders($e) {
+		for ($headerIndex = 1; $headerIndex < 3; $headerIndex++) {
+			$headers = $e->getElementsByTagName('h' . $headerIndex);
+			for ($i=$headers->length-1; $i >=0; $i--) {
+				if ($this->getClassWeight($headers->item($i)) < 0 || $this->getLinkDensity($headers->item($i)) > 0.33) {
+					$headers->item($i)->parentNode->removeChild($headers->item($i));
+				}
+			}
+		}
+	}
+
+	public function flagIsActive($flag) {
+		return ($this->flags & $flag) > 0;
+	}
+	
+	public function addFlag($flag) {
+		$this->flags = $this->flags | $flag;
+	}
+	
+	public function removeFlag($flag) {
+		$this->flags = $this->flags & ~$flag;
+	}
+}
 ?>
\ No newline at end of file
diff --git a/inc/3rdparty/makefulltextfeed.php b/inc/3rdparty/makefulltextfeed.php
index 4faad6d9..7a56be8c 100755
--- a/inc/3rdparty/makefulltextfeed.php
+++ b/inc/3rdparty/makefulltextfeed.php
@@ -3,8 +3,8 @@
 // Author: Keyvan Minoukadeh
 // Copyright (c) 2013 Keyvan Minoukadeh
 // License: AGPLv3
-// Version: 3.1
-// Date: 2013-03-05
+// Version: 3.2
+// Date: 2013-05-13
 // More info: http://fivefilters.org/content-only/
 // Help: http://help.fivefilters.org
 
@@ -25,12 +25,8 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 // Usage
 // -----
-// Request this file passing it your feed in the querystring: makefulltextfeed.php?url=mysite.org
-// The following options can be passed in the querystring:
-// * URL: url=[feed or website url] (required, should be URL-encoded - in php: urlencode($url))
-// * URL points to HTML (not feed): html=true (optional, by default it's automatically detected)
-// * API key: key=[api key] (optional, refer to config.php)
-// * Max entries to process: max=[max number of items] (optional)
+// Request this file passing it a web page or feed URL in the querystring: makefulltextfeed.php?url=example.org/article
+// For more request parameters, see http://help.fivefilters.org/customer/portal/articles/226660-usage
 
 error_reporting(E_ALL ^ E_NOTICE);
 ini_set("display_errors", 1);
@@ -76,8 +72,8 @@ header('X-Robots-Tag: noindex, nofollow');
 ////////////////////////////////
 // Check if service is enabled
 ////////////////////////////////
-if (!$options->enabled) { 
-	die('The full-text RSS service is currently disabled'); 
+if (!$options->enabled) {
+	die('The full-text RSS service is currently disabled');
 }
 
 ////////////////////////////////
@@ -121,8 +117,8 @@ $options->smart_cache = $options->smart_cache && function_exists('apc_inc');
 ////////////////////////////////
 // Check for feed URL
 ////////////////////////////////
-if (!isset($_GET['url'])) { 
-	die('No URL supplied'); 
+if (!isset($_GET['url'])) {
+	die('No URL supplied');
 }
 $url = trim($_GET['url']);
 if (strtolower(substr($url, 0, 7)) == 'feed://') {
@@ -161,10 +157,12 @@ if (isset($_GET['key']) && ($key_index = array_search($_GET['key'], $options->ap
 	if (isset($_GET['links'])) $redirect .= '&links='.urlencode($_GET['links']);
 	if (isset($_GET['exc'])) $redirect .= '&exc='.urlencode($_GET['exc']);
 	if (isset($_GET['format'])) $redirect .= '&format='.urlencode($_GET['format']);
-	if (isset($_GET['callback'])) $redirect .= '&callback='.urlencode($_GET['callback']);	
+	if (isset($_GET['callback'])) $redirect .= '&callback='.urlencode($_GET['callback']);
 	if (isset($_GET['l'])) $redirect .= '&l='.urlencode($_GET['l']);
 	if (isset($_GET['xss'])) $redirect .= '&xss';
 	if (isset($_GET['use_extracted_title'])) $redirect .= '&use_extracted_title';
+	if (isset($_GET['content'])) $redirect .= '&content='.urlencode($_GET['content']);
+	if (isset($_GET['summary'])) $redirect .= '&summary='.urlencode($_GET['summary']);
 	if (isset($_GET['debug'])) $redirect .= '&debug';
 	if ($debug_mode) {
 		debug('Redirecting to hide access key, follow URL below to continue');
@@ -177,7 +175,7 @@ if (isset($_GET['key']) && ($key_index = array_search($_GET['key'], $options->ap
 
 ///////////////////////////////////////////////
 // Set timezone.
-// Prevents warnings, but needs more testing - 
+// Prevents warnings, but needs more testing -
 // perhaps if timezone is set in php.ini we
 // don't need to set it at all...
 ///////////////////////////////////////////////
@@ -199,7 +197,7 @@ if (isset($_GET['key']) && isset($_GET['hash']) && isset($options->api_keys[(int
 }
 $key_index = ($valid_key) ? (int)$_GET['key'] : 0;
 if (!$valid_key && $options->key_required) {
-	die('A valid key must be supplied'); 
+	die('A valid key must be supplied');
 }
 if (!$valid_key && isset($_GET['key']) && $_GET['key'] != '') {
 	die('The entered key is invalid');
@@ -250,6 +248,28 @@ if ($options->favour_feed_titles == 'user') {
 	$favour_feed_titles = $options->favour_feed_titles;
 }
 
+///////////////////////////////////////////////
+// Include full content in output?
+///////////////////////////////////////////////
+if ($options->content === 'user') {
+	if (isset($_GET['content']) && $_GET['content'] === '0') {
+		$options->content = false;
+	} else {
+		$options->content = true;
+	}
+}
+
+///////////////////////////////////////////////
+// Include summaries in output?
+///////////////////////////////////////////////
+if ($options->summary === 'user') {
+	if (isset($_GET['summary']) && $_GET['summary'] === '1') {
+		$options->summary = true;
+	} else {
+		$options->summary = false;
+	}
+}
+
 ///////////////////////////////////////////////
 // Exclude items if extraction fails
 ///////////////////////////////////////////////
@@ -272,15 +292,6 @@ if ($options->detect_language === 'user') {
 	$detect_language = $options->detect_language;
 }
 
-if ($detect_language >= 2) {
-	$language_codes = array('albanian' => 'sq','arabic' => 'ar','azeri' => 'az','bengali' => 'bn','bulgarian' => 'bg',
-	'cebuano' => 'ceb', // ISO 639-2
-	'croatian' => 'hr','czech' => 'cs','danish' => 'da','dutch' => 'nl','english' => 'en','estonian' => 'et','farsi' => 'fa','finnish' => 'fi','french' => 'fr','german' => 'de','hausa' => 'ha',
-	'hawaiian' => 'haw', // ISO 639-2 
-	'hindi' => 'hi','hungarian' => 'hu','icelandic' => 'is','indonesian' => 'id','italian' => 'it','kazakh' => 'kk','kyrgyz' => 'ky','latin' => 'la','latvian' => 'lv','lithuanian' => 'lt','macedonian' => 'mk','mongolian' => 'mn','nepali' => 'ne','norwegian' => 'no','pashto' => 'ps',
-	'pidgin' => 'cpe', // ISO 639-2  
-	'polish' => 'pl','portuguese' => 'pt','romanian' => 'ro','russian' => 'ru','serbian' => 'sr','slovak' => 'sk','slovene' => 'sl','somali' => 'so','spanish' => 'es','swahili' => 'sw','swedish' => 'sv','tagalog' => 'tl','turkish' => 'tr','ukrainian' => 'uk','urdu' => 'ur','uzbek' => 'uz','vietnamese' => 'vi','welsh' => 'cy');
-}
 $use_cld = extension_loaded('cld') && (version_compare(PHP_VERSION, '5.3.0') >= 0);
 
 /////////////////////////////////////
@@ -330,7 +341,7 @@ if ($options->cors) header('Access-Control-Allow-Origin: *');
 //////////////////////////////////
 if ($options->caching) {
 	debug('Caching is enabled...');
-	$cache_id = md5($max.$url.$valid_key.$links.$favour_feed_titles.$xss_filter.$exclude_on_fail.$format.$detect_language.(int)isset($_GET['pubsub']));
+	$cache_id = md5($max.$url.(int)$valid_key.$links.(int)$favour_feed_titles.(int)$options->content.(int)$options->summary.(int)$xss_filter.(int)$exclude_on_fail.$format.$detect_language.(int)isset($_GET['pubsub']));
 	$check_cache = true;
 	if ($options->apc && $options->smart_cache) {
 		apc_add("cache.$cache_id", 0, 10*60);
@@ -468,7 +479,7 @@ if ($img_url = $feed->get_image_url()) {
 ////////////////////////////////////////////
 // Loop through feed items
 ////////////////////////////////////////////
-$items = $feed->get_items(0, $max);	
+$items = $feed->get_items(0, $max);
 // Request all feed items in parallel (if supported)
 $urls_sanitized = array();
 $urls = array();
@@ -550,24 +561,43 @@ foreach ($items as $key => $item) {
 			$is_single_page = false;
 			if ($single_page_response = getSinglePage($item, $html, $effective_url)) {
 				$is_single_page = true;
-				$html = $single_page_response['body'];
-				// remove strange things
-				$html = str_replace('</[>', '', $html);	
-				$html = convert_to_utf8($html, $single_page_response['headers']);
 				$effective_url = $single_page_response['effective_url'];
-				debug("Retrieved single-page view from $effective_url");
+				// check if action defined for returned Content-Type
+				$mime_info = get_mime_action_info($single_page_response['headers']);
+				if (isset($mime_info['action'])) {
+					if ($mime_info['action'] == 'exclude') {
+						continue; // skip this feed item entry
+					} elseif ($mime_info['action'] == 'link') {
+						if ($mime_info['type'] == 'image') {
+							$html = "<a href=\"$effective_url\"><img src=\"$effective_url\" alt=\"{$mime_info['name']}\" /></a>";
+						} else {
+							$html = "<a href=\"$effective_url\">Download {$mime_info['name']}</a>";
+						}
+						$extracted_title = $mime_info['name'];
+						$do_content_extraction = false;
+					}
+				}
+				if ($do_content_extraction) {
+					$html = $single_page_response['body'];
+					// remove strange things
+					$html = str_replace('</[>', '', $html);
+					$html = convert_to_utf8($html, $single_page_response['headers']);
+					debug("Retrieved single-page view from $effective_url");
+				}
 				unset($single_page_response);
 			}
+		}
+		if ($do_content_extraction) {
 			debug('--------');
 			debug('Attempting to extract content');
 			$extract_result = $extractor->process($html, $effective_url);
 			$readability = $extractor->readability;
-			$content_block = ($extract_result) ? $extractor->getContent() : null;			
+			$content_block = ($extract_result) ? $extractor->getContent() : null;
 			$extracted_title = ($extract_result) ? $extractor->getTitle() : '';
 			// Deal with multi-page articles
 			//die('Next: '.$extractor->getNextPageUrl());
 			$is_multi_page = (!$is_single_page && $extract_result && $extractor->getNextPageUrl());
-			if ($options->multipage && $is_multi_page) {
+			if ($options->multipage && $is_multi_page && $options->content) {
 				debug('--------');
 				debug('Attempting to process multi-page article');
 				$multi_page_urls = array();
@@ -580,7 +610,7 @@ foreach ($items as $key => $item) {
 						// check it's not what we have already!
 						if (!in_array($next_page_url, $multi_page_urls)) {
 							// it's not, so let's attempt to fetch it
-							$multi_page_urls[] = $next_page_url;						
+							$multi_page_urls[] = $next_page_url;
 							$_prev_ref = $http->referer;
 							if (($response = $http->get($next_page_url, true)) && $response['status_code'] < 300) {
 								// make sure mime type is not something with a different action associated
@@ -605,13 +635,15 @@ foreach ($items as $key => $item) {
 				// did we successfully deal with this multi-page article?
 				if (empty($multi_page_content)) {
 					debug('Failed to extract all parts of multi-page article, so not going to include them');
-					$multi_page_content[] = $readability->dom->createElement('p')->innerHTML = '<em>This article appears to continue on subsequent pages which we could not extract</em>';
+					$_page = $readability->dom->createElement('p');
+					$_page->innerHTML = '<em>This article appears to continue on subsequent pages which we could not extract</em>';
+					$multi_page_content[] = $_page;
 				}
 				foreach ($multi_page_content as $_page) {
 					$_page = $content_block->ownerDocument->importNode($_page, true);
 					$content_block->appendChild($_page);
 				}
-				unset($multi_page_urls, $multi_page_content, $page_mime_info, $next_page_url);
+				unset($multi_page_urls, $multi_page_content, $page_mime_info, $next_page_url, $_page);
 			}
 		}
 		// use extracted title for both feed and item title if we're using single-item dummy feed
@@ -658,7 +690,7 @@ foreach ($items as $key => $item) {
 			} else {
 				$html = $content_block->ownerDocument->saveXML($content_block); // essentially outerHTML
 			}
-			unset($content_block);
+			//unset($content_block);
 			// post-processing cleanup
 			$html = preg_replace('!<p>[\s\h\v]*</p>!u', '', $html);
 			if ($links == 'remove') {
@@ -671,130 +703,155 @@ foreach ($items as $key => $item) {
 		}
 	}
 
-		if ($valid_key && isset($_GET['pubsub'])) { // used only on fivefilters.org at the moment
-			$newitem->addElement('guid', 'http://fivefilters.org/content-only/redirect.php?url='.urlencode($item->get_permalink()), array('isPermaLink'=>'false'));
+	if ($valid_key && isset($_GET['pubsub'])) { // used only on fivefilters.org at the moment
+		$newitem->addElement('guid', 'http://fivefilters.org/content-only/redirect.php?url='.urlencode($item->get_permalink()), array('isPermaLink'=>'false'));
+	} else {
+		$newitem->addElement('guid', $item->get_permalink(), array('isPermaLink'=>'true'));
+	}
+	// filter xss?
+	if ($xss_filter) {
+		debug('Filtering HTML to remove XSS');
+		$html = htmLawed::hl($html, array('safe'=>1, 'deny_attribute'=>'style', 'comment'=>1, 'cdata'=>1));
+	}
+
+	// add content
+	if ($options->summary === true) {
+		// get summary
+		$summary = '';
+		if (!$do_content_extraction) {
+			$summary = $html;
 		} else {
-			$newitem->addElement('guid', $item->get_permalink(), array('isPermaLink'=>'true'));
-		}
-		// filter xss?
-		if ($xss_filter) {
-			debug('Filtering HTML to remove XSS');
-			$html = htmLawed::hl($html, array('safe'=>1, 'deny_attribute'=>'style', 'comment'=>1, 'cdata'=>1));
-		}
-		$newitem->setDescription($html);
-		
-		// set date
-		if ((int)$item->get_date('U') > 0) {
-			$newitem->setDate((int)$item->get_date('U'));
-		} elseif ($extractor->getDate()) {
-			$newitem->setDate($extractor->getDate());
-		}
-		
-		// add authors
-		if ($authors = $item->get_authors()) {
-			foreach ($authors as $author) {
-				// for some feeds, SimplePie stores author's name as email, e.g. http://feeds.feedburner.com/nymag/intel
-				if ($author->get_name() !== null) {
-					$newitem->addElement('dc:creator', $author->get_name());
-				} elseif ($author->get_email() !== null) {
-					$newitem->addElement('dc:creator', $author->get_email());
+			// Try to get first few paragraphs
+			if (isset($content_block) && ($content_block instanceof DOMElement)) {
+				$_paras = $content_block->getElementsByTagName('p');
+				foreach ($_paras as $_para) {
+					$summary .= preg_replace("/[\n\r\t ]+/", ' ', $_para->textContent).' ';
+					if (strlen($summary) > 200) break;
 				}
+			} else {
+				$summary = $html;
 			}
-		} elseif ($authors = $extractor->getAuthors()) {
-			//TODO: make sure the list size is reasonable
-			foreach ($authors as $author) {
-				// TODO: xpath often selects authors from other articles linked from the page.
-				// for now choose first item
-				$newitem->addElement('dc:creator', $author);
-				break;
+		}
+		unset($_paras, $_para);
+		$summary = get_excerpt($summary);
+		$newitem->setDescription($summary);
+		if ($options->content) $newitem->setElement('content:encoded', $html);
+	} else {
+		if ($options->content) $newitem->setDescription($html);
+	}
+
+	// set date
+	if ((int)$item->get_date('U') > 0) {
+		$newitem->setDate((int)$item->get_date('U'));
+	} elseif ($extractor->getDate()) {
+		$newitem->setDate($extractor->getDate());
+	}
+
+	// add authors
+	if ($authors = $item->get_authors()) {
+		foreach ($authors as $author) {
+			// for some feeds, SimplePie stores author's name as email, e.g. http://feeds.feedburner.com/nymag/intel
+			if ($author->get_name() !== null) {
+				$newitem->addElement('dc:creator', $author->get_name());
+			} elseif ($author->get_email() !== null) {
+				$newitem->addElement('dc:creator', $author->get_email());
 			}
 		}
-		
-		// add language
-		if ($detect_language) {
-			$language = $extractor->getLanguage();
-			if (!$language) $language = $feed->get_language();
-			if (($detect_language == 3 || (!$language && $detect_language == 2)) && $text_sample) {
-				try {
-					if ($use_cld) {
-						// Use PHP-CLD extension
-						$php_cld = 'CLD\detect'; // in quotes to prevent PHP 5.2 parse error
-						$res = $php_cld($text_sample);
-						if (is_array($res) && count($res) > 0) {
-							$language = $res[0]['code'];
-						}	
-					} else {
-						//die('what');
-						// Use PEAR's Text_LanguageDetect
-						if (!isset($l))	{
-							$l = new Text_LanguageDetect('libraries/language-detect/lang.dat', 'libraries/language-detect/unicode_blocks.dat');
-						}
-						$l_result = $l->detect($text_sample, 1);
-						if (count($l_result) > 0) {
-							$language = $language_codes[key($l_result)];
-						}
+	} elseif ($authors = $extractor->getAuthors()) {
+		//TODO: make sure the list size is reasonable
+		foreach ($authors as $author) {
+			// TODO: xpath often selects authors from other articles linked from the page.
+			// for now choose first item
+			$newitem->addElement('dc:creator', $author);
+			break;
+		}
+	}
+
+	// add language
+	if ($detect_language) {
+		$language = $extractor->getLanguage();
+		if (!$language) $language = $feed->get_language();
+		if (($detect_language == 3 || (!$language && $detect_language == 2)) && $text_sample) {
+			try {
+				if ($use_cld) {
+					// Use PHP-CLD extension
+					$php_cld = 'CLD\detect'; // in quotes to prevent PHP 5.2 parse error
+					$res = $php_cld($text_sample);
+					if (is_array($res) && count($res) > 0) {
+						$language = $res[0]['code'];
+					}
+				} else {
+					//die('what');
+					// Use PEAR's Text_LanguageDetect
+					if (!isset($l))	{
+					  $l = new Text_LanguageDetect();
+					  $l->setNameMode(2); // return ISO 639-1 codes (e.g. "en")
+					}
+					$l_result = $l->detect($text_sample, 1);
+					if (count($l_result) > 0) {
+						$language = key($l_result);
 					}
-				} catch (Exception $e) {
-					//die('error: '.$e);	
-					// do nothing
 				}
-			}
-			if ($language && (strlen($language) < 7)) {	
-				$newitem->addElement('dc:language', $language);
+			} catch (Exception $e) {
+				//die('error: '.$e);
+				// do nothing
 			}
 		}
-		
-		// add MIME type (if it appeared in our exclusions lists)
-		if (isset($mime_info['mime'])) $newitem->addElement('dc:format', $mime_info['mime']);
-		// add effective URL (URL after redirects)
-		if (isset($effective_url)) {
-			//TODO: ensure $effective_url is valid witout - sometimes it causes problems, e.g.
-			//http://www.siasat.pk/forum/showthread.php?108883-Pakistan-Chowk-by-Rana-Mubashir-�-25th-March-2012-Special-Program-from-Liari-(Karachi)
-			//temporary measure: use utf8_encode()
-			$newitem->addElement('dc:identifier', remove_url_cruft(utf8_encode($effective_url)));
-		} else {
-			$newitem->addElement('dc:identifier', remove_url_cruft($item->get_permalink()));
+		if ($language && (strlen($language) < 7)) {
+			$newitem->addElement('dc:language', $language);
 		}
-		
-		// add categories
-		if ($categories = $item->get_categories()) {
-			foreach ($categories as $category) {
-				if ($category->get_label() !== null) {
-					$newitem->addElement('category', $category->get_label());
-				}
+	}
+
+	// add MIME type (if it appeared in our exclusions lists)
+	if (isset($mime_info['mime'])) $newitem->addElement('dc:format', $mime_info['mime']);
+	// add effective URL (URL after redirects)
+	if (isset($effective_url)) {
+		//TODO: ensure $effective_url is valid witout - sometimes it causes problems, e.g.
+		//http://www.siasat.pk/forum/showthread.php?108883-Pakistan-Chowk-by-Rana-Mubashir-�-25th-March-2012-Special-Program-from-Liari-(Karachi)
+		//temporary measure: use utf8_encode()
+		$newitem->addElement('dc:identifier', remove_url_cruft(utf8_encode($effective_url)));
+	} else {
+		$newitem->addElement('dc:identifier', remove_url_cruft($item->get_permalink()));
+	}
+
+	// add categories
+	if ($categories = $item->get_categories()) {
+		foreach ($categories as $category) {
+			if ($category->get_label() !== null) {
+				$newitem->addElement('category', $category->get_label());
 			}
 		}
-		
-		// check for enclosures
-		if ($options->keep_enclosures) {
-			if ($enclosures = $item->get_enclosures()) {
-				foreach ($enclosures as $enclosure) {
-					// thumbnails
-					foreach ((array)$enclosure->get_thumbnails() as $thumbnail) {
-						$newitem->addElement('media:thumbnail', '', array('url'=>$thumbnail));
-					}
-					if (!$enclosure->get_link()) continue;
-					$enc = array();
-					// Media RSS spec ($enc): http://search.yahoo.com/mrss
-					// SimplePie methods ($enclosure): http://simplepie.org/wiki/reference/start#methods4
-					$enc['url'] = $enclosure->get_link();
-					if ($enclosure->get_length()) $enc['fileSize'] = $enclosure->get_length();
-					if ($enclosure->get_type()) $enc['type'] = $enclosure->get_type();
-					if ($enclosure->get_medium()) $enc['medium'] = $enclosure->get_medium();
-					if ($enclosure->get_expression()) $enc['expression'] = $enclosure->get_expression();
-					if ($enclosure->get_bitrate()) $enc['bitrate'] = $enclosure->get_bitrate();
-					if ($enclosure->get_framerate()) $enc['framerate'] = $enclosure->get_framerate();
-					if ($enclosure->get_sampling_rate()) $enc['samplingrate'] = $enclosure->get_sampling_rate();
-					if ($enclosure->get_channels()) $enc['channels'] = $enclosure->get_channels();
-					if ($enclosure->get_duration()) $enc['duration'] = $enclosure->get_duration();
-					if ($enclosure->get_height()) $enc['height'] = $enclosure->get_height();
-					if ($enclosure->get_width()) $enc['width'] = $enclosure->get_width();
-					if ($enclosure->get_language()) $enc['lang'] = $enclosure->get_language();
-					$newitem->addElement('media:content', '', $enc);
+	}
+
+	// check for enclosures
+	if ($options->keep_enclosures) {
+		if ($enclosures = $item->get_enclosures()) {
+			foreach ($enclosures as $enclosure) {
+				// thumbnails
+				foreach ((array)$enclosure->get_thumbnails() as $thumbnail) {
+					$newitem->addElement('media:thumbnail', '', array('url'=>$thumbnail));
 				}
+				if (!$enclosure->get_link()) continue;
+				$enc = array();
+				// Media RSS spec ($enc): http://search.yahoo.com/mrss
+				// SimplePie methods ($enclosure): http://simplepie.org/wiki/reference/start#methods4
+				$enc['url'] = $enclosure->get_link();
+				if ($enclosure->get_length()) $enc['fileSize'] = $enclosure->get_length();
+				if ($enclosure->get_type()) $enc['type'] = $enclosure->get_type();
+				if ($enclosure->get_medium()) $enc['medium'] = $enclosure->get_medium();
+				if ($enclosure->get_expression()) $enc['expression'] = $enclosure->get_expression();
+				if ($enclosure->get_bitrate()) $enc['bitrate'] = $enclosure->get_bitrate();
+				if ($enclosure->get_framerate()) $enc['framerate'] = $enclosure->get_framerate();
+				if ($enclosure->get_sampling_rate()) $enc['samplingrate'] = $enclosure->get_sampling_rate();
+				if ($enclosure->get_channels()) $enc['channels'] = $enclosure->get_channels();
+				if ($enclosure->get_duration()) $enc['duration'] = $enclosure->get_duration();
+				if ($enclosure->get_height()) $enc['height'] = $enclosure->get_height();
+				if ($enclosure->get_width()) $enc['width'] = $enclosure->get_width();
+				if ($enclosure->get_language()) $enc['lang'] = $enclosure->get_language();
+				$newitem->addElement('media:content', '', $enc);
 			}
 		}
-	/* } */
+	}
 	$output->addItem($newitem);
 	unset($html);
 	$item_count++;
diff --git a/inc/3rdparty/makefulltextfeedHelpers.php b/inc/3rdparty/makefulltextfeedHelpers.php
index 1c11b8f6..4e985372 100755
--- a/inc/3rdparty/makefulltextfeedHelpers.php
+++ b/inc/3rdparty/makefulltextfeedHelpers.php
@@ -66,6 +66,38 @@ class DummySingleItem {
 // HELPER FUNCTIONS
 ///////////////////////////////
 
+// Adapted from WordPress
+// http://core.trac.wordpress.org/browser/tags/3.5.1/wp-includes/formatting.php#L2173
+function get_excerpt($text, $num_words=55, $more=null) {
+	if (null === $more) $more = '&hellip;';
+	$text = strip_tags($text);
+	//TODO: Check if word count is based on single characters (East Asian characters)
+	/*
+	if (1==2) {
+  	$text = trim(preg_replace("/[\n\r\t ]+/", ' ', $text), ' ');
+  	preg_match_all('/./u', $text, $words_array);
+  	$words_array = array_slice($words_array[0], 0, $num_words + 1);
+  	$sep = '';
+	} else {
+  	$words_array = preg_split("/[\n\r\t ]+/", $text, $num_words + 1, PREG_SPLIT_NO_EMPTY);
+  	$sep = ' ';
+	}
+	*/
+	$words_array = preg_split("/[\n\r\t ]+/", $text, $num_words + 1, PREG_SPLIT_NO_EMPTY);
+	$sep = ' ';
+	if (count($words_array) > $num_words) {
+		array_pop($words_array);
+		$text = implode($sep, $words_array);
+		$text = $text.$more;
+	} else {
+		$text = implode($sep, $words_array);
+	}
+	// trim whitespace at beginning or end of string
+	// See: http://stackoverflow.com/questions/4166896/trim-unicode-whitespace-in-php-5-2
+	$text = preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $text);
+	return $text;
+}
+
 function url_allowed($url) {
 	global $options;
 	if (!empty($options->allowed_urls)) {
@@ -165,14 +197,6 @@ function convert_to_utf8($html, $header=null)
 			if (strtolower($encoding) != 'utf-8') {
 				debug('Converting to UTF-8');
 				$html = SimplePie_Misc::change_encoding($html, $encoding, 'utf-8');
-				/*
-				if (function_exists('iconv')) {
-					// iconv appears to handle certain character encodings better than mb_convert_encoding
-					$html = iconv($encoding, 'utf-8', $html);
-				} else {
-					$html = mb_convert_encoding($html, 'utf-8', $encoding);
-				}
-				*/
 			}
 		}
 	}
@@ -196,7 +220,7 @@ function makeAbsolute($base, $elem) {
 }
 function makeAbsoluteAttr($base, $e, $attr) {
 	if ($e->hasAttribute($attr)) {
-		// Trim leading and trailing white space. I don't really like this but 
+		// Trim leading and trailing white space. I don't really like this but
 		// unfortunately it does appear on some sites. e.g.  <img src=" /path/to/image.jpg" />
 		$url = trim(str_replace('%20', ' ', $e->getAttribute($attr)));
 		$url = str_replace(' ', '%20', $url);
diff --git a/inc/3rdparty/site_config/index.php b/inc/3rdparty/site_config/index.php
index a1b767fd..76ca8b3c 100644
--- a/inc/3rdparty/site_config/index.php
+++ b/inc/3rdparty/site_config/index.php
@@ -1,3 +1,2 @@
-<?php
-// this is here to prevent directory listing over the web
-?>
\ No newline at end of file
+<?php
+// this is here to prevent directory listing over the web
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/version.txt b/inc/3rdparty/site_config/standard/version.txt
index bf0d87ab..eaf01ebd 100644
--- a/inc/3rdparty/site_config/standard/version.txt
+++ b/inc/3rdparty/site_config/standard/version.txt
@@ -1 +1 @@
-4
\ No newline at end of file
+2013-05-12T22:53:07Z
\ No newline at end of file
-- 
cgit v1.2.3


From d18ff7d9565f982bc15c5930123992d44614e1e2 Mon Sep 17 00:00:00 2001
From: Maryana Rozhankivska <mariroz@mr.lviv.ua>
Date: Fri, 23 May 2014 19:25:48 +0300
Subject: two small unimportant forgotten changes to 3.2 version of
 full-text-rss, issue #694

---
 inc/3rdparty/libraries/language-detect/Parser.php | 354 ----------------------
 inc/3rdparty/site_config/custom/index.php         |   3 +
 2 files changed, 3 insertions(+), 354 deletions(-)
 delete mode 100644 inc/3rdparty/libraries/language-detect/Parser.php
 create mode 100644 inc/3rdparty/site_config/custom/index.php

diff --git a/inc/3rdparty/libraries/language-detect/Parser.php b/inc/3rdparty/libraries/language-detect/Parser.php
deleted file mode 100644
index 7f15fa98..00000000
--- a/inc/3rdparty/libraries/language-detect/Parser.php
+++ /dev/null
@@ -1,354 +0,0 @@
-<?php
-
-/**
- * This class represents a text sample to be parsed.
- *
- * @category    Text
- * @package     Text_LanguageDetect
- * @author      Nicholas Pisarro
- * @copyright   2006
- * @license     BSD
- * @version     CVS: $Id: Parser.php,v 1.5 2006/03/11 05:45:05 taak Exp $
- * @link        http://pear.php.net/package/Text_LanguageDetect/
- * @link        http://langdetect.blogspot.com/
- */
-
-/**
- * This class represents a text sample to be parsed.
- *
- * This separates the analysis of a text sample from the primary LanguageDetect
- * class. After a new profile has been built, the data can be retrieved using
- * the accessor functions.
- *
- * This class is intended to be used by the Text_LanguageDetect class, not 
- * end-users.
- *
- * @category    Text
- * @package     Text_LanguageDetect
- * @author      Nicholas Pisarro
- * @copyright   2006
- * @license     BSD
- * @version     release: 0.2.3
- */
-class Text_LanguageDetect_Parser extends Text_LanguageDetect
-{
-    /**
-     * the piece of text being parsed
-     *
-     * @access  private
-     * @var     string
-     */
-    var $_string;
-
-    /**
-     * stores the trigram frequencies of the sample
-     *
-     * @access  private
-     * @var     string
-     */
-    var $_trigrams = array();
-
-    /**
-     * stores the trigram ranks of the sample
-     *
-     * @access  private
-     * @var     array
-     */
-    var $_trigram_ranks = array();
-
-    /**
-     * stores the unicode blocks of the sample
-     *
-     * @access  private
-     * @var     array
-     */
-    var $_unicode_blocks = array();
-    
-    /**
-     * Whether the parser should compile the unicode ranges
-     * 
-     * @access  private
-     * @var     bool
-     */
-    var $_compile_unicode = false;
-
-    /**
-     * Whether the parser should compile trigrams
-     *
-     * @access  private
-     * @var     bool
-     */
-    var $_compile_trigram = false;
-
-    /**
-     * Whether the trigram parser should pad the beginning of the string
-     *
-     * @access  private
-     * @var     bool
-     */
-    var $_trigram_pad_start = false;
-
-    /**
-     * Whether the unicode parser should skip non-alphabetical ascii chars
-     *
-     * @access  private
-     * @var     bool
-     */
-    var $_unicode_skip_symbols = true;
-
-    /**
-     * Constructor
-     *
-     * @access  private
-     * @param   string  $string     string to be parsed
-     */
-    function Text_LanguageDetect_Parser($string, $db=null, $unicode_db=null) {
-		if (isset($db)) $this->_db_filename = $db;
-		if (isset($unicode_db)) $this->_unicode_db_filename = $unicode_db;	
-        $this->_string = $string;
-    }
-
-    /**
-     * Returns true if a string is suitable for parsing
-     *
-     * @static
-     * @access  public
-     * @param   string  $str    input string to test
-     * @return  bool            true if acceptable, false if not
-     */
-    function validateString($str) {
-        if (!empty($str) && strlen($str) > 3 && preg_match('/\S/', $str)) {
-            return true;
-        } else {
-            return false;
-        }
-    }
-
-    /**
-     * turn on/off trigram counting
-     *
-     * @access  public
-     * @param   bool    $bool true for on, false for off
-     */
-    function prepareTrigram($bool = true)
-    {
-        $this->_compile_trigram = $bool;
-    }
-
-    /**
-     * turn on/off unicode block counting
-     *
-     * @access  public
-     * @param   bool    $bool true for on, false for off
-     */
-    function prepareUnicode($bool = true)
-    {
-        $this->_compile_unicode = $bool;
-    }
-
-    /**
-     * turn on/off padding the beginning of the sample string
-     *
-     * @access  public
-     * @param   bool    $bool true for on, false for off
-     */
-    function setPadStart($bool = true)
-    {
-        $this->_trigram_pad_start = $bool;
-    }
-
-    /**
-     * Should the unicode block counter skip non-alphabetical ascii chars?
-     *
-     * @access  public
-     * @param   bool    $bool true for on, false for off
-     */
-    function setUnicodeSkipSymbols($bool = true)
-    {
-        $this->_unicode_skip_symbols = $bool;
-    }
-
-    /**
-     * Returns the trigram ranks for the text sample
-     *
-     * @access  public
-     * @return  array    trigram ranks in the text sample
-     */
-    function &getTrigramRanks()
-    {
-        return $this->_trigram_ranks;
-    }
-
-    /**
-     * Return the trigram freqency table
-     *
-     * only used in testing to make sure the parser is working
-     *
-     * @access  public
-     * @return  array    trigram freqencies in the text sample
-     */
-    function &getTrigramFreqs()
-    {
-        return $this->_trigram;
-    }
-
-    /**
-     * returns the array of unicode blocks
-     *
-     * @access  public
-     * @return  array   unicode blocks in the text sample
-     */
-    function &getUnicodeBlocks()
-    {
-        return $this->_unicode_blocks;
-    }
-
-    /**
-     * Executes the parsing operation
-     * 
-     * Be sure to call the set*() functions to set options and the 
-     * prepare*() functions first to tell it what kind of data to compute
-     *
-     * Afterwards the get*() functions can be used to access the compiled
-     * information.
-     *
-     * @access public
-     */
-    function analyze()
-    {
-        $len = strlen($this->_string);
-        $byte_counter = 0;
-
-
-        // unicode startup
-        if ($this->_compile_unicode) {
-            $blocks =& $this->_read_unicode_block_db();
-
-            $block_count = count($blocks);
-
-            $skipped_count = 0;
-            $unicode_chars = array();
-        }
-
-        // trigram startup
-        if ($this->_compile_trigram) {
-            // initialize them as blank so the parser will skip the first two
-            // (since it skips trigrams with more than  2 contiguous spaces)
-            $a = ' ';
-            $b = ' ';
-
-            // kludge
-            // if it finds a valid trigram to start and the start pad option is
-            // off, then set a variable that will be used to reduce this
-            // trigram after parsing has finished
-            if (!$this->_trigram_pad_start) {
-                $a = $this->_next_char($this->_string, $byte_counter, true);
-
-                if ($a != ' ') {
-                    $b = $this->_next_char($this->_string, $byte_counter, true);
-                    $dropone = " $a$b";
-                }
-
-                $byte_counter = 0;
-                $a = ' ';
-                $b = ' ';
-            }
-        }
-
-        while ($byte_counter < $len) {
-            $char = $this->_next_char($this->_string, $byte_counter, true);
-
-
-            // language trigram detection
-            if ($this->_compile_trigram) {
-                if (!($b == ' ' && ($a == ' ' || $char == ' '))) {
-                    if (!isset($this->_trigram[$a . $b . $char])) {
-                       $this->_trigram[$a . $b . $char] = 1;
-                    } else {
-                       $this->_trigram[$a . $b . $char]++;
-                    }
-                }
-
-                $a = $b;
-                $b = $char;
-            }
-
-            // unicode block detection
-            if ($this->_compile_unicode) {
-                if ($this->_unicode_skip_symbols
-                        && strlen($char) == 1
-                        && ($char < 'A' || $char > 'z'
-                        || ($char > 'Z' && $char < 'a'))
-                        && $char != "'") {  // does not skip the apostrophe
-                                            // since it's included in the language
-                                            // models
-
-                    $skipped_count++;
-                    continue;
-                }
-
-                // build an array of all the characters
-                if (isset($unicode_chars[$char])) {
-                    $unicode_chars[$char]++;
-                } else {
-                    $unicode_chars[$char] = 1;
-                }
-            }
-
-            // todo: add byte detection here
-        }
-
-        // unicode cleanup
-        if ($this->_compile_unicode) {
-            foreach ($unicode_chars as $utf8_char => $count) {
-                $search_result = $this->_unicode_block_name(
-                        $this->_utf8char2unicode($utf8_char), $blocks, $block_count);
-
-                if ($search_result != -1) {
-                    $block_name = $search_result[2];
-                } else {
-                    $block_name = '[Malformatted]';
-                }
-
-                if (isset($this->_unicode_blocks[$block_name])) {
-                    $this->_unicode_blocks[$block_name] += $count;
-                } else {
-                    $this->_unicode_blocks[$block_name] = $count;
-                }
-            }
-        }
-
-
-        // trigram cleanup
-        if ($this->_compile_trigram) {
-            // pad the end
-            if ($b != ' ') {
-                if (!isset($this->_trigram["$a$b "])) {
-                    $this->_trigram["$a$b "] = 1;
-                } else {
-                    $this->_trigram["$a$b "]++;
-                }
-            }
-
-            // perl compatibility; Language::Guess does not pad the beginning
-            // kludge
-            if (isset($dropone)) {
-                if ($this->_trigram[$dropone] == 1) {
-                    unset($this->_trigram[$dropone]);
-                } else {
-                    $this->_trigram[$dropone]--;
-                }
-            }
-
-            if (!empty($this->_trigram)) {
-                $this->_trigram_ranks = $this->_arr_rank($this->_trigram);
-            } else {
-                $this->_trigram_ranks = array();
-            }
-        }
-    }
-}
-
-/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
-
-?>
diff --git a/inc/3rdparty/site_config/custom/index.php b/inc/3rdparty/site_config/custom/index.php
new file mode 100644
index 00000000..a3d5f739
--- /dev/null
+++ b/inc/3rdparty/site_config/custom/index.php
@@ -0,0 +1,3 @@
+<?php
+// this is here to prevent directory listing over the web
+?>
\ No newline at end of file
-- 
cgit v1.2.3


From a50583fb97615f4c26cc84ee95d62f867a84b4e6 Mon Sep 17 00:00:00 2001
From: Maryana Rozhankivska <mariroz@mr.lviv.ua>
Date: Fri, 23 May 2014 19:27:17 +0300
Subject: last 3 important changes to 3.2 version of full-text-rss, issue #694

---
 .../language-detect/LanguageDetect/Exception.php   |  57 ++++
 .../language-detect/LanguageDetect/ISO639.php      | 339 ++++++++++++++++++++
 .../language-detect/LanguageDetect/Parser.php      | 347 +++++++++++++++++++++
 3 files changed, 743 insertions(+)
 create mode 100644 inc/3rdparty/libraries/language-detect/LanguageDetect/Exception.php
 create mode 100644 inc/3rdparty/libraries/language-detect/LanguageDetect/ISO639.php
 create mode 100644 inc/3rdparty/libraries/language-detect/LanguageDetect/Parser.php

diff --git a/inc/3rdparty/libraries/language-detect/LanguageDetect/Exception.php b/inc/3rdparty/libraries/language-detect/LanguageDetect/Exception.php
new file mode 100644
index 00000000..196d994f
--- /dev/null
+++ b/inc/3rdparty/libraries/language-detect/LanguageDetect/Exception.php
@@ -0,0 +1,57 @@
+<?php
+class Text_LanguageDetect_Exception extends Exception
+{
+    /**
+     * Database file could not be found
+     */
+    const DB_NOT_FOUND = 10;
+
+    /**
+     * Database file found, but not readable
+     */
+    const DB_NOT_READABLE = 11;
+
+    /**
+     * Database file is empty
+     */
+    const DB_EMPTY = 12;
+
+    /**
+     * Database contents is not a PHP array
+     */
+    const DB_NOT_ARRAY = 13;
+
+    /**
+     * Magic quotes are activated
+     */
+    const MAGIC_QUOTES = 14;
+
+
+    /**
+     * Parameter of invalid type passed to method
+     */
+    const PARAM_TYPE = 20;
+
+    /**
+     * Character in parameter is invalid
+     */
+    const INVALID_CHAR = 21;
+
+
+    /**
+     * Language is not in the database
+     */
+    const UNKNOWN_LANGUAGE = 30;
+
+
+    /**
+     * Error during block detection
+     */
+    const BLOCK_DETECTION = 40;
+
+
+    /**
+     * Error while clustering languages
+     */
+    const NO_HIGHEST_KEY = 50;
+}
diff --git a/inc/3rdparty/libraries/language-detect/LanguageDetect/ISO639.php b/inc/3rdparty/libraries/language-detect/LanguageDetect/ISO639.php
new file mode 100644
index 00000000..05b0590d
--- /dev/null
+++ b/inc/3rdparty/libraries/language-detect/LanguageDetect/ISO639.php
@@ -0,0 +1,339 @@
+<?php
+/**
+ * Part of Text_LanguageDetect
+ *
+ * PHP version 5
+ *
+ * @category  Text
+ * @package   Text_LanguageDetect
+ * @author    Christian Weiske <cweiske@php.net>
+ * @copyright 2011 Christian Weiske <cweiske@php.net>
+ * @license   http://www.debian.org/misc/bsd.license BSD
+ * @version   SVN: $Id$
+ * @link      http://pear.php.net/package/Text_LanguageDetect/
+ */
+
+/**
+ * Provides a mapping between the languages from lang.dat and the
+ * ISO 639-1 and ISO-639-2 codes.
+ *
+ * Note that this class contains only languages that exist in lang.dat.
+ *
+ * @category  Text
+ * @package   Text_LanguageDetect
+ * @author    Christian Weiske <cweiske@php.net>
+ * @copyright 2011 Christian Weiske <cweiske@php.net>
+ * @license   http://www.debian.org/misc/bsd.license BSD
+ * @link      http://www.loc.gov/standards/iso639-2/php/code_list.php
+ */
+class Text_LanguageDetect_ISO639
+{
+    /**
+     * Maps all language names from the language database to the
+     * ISO 639-1 2-letter language code.
+     *
+     * NULL indicates that there is no 2-letter code.
+     *
+     * @var array
+     */
+    public static $nameToCode2 = array(
+        'albanian'   => 'sq',
+        'arabic'     => 'ar',
+        'azeri'      => 'az',
+        'bengali'    => 'bn',
+        'bulgarian'  => 'bg',
+        'cebuano'    => null,
+        'croatian'   => 'hr',
+        'czech'      => 'cs',
+        'danish'     => 'da',
+        'dutch'      => 'nl',
+        'english'    => 'en',
+        'estonian'   => 'et',
+        'farsi'      => 'fa',
+        'finnish'    => 'fi',
+        'french'     => 'fr',
+        'german'     => 'de',
+        'hausa'      => 'ha',
+        'hawaiian'   => null,
+        'hindi'      => 'hi',
+        'hungarian'  => 'hu',
+        'icelandic'  => 'is',
+        'indonesian' => 'id',
+        'italian'    => 'it',
+        'kazakh'     => 'kk',
+        'kyrgyz'     => 'ky',
+        'latin'      => 'la',
+        'latvian'    => 'lv',
+        'lithuanian' => 'lt',
+        'macedonian' => 'mk',
+        'mongolian'  => 'mn',
+        'nepali'     => 'ne',
+        'norwegian'  => 'no',
+        'pashto'     => 'ps',
+        'pidgin'     => null,
+        'polish'     => 'pl',
+        'portuguese' => 'pt',
+        'romanian'   => 'ro',
+        'russian'    => 'ru',
+        'serbian'    => 'sr',
+        'slovak'     => 'sk',
+        'slovene'    => 'sl',
+        'somali'     => 'so',
+        'spanish'    => 'es',
+        'swahili'    => 'sw',
+        'swedish'    => 'sv',
+        'tagalog'    => 'tl',
+        'turkish'    => 'tr',
+        'ukrainian'  => 'uk',
+        'urdu'       => 'ur',
+        'uzbek'      => 'uz',
+        'vietnamese' => 'vi',
+        'welsh'      => 'cy',
+    );
+
+    /**
+     * Maps all language names from the language database to the
+     * ISO 639-2 3-letter language code.
+     *
+     * @var array
+     */
+    public static $nameToCode3 = array(
+        'albanian'   => 'sqi',
+        'arabic'     => 'ara',
+        'azeri'      => 'aze',
+        'bengali'    => 'ben',
+        'bulgarian'  => 'bul',
+        'cebuano'    => 'ceb',
+        'croatian'   => 'hrv',
+        'czech'      => 'ces',
+        'danish'     => 'dan',
+        'dutch'      => 'nld',
+        'english'    => 'eng',
+        'estonian'   => 'est',
+        'farsi'      => 'fas',
+        'finnish'    => 'fin',
+        'french'     => 'fra',
+        'german'     => 'deu',
+        'hausa'      => 'hau',
+        'hawaiian'   => 'haw',
+        'hindi'      => 'hin',
+        'hungarian'  => 'hun',
+        'icelandic'  => 'isl',
+        'indonesian' => 'ind',
+        'italian'    => 'ita',
+        'kazakh'     => 'kaz',
+        'kyrgyz'     => 'kir',
+        'latin'      => 'lat',
+        'latvian'    => 'lav',
+        'lithuanian' => 'lit',
+        'macedonian' => 'mkd',
+        'mongolian'  => 'mon',
+        'nepali'     => 'nep',
+        'norwegian'  => 'nor',
+        'pashto'     => 'pus',
+        'pidgin'     => 'crp',
+        'polish'     => 'pol',
+        'portuguese' => 'por',
+        'romanian'   => 'ron',
+        'russian'    => 'rus',
+        'serbian'    => 'srp',
+        'slovak'     => 'slk',
+        'slovene'    => 'slv',
+        'somali'     => 'som',
+        'spanish'    => 'spa',
+        'swahili'    => 'swa',
+        'swedish'    => 'swe',
+        'tagalog'    => 'tgl',
+        'turkish'    => 'tur',
+        'ukrainian'  => 'ukr',
+        'urdu'       => 'urd',
+        'uzbek'      => 'uzb',
+        'vietnamese' => 'vie',
+        'welsh'      => 'cym',
+    );
+
+    /**
+     * Maps ISO 639-1 2-letter language codes to the language names
+     * in the language database
+     *
+     * Not all languages have a 2 letter code, so some are missing
+     *
+     * @var array
+     */
+    public static $code2ToName = array(
+        'ar' => 'arabic',
+        'az' => 'azeri',
+        'bg' => 'bulgarian',
+        'bn' => 'bengali',
+        'cs' => 'czech',
+        'cy' => 'welsh',
+        'da' => 'danish',
+        'de' => 'german',
+        'en' => 'english',
+        'es' => 'spanish',
+        'et' => 'estonian',
+        'fa' => 'farsi',
+        'fi' => 'finnish',
+        'fr' => 'french',
+        'ha' => 'hausa',
+        'hi' => 'hindi',
+        'hr' => 'croatian',
+        'hu' => 'hungarian',
+        'id' => 'indonesian',
+        'is' => 'icelandic',
+        'it' => 'italian',
+        'kk' => 'kazakh',
+        'ky' => 'kyrgyz',
+        'la' => 'latin',
+        'lt' => 'lithuanian',
+        'lv' => 'latvian',
+        'mk' => 'macedonian',
+        'mn' => 'mongolian',
+        'ne' => 'nepali',
+        'nl' => 'dutch',
+        'no' => 'norwegian',
+        'pl' => 'polish',
+        'ps' => 'pashto',
+        'pt' => 'portuguese',
+        'ro' => 'romanian',
+        'ru' => 'russian',
+        'sk' => 'slovak',
+        'sl' => 'slovene',
+        'so' => 'somali',
+        'sq' => 'albanian',
+        'sr' => 'serbian',
+        'sv' => 'swedish',
+        'sw' => 'swahili',
+        'tl' => 'tagalog',
+        'tr' => 'turkish',
+        'uk' => 'ukrainian',
+        'ur' => 'urdu',
+        'uz' => 'uzbek',
+        'vi' => 'vietnamese',
+    );
+
+    /**
+     * Maps ISO 639-2 3-letter language codes to the language names
+     * in the language database.
+     *
+     * @var array
+     */
+    public static $code3ToName = array(
+        'ara' => 'arabic',
+        'aze' => 'azeri',
+        'ben' => 'bengali',
+        'bul' => 'bulgarian',
+        'ceb' => 'cebuano',
+        'ces' => 'czech',
+        'crp' => 'pidgin',
+        'cym' => 'welsh',
+        'dan' => 'danish',
+        'deu' => 'german',
+        'eng' => 'english',
+        'est' => 'estonian',
+        'fas' => 'farsi',
+        'fin' => 'finnish',
+        'fra' => 'french',
+        'hau' => 'hausa',
+        'haw' => 'hawaiian',
+        'hin' => 'hindi',
+        'hrv' => 'croatian',
+        'hun' => 'hungarian',
+        'ind' => 'indonesian',
+        'isl' => 'icelandic',
+        'ita' => 'italian',
+        'kaz' => 'kazakh',
+        'kir' => 'kyrgyz',
+        'lat' => 'latin',
+        'lav' => 'latvian',
+        'lit' => 'lithuanian',
+        'mkd' => 'macedonian',
+        'mon' => 'mongolian',
+        'nep' => 'nepali',
+        'nld' => 'dutch',
+        'nor' => 'norwegian',
+        'pol' => 'polish',
+        'por' => 'portuguese',
+        'pus' => 'pashto',
+        'rom' => 'romanian',
+        'rus' => 'russian',
+        'slk' => 'slovak',
+        'slv' => 'slovene',
+        'som' => 'somali',
+        'spa' => 'spanish',
+        'sqi' => 'albanian',
+        'srp' => 'serbian',
+        'swa' => 'swahili',
+        'swe' => 'swedish',
+        'tgl' => 'tagalog',
+        'tur' => 'turkish',
+        'ukr' => 'ukrainian',
+        'urd' => 'urdu',
+        'uzb' => 'uzbek',
+        'vie' => 'vietnamese',
+    );
+
+    /**
+     * Returns the 2-letter ISO 639-1 code for the given language name.
+     *
+     * @param string $lang English language name like "swedish"
+     *
+     * @return string Two-letter language code (e.g. "sv") or NULL if not found
+     */
+    public static function nameToCode2($lang)
+    {
+        $lang = strtolower($lang);
+        if (!isset(self::$nameToCode2[$lang])) {
+            return null;
+        }
+        return self::$nameToCode2[$lang];
+    }
+
+    /**
+     * Returns the 3-letter ISO 639-2 code for the given language name.
+     *
+     * @param string $lang English language name like "swedish"
+     *
+     * @return string Three-letter language code (e.g. "swe") or NULL if not found
+     */
+    public static function nameToCode3($lang)
+    {
+        $lang = strtolower($lang);
+        if (!isset(self::$nameToCode3[$lang])) {
+            return null;
+        }
+        return self::$nameToCode3[$lang];
+    }
+
+    /**
+     * Returns the language name for the given 2-letter ISO 639-1 code.
+     *
+     * @param string $code Two-letter language code (e.g. "sv")
+     *
+     * @return string English language name like "swedish"
+     */
+    public static function code2ToName($code)
+    {
+        $lang = strtolower($code);
+        if (!isset(self::$code2ToName[$code])) {
+            return null;
+        }
+        return self::$code2ToName[$code];
+    }
+
+    /**
+     * Returns the language name for the given 3-letter ISO 639-2 code.
+     *
+     * @param string $code Three-letter language code (e.g. "swe")
+     *
+     * @return string English language name like "swedish"
+     */
+    public static function code3ToName($code)
+    {
+        $lang = strtolower($code);
+        if (!isset(self::$code3ToName[$code])) {
+            return null;
+        }
+        return self::$code3ToName[$code];
+    }
+}
\ No newline at end of file
diff --git a/inc/3rdparty/libraries/language-detect/LanguageDetect/Parser.php b/inc/3rdparty/libraries/language-detect/LanguageDetect/Parser.php
new file mode 100644
index 00000000..fb0e1e20
--- /dev/null
+++ b/inc/3rdparty/libraries/language-detect/LanguageDetect/Parser.php
@@ -0,0 +1,347 @@
+<?php
+
+/**
+ * This class represents a text sample to be parsed.
+ *
+ * @category    Text
+ * @package     Text_LanguageDetect
+ * @author      Nicholas Pisarro
+ * @copyright   2006
+ * @license     BSD
+ * @version     CVS: $Id: Parser.php 322327 2012-01-15 17:55:59Z cweiske $
+ * @link        http://pear.php.net/package/Text_LanguageDetect/
+ * @link        http://langdetect.blogspot.com/
+ */
+
+/**
+ * This class represents a text sample to be parsed.
+ *
+ * This separates the analysis of a text sample from the primary LanguageDetect
+ * class. After a new profile has been built, the data can be retrieved using
+ * the accessor functions.
+ *
+ * This class is intended to be used by the Text_LanguageDetect class, not 
+ * end-users.
+ *
+ * @category    Text
+ * @package     Text_LanguageDetect
+ * @author      Nicholas Pisarro
+ * @copyright   2006
+ * @license     BSD
+ * @version     release: 0.3.0
+ */
+class Text_LanguageDetect_Parser extends Text_LanguageDetect
+{
+    /**
+     * the piece of text being parsed
+     *
+     * @access  private
+     * @var     string
+     */
+    var $_string;
+
+    /**
+     * stores the trigram frequencies of the sample
+     *
+     * @access  private
+     * @var     string
+     */
+    var $_trigrams = array();
+
+    /**
+     * stores the trigram ranks of the sample
+     *
+     * @access  private
+     * @var     array
+     */
+    var $_trigram_ranks = array();
+
+    /**
+     * stores the unicode blocks of the sample
+     *
+     * @access  private
+     * @var     array
+     */
+    var $_unicode_blocks = array();
+    
+    /**
+     * Whether the parser should compile the unicode ranges
+     * 
+     * @access  private
+     * @var     bool
+     */
+    var $_compile_unicode = false;
+
+    /**
+     * Whether the parser should compile trigrams
+     *
+     * @access  private
+     * @var     bool
+     */
+    var $_compile_trigram = false;
+
+    /**
+     * Whether the trigram parser should pad the beginning of the string
+     *
+     * @access  private
+     * @var     bool
+     */
+    var $_trigram_pad_start = false;
+
+    /**
+     * Whether the unicode parser should skip non-alphabetical ascii chars
+     *
+     * @access  private
+     * @var     bool
+     */
+    var $_unicode_skip_symbols = true;
+
+    /**
+     * Constructor
+     *
+     * @access  private
+     * @param   string  $string     string to be parsed
+     */
+    function Text_LanguageDetect_Parser($string) {
+        $this->_string = $string;
+    }
+
+    /**
+     * Returns true if a string is suitable for parsing
+     *
+     * @param   string  $str    input string to test
+     * @return  bool            true if acceptable, false if not
+     */
+    public static function validateString($str) {
+        if (!empty($str) && strlen($str) > 3 && preg_match('/\S/', $str)) {
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    /**
+     * turn on/off trigram counting
+     *
+     * @access  public
+     * @param   bool    $bool true for on, false for off
+     */
+    function prepareTrigram($bool = true)
+    {
+        $this->_compile_trigram = $bool;
+    }
+
+    /**
+     * turn on/off unicode block counting
+     *
+     * @access  public
+     * @param   bool    $bool true for on, false for off
+     */
+    function prepareUnicode($bool = true)
+    {
+        $this->_compile_unicode = $bool;
+    }
+
+    /**
+     * turn on/off padding the beginning of the sample string
+     *
+     * @access  public
+     * @param   bool    $bool true for on, false for off
+     */
+    function setPadStart($bool = true)
+    {
+        $this->_trigram_pad_start = $bool;
+    }
+
+    /**
+     * Should the unicode block counter skip non-alphabetical ascii chars?
+     *
+     * @access  public
+     * @param   bool    $bool true for on, false for off
+     */
+    function setUnicodeSkipSymbols($bool = true)
+    {
+        $this->_unicode_skip_symbols = $bool;
+    }
+
+    /**
+     * Returns the trigram ranks for the text sample
+     *
+     * @access  public
+     * @return  array    trigram ranks in the text sample
+     */
+    function &getTrigramRanks()
+    {
+        return $this->_trigram_ranks;
+    }
+
+    /**
+     * Return the trigram freqency table
+     *
+     * only used in testing to make sure the parser is working
+     *
+     * @access  public
+     * @return  array    trigram freqencies in the text sample
+     */
+    function &getTrigramFreqs()
+    {
+        return $this->_trigram;
+    }
+
+    /**
+     * returns the array of unicode blocks
+     *
+     * @access  public
+     * @return  array   unicode blocks in the text sample
+     */
+    function &getUnicodeBlocks()
+    {
+        return $this->_unicode_blocks;
+    }
+
+    /**
+     * Executes the parsing operation
+     * 
+     * Be sure to call the set*() functions to set options and the 
+     * prepare*() functions first to tell it what kind of data to compute
+     *
+     * Afterwards the get*() functions can be used to access the compiled
+     * information.
+     *
+     * @access public
+     */
+    function analyze()
+    {
+        $len = strlen($this->_string);
+        $byte_counter = 0;
+
+
+        // unicode startup
+        if ($this->_compile_unicode) {
+            $blocks = $this->_read_unicode_block_db();
+            $block_count = count($blocks);
+
+            $skipped_count = 0;
+            $unicode_chars = array();
+        }
+
+        // trigram startup
+        if ($this->_compile_trigram) {
+            // initialize them as blank so the parser will skip the first two
+            // (since it skips trigrams with more than  2 contiguous spaces)
+            $a = ' ';
+            $b = ' ';
+
+            // kludge
+            // if it finds a valid trigram to start and the start pad option is
+            // off, then set a variable that will be used to reduce this
+            // trigram after parsing has finished
+            if (!$this->_trigram_pad_start) {
+                $a = $this->_next_char($this->_string, $byte_counter, true);
+
+                if ($a != ' ') {
+                    $b = $this->_next_char($this->_string, $byte_counter, true);
+                    $dropone = " $a$b";
+                }
+
+                $byte_counter = 0;
+                $a = ' ';
+                $b = ' ';
+            }
+        }
+
+        while ($byte_counter < $len) {
+            $char = $this->_next_char($this->_string, $byte_counter, true);
+
+
+            // language trigram detection
+            if ($this->_compile_trigram) {
+                if (!($b == ' ' && ($a == ' ' || $char == ' '))) {
+                    if (!isset($this->_trigram[$a . $b . $char])) {
+                       $this->_trigram[$a . $b . $char] = 1;
+                    } else {
+                       $this->_trigram[$a . $b . $char]++;
+                    }
+                }
+
+                $a = $b;
+                $b = $char;
+            }
+
+            // unicode block detection
+            if ($this->_compile_unicode) {
+                if ($this->_unicode_skip_symbols
+                        && strlen($char) == 1
+                        && ($char < 'A' || $char > 'z'
+                        || ($char > 'Z' && $char < 'a'))
+                        && $char != "'") {  // does not skip the apostrophe
+                                            // since it's included in the language
+                                            // models
+
+                    $skipped_count++;
+                    continue;
+                }
+
+                // build an array of all the characters
+                if (isset($unicode_chars[$char])) {
+                    $unicode_chars[$char]++;
+                } else {
+                    $unicode_chars[$char] = 1;
+                }
+            }
+
+            // todo: add byte detection here
+        }
+
+        // unicode cleanup
+        if ($this->_compile_unicode) {
+            foreach ($unicode_chars as $utf8_char => $count) {
+                $search_result = $this->_unicode_block_name(
+                        $this->_utf8char2unicode($utf8_char), $blocks, $block_count);
+
+                if ($search_result != -1) {
+                    $block_name = $search_result[2];
+                } else {
+                    $block_name = '[Malformatted]';
+                }
+
+                if (isset($this->_unicode_blocks[$block_name])) {
+                    $this->_unicode_blocks[$block_name] += $count;
+                } else {
+                    $this->_unicode_blocks[$block_name] = $count;
+                }
+            }
+        }
+
+
+        // trigram cleanup
+        if ($this->_compile_trigram) {
+            // pad the end
+            if ($b != ' ') {
+                if (!isset($this->_trigram["$a$b "])) {
+                    $this->_trigram["$a$b "] = 1;
+                } else {
+                    $this->_trigram["$a$b "]++;
+                }
+            }
+
+            // perl compatibility; Language::Guess does not pad the beginning
+            // kludge
+            if (isset($dropone)) {
+                if ($this->_trigram[$dropone] == 1) {
+                    unset($this->_trigram[$dropone]);
+                } else {
+                    $this->_trigram[$dropone]--;
+                }
+            }
+
+            if (!empty($this->_trigram)) {
+                $this->_trigram_ranks = $this->_arr_rank($this->_trigram);
+            } else {
+                $this->_trigram_ranks = array();
+            }
+        }
+    }
+}
+
+/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
\ No newline at end of file
-- 
cgit v1.2.3


From 1fce49fac70f6a2b97cdf9697f8918644125a190 Mon Sep 17 00:00:00 2001
From: Maryana Rozhankivska <mariroz@mr.lviv.ua>
Date: Fri, 23 May 2014 19:29:01 +0300
Subject: full-text rss config file for ted.com, issue #676

---
 inc/3rdparty/site_config/custom/ted.com.txt | 11 +++++++++++
 1 file changed, 11 insertions(+)
 create mode 100755 inc/3rdparty/site_config/custom/ted.com.txt

diff --git a/inc/3rdparty/site_config/custom/ted.com.txt b/inc/3rdparty/site_config/custom/ted.com.txt
new file mode 100755
index 00000000..4940d2bc
--- /dev/null
+++ b/inc/3rdparty/site_config/custom/ted.com.txt
@@ -0,0 +1,11 @@
+title: //title
+body: //div[@class='talk-article__body talk-transcript__body'] | //div[@class='media__image media__image--thumb talk-link__image']
+
+strip_id_or_class: talk-transcript__para__time
+
+single_page_link: //a[@id='hero-transcript-link']
+
+#prune: no
+tidy: no
+
+test_url: http://www.ted.com/talks/andrew_solomon_how_the_worst_moments_in_our_lives_make_us_who_we_are
-- 
cgit v1.2.3


From a342945b617c44748a4ca6ec583326c43c9d7319 Mon Sep 17 00:00:00 2001
From: Maryana Rozhankivska <mariroz@mr.lviv.ua>
Date: Sat, 24 May 2014 11:08:39 +0300
Subject: fix of rearch form popup in default theme

---
 themes/default/_search-form.twig | 14 --------------
 1 file changed, 14 deletions(-)
 mode change 100644 => 100755 themes/default/_search-form.twig

diff --git a/themes/default/_search-form.twig b/themes/default/_search-form.twig
old mode 100644
new mode 100755
index 74f420d0..33bea20d
--- a/themes/default/_search-form.twig
+++ b/themes/default/_search-form.twig
@@ -7,17 +7,3 @@
     </p>
 </form>
 </div>
-<script type="text/javascript">
-    $(document).ready(function() {
-
-        $("#search-form").hide();
-
-        $("#search").click(function(){
-            $("#search-form").toggle();
-            $("#search").toggleClass("current");
-            $("#search-arrow").toggleClass("arrow-down");
-        });
-
-
-    });
-</script>
\ No newline at end of file
-- 
cgit v1.2.3


From 009669360d46645d8f84f444fe0f6895811f51a3 Mon Sep 17 00:00:00 2001
From: Maryana Rozhankivska <mariroz@mr.lviv.ua>
Date: Sat, 24 May 2014 11:21:43 +0300
Subject: fix of onmouseover displaying of wallbabag a link in wallabag form in
 default theme

---
 themes/default/css/style.css | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/themes/default/css/style.css b/themes/default/css/style.css
index 11a8ea1c..e58ef81a 100755
--- a/themes/default/css/style.css
+++ b/themes/default/css/style.css
@@ -384,8 +384,8 @@ a#bagit-form-close {
   background-color: #000;
   color: #fff;
   padding: 0 4px 1px 3px;
-  font-weight: bold;
-  font-size: 0.7em;
+  font-weight: bold; 
+  font-size: 0.7em; 
   border-radius: 4px;
 }
 .add-to-wallabag-link-after:hover, .add-to-wallabag-link-after:active {
@@ -394,6 +394,23 @@ a#bagit-form-close {
 .add-to-wallabag-link-after:visited {
   color: #999;
 }
+a.add-to-wallabag-link-after {
+    visibility: hidden;
+    position: absolute;
+    opacity: 0;
+    transition-duration: 2s;
+    transition-timing-function: ease-out;
+}
+#article article a:hover + a.add-to-wallabag-link-after, a.add-to-wallabag-link-after:hover {
+    opacity: 1;
+    visibility: visible;
+    transition-duration: .3s;
+    transition-timing-function: ease-in;
+}
+a.add-to-wallabag-link-after:after {
+     content: "w";
+}
+
 
 #add-link-result {
   display: inline;
-- 
cgit v1.2.3


From 0b9bb8cb7868f24137c5d8b85c39cc88ea877411 Mon Sep 17 00:00:00 2001
From: Maryana Rozhankivska <mariroz@mr.lviv.ua>
Date: Mon, 26 May 2014 14:29:18 +0300
Subject: add dailymotion videos, issue #708

---
 inc/3rdparty/site_config/custom/dailymotion.com.txt | 12 ++++++++++++
 inc/poche/Poche.class.php                           | 11 ++++++-----
 2 files changed, 18 insertions(+), 5 deletions(-)
 create mode 100755 inc/3rdparty/site_config/custom/dailymotion.com.txt

diff --git a/inc/3rdparty/site_config/custom/dailymotion.com.txt b/inc/3rdparty/site_config/custom/dailymotion.com.txt
new file mode 100755
index 00000000..0cad808f
--- /dev/null
+++ b/inc/3rdparty/site_config/custom/dailymotion.com.txt
@@ -0,0 +1,12 @@
+title: //title
+body: //iframe
+
+replace_string(<![CDATA[): _
+replace_string(]]>): _
+
+single_page_link: //link[@type='application/xml+oembed']
+
+prune: no
+tidy: no
+
+http://www.dailymotion.com/video/x1vk5oh_before-they-were-on-game-of-thrones_people
diff --git a/inc/poche/Poche.class.php b/inc/poche/Poche.class.php
index 1b69cd61..37cf66a3 100755
--- a/inc/poche/Poche.class.php
+++ b/inc/poche/Poche.class.php
@@ -1142,11 +1142,12 @@ class Poche
      * return new purifier object with actual config
      */
     protected function getPurifier() {
-      $config = HTMLPurifier_Config::createDefault();
-      $config->set('Cache.SerializerPath', CACHE);
-      $config->set('HTML.SafeIframe', true);
-      $config->set('URI.SafeIframeRegexp', '%^(https?:)?//(www\.youtube(?:-nocookie)?\.com/embed/|player\.vimeo\.com/video/)%'); //allow YouTube and Vimeo$purifier = new HTMLPurifier($config);
-
+      $config = HTMLPurifier_Config::createDefault();
+      $config->set('Cache.SerializerPath', CACHE);
+      $config->set('HTML.SafeIframe', true);
+      //allow YouTube, Vimeo and dailymotion videos
+      $config->set('URI.SafeIframeRegexp', '%^(https?:)?//(www\.youtube(?:-nocookie)?\.com/embed/|player\.vimeo\.com/video/|www\.dailymotion\.com/embed/video/)%');
+
       return new HTMLPurifier($config);
     }
     
-- 
cgit v1.2.3


From 3dc8d84229ed0f3ccd40b44420ed6e818a6edea9 Mon Sep 17 00:00:00 2001
From: Maryana Rozhankivska <mariroz@mr.lviv.ua>
Date: Thu, 29 May 2014 16:35:00 +0300
Subject: fix of uninitialized object warning, issue #710

---
 inc/3rdparty/libraries/feedwriter/FeedWriter.php | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/inc/3rdparty/libraries/feedwriter/FeedWriter.php b/inc/3rdparty/libraries/feedwriter/FeedWriter.php
index 77755690..aa064afb 100755
--- a/inc/3rdparty/libraries/feedwriter/FeedWriter.php
+++ b/inc/3rdparty/libraries/feedwriter/FeedWriter.php
@@ -97,13 +97,16 @@ define('JSONP', 3, true);
               header('X-content-type-options: nosniff');
           } elseif ($this->version == JSON) {
               header('Content-type: application/json; charset=UTF-8');
-              $this->json = new stdClass();
           } elseif ($this->version == JSONP) {
               header('Content-type: application/javascript; charset=UTF-8');
-              $this->json = new stdClass();
           }
         }
 
+        if ($this->version == JSON || $this->version == JSONP) {
+          $this->json = new stdClass();
+        }
+
+
         $this->printHead();
         $this->printChannels();
         $this->printItems();
@@ -202,7 +205,7 @@ define('JSONP', 3, true);
     public function setDescription($description)
     {
         $tag = ($this->version == ATOM)? 'subtitle' : 'description';
-        $this->setChannelElement($tag, $desciption);
+        $this->setChannelElement($tag, $description);
     }
 
     /**
-- 
cgit v1.2.3


From 79024eb004bfb9de77ec60d648315888e70033ad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolas=20L=C5=93uillet?= <nicolas@loeuillet.org>
Date: Thu, 29 May 2014 18:32:55 +0200
Subject: fix #344 FQDN with non-standard ports broken

---
 inc/poche/Tools.class.php | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/inc/poche/Tools.class.php b/inc/poche/Tools.class.php
index 7f064020..8073a3fe 100755
--- a/inc/poche/Tools.class.php
+++ b/inc/poche/Tools.class.php
@@ -60,6 +60,10 @@ class Tools
         }
 
         $host = (isset($_SERVER['HTTP_X_FORWARDED_HOST']) ? $_SERVER['HTTP_X_FORWARDED_HOST'] : (isset($_SERVER['HTTP_HOST']) ? $_SERVER['HTTP_HOST'] : $_SERVER['SERVER_NAME']));
+
+        if (strpos($host, ':') !== false) {
+            $serverport = '';
+        }
         
         return 'http' . ($https ? 's' : '') . '://'
             . $host . $serverport . $scriptname;
-- 
cgit v1.2.3


From 8038b38802769031e050c753fc0a388a2276629e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolas=20L=C5=93uillet?= <nicolas@loeuillet.org>
Date: Thu, 29 May 2014 18:52:34 +0200
Subject: 1.7, premium version :)

---
 index.php | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/index.php b/index.php
index 79838ed9..472e0fab 100755
--- a/index.php
+++ b/index.php
@@ -8,7 +8,7 @@
  * @license    http://www.wtfpl.net/ see COPYING file
  */
 
-define ('POCHE', '1.6.1');
+define ('POCHE', '1.7.0');
 require 'check_setup.php';
 require_once 'inc/poche/global.inc.php';
 
-- 
cgit v1.2.3


From cbc75befb5bdf368bec15f47413bd7669273a181 Mon Sep 17 00:00:00 2001
From: Maryana Rozhankivska <mariroz@mr.lviv.ua>
Date: Fri, 30 May 2014 17:14:53 +0300
Subject: small xss vulnerability and translation ability fix

---
 inc/poche/Poche.class.php | 41 ++++++++++++++++++++---------------------
 1 file changed, 20 insertions(+), 21 deletions(-)

diff --git a/inc/poche/Poche.class.php b/inc/poche/Poche.class.php
index 37cf66a3..b0c0adf8 100755
--- a/inc/poche/Poche.class.php
+++ b/inc/poche/Poche.class.php
@@ -1083,11 +1083,10 @@ class Poche
         $config = $this->store->getConfigUser($user_id);
 
         if ($config == null) {
-            die(_('User with this id (' . $user_id . ') does not exist.'));
+            die(_( sprintf('User with this id (%d) does not exist.', $user_id) ));
         }
 
-        if (!in_array($type, $allowed_types) ||
-            $token != $config['token']) {
+        if (!in_array($type, $allowed_types) || $token != $config['token']) {
             die(_('Uh, there is a problem while generating feeds.'));
         }
         // Check the token
@@ -1150,12 +1149,12 @@ class Poche
 
       return new HTMLPurifier($config);
     }
-    
+
     /**
      * handle epub
      */
     public function createEpub() {
-       
+
         switch ($_GET['method']) {
             case 'id':
                 $entryID = filter_var($_GET['id'],FILTER_SANITIZE_NUMBER_INT);
@@ -1191,7 +1190,7 @@ class Poche
                 break;
             case 'default':
                 die(_('Uh, there is a problem while generating epub.'));
-            
+
         }
 
         $content_start =
@@ -1204,11 +1203,11 @@ class Poche
         . "<body>\n";
 
         $bookEnd = "</body>\n</html>\n";
-        
+
         $log = new Logger("wallabag", TRUE);
         $fileDir = CACHE;
 
-        
+
         $book = new EPub(EPub::BOOK_VERSION_EPUB3);
         $log->logLine("new EPub()");
         $log->logLine("EPub class version: " . EPub::VERSION);
@@ -1216,7 +1215,7 @@ class Poche
         $log->logLine("Zip version: " . Zip::VERSION);
         $log->logLine("getCurrentServerURL: " . $book->getCurrentServerURL());
         $log->logLine("getCurrentPageURL..: " . $book->getCurrentPageURL());
-        
+
         $book->setTitle(_('wallabag\'s articles'));
         $book->setIdentifier("http://$_SERVER[HTTP_HOST]", EPub::IDENTIFIER_URI); // Could also be the ISBN number, prefered for published books, or a UUID.
         //$book->setLanguage("en"); // Not needed, but included for the example, Language is mandatory, but EPub defaults to "en". Use RFC3066 Language codes, such as "en", "da", "fr" etc.
@@ -1226,39 +1225,39 @@ class Poche
         $book->setDate(time()); // Strictly not needed as the book date defaults to time().
         //$book->setRights("Copyright and licence information specific for the book."); // As this is generated, this _could_ contain the name or licence information of the user who purchased the book, if needed. If this is used that way, the identifier must also be made unique for the book.
         $book->setSourceURL("http://$_SERVER[HTTP_HOST]");
-        
+
         $book->addDublinCoreMetadata(DublinCore::CONTRIBUTOR, "PHP");
         $book->addDublinCoreMetadata(DublinCore::CONTRIBUTOR, "wallabag");
-        
+
         $cssData = "body {\n margin-left: .5em;\n margin-right: .5em;\n text-align: justify;\n}\n\np {\n font-family: serif;\n font-size: 10pt;\n text-align: justify;\n text-indent: 1em;\n margin-top: 0px;\n margin-bottom: 1ex;\n}\n\nh1, h2 {\n font-family: sans-serif;\n font-style: italic;\n text-align: center;\n background-color: #6b879c;\n color: white;\n width: 100%;\n}\n\nh1 {\n margin-bottom: 2px;\n}\n\nh2 {\n margin-top: -2px;\n margin-bottom: 2px;\n}\n";
-        
+
         $log->logLine("Add Cover");
-        
+
         $fullTitle = "<h1> " . $bookTitle . "</h1>\n";
-        
+
         $book->setCoverImage("Cover.png", file_get_contents("themes/baggy/img/apple-touch-icon-152.png"), "image/png", $fullTitle);
-        
+
         $cover = $content_start . '<div style="text-align:center;"><p>' . _('Produced by wallabag with PHPePub') . '</p><p>'. _('Please open <a href="https://github.com/wallabag/wallabag/issues" >an issue</a> if you have trouble with the display of this E-Book on your device.') . '</p></div>' . $bookEnd;
-        
+
         //$book->addChapter("Table of Contents", "TOC.xhtml", NULL, false, EPub::EXTERNAL_REF_IGNORE);
         $book->addChapter("Notices", "Cover2.html", $cover);
-        
+
         $book->buildTOC();
-        
+
         foreach ($entries as $entry) { //set tags as subjects
             $tags = $this->store->retrieveTagsByEntry($entry['id']);
             foreach ($tags as $tag) {
                 $book->setSubject($tag['value']);
             }
-            
+
             $log->logLine("Set up parameters");
-            
+
             $chapter = $content_start . $entry['content'] . $bookEnd;
             $book->addChapter($entry['title'], htmlspecialchars($entry['title']) . ".html", $chapter, true, EPub::EXTERNAL_REF_ADD);
             $log->logLine("Added chapter " . $entry['title']);
         }
 
-        if (DEBUG_POCHE) { 
+        if (DEBUG_POCHE) {
             $epuplog = $book->getLog();
             $book->addChapter("Log", "Log.html", $content_start . $log->getLog() . "\n</pre>" . $bookEnd); // log generation
         }
-- 
cgit v1.2.3


From 30bd273580a326db1fcc7263e1f52948672f9848 Mon Sep 17 00:00:00 2001
From: Maryana Rozhankivska <mariroz@mr.lviv.ua>
Date: Fri, 30 May 2014 17:17:34 +0300
Subject: small xss vulnerability and translation ability fix

---
 inc/poche/Poche.class.php | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/inc/poche/Poche.class.php b/inc/poche/Poche.class.php
index b0c0adf8..3d1337f3 100755
--- a/inc/poche/Poche.class.php
+++ b/inc/poche/Poche.class.php
@@ -1083,7 +1083,7 @@ class Poche
         $config = $this->store->getConfigUser($user_id);
 
         if ($config == null) {
-            die(_( sprintf('User with this id (%d) does not exist.', $user_id) ));
+            die(sprintf(_('User with this id (%d) does not exist.'), $user_id));
         }
 
         if (!in_array($type, $allowed_types) || $token != $config['token']) {
-- 
cgit v1.2.3


From 1d14e653156399ef4759df3c0f39cebc5ee0102e Mon Sep 17 00:00:00 2001
From: Maryana Rozhankivska <mariroz@mr.lviv.ua>
Date: Sun, 1 Jun 2014 19:49:22 +0300
Subject: fix of undefined ATOM constant warning in full-text-rss, will fix
 ios-app issue #14

---
 inc/3rdparty/libraries/feedwriter/FeedWriter.php | 1 +
 1 file changed, 1 insertion(+)

diff --git a/inc/3rdparty/libraries/feedwriter/FeedWriter.php b/inc/3rdparty/libraries/feedwriter/FeedWriter.php
index aa064afb..9446cddf 100755
--- a/inc/3rdparty/libraries/feedwriter/FeedWriter.php
+++ b/inc/3rdparty/libraries/feedwriter/FeedWriter.php
@@ -2,6 +2,7 @@
 define('RSS2', 1, true);
 define('JSON', 2, true);
 define('JSONP', 3, true);
+define('ATOM', 4, true);
 
  /**
  * Univarsel Feed Writer class
-- 
cgit v1.2.3


From 752cd4a8ef7bbc8ebd6c481ed890e0d8e46819a8 Mon Sep 17 00:00:00 2001
From: Maryana Rozhankivska <mariroz@mr.lviv.ua>
Date: Mon, 2 Jun 2014 18:00:09 +0300
Subject: error reporting level set in E_ALL & ~E_NOTICE by default, can be
 overriden in config

---
 inc/3rdparty/makefulltextfeed.php | 2 +-
 inc/poche/Tools.class.php         | 4 +---
 inc/poche/config.inc.default.php  | 4 ++++
 index.php                         | 5 +++++
 4 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/inc/3rdparty/makefulltextfeed.php b/inc/3rdparty/makefulltextfeed.php
index 7a56be8c..62c050ec 100755
--- a/inc/3rdparty/makefulltextfeed.php
+++ b/inc/3rdparty/makefulltextfeed.php
@@ -28,7 +28,7 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>.
 // Request this file passing it a web page or feed URL in the querystring: makefulltextfeed.php?url=example.org/article
 // For more request parameters, see http://help.fivefilters.org/customer/portal/articles/226660-usage
 
-error_reporting(E_ALL ^ E_NOTICE);
+//error_reporting(E_ALL ^ E_NOTICE);
 ini_set("display_errors", 1);
 @set_time_limit(120);
 
diff --git a/inc/poche/Tools.class.php b/inc/poche/Tools.class.php
index 8073a3fe..1ef875c9 100755
--- a/inc/poche/Tools.class.php
+++ b/inc/poche/Tools.class.php
@@ -18,8 +18,6 @@ class Tools
             die(_('Oops, it seems you don\'t have PHP 5.'));
         }
 
-        error_reporting(E_ALL);
-
         function stripslashesDeep($value) {
             return is_array($value)
                 ? array_map('stripslashesDeep', $value)
@@ -64,7 +62,7 @@ class Tools
         if (strpos($host, ':') !== false) {
             $serverport = '';
         }
-        
+
         return 'http' . ($https ? 's' : '') . '://'
             . $host . $serverport . $scriptname;
     }
diff --git a/inc/poche/config.inc.default.php b/inc/poche/config.inc.default.php
index ffcd205d..95f727c6 100755
--- a/inc/poche/config.inc.default.php
+++ b/inc/poche/config.inc.default.php
@@ -30,6 +30,10 @@
 
 @define ('MODE_DEMO', FALSE);
 @define ('DEBUG_POCHE', FALSE);
+
+//default level of error reporting in application. Developers should override it in their config.inc.php: set to E_ALL.
+@define ('ERROR_REPORTING', E_ALL & ~E_NOTICE);
+
 @define ('DOWNLOAD_PICTURES', FALSE); # This can slow down the process of adding articles
 @define ('REGENERATE_PICTURES_QUALITY', 75);
 @define ('CONVERT_LINKS_FOOTNOTES', FALSE);
diff --git a/index.php b/index.php
index 472e0fab..c134b103 100755
--- a/index.php
+++ b/index.php
@@ -12,6 +12,11 @@ define ('POCHE', '1.7.0');
 require 'check_setup.php';
 require_once 'inc/poche/global.inc.php';
 
+# Set error reporting level
+if (defined('ERROR_REPORTING')) {
+	error_reporting(ERROR_REPORTING);
+}
+
 # Start session
 Session::$sessionName = 'poche';
 Session::init();
-- 
cgit v1.2.3