From: Nicolas Lœuillet Date: Tue, 15 Jul 2014 09:49:24 +0000 (+0200) Subject: Merge pull request #761 from wallabag/dev X-Git-Tag: 1.7.1 X-Git-Url: https://git.immae.eu/?a=commitdiff_plain;h=0f6273cdb8c77436593782d42f271fddc7a7875d;hp=a9f5e572dde4f986a498d2fbe92a38a1b22f9595;p=github%2Fwallabag%2Fwallabag.git Merge pull request #761 from wallabag/dev 1.7.1 --- diff --git a/inc/3rdparty/libraries/PHPePub/EPub.php b/inc/3rdparty/libraries/PHPePub/EPub.php index f1f41bd5..d9b990b7 100644 --- a/inc/3rdparty/libraries/PHPePub/EPub.php +++ b/inc/3rdparty/libraries/PHPePub/EPub.php @@ -41,6 +41,8 @@ class EPub { private $bookVersion = EPub::BOOK_VERSION_EPUB2; + private $debugInside = FALSE; + public $maxImageWidth = 768; public $maxImageHeight = 1024; @@ -132,10 +134,14 @@ class EPub { * * @return void */ - function __construct($bookVersion = EPub::BOOK_VERSION_EPUB2, $languageCode = "en", $writingDirection = EPub::DIRECTION_LEFT_TO_RIGHT) { + function __construct($bookVersion = EPub::BOOK_VERSION_EPUB2, $debugInside = FALSE, $languageCode = "en", $writingDirection = EPub::DIRECTION_LEFT_TO_RIGHT) { include_once("Zip.php"); include_once("Logger.php"); + if (!$debugInside) { + error_reporting(E_ERROR | E_PARSE); + } + $this->bookVersion = $bookVersion; $this->writingDirection = $writingDirection; $this->languageCode = $languageCode; diff --git a/inc/3rdparty/libraries/feedwriter/FeedWriter.php b/inc/3rdparty/libraries/feedwriter/FeedWriter.php index aa064afb..9446cddf 100755 --- a/inc/3rdparty/libraries/feedwriter/FeedWriter.php +++ b/inc/3rdparty/libraries/feedwriter/FeedWriter.php @@ -2,6 +2,7 @@ define('RSS2', 1, true); define('JSON', 2, true); define('JSONP', 3, true); +define('ATOM', 4, true); /** * Univarsel Feed Writer class diff --git a/inc/3rdparty/libraries/readability/Readability.php b/inc/3rdparty/libraries/readability/Readability.php old mode 100644 new mode 100755 index d0f09d74..4fa3ba63 --- a/inc/3rdparty/libraries/readability/Readability.php +++ b/inc/3rdparty/libraries/readability/Readability.php @@ -679,6 +679,7 @@ class Readability } else { $topCandidate->innerHTML = $page->documentElement->innerHTML; $page->documentElement->innerHTML = ''; + $this->reinitBody(); $page->documentElement->appendChild($topCandidate); } } else { @@ -794,8 +795,7 @@ class Readability { // TODO: find out why element disappears sometimes, e.g. for this URL http://www.businessinsider.com/6-hedge-fund-etfs-for-average-investors-2011-7 // in the meantime, we check and create an empty element if it's not there. - if (!isset($this->body->childNodes)) $this->body = $this->dom->createElement('body'); - $this->body->innerHTML = $this->bodyCache; + $this->reinitBody(); if ($this->flagIsActive(self::FLAG_STRIP_UNLIKELYS)) { $this->removeFlag(self::FLAG_STRIP_UNLIKELYS); @@ -1134,5 +1134,18 @@ class Readability public function removeFlag($flag) { $this->flags = $this->flags & ~$flag; } + + /** + * Will recreate previously deleted body property + * + * @return void + */ + protected function reinitBody() { + if (!isset($this->body->childNodes)) { + $this->body = $this->dom->createElement('body'); + $this->body->innerHTML = $this->bodyCache; + } + } + } ?> \ No newline at end of file diff --git a/inc/3rdparty/makefulltextfeed.php b/inc/3rdparty/makefulltextfeed.php index 7a56be8c..a081f88b 100755 --- a/inc/3rdparty/makefulltextfeed.php +++ b/inc/3rdparty/makefulltextfeed.php @@ -28,7 +28,7 @@ along with this program. If not, see . // Request this file passing it a web page or feed URL in the querystring: makefulltextfeed.php?url=example.org/article // For more request parameters, see http://help.fivefilters.org/customer/portal/articles/226660-usage -error_reporting(E_ALL ^ E_NOTICE); +//error_reporting(E_ALL ^ E_NOTICE); ini_set("display_errors", 1); @set_time_limit(120); @@ -671,7 +671,11 @@ foreach ($items as $key => $item) { $html .= $item->get_description(); } else { $readability->clean($content_block, 'select'); - if ($options->rewrite_relative_urls) makeAbsolute($effective_url, $content_block); + // get base URL + $base_url = get_base_url($readability->dom); + if (!$base_url) $base_url = $effective_url; + // rewrite URLs + if ($options->rewrite_relative_urls) makeAbsolute($base_url, $content_block); // footnotes if (($links == 'footnotes') && (strpos($effective_url, 'wikipedia.org') === false)) { $readability->addFootnotes($content_block); diff --git a/inc/3rdparty/makefulltextfeedHelpers.php b/inc/3rdparty/makefulltextfeedHelpers.php index 4e985372..ac872ab8 100755 --- a/inc/3rdparty/makefulltextfeedHelpers.php +++ b/inc/3rdparty/makefulltextfeedHelpers.php @@ -377,3 +377,13 @@ function debug($msg) { flush(); } } + +function get_base_url($dom) { + $xpath = new DOMXPath($dom); + $base_url = @$xpath->evaluate('string(//head/base/@href)', $dom); + if ($base_url !== '') { + return $base_url; + } else { + return false; + } +} diff --git a/inc/3rdparty/simple_html_dom.php b/inc/3rdparty/simple_html_dom.php old mode 100644 new mode 100755 index 43b94e57..9b73b105 --- a/inc/3rdparty/simple_html_dom.php +++ b/inc/3rdparty/simple_html_dom.php @@ -34,7 +34,7 @@ * @author S.C. Chen * @author John Schlick * @author Rus Carroll - * @version 1.5 ($Rev: 202 $) + * @version 1.5 ($Rev: 210 $) * @package PlaceLocalInclude * @subpackage simple_html_dom */ @@ -269,7 +269,10 @@ class simple_html_dom_node { return $this->children; } - if (isset($this->children[$idx])) return $this->children[$idx]; + if (isset($this->children[$idx])) + { + return $this->children[$idx]; + } return null; } @@ -330,14 +333,14 @@ class simple_html_dom_node function find_ancestor_tag($tag) { global $debug_object; - if (is_object($debug_object)) { $debug_object->debugLogEntry(1); } + if (is_object($debug_object)) { $debug_object->debug_log_entry(1); } // Start by including ourselves in the comparison. $returnDom = $this; while (!is_null($returnDom)) { - if (is_object($debug_object)) { $debug_object->debugLog(2, "Current tag is: " . $returnDom->tag); } + if (is_object($debug_object)) { $debug_object->debug_log(2, "Current tag is: " . $returnDom->tag); } if ($returnDom->tag == $tag) { @@ -374,7 +377,7 @@ class simple_html_dom_node $text = " with text: " . $this->text; } } - $debug_object->debugLog(1, 'Innertext of tag: ' . $this->tag . $text); + $debug_object->debug_log(1, 'Innertext of tag: ' . $this->tag . $text); } if ($this->tag==='root') return $this->innertext(); @@ -532,7 +535,9 @@ class simple_html_dom_node foreach ($head as $k=>$v) { if (!isset($found_keys[$k])) + { $found_keys[$k] = 1; + } } } @@ -554,7 +559,7 @@ class simple_html_dom_node protected function seek($selector, &$ret, $lowercase=false) { global $debug_object; - if (is_object($debug_object)) { $debug_object->debugLogEntry(1); } + if (is_object($debug_object)) { $debug_object->debug_log_entry(1); } list($tag, $key, $val, $exp, $no_key) = $selector; @@ -615,7 +620,7 @@ class simple_html_dom_node // this is a normal search, we want the value of that attribute of the tag. $nodeKeyValue = $node->attr[$key]; } - if (is_object($debug_object)) {$debug_object->debugLog(2, "testing node: " . $node->tag . " for attribute: " . $key . $exp . $val . " where nodes value is: " . $nodeKeyValue);} + if (is_object($debug_object)) {$debug_object->debug_log(2, "testing node: " . $node->tag . " for attribute: " . $key . $exp . $val . " where nodes value is: " . $nodeKeyValue);} //PaperG - If lowercase is set, do a case insensitive test of the value of the selector. if ($lowercase) { @@ -623,7 +628,7 @@ class simple_html_dom_node } else { $check = $this->match($exp, $val, $nodeKeyValue); } - if (is_object($debug_object)) {$debug_object->debugLog(2, "after match: " . ($check ? "true" : "false"));} + if (is_object($debug_object)) {$debug_object->debug_log(2, "after match: " . ($check ? "true" : "false"));} // handle multiple class if (!$check && strcasecmp($key, 'class')===0) { @@ -645,12 +650,12 @@ class simple_html_dom_node unset($node); } // It's passed by reference so this is actually what this function returns. - if (is_object($debug_object)) {$debug_object->debugLog(1, "EXIT - ret: ", $ret);} + if (is_object($debug_object)) {$debug_object->debug_log(1, "EXIT - ret: ", $ret);} } protected function match($exp, $pattern, $value) { global $debug_object; - if (is_object($debug_object)) {$debug_object->debugLogEntry(1);} + if (is_object($debug_object)) {$debug_object->debug_log_entry(1);} switch ($exp) { case '=': @@ -672,7 +677,7 @@ class simple_html_dom_node protected function parse_selector($selector_string) { global $debug_object; - if (is_object($debug_object)) {$debug_object->debugLogEntry(1);} + if (is_object($debug_object)) {$debug_object->debug_log_entry(1);} // pattern of CSS selectors, modified from mootools // Paperg: Add the colon to the attrbute, so that it properly finds like google does. @@ -683,7 +688,7 @@ class simple_html_dom_node // $pattern = "/([\w-:\*]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is"; $pattern = "/([\w-:\*]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-:]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is"; preg_match_all($pattern, trim($selector_string).' ', $matches, PREG_SET_ORDER); - if (is_object($debug_object)) {$debug_object->debugLog(2, "Matches Array: ", $matches);} + if (is_object($debug_object)) {$debug_object->debug_log(2, "Matches Array: ", $matches);} $selectors = array(); $result = array(); @@ -718,12 +723,14 @@ class simple_html_dom_node return $selectors; } - function __get($name) { + function __get($name) + { if (isset($this->attr[$name])) { return $this->convert_text($this->attr[$name]); } - switch ($name) { + switch ($name) + { case 'outertext': return $this->outertext(); case 'innertext': return $this->innertext(); case 'plaintext': return $this->text(); @@ -732,22 +739,30 @@ class simple_html_dom_node } } - function __set($name, $value) { - switch ($name) { + function __set($name, $value) + { + global $debug_object; + if (is_object($debug_object)) {$debug_object->debug_log_entry(1);} + + switch ($name) + { case 'outertext': return $this->_[HDOM_INFO_OUTER] = $value; case 'innertext': if (isset($this->_[HDOM_INFO_TEXT])) return $this->_[HDOM_INFO_TEXT] = $value; return $this->_[HDOM_INFO_INNER] = $value; } - if (!isset($this->attr[$name])) { + if (!isset($this->attr[$name])) + { $this->_[HDOM_INFO_SPACE][] = array(' ', '', ''); $this->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_DOUBLE; } $this->attr[$name] = $value; } - function __isset($name) { - switch ($name) { + function __isset($name) + { + switch ($name) + { case 'outertext': return true; case 'innertext': return true; case 'plaintext': return true; @@ -765,7 +780,7 @@ class simple_html_dom_node function convert_text($text) { global $debug_object; - if (is_object($debug_object)) {$debug_object->debugLogEntry(1);} + if (is_object($debug_object)) {$debug_object->debug_log_entry(1);} $converted_text = $text; @@ -777,7 +792,7 @@ class simple_html_dom_node $sourceCharset = strtoupper($this->dom->_charset); $targetCharset = strtoupper($this->dom->_target_charset); } - if (is_object($debug_object)) {$debug_object->debugLog(3, "source charset: " . $sourceCharset . " target charaset: " . $targetCharset);} + if (is_object($debug_object)) {$debug_object->debug_log(3, "source charset: " . $sourceCharset . " target charaset: " . $targetCharset);} if (!empty($sourceCharset) && !empty($targetCharset) && (strcasecmp($sourceCharset, $targetCharset) != 0)) { @@ -1045,10 +1060,10 @@ class simple_html_dom // prepare $this->prepare($str, $lowercase, $stripRN, $defaultBRText, $defaultSpanText); - // strip out comments - $this->remove_noise("''is"); // strip out cdata $this->remove_noise("''is", true); + // strip out comments + $this->remove_noise("''is"); // Per sourceforge http://sourceforge.net/tracker/?func=detail&aid=2949097&group_id=218559&atid=1044037 // Script tags removal now preceeds style tag removal. // strip out