From aa126ba458a02e8b1e43b15fc28f550ee72a9619 Mon Sep 17 00:00:00 2001 From: Maryana Rozhankivska Date: Wed, 25 Jun 2014 19:34:14 +0300 Subject: fix of issue #718: Error parsing file imported from Pocket #718 --- inc/3rdparty/simple_html_dom.php | 105 ++++++++++++++++++++++++--------------- 1 file changed, 65 insertions(+), 40 deletions(-) mode change 100644 => 100755 inc/3rdparty/simple_html_dom.php (limited to 'inc') diff --git a/inc/3rdparty/simple_html_dom.php b/inc/3rdparty/simple_html_dom.php old mode 100644 new mode 100755 index 43b94e57..9b73b105 --- a/inc/3rdparty/simple_html_dom.php +++ b/inc/3rdparty/simple_html_dom.php @@ -34,7 +34,7 @@ * @author S.C. Chen * @author John Schlick * @author Rus Carroll - * @version 1.5 ($Rev: 202 $) + * @version 1.5 ($Rev: 210 $) * @package PlaceLocalInclude * @subpackage simple_html_dom */ @@ -269,7 +269,10 @@ class simple_html_dom_node { return $this->children; } - if (isset($this->children[$idx])) return $this->children[$idx]; + if (isset($this->children[$idx])) + { + return $this->children[$idx]; + } return null; } @@ -330,14 +333,14 @@ class simple_html_dom_node function find_ancestor_tag($tag) { global $debug_object; - if (is_object($debug_object)) { $debug_object->debugLogEntry(1); } + if (is_object($debug_object)) { $debug_object->debug_log_entry(1); } // Start by including ourselves in the comparison. $returnDom = $this; while (!is_null($returnDom)) { - if (is_object($debug_object)) { $debug_object->debugLog(2, "Current tag is: " . $returnDom->tag); } + if (is_object($debug_object)) { $debug_object->debug_log(2, "Current tag is: " . $returnDom->tag); } if ($returnDom->tag == $tag) { @@ -374,7 +377,7 @@ class simple_html_dom_node $text = " with text: " . $this->text; } } - $debug_object->debugLog(1, 'Innertext of tag: ' . $this->tag . $text); + $debug_object->debug_log(1, 'Innertext of tag: ' . $this->tag . $text); } if ($this->tag==='root') return $this->innertext(); @@ -532,7 +535,9 @@ class simple_html_dom_node foreach ($head as $k=>$v) { if (!isset($found_keys[$k])) + { $found_keys[$k] = 1; + } } } @@ -554,7 +559,7 @@ class simple_html_dom_node protected function seek($selector, &$ret, $lowercase=false) { global $debug_object; - if (is_object($debug_object)) { $debug_object->debugLogEntry(1); } + if (is_object($debug_object)) { $debug_object->debug_log_entry(1); } list($tag, $key, $val, $exp, $no_key) = $selector; @@ -615,7 +620,7 @@ class simple_html_dom_node // this is a normal search, we want the value of that attribute of the tag. $nodeKeyValue = $node->attr[$key]; } - if (is_object($debug_object)) {$debug_object->debugLog(2, "testing node: " . $node->tag . " for attribute: " . $key . $exp . $val . " where nodes value is: " . $nodeKeyValue);} + if (is_object($debug_object)) {$debug_object->debug_log(2, "testing node: " . $node->tag . " for attribute: " . $key . $exp . $val . " where nodes value is: " . $nodeKeyValue);} //PaperG - If lowercase is set, do a case insensitive test of the value of the selector. if ($lowercase) { @@ -623,7 +628,7 @@ class simple_html_dom_node } else { $check = $this->match($exp, $val, $nodeKeyValue); } - if (is_object($debug_object)) {$debug_object->debugLog(2, "after match: " . ($check ? "true" : "false"));} + if (is_object($debug_object)) {$debug_object->debug_log(2, "after match: " . ($check ? "true" : "false"));} // handle multiple class if (!$check && strcasecmp($key, 'class')===0) { @@ -645,12 +650,12 @@ class simple_html_dom_node unset($node); } // It's passed by reference so this is actually what this function returns. - if (is_object($debug_object)) {$debug_object->debugLog(1, "EXIT - ret: ", $ret);} + if (is_object($debug_object)) {$debug_object->debug_log(1, "EXIT - ret: ", $ret);} } protected function match($exp, $pattern, $value) { global $debug_object; - if (is_object($debug_object)) {$debug_object->debugLogEntry(1);} + if (is_object($debug_object)) {$debug_object->debug_log_entry(1);} switch ($exp) { case '=': @@ -672,7 +677,7 @@ class simple_html_dom_node protected function parse_selector($selector_string) { global $debug_object; - if (is_object($debug_object)) {$debug_object->debugLogEntry(1);} + if (is_object($debug_object)) {$debug_object->debug_log_entry(1);} // pattern of CSS selectors, modified from mootools // Paperg: Add the colon to the attrbute, so that it properly finds like google does. @@ -683,7 +688,7 @@ class simple_html_dom_node // $pattern = "/([\w-:\*]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is"; $pattern = "/([\w-:\*]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-:]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is"; preg_match_all($pattern, trim($selector_string).' ', $matches, PREG_SET_ORDER); - if (is_object($debug_object)) {$debug_object->debugLog(2, "Matches Array: ", $matches);} + if (is_object($debug_object)) {$debug_object->debug_log(2, "Matches Array: ", $matches);} $selectors = array(); $result = array(); @@ -718,12 +723,14 @@ class simple_html_dom_node return $selectors; } - function __get($name) { + function __get($name) + { if (isset($this->attr[$name])) { return $this->convert_text($this->attr[$name]); } - switch ($name) { + switch ($name) + { case 'outertext': return $this->outertext(); case 'innertext': return $this->innertext(); case 'plaintext': return $this->text(); @@ -732,22 +739,30 @@ class simple_html_dom_node } } - function __set($name, $value) { - switch ($name) { + function __set($name, $value) + { + global $debug_object; + if (is_object($debug_object)) {$debug_object->debug_log_entry(1);} + + switch ($name) + { case 'outertext': return $this->_[HDOM_INFO_OUTER] = $value; case 'innertext': if (isset($this->_[HDOM_INFO_TEXT])) return $this->_[HDOM_INFO_TEXT] = $value; return $this->_[HDOM_INFO_INNER] = $value; } - if (!isset($this->attr[$name])) { + if (!isset($this->attr[$name])) + { $this->_[HDOM_INFO_SPACE][] = array(' ', '', ''); $this->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_DOUBLE; } $this->attr[$name] = $value; } - function __isset($name) { - switch ($name) { + function __isset($name) + { + switch ($name) + { case 'outertext': return true; case 'innertext': return true; case 'plaintext': return true; @@ -765,7 +780,7 @@ class simple_html_dom_node function convert_text($text) { global $debug_object; - if (is_object($debug_object)) {$debug_object->debugLogEntry(1);} + if (is_object($debug_object)) {$debug_object->debug_log_entry(1);} $converted_text = $text; @@ -777,7 +792,7 @@ class simple_html_dom_node $sourceCharset = strtoupper($this->dom->_charset); $targetCharset = strtoupper($this->dom->_target_charset); } - if (is_object($debug_object)) {$debug_object->debugLog(3, "source charset: " . $sourceCharset . " target charaset: " . $targetCharset);} + if (is_object($debug_object)) {$debug_object->debug_log(3, "source charset: " . $sourceCharset . " target charaset: " . $targetCharset);} if (!empty($sourceCharset) && !empty($targetCharset) && (strcasecmp($sourceCharset, $targetCharset) != 0)) { @@ -1045,10 +1060,10 @@ class simple_html_dom // prepare $this->prepare($str, $lowercase, $stripRN, $defaultBRText, $defaultSpanText); - // strip out comments - $this->remove_noise("''is"); // strip out cdata $this->remove_noise("''is", true); + // strip out comments + $this->remove_noise("''is"); // Per sourceforge http://sourceforge.net/tracker/?func=detail&aid=2949097&group_id=218559&atid=1044037 // Script tags removal now preceeds style tag removal. // strip out