diff options
author | tcitworld <tcit@tcit.fr> | 2014-06-25 18:54:39 +0200 |
---|---|---|
committer | tcitworld <tcit@tcit.fr> | 2014-06-25 18:54:39 +0200 |
commit | 69213014d1fb8f05cffe7bf83467d938a282f29b (patch) | |
tree | aed7dc63fed6c360fdcdfccf0fa8d74bb77d1048 /inc/3rdparty/simple_html_dom.php | |
parent | c9563378eaa061a339ca5c0aa9e4a45f98e50c9f (diff) | |
parent | aa126ba458a02e8b1e43b15fc28f550ee72a9619 (diff) | |
download | wallabag-69213014d1fb8f05cffe7bf83467d938a282f29b.tar.gz wallabag-69213014d1fb8f05cffe7bf83467d938a282f29b.tar.zst wallabag-69213014d1fb8f05cffe7bf83467d938a282f29b.zip |
Merge pull request #736 from mariroz/dev
fix of issue #718: Error parsing file imported from Pocket #718
Diffstat (limited to 'inc/3rdparty/simple_html_dom.php')
-rwxr-xr-x[-rw-r--r--] | inc/3rdparty/simple_html_dom.php | 105 |
1 files changed, 65 insertions, 40 deletions
diff --git a/inc/3rdparty/simple_html_dom.php b/inc/3rdparty/simple_html_dom.php index 43b94e57..9b73b105 100644..100755 --- a/inc/3rdparty/simple_html_dom.php +++ b/inc/3rdparty/simple_html_dom.php | |||
@@ -34,7 +34,7 @@ | |||
34 | * @author S.C. Chen <me578022@gmail.com> | 34 | * @author S.C. Chen <me578022@gmail.com> |
35 | * @author John Schlick | 35 | * @author John Schlick |
36 | * @author Rus Carroll | 36 | * @author Rus Carroll |
37 | * @version 1.5 ($Rev: 202 $) | 37 | * @version 1.5 ($Rev: 210 $) |
38 | * @package PlaceLocalInclude | 38 | * @package PlaceLocalInclude |
39 | * @subpackage simple_html_dom | 39 | * @subpackage simple_html_dom |
40 | */ | 40 | */ |
@@ -269,7 +269,10 @@ class simple_html_dom_node | |||
269 | { | 269 | { |
270 | return $this->children; | 270 | return $this->children; |
271 | } | 271 | } |
272 | if (isset($this->children[$idx])) return $this->children[$idx]; | 272 | if (isset($this->children[$idx])) |
273 | { | ||
274 | return $this->children[$idx]; | ||
275 | } | ||
273 | return null; | 276 | return null; |
274 | } | 277 | } |
275 | 278 | ||
@@ -330,14 +333,14 @@ class simple_html_dom_node | |||
330 | function find_ancestor_tag($tag) | 333 | function find_ancestor_tag($tag) |
331 | { | 334 | { |
332 | global $debug_object; | 335 | global $debug_object; |
333 | if (is_object($debug_object)) { $debug_object->debugLogEntry(1); } | 336 | if (is_object($debug_object)) { $debug_object->debug_log_entry(1); } |
334 | 337 | ||
335 | // Start by including ourselves in the comparison. | 338 | // Start by including ourselves in the comparison. |
336 | $returnDom = $this; | 339 | $returnDom = $this; |
337 | 340 | ||
338 | while (!is_null($returnDom)) | 341 | while (!is_null($returnDom)) |
339 | { | 342 | { |
340 | if (is_object($debug_object)) { $debug_object->debugLog(2, "Current tag is: " . $returnDom->tag); } | 343 | if (is_object($debug_object)) { $debug_object->debug_log(2, "Current tag is: " . $returnDom->tag); } |
341 | 344 | ||
342 | if ($returnDom->tag == $tag) | 345 | if ($returnDom->tag == $tag) |
343 | { | 346 | { |
@@ -374,7 +377,7 @@ class simple_html_dom_node | |||
374 | $text = " with text: " . $this->text; | 377 | $text = " with text: " . $this->text; |
375 | } | 378 | } |
376 | } | 379 | } |
377 | $debug_object->debugLog(1, 'Innertext of tag: ' . $this->tag . $text); | 380 | $debug_object->debug_log(1, 'Innertext of tag: ' . $this->tag . $text); |
378 | } | 381 | } |
379 | 382 | ||
380 | if ($this->tag==='root') return $this->innertext(); | 383 | if ($this->tag==='root') return $this->innertext(); |
@@ -532,7 +535,9 @@ class simple_html_dom_node | |||
532 | foreach ($head as $k=>$v) | 535 | foreach ($head as $k=>$v) |
533 | { | 536 | { |
534 | if (!isset($found_keys[$k])) | 537 | if (!isset($found_keys[$k])) |
538 | { | ||
535 | $found_keys[$k] = 1; | 539 | $found_keys[$k] = 1; |
540 | } | ||
536 | } | 541 | } |
537 | } | 542 | } |
538 | 543 | ||
@@ -554,7 +559,7 @@ class simple_html_dom_node | |||
554 | protected function seek($selector, &$ret, $lowercase=false) | 559 | protected function seek($selector, &$ret, $lowercase=false) |
555 | { | 560 | { |
556 | global $debug_object; | 561 | global $debug_object; |
557 | if (is_object($debug_object)) { $debug_object->debugLogEntry(1); } | 562 | if (is_object($debug_object)) { $debug_object->debug_log_entry(1); } |
558 | 563 | ||
559 | list($tag, $key, $val, $exp, $no_key) = $selector; | 564 | list($tag, $key, $val, $exp, $no_key) = $selector; |
560 | 565 | ||
@@ -615,7 +620,7 @@ class simple_html_dom_node | |||
615 | // this is a normal search, we want the value of that attribute of the tag. | 620 | // this is a normal search, we want the value of that attribute of the tag. |
616 | $nodeKeyValue = $node->attr[$key]; | 621 | $nodeKeyValue = $node->attr[$key]; |
617 | } | 622 | } |
618 | if (is_object($debug_object)) {$debug_object->debugLog(2, "testing node: " . $node->tag . " for attribute: " . $key . $exp . $val . " where nodes value is: " . $nodeKeyValue);} | 623 | if (is_object($debug_object)) {$debug_object->debug_log(2, "testing node: " . $node->tag . " for attribute: " . $key . $exp . $val . " where nodes value is: " . $nodeKeyValue);} |
619 | 624 | ||
620 | //PaperG - If lowercase is set, do a case insensitive test of the value of the selector. | 625 | //PaperG - If lowercase is set, do a case insensitive test of the value of the selector. |
621 | if ($lowercase) { | 626 | if ($lowercase) { |
@@ -623,7 +628,7 @@ class simple_html_dom_node | |||
623 | } else { | 628 | } else { |
624 | $check = $this->match($exp, $val, $nodeKeyValue); | 629 | $check = $this->match($exp, $val, $nodeKeyValue); |
625 | } | 630 | } |
626 | if (is_object($debug_object)) {$debug_object->debugLog(2, "after match: " . ($check ? "true" : "false"));} | 631 | if (is_object($debug_object)) {$debug_object->debug_log(2, "after match: " . ($check ? "true" : "false"));} |
627 | 632 | ||
628 | // handle multiple class | 633 | // handle multiple class |
629 | if (!$check && strcasecmp($key, 'class')===0) { | 634 | if (!$check && strcasecmp($key, 'class')===0) { |
@@ -645,12 +650,12 @@ class simple_html_dom_node | |||
645 | unset($node); | 650 | unset($node); |
646 | } | 651 | } |
647 | // It's passed by reference so this is actually what this function returns. | 652 | // It's passed by reference so this is actually what this function returns. |
648 | if (is_object($debug_object)) {$debug_object->debugLog(1, "EXIT - ret: ", $ret);} | 653 | if (is_object($debug_object)) {$debug_object->debug_log(1, "EXIT - ret: ", $ret);} |
649 | } | 654 | } |
650 | 655 | ||
651 | protected function match($exp, $pattern, $value) { | 656 | protected function match($exp, $pattern, $value) { |
652 | global $debug_object; | 657 | global $debug_object; |
653 | if (is_object($debug_object)) {$debug_object->debugLogEntry(1);} | 658 | if (is_object($debug_object)) {$debug_object->debug_log_entry(1);} |
654 | 659 | ||
655 | switch ($exp) { | 660 | switch ($exp) { |
656 | case '=': | 661 | case '=': |
@@ -672,7 +677,7 @@ class simple_html_dom_node | |||
672 | 677 | ||
673 | protected function parse_selector($selector_string) { | 678 | protected function parse_selector($selector_string) { |
674 | global $debug_object; | 679 | global $debug_object; |
675 | if (is_object($debug_object)) {$debug_object->debugLogEntry(1);} | 680 | if (is_object($debug_object)) {$debug_object->debug_log_entry(1);} |
676 | 681 | ||
677 | // pattern of CSS selectors, modified from mootools | 682 | // pattern of CSS selectors, modified from mootools |
678 | // Paperg: Add the colon to the attrbute, so that it properly finds <tag attr:ibute="something" > like google does. | 683 | // Paperg: Add the colon to the attrbute, so that it properly finds <tag attr:ibute="something" > like google does. |
@@ -683,7 +688,7 @@ class simple_html_dom_node | |||
683 | // $pattern = "/([\w-:\*]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is"; | 688 | // $pattern = "/([\w-:\*]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is"; |
684 | $pattern = "/([\w-:\*]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-:]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is"; | 689 | $pattern = "/([\w-:\*]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-:]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is"; |
685 | preg_match_all($pattern, trim($selector_string).' ', $matches, PREG_SET_ORDER); | 690 | preg_match_all($pattern, trim($selector_string).' ', $matches, PREG_SET_ORDER); |
686 | if (is_object($debug_object)) {$debug_object->debugLog(2, "Matches Array: ", $matches);} | 691 | if (is_object($debug_object)) {$debug_object->debug_log(2, "Matches Array: ", $matches);} |
687 | 692 | ||
688 | $selectors = array(); | 693 | $selectors = array(); |
689 | $result = array(); | 694 | $result = array(); |
@@ -718,12 +723,14 @@ class simple_html_dom_node | |||
718 | return $selectors; | 723 | return $selectors; |
719 | } | 724 | } |
720 | 725 | ||
721 | function __get($name) { | 726 | function __get($name) |
727 | { | ||
722 | if (isset($this->attr[$name])) | 728 | if (isset($this->attr[$name])) |
723 | { | 729 | { |
724 | return $this->convert_text($this->attr[$name]); | 730 | return $this->convert_text($this->attr[$name]); |
725 | } | 731 | } |
726 | switch ($name) { | 732 | switch ($name) |
733 | { | ||
727 | case 'outertext': return $this->outertext(); | 734 | case 'outertext': return $this->outertext(); |
728 | case 'innertext': return $this->innertext(); | 735 | case 'innertext': return $this->innertext(); |
729 | case 'plaintext': return $this->text(); | 736 | case 'plaintext': return $this->text(); |
@@ -732,22 +739,30 @@ class simple_html_dom_node | |||
732 | } | 739 | } |
733 | } | 740 | } |
734 | 741 | ||
735 | function __set($name, $value) { | 742 | function __set($name, $value) |
736 | switch ($name) { | 743 | { |
744 | global $debug_object; | ||
745 | if (is_object($debug_object)) {$debug_object->debug_log_entry(1);} | ||
746 | |||
747 | switch ($name) | ||
748 | { | ||
737 | case 'outertext': return $this->_[HDOM_INFO_OUTER] = $value; | 749 | case 'outertext': return $this->_[HDOM_INFO_OUTER] = $value; |
738 | case 'innertext': | 750 | case 'innertext': |
739 | if (isset($this->_[HDOM_INFO_TEXT])) return $this->_[HDOM_INFO_TEXT] = $value; | 751 | if (isset($this->_[HDOM_INFO_TEXT])) return $this->_[HDOM_INFO_TEXT] = $value; |
740 | return $this->_[HDOM_INFO_INNER] = $value; | 752 | return $this->_[HDOM_INFO_INNER] = $value; |
741 | } | 753 | } |
742 | if (!isset($this->attr[$name])) { | 754 | if (!isset($this->attr[$name])) |
755 | { | ||
743 | $this->_[HDOM_INFO_SPACE][] = array(' ', '', ''); | 756 | $this->_[HDOM_INFO_SPACE][] = array(' ', '', ''); |
744 | $this->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_DOUBLE; | 757 | $this->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_DOUBLE; |
745 | } | 758 | } |
746 | $this->attr[$name] = $value; | 759 | $this->attr[$name] = $value; |
747 | } | 760 | } |
748 | 761 | ||
749 | function __isset($name) { | 762 | function __isset($name) |
750 | switch ($name) { | 763 | { |
764 | switch ($name) | ||
765 | { | ||
751 | case 'outertext': return true; | 766 | case 'outertext': return true; |
752 | case 'innertext': return true; | 767 | case 'innertext': return true; |
753 | case 'plaintext': return true; | 768 | case 'plaintext': return true; |
@@ -765,7 +780,7 @@ class simple_html_dom_node | |||
765 | function convert_text($text) | 780 | function convert_text($text) |
766 | { | 781 | { |
767 | global $debug_object; | 782 | global $debug_object; |
768 | if (is_object($debug_object)) {$debug_object->debugLogEntry(1);} | 783 | if (is_object($debug_object)) {$debug_object->debug_log_entry(1);} |
769 | 784 | ||
770 | $converted_text = $text; | 785 | $converted_text = $text; |
771 | 786 | ||
@@ -777,7 +792,7 @@ class simple_html_dom_node | |||
777 | $sourceCharset = strtoupper($this->dom->_charset); | 792 | $sourceCharset = strtoupper($this->dom->_charset); |
778 | $targetCharset = strtoupper($this->dom->_target_charset); | 793 | $targetCharset = strtoupper($this->dom->_target_charset); |
779 | } | 794 | } |
780 | if (is_object($debug_object)) {$debug_object->debugLog(3, "source charset: " . $sourceCharset . " target charaset: " . $targetCharset);} | 795 | if (is_object($debug_object)) {$debug_object->debug_log(3, "source charset: " . $sourceCharset . " target charaset: " . $targetCharset);} |
781 | 796 | ||
782 | if (!empty($sourceCharset) && !empty($targetCharset) && (strcasecmp($sourceCharset, $targetCharset) != 0)) | 797 | if (!empty($sourceCharset) && !empty($targetCharset) && (strcasecmp($sourceCharset, $targetCharset) != 0)) |
783 | { | 798 | { |
@@ -1045,10 +1060,10 @@ class simple_html_dom | |||
1045 | 1060 | ||
1046 | // prepare | 1061 | // prepare |
1047 | $this->prepare($str, $lowercase, $stripRN, $defaultBRText, $defaultSpanText); | 1062 | $this->prepare($str, $lowercase, $stripRN, $defaultBRText, $defaultSpanText); |
1048 | // strip out comments | ||
1049 | $this->remove_noise("'<!--(.*?)-->'is"); | ||
1050 | // strip out cdata | 1063 | // strip out cdata |
1051 | $this->remove_noise("'<!\[CDATA\[(.*?)\]\]>'is", true); | 1064 | $this->remove_noise("'<!\[CDATA\[(.*?)\]\]>'is", true); |
1065 | // strip out comments | ||
1066 | $this->remove_noise("'<!--(.*?)-->'is"); | ||
1052 | // Per sourceforge http://sourceforge.net/tracker/?func=detail&aid=2949097&group_id=218559&atid=1044037 | 1067 | // Per sourceforge http://sourceforge.net/tracker/?func=detail&aid=2949097&group_id=218559&atid=1044037 |
1053 | // Script tags removal now preceeds style tag removal. | 1068 | // Script tags removal now preceeds style tag removal. |
1054 | // strip out <script> tags | 1069 | // strip out <script> tags |
@@ -1078,10 +1093,15 @@ class simple_html_dom | |||
1078 | // load html from file | 1093 | // load html from file |
1079 | function load_file() | 1094 | function load_file() |
1080 | { | 1095 | { |
1096 | //external error: NOT related to dom loading | ||
1097 | $extError=error_get_last(); | ||
1098 | |||
1081 | $args = func_get_args(); | 1099 | $args = func_get_args(); |
1082 | $this->load(call_user_func_array('file_get_contents', $args), true); | 1100 | $this->load(call_user_func_array('file_get_contents', $args), true); |
1101 | |||
1083 | // Throw an error if we can't properly load the dom. | 1102 | // Throw an error if we can't properly load the dom. |
1084 | if (($error=error_get_last())!==null) { | 1103 | $error=error_get_last(); |
1104 | if ($error!==$extError) { | ||
1085 | $this->clear(); | 1105 | $this->clear(); |
1086 | return false; | 1106 | return false; |
1087 | } | 1107 | } |
@@ -1198,22 +1218,22 @@ class simple_html_dom | |||
1198 | if ($success) | 1218 | if ($success) |
1199 | { | 1219 | { |
1200 | $charset = $matches[1]; | 1220 | $charset = $matches[1]; |
1201 | if (is_object($debug_object)) {$debug_object->debugLog(2, 'header content-type found charset of: ' . $charset);} | 1221 | if (is_object($debug_object)) {$debug_object->debug_log(2, 'header content-type found charset of: ' . $charset);} |
1202 | } | 1222 | } |
1203 | 1223 | ||
1204 | } | 1224 | } |
1205 | 1225 | ||
1206 | if (empty($charset)) | 1226 | if (empty($charset)) |
1207 | { | 1227 | { |
1208 | $el = $this->root->find('meta[http-equiv=Content-Type]',0); | 1228 | $el = $this->root->find('meta[http-equiv=Content-Type]',0, true); |
1209 | if (!empty($el)) | 1229 | if (!empty($el)) |
1210 | { | 1230 | { |
1211 | $fullvalue = $el->content; | 1231 | $fullvalue = $el->content; |
1212 | if (is_object($debug_object)) {$debug_object->debugLog(2, 'meta content-type tag found' . $fullvalue);} | 1232 | if (is_object($debug_object)) {$debug_object->debug_log(2, 'meta content-type tag found' . $fullvalue);} |
1213 | 1233 | ||
1214 | if (!empty($fullvalue)) | 1234 | if (!empty($fullvalue)) |
1215 | { | 1235 | { |
1216 | $success = preg_match('/charset=(.+)/', $fullvalue, $matches); | 1236 | $success = preg_match('/charset=(.+)/i', $fullvalue, $matches); |
1217 | if ($success) | 1237 | if ($success) |
1218 | { | 1238 | { |
1219 | $charset = $matches[1]; | 1239 | $charset = $matches[1]; |
@@ -1221,7 +1241,7 @@ class simple_html_dom | |||
1221 | else | 1241 | else |
1222 | { | 1242 | { |
1223 | // If there is a meta tag, and they don't specify the character set, research says that it's typically ISO-8859-1 | 1243 | // If there is a meta tag, and they don't specify the character set, research says that it's typically ISO-8859-1 |
1224 | if (is_object($debug_object)) {$debug_object->debugLog(2, 'meta content-type tag couldn\'t be parsed. using iso-8859 default.');} | 1244 | if (is_object($debug_object)) {$debug_object->debug_log(2, 'meta content-type tag couldn\'t be parsed. using iso-8859 default.');} |
1225 | $charset = 'ISO-8859-1'; | 1245 | $charset = 'ISO-8859-1'; |
1226 | } | 1246 | } |
1227 | } | 1247 | } |
@@ -1231,14 +1251,19 @@ class simple_html_dom | |||
1231 | // If we couldn't find a charset above, then lets try to detect one based on the text we got... | 1251 | // If we couldn't find a charset above, then lets try to detect one based on the text we got... |
1232 | if (empty($charset)) | 1252 | if (empty($charset)) |
1233 | { | 1253 | { |
1234 | // Have php try to detect the encoding from the text given to us. | 1254 | // Use this in case mb_detect_charset isn't installed/loaded on this machine. |
1235 | $charset = mb_detect_encoding($this->root->plaintext . "ascii", $encoding_list = array( "UTF-8", "CP1252" ) ); | 1255 | $charset = false; |
1236 | if (is_object($debug_object)) {$debug_object->debugLog(2, 'mb_detect found: ' . $charset);} | 1256 | if (function_exists('mb_detect_encoding')) |
1257 | { | ||
1258 | // Have php try to detect the encoding from the text given to us. | ||
1259 | $charset = mb_detect_encoding($this->root->plaintext . "ascii", $encoding_list = array( "UTF-8", "CP1252" ) ); | ||
1260 | if (is_object($debug_object)) {$debug_object->debug_log(2, 'mb_detect found: ' . $charset);} | ||
1261 | } | ||
1237 | 1262 | ||
1238 | // and if this doesn't work... then we need to just wrongheadedly assume it's UTF-8 so that we can move on - cause this will usually give us most of what we need... | 1263 | // and if this doesn't work... then we need to just wrongheadedly assume it's UTF-8 so that we can move on - cause this will usually give us most of what we need... |
1239 | if ($charset === false) | 1264 | if ($charset === false) |
1240 | { | 1265 | { |
1241 | if (is_object($debug_object)) {$debug_object->debugLog(2, 'since mb_detect failed - using default of utf-8');} | 1266 | if (is_object($debug_object)) {$debug_object->debug_log(2, 'since mb_detect failed - using default of utf-8');} |
1242 | $charset = 'UTF-8'; | 1267 | $charset = 'UTF-8'; |
1243 | } | 1268 | } |
1244 | } | 1269 | } |
@@ -1246,11 +1271,11 @@ class simple_html_dom | |||
1246 | // Since CP1252 is a superset, if we get one of it's subsets, we want it instead. | 1271 | // Since CP1252 is a superset, if we get one of it's subsets, we want it instead. |
1247 | if ((strtolower($charset) == strtolower('ISO-8859-1')) || (strtolower($charset) == strtolower('Latin1')) || (strtolower($charset) == strtolower('Latin-1'))) | 1272 | if ((strtolower($charset) == strtolower('ISO-8859-1')) || (strtolower($charset) == strtolower('Latin1')) || (strtolower($charset) == strtolower('Latin-1'))) |
1248 | { | 1273 | { |
1249 | if (is_object($debug_object)) {$debug_object->debugLog(2, 'replacing ' . $charset . ' with CP1252 as its a superset');} | 1274 | if (is_object($debug_object)) {$debug_object->debug_log(2, 'replacing ' . $charset . ' with CP1252 as its a superset');} |
1250 | $charset = 'CP1252'; | 1275 | $charset = 'CP1252'; |
1251 | } | 1276 | } |
1252 | 1277 | ||
1253 | if (is_object($debug_object)) {$debug_object->debugLog(1, 'EXIT - ' . $charset);} | 1278 | if (is_object($debug_object)) {$debug_object->debug_log(1, 'EXIT - ' . $charset);} |
1254 | 1279 | ||
1255 | return $this->_charset = $charset; | 1280 | return $this->_charset = $charset; |
1256 | } | 1281 | } |
@@ -1616,14 +1641,14 @@ class simple_html_dom | |||
1616 | protected function remove_noise($pattern, $remove_tag=false) | 1641 | protected function remove_noise($pattern, $remove_tag=false) |
1617 | { | 1642 | { |
1618 | global $debug_object; | 1643 | global $debug_object; |
1619 | if (is_object($debug_object)) { $debug_object->debugLogEntry(1); } | 1644 | if (is_object($debug_object)) { $debug_object->debug_log_entry(1); } |
1620 | 1645 | ||
1621 | $count = preg_match_all($pattern, $this->doc, $matches, PREG_SET_ORDER|PREG_OFFSET_CAPTURE); | 1646 | $count = preg_match_all($pattern, $this->doc, $matches, PREG_SET_ORDER|PREG_OFFSET_CAPTURE); |
1622 | 1647 | ||
1623 | for ($i=$count-1; $i>-1; --$i) | 1648 | for ($i=$count-1; $i>-1; --$i) |
1624 | { | 1649 | { |
1625 | $key = '___noise___'.sprintf('% 5d', count($this->noise)+1000); | 1650 | $key = '___noise___'.sprintf('% 5d', count($this->noise)+1000); |
1626 | if (is_object($debug_object)) { $debug_object->debugLog(2, 'key is: ' . $key); } | 1651 | if (is_object($debug_object)) { $debug_object->debug_log(2, 'key is: ' . $key); } |
1627 | $idx = ($remove_tag) ? 0 : 1; | 1652 | $idx = ($remove_tag) ? 0 : 1; |
1628 | $this->noise[$key] = $matches[$i][$idx][0]; | 1653 | $this->noise[$key] = $matches[$i][$idx][0]; |
1629 | $this->doc = substr_replace($this->doc, $key, $matches[$i][$idx][1], strlen($matches[$i][$idx][0])); | 1654 | $this->doc = substr_replace($this->doc, $key, $matches[$i][$idx][1], strlen($matches[$i][$idx][0])); |
@@ -1641,7 +1666,7 @@ class simple_html_dom | |||
1641 | function restore_noise($text) | 1666 | function restore_noise($text) |
1642 | { | 1667 | { |
1643 | global $debug_object; | 1668 | global $debug_object; |
1644 | if (is_object($debug_object)) { $debug_object->debugLogEntry(1); } | 1669 | if (is_object($debug_object)) { $debug_object->debug_log_entry(1); } |
1645 | 1670 | ||
1646 | while (($pos=strpos($text, '___noise___'))!==false) | 1671 | while (($pos=strpos($text, '___noise___'))!==false) |
1647 | { | 1672 | { |
@@ -1649,7 +1674,7 @@ class simple_html_dom | |||
1649 | if (strlen($text) > $pos+15) | 1674 | if (strlen($text) > $pos+15) |
1650 | { | 1675 | { |
1651 | $key = '___noise___'.$text[$pos+11].$text[$pos+12].$text[$pos+13].$text[$pos+14].$text[$pos+15]; | 1676 | $key = '___noise___'.$text[$pos+11].$text[$pos+12].$text[$pos+13].$text[$pos+14].$text[$pos+15]; |
1652 | if (is_object($debug_object)) { $debug_object->debugLog(2, 'located key of: ' . $key); } | 1677 | if (is_object($debug_object)) { $debug_object->debug_log(2, 'located key of: ' . $key); } |
1653 | 1678 | ||
1654 | if (isset($this->noise[$key])) | 1679 | if (isset($this->noise[$key])) |
1655 | { | 1680 | { |
@@ -1674,7 +1699,7 @@ class simple_html_dom | |||
1674 | function search_noise($text) | 1699 | function search_noise($text) |
1675 | { | 1700 | { |
1676 | global $debug_object; | 1701 | global $debug_object; |
1677 | if (is_object($debug_object)) { $debug_object->debugLogEntry(1); } | 1702 | if (is_object($debug_object)) { $debug_object->debug_log_entry(1); } |
1678 | 1703 | ||
1679 | foreach($this->noise as $noiseElement) | 1704 | foreach($this->noise as $noiseElement) |
1680 | { | 1705 | { |