diff options
author | nicosomb <nicolas@loeuillet.org> | 2013-04-23 15:19:33 +0200 |
---|---|---|
committer | nicosomb <nicolas@loeuillet.org> | 2013-04-23 15:19:33 +0200 |
commit | 3e7188185d9d4be054ed3807c8b910d1e5f504f8 (patch) | |
tree | 1caacbc5ed979eb4e0266fc35588e4d88c9cb9ed /inc/Readability.php | |
parent | 8d5aab49c185313338245f9c3a878628e16d7c85 (diff) | |
parent | cdcc8d2533d2ed65ac6a89c9a6d0041de7361ce1 (diff) | |
download | wallabag-3e7188185d9d4be054ed3807c8b910d1e5f504f8.tar.gz wallabag-3e7188185d9d4be054ed3807c8b910d1e5f504f8.tar.zst wallabag-3e7188185d9d4be054ed3807c8b910d1e5f504f8.zip |
Merge branch 'memiks-gestion_erreur_readability' into dev
Diffstat (limited to 'inc/Readability.php')
-rw-r--r-- | inc/Readability.php | 7 |
1 files changed, 4 insertions, 3 deletions
diff --git a/inc/Readability.php b/inc/Readability.php index c50bf2ef..2ce90f6d 100644 --- a/inc/Readability.php +++ b/inc/Readability.php | |||
@@ -80,7 +80,7 @@ class Readability | |||
80 | public $debug = false; | 80 | public $debug = false; |
81 | protected $body = null; // | 81 | protected $body = null; // |
82 | protected $bodyCache = null; // Cache the body HTML in case we need to re-use it later | 82 | protected $bodyCache = null; // Cache the body HTML in case we need to re-use it later |
83 | protected $flags = self::FLAG_CLEAN_CONDITIONALLY; // 1 | 2 | 4; // Start with all flags set. | 83 | protected $flags = self::FLAG_CLEAN_CONDITIONALLY; // 1 | 2 | 4; // Start with all flags set. |
84 | protected $success = false; // indicates whether we were able to extract or not | 84 | protected $success = false; // indicates whether we were able to extract or not |
85 | 85 | ||
86 | /** | 86 | /** |
@@ -90,7 +90,7 @@ class Readability | |||
90 | public $regexps = array( | 90 | public $regexps = array( |
91 | 'unlikelyCandidates' => '/combx|comment|comments|community|disqus|extra|foot|header|menu|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup|tweet|twitter/i', | 91 | 'unlikelyCandidates' => '/combx|comment|comments|community|disqus|extra|foot|header|menu|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup|tweet|twitter/i', |
92 | 'okMaybeItsACandidate' => '/and|article|body|column|main|shadow/i', | 92 | 'okMaybeItsACandidate' => '/and|article|body|column|main|shadow/i', |
93 | 'positive' => '/article|body|content|entry|hentry|main|page|pagination|post|text|blog|story/i', | 93 | 'positive' => '/article|body|content|entry|hentry|main|page|pagination|post|text|blog|story|attachment/i', |
94 | 'negative' => '/combx|comment|comments|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|widget/i', | 94 | 'negative' => '/combx|comment|comments|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|widget/i', |
95 | 'divToPElements' => '/<(a|blockquote|dl|div|ol|p|pre|table|ul)/i', | 95 | 'divToPElements' => '/<(a|blockquote|dl|div|ol|p|pre|table|ul)/i', |
96 | 'replaceBrs' => '/(<br[^>]*>[ \n\r\t]*){2,}/i', | 96 | 'replaceBrs' => '/(<br[^>]*>[ \n\r\t]*){2,}/i', |
@@ -106,7 +106,7 @@ class Readability | |||
106 | * Create instance of Readability | 106 | * Create instance of Readability |
107 | * @param string UTF-8 encoded string | 107 | * @param string UTF-8 encoded string |
108 | * @param string (optional) URL associated with HTML (used for footnotes) | 108 | * @param string (optional) URL associated with HTML (used for footnotes) |
109 | */ | 109 | */ |
110 | function __construct($html, $url=null) | 110 | function __construct($html, $url=null) |
111 | { | 111 | { |
112 | /* Turn all double br's into p's */ | 112 | /* Turn all double br's into p's */ |
@@ -185,6 +185,7 @@ class Readability | |||
185 | $articleContent = $this->dom->createElement('div'); | 185 | $articleContent = $this->dom->createElement('div'); |
186 | $articleContent->setAttribute('id', 'readability-content'); | 186 | $articleContent->setAttribute('id', 'readability-content'); |
187 | $articleContent->innerHTML = '<p>Sorry, Readability was unable to parse this page for content.</p>'; | 187 | $articleContent->innerHTML = '<p>Sorry, Readability was unable to parse this page for content.</p>'; |
188 | return $this->success; | ||
188 | } | 189 | } |
189 | 190 | ||
190 | $overlay->setAttribute('id', 'readOverlay'); | 191 | $overlay->setAttribute('id', 'readOverlay'); |