From 4ddbd26787ebbf4c5e01061ac9dbee4f8c21421b Mon Sep 17 00:00:00 2001 From: memiks Date: Tue, 23 Apr 2013 07:22:19 -0500 Subject: Ajout du global msg et gestion du retour dans Readability --- inc/Readability.php | 7 ++++--- inc/functions.php | 29 +++++++++++++++++++++++++---- 2 files changed, 29 insertions(+), 7 deletions(-) (limited to 'inc') diff --git a/inc/Readability.php b/inc/Readability.php index c50bf2ef..19298c13 100644 --- a/inc/Readability.php +++ b/inc/Readability.php @@ -80,7 +80,7 @@ class Readability public $debug = false; protected $body = null; // protected $bodyCache = null; // Cache the body HTML in case we need to re-use it later - protected $flags = self::FLAG_CLEAN_CONDITIONALLY; // 1 | 2 | 4; // Start with all flags set. + protected $flags = Self::FLAG_CLEAN_CONDITIONALLY; // 1 | 2 | 4; // Start with all flags set. protected $success = false; // indicates whether we were able to extract or not /** @@ -90,7 +90,7 @@ class Readability public $regexps = array( 'unlikelyCandidates' => '/combx|comment|comments|community|disqus|extra|foot|header|menu|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup|tweet|twitter/i', 'okMaybeItsACandidate' => '/and|article|body|column|main|shadow/i', - 'positive' => '/article|body|content|entry|hentry|main|page|pagination|post|text|blog|story/i', + 'positive' => '/article|body|content|entry|hentry|main|page|pagination|post|text|blog|story|attachment/i', 'negative' => '/combx|comment|comments|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|widget/i', 'divToPElements' => '/<(a|blockquote|dl|div|ol|p|pre|table|ul)/i', 'replaceBrs' => '/(]*>[ \n\r\t]*){2,}/i', @@ -106,7 +106,7 @@ class Readability * Create instance of Readability * @param string UTF-8 encoded string * @param string (optional) URL associated with HTML (used for footnotes) - */ + */ function __construct($html, $url=null) { /* Turn all double br's into p's */ @@ -185,6 +185,7 @@ class Readability $articleContent = $this->dom->createElement('div'); $articleContent->setAttribute('id', 'readability-content'); $articleContent->innerHTML = '

Sorry, Readability was unable to parse this page for content.

'; + return $this->success; } $overlay->setAttribute('id', 'readOverlay'); diff --git a/inc/functions.php b/inc/functions.php index 205f3968..b27120c5 100644 --- a/inc/functions.php +++ b/inc/functions.php @@ -39,6 +39,10 @@ function get_external_file($url) curl_setopt($curl, CURLOPT_RETURNTRANSFER, true); curl_setopt($curl, CURLOPT_HEADER, false); + // FOR SSL do not verified certificate + curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, FALSE); + curl_setopt($curl, CURLOPT_AUTOREFERER, TRUE ); + // FeedBurner requires a proper USER-AGENT... curl_setopt($curl, CURL_HTTP_VERSION_1_1, true); curl_setopt($curl, CURLOPT_ENCODING, "gzip, deflate"); @@ -54,7 +58,15 @@ function get_external_file($url) } else { // create http context and add timeout and user-agent - $context = stream_context_create(array('http'=>array('timeout' => $timeout,'header'=> "User-Agent: ".$useragent,/*spoot Mozilla Firefox*/'follow_location' => true))); + $context = stream_context_create(array( + 'http'=>array('timeout' => $timeout, + 'header'=> "User-Agent: ".$useragent, /*spoot Mozilla Firefox*/ + 'follow_location' => true), + // FOR SSL do not verified certificate + 'ssl' => array('verify_peer' => false, + 'allow_self_signed' => true) + ) + ); // only download page lesser than 4MB $data = @file_get_contents($url, false, $context, -1, 4000000); // We download at most 4 MB from source. @@ -98,6 +110,8 @@ function get_external_file($url) */ function prepare_url($url) { + global $msg; + $parametres = array(); $url = html_entity_decode(trim($url)); @@ -108,14 +122,21 @@ function prepare_url($url) $i=strpos($url,'#xtor=RSS-'); if ($i!==false) $url=substr($url,0,$i); $title = $url; - if (!preg_match('!^https?://!i', $url)) - $url = 'http://' . $url; + $html = Encoding::toUTF8(get_external_file($url,15)); + // If get_external_file if not able to retrieve HTTPS content try the same URL with HTTP protocol + if (!preg_match('!^https?://!i', $url) && (!isset($html) || strlen($html) <= 0)) { + $url = 'http://' . $url; + $html = Encoding::toUTF8(get_external_file($url,15)); + } - $html = Encoding::toUTF8(get_external_file($url,15)); if (isset($html) and strlen($html) > 0) { $r = new Readability($html, $url); + $r->convertLinksToFootnotes = CONVERT_LINKS_FOOTNOTES; + $r->debug=true; + $r->revertForcedParagraphElements = REVERT_FORCED_PARAGRAPH_ELEMENTS; + if($r->init()) { $content = $r->articleContent->innerHTML; -- cgit v1.2.3 From ae9571694f94ef363dba8564fef6b43ae833b508 Mon Sep 17 00:00:00 2001 From: memiks Date: Tue, 23 Apr 2013 07:29:16 -0500 Subject: Correction erreur sur le Self --- inc/Readability.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'inc') diff --git a/inc/Readability.php b/inc/Readability.php index 19298c13..2ce90f6d 100644 --- a/inc/Readability.php +++ b/inc/Readability.php @@ -80,7 +80,7 @@ class Readability public $debug = false; protected $body = null; // protected $bodyCache = null; // Cache the body HTML in case we need to re-use it later - protected $flags = Self::FLAG_CLEAN_CONDITIONALLY; // 1 | 2 | 4; // Start with all flags set. + protected $flags = self::FLAG_CLEAN_CONDITIONALLY; // 1 | 2 | 4; // Start with all flags set. protected $success = false; // indicates whether we were able to extract or not /** -- cgit v1.2.3 From cdcc8d2533d2ed65ac6a89c9a6d0041de7361ce1 Mon Sep 17 00:00:00 2001 From: memiks Date: Tue, 23 Apr 2013 08:09:54 -0500 Subject: Remove debug on Readability output --- inc/functions.php | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'inc') diff --git a/inc/functions.php b/inc/functions.php index b27120c5..0303aab6 100644 --- a/inc/functions.php +++ b/inc/functions.php @@ -134,8 +134,7 @@ function prepare_url($url) $r = new Readability($html, $url); $r->convertLinksToFootnotes = CONVERT_LINKS_FOOTNOTES; - $r->debug=true; - $r->revertForcedParagraphElements = REVERT_FORCED_PARAGRAPH_ELEMENTS; + $r->revertForcedParagraphElements = REVERT_FORCED_PARAGRAPH_ELEMENTS; if($r->init()) { @@ -372,4 +371,4 @@ function logm($message) { $t = strval(date('Y/m/d_H:i:s')).' - '.$_SERVER["REMOTE_ADDR"].' - '.strval($message)."\n"; file_put_contents('./log.txt',$t,FILE_APPEND); -} \ No newline at end of file +} -- cgit v1.2.3