From: Nicolas LÅ“uillet Date: Thu, 12 Sep 2013 17:28:59 +0000 (+0200) Subject: fix bug #209: titles with colon bad parsed X-Git-Tag: 1.0-beta5^2~21 X-Git-Url: https://git.immae.eu/?a=commitdiff_plain;h=b9523a0ba09b5641e93fcd8300dd1a9f5145da2d;p=github%2Fwallabag%2Fwallabag.git fix bug #209: titles with colon bad parsed --- diff --git a/inc/poche/PocheReadability.php b/inc/poche/PocheReadability.php new file mode 100644 index 00000000..48ae90d0 --- /dev/null +++ b/inc/poche/PocheReadability.php @@ -0,0 +1,46 @@ +getInnerText($this->dom->getElementsByTagName('title')->item(0)); + } catch(Exception $e) {} + + if (preg_match('/ [\|\-] /', $curTitle)) + { + $curTitle = preg_replace('/(.*)[\|\-] .*/i', '$1', $origTitle); + + if (count(explode(' ', $curTitle)) < 3) { + $curTitle = preg_replace('/[^\|\-]*[\|\-](.*)/i', '$1', $origTitle); + } + } + else if(strlen($curTitle) > 150 || strlen($curTitle) < 15) + { + $hOnes = $this->dom->getElementsByTagName('h1'); + if($hOnes->length == 1) + { + $curTitle = $this->getInnerText($hOnes->item(0)); + } + } + + $curTitle = trim($curTitle); + + if (count(explode(' ', $curTitle)) <= 4) { + $curTitle = $origTitle; + } + + $articleTitle = $this->dom->createElement('h1'); + $articleTitle->innerHTML = $curTitle; + + return $articleTitle; + } +} \ No newline at end of file diff --git a/inc/poche/Url.class.php b/inc/poche/Url.class.php index 5a893014..600a2166 100644 --- a/inc/poche/Url.class.php +++ b/inc/poche/Url.class.php @@ -354,7 +354,7 @@ class Url } if (isset($splink)) { // Build DOM tree from HTML - $readability = new Readability($html, $url); + $readability = new PocheReadability($html, $url); $xpath = new DOMXPath($readability->dom); // Loop through single_page_link xpath expressions $single_page_url = null; diff --git a/inc/poche/config.inc.php b/inc/poche/config.inc.php index 45526695..aaa26af8 100755 --- a/inc/poche/config.inc.php +++ b/inc/poche/config.inc.php @@ -20,6 +20,7 @@ require_once __DIR__ . '/../../inc/poche/Url.class.php'; require_once __DIR__ . '/../../inc/3rdparty/class.messages.php'; require_once __DIR__ . '/../../inc/poche/Poche.class.php'; require_once __DIR__ . '/../../inc/3rdparty/Readability.php'; +require_once __DIR__ . '/../../inc/poche/PocheReadability.php'; require_once __DIR__ . '/../../inc/3rdparty/Encoding.php'; require_once __DIR__ . '/../../inc/poche/Database.class.php'; require_once __DIR__ . '/../../vendor/autoload.php'; @@ -47,11 +48,4 @@ if (!ini_get('date.timezone') || !@date_default_timezone_set(ini_get('date.timez date_default_timezone_set('UTC'); } -$poche = new Poche(); -#XSRF protection with token -// if (!empty($_POST)) { -// if (!Session::isToken($_POST['token'])) { -// die(_('Wrong token')); -// } -// unset($_SESSION['tokens']); -// } \ No newline at end of file +$poche = new Poche(); \ No newline at end of file