From b9523a0ba09b5641e93fcd8300dd1a9f5145da2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20L=C5=93uillet?= Date: Thu, 12 Sep 2013 19:28:59 +0200 Subject: fix bug #209: titles with colon bad parsed --- inc/poche/PocheReadability.php | 46 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 inc/poche/PocheReadability.php (limited to 'inc/poche/PocheReadability.php') diff --git a/inc/poche/PocheReadability.php b/inc/poche/PocheReadability.php new file mode 100644 index 00000000..48ae90d0 --- /dev/null +++ b/inc/poche/PocheReadability.php @@ -0,0 +1,46 @@ +getInnerText($this->dom->getElementsByTagName('title')->item(0)); + } catch(Exception $e) {} + + if (preg_match('/ [\|\-] /', $curTitle)) + { + $curTitle = preg_replace('/(.*)[\|\-] .*/i', '$1', $origTitle); + + if (count(explode(' ', $curTitle)) < 3) { + $curTitle = preg_replace('/[^\|\-]*[\|\-](.*)/i', '$1', $origTitle); + } + } + else if(strlen($curTitle) > 150 || strlen($curTitle) < 15) + { + $hOnes = $this->dom->getElementsByTagName('h1'); + if($hOnes->length == 1) + { + $curTitle = $this->getInnerText($hOnes->item(0)); + } + } + + $curTitle = trim($curTitle); + + if (count(explode(' ', $curTitle)) <= 4) { + $curTitle = $origTitle; + } + + $articleTitle = $this->dom->createElement('h1'); + $articleTitle->innerHTML = $curTitle; + + return $articleTitle; + } +} \ No newline at end of file -- cgit v1.2.3