diff options
author | Nicolas LÅ“uillet <nicolas.loeuillet@gmail.com> | 2013-09-12 19:28:59 +0200 |
---|---|---|
committer | Nicolas LÅ“uillet <nicolas.loeuillet@gmail.com> | 2013-09-12 19:28:59 +0200 |
commit | b9523a0ba09b5641e93fcd8300dd1a9f5145da2d (patch) | |
tree | 404e6ec03b241003610aab888a65ab47e90e6e97 /inc/poche/PocheReadability.php | |
parent | 084ec2a63df90b775195ad874f8fc0d7fbefbc93 (diff) | |
download | wallabag-b9523a0ba09b5641e93fcd8300dd1a9f5145da2d.tar.gz wallabag-b9523a0ba09b5641e93fcd8300dd1a9f5145da2d.tar.zst wallabag-b9523a0ba09b5641e93fcd8300dd1a9f5145da2d.zip |
fix bug #209: titles with colon bad parsed
Diffstat (limited to 'inc/poche/PocheReadability.php')
-rw-r--r-- | inc/poche/PocheReadability.php | 46 |
1 files changed, 46 insertions, 0 deletions
diff --git a/inc/poche/PocheReadability.php b/inc/poche/PocheReadability.php new file mode 100644 index 00000000..48ae90d0 --- /dev/null +++ b/inc/poche/PocheReadability.php | |||
@@ -0,0 +1,46 @@ | |||
1 | <?php | ||
2 | |||
3 | class PocheReadability extends Readability | ||
4 | { | ||
5 | /** | ||
6 | * Get the article title as an H1. | ||
7 | * | ||
8 | * @return DOMElement | ||
9 | */ | ||
10 | protected function getArticleTitle() { | ||
11 | $curTitle = ''; | ||
12 | $origTitle = ''; | ||
13 | |||
14 | try { | ||
15 | $curTitle = $origTitle = $this->getInnerText($this->dom->getElementsByTagName('title')->item(0)); | ||
16 | } catch(Exception $e) {} | ||
17 | |||
18 | if (preg_match('/ [\|\-] /', $curTitle)) | ||
19 | { | ||
20 | $curTitle = preg_replace('/(.*)[\|\-] .*/i', '$1', $origTitle); | ||
21 | |||
22 | if (count(explode(' ', $curTitle)) < 3) { | ||
23 | $curTitle = preg_replace('/[^\|\-]*[\|\-](.*)/i', '$1', $origTitle); | ||
24 | } | ||
25 | } | ||
26 | else if(strlen($curTitle) > 150 || strlen($curTitle) < 15) | ||
27 | { | ||
28 | $hOnes = $this->dom->getElementsByTagName('h1'); | ||
29 | if($hOnes->length == 1) | ||
30 | { | ||
31 | $curTitle = $this->getInnerText($hOnes->item(0)); | ||
32 | } | ||
33 | } | ||
34 | |||
35 | $curTitle = trim($curTitle); | ||
36 | |||
37 | if (count(explode(' ', $curTitle)) <= 4) { | ||
38 | $curTitle = $origTitle; | ||
39 | } | ||
40 | |||
41 | $articleTitle = $this->dom->createElement('h1'); | ||
42 | $articleTitle->innerHTML = $curTitle; | ||
43 | |||
44 | return $articleTitle; | ||
45 | } | ||
46 | } \ No newline at end of file | ||