diff options
author | Nicolas LÅ“uillet <nicolas.loeuillet@gmail.com> | 2013-09-12 19:28:59 +0200 |
---|---|---|
committer | Nicolas LÅ“uillet <nicolas.loeuillet@gmail.com> | 2013-09-12 19:28:59 +0200 |
commit | b9523a0ba09b5641e93fcd8300dd1a9f5145da2d (patch) | |
tree | 404e6ec03b241003610aab888a65ab47e90e6e97 /inc | |
parent | 084ec2a63df90b775195ad874f8fc0d7fbefbc93 (diff) | |
download | wallabag-b9523a0ba09b5641e93fcd8300dd1a9f5145da2d.tar.gz wallabag-b9523a0ba09b5641e93fcd8300dd1a9f5145da2d.tar.zst wallabag-b9523a0ba09b5641e93fcd8300dd1a9f5145da2d.zip |
fix bug #209: titles with colon bad parsed
Diffstat (limited to 'inc')
-rw-r--r-- | inc/poche/PocheReadability.php | 46 | ||||
-rw-r--r-- | inc/poche/Url.class.php | 2 | ||||
-rwxr-xr-x | inc/poche/config.inc.php | 10 |
3 files changed, 49 insertions, 9 deletions
diff --git a/inc/poche/PocheReadability.php b/inc/poche/PocheReadability.php new file mode 100644 index 00000000..48ae90d0 --- /dev/null +++ b/inc/poche/PocheReadability.php | |||
@@ -0,0 +1,46 @@ | |||
1 | <?php | ||
2 | |||
3 | class PocheReadability extends Readability | ||
4 | { | ||
5 | /** | ||
6 | * Get the article title as an H1. | ||
7 | * | ||
8 | * @return DOMElement | ||
9 | */ | ||
10 | protected function getArticleTitle() { | ||
11 | $curTitle = ''; | ||
12 | $origTitle = ''; | ||
13 | |||
14 | try { | ||
15 | $curTitle = $origTitle = $this->getInnerText($this->dom->getElementsByTagName('title')->item(0)); | ||
16 | } catch(Exception $e) {} | ||
17 | |||
18 | if (preg_match('/ [\|\-] /', $curTitle)) | ||
19 | { | ||
20 | $curTitle = preg_replace('/(.*)[\|\-] .*/i', '$1', $origTitle); | ||
21 | |||
22 | if (count(explode(' ', $curTitle)) < 3) { | ||
23 | $curTitle = preg_replace('/[^\|\-]*[\|\-](.*)/i', '$1', $origTitle); | ||
24 | } | ||
25 | } | ||
26 | else if(strlen($curTitle) > 150 || strlen($curTitle) < 15) | ||
27 | { | ||
28 | $hOnes = $this->dom->getElementsByTagName('h1'); | ||
29 | if($hOnes->length == 1) | ||
30 | { | ||
31 | $curTitle = $this->getInnerText($hOnes->item(0)); | ||
32 | } | ||
33 | } | ||
34 | |||
35 | $curTitle = trim($curTitle); | ||
36 | |||
37 | if (count(explode(' ', $curTitle)) <= 4) { | ||
38 | $curTitle = $origTitle; | ||
39 | } | ||
40 | |||
41 | $articleTitle = $this->dom->createElement('h1'); | ||
42 | $articleTitle->innerHTML = $curTitle; | ||
43 | |||
44 | return $articleTitle; | ||
45 | } | ||
46 | } \ No newline at end of file | ||
diff --git a/inc/poche/Url.class.php b/inc/poche/Url.class.php index 5a893014..600a2166 100644 --- a/inc/poche/Url.class.php +++ b/inc/poche/Url.class.php | |||
@@ -354,7 +354,7 @@ class Url | |||
354 | } | 354 | } |
355 | if (isset($splink)) { | 355 | if (isset($splink)) { |
356 | // Build DOM tree from HTML | 356 | // Build DOM tree from HTML |
357 | $readability = new Readability($html, $url); | 357 | $readability = new PocheReadability($html, $url); |
358 | $xpath = new DOMXPath($readability->dom); | 358 | $xpath = new DOMXPath($readability->dom); |
359 | // Loop through single_page_link xpath expressions | 359 | // Loop through single_page_link xpath expressions |
360 | $single_page_url = null; | 360 | $single_page_url = null; |
diff --git a/inc/poche/config.inc.php b/inc/poche/config.inc.php index 45526695..aaa26af8 100755 --- a/inc/poche/config.inc.php +++ b/inc/poche/config.inc.php | |||
@@ -20,6 +20,7 @@ require_once __DIR__ . '/../../inc/poche/Url.class.php'; | |||
20 | require_once __DIR__ . '/../../inc/3rdparty/class.messages.php'; | 20 | require_once __DIR__ . '/../../inc/3rdparty/class.messages.php'; |
21 | require_once __DIR__ . '/../../inc/poche/Poche.class.php'; | 21 | require_once __DIR__ . '/../../inc/poche/Poche.class.php'; |
22 | require_once __DIR__ . '/../../inc/3rdparty/Readability.php'; | 22 | require_once __DIR__ . '/../../inc/3rdparty/Readability.php'; |
23 | require_once __DIR__ . '/../../inc/poche/PocheReadability.php'; | ||
23 | require_once __DIR__ . '/../../inc/3rdparty/Encoding.php'; | 24 | require_once __DIR__ . '/../../inc/3rdparty/Encoding.php'; |
24 | require_once __DIR__ . '/../../inc/poche/Database.class.php'; | 25 | require_once __DIR__ . '/../../inc/poche/Database.class.php'; |
25 | require_once __DIR__ . '/../../vendor/autoload.php'; | 26 | require_once __DIR__ . '/../../vendor/autoload.php'; |
@@ -47,11 +48,4 @@ if (!ini_get('date.timezone') || !@date_default_timezone_set(ini_get('date.timez | |||
47 | date_default_timezone_set('UTC'); | 48 | date_default_timezone_set('UTC'); |
48 | } | 49 | } |
49 | 50 | ||
50 | $poche = new Poche(); | 51 | $poche = new Poche(); \ No newline at end of file |
51 | #XSRF protection with token | ||
52 | // if (!empty($_POST)) { | ||
53 | // if (!Session::isToken($_POST['token'])) { | ||
54 | // die(_('Wrong token')); | ||
55 | // } | ||
56 | // unset($_SESSION['tokens']); | ||
57 | // } \ No newline at end of file | ||