aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorNicolas LÅ“uillet <nicolas.loeuillet@gmail.com>2013-09-12 19:28:59 +0200
committerNicolas LÅ“uillet <nicolas.loeuillet@gmail.com>2013-09-12 19:28:59 +0200
commitb9523a0ba09b5641e93fcd8300dd1a9f5145da2d (patch)
tree404e6ec03b241003610aab888a65ab47e90e6e97
parent084ec2a63df90b775195ad874f8fc0d7fbefbc93 (diff)
downloadwallabag-b9523a0ba09b5641e93fcd8300dd1a9f5145da2d.tar.gz
wallabag-b9523a0ba09b5641e93fcd8300dd1a9f5145da2d.tar.zst
wallabag-b9523a0ba09b5641e93fcd8300dd1a9f5145da2d.zip
fix bug #209: titles with colon bad parsed
-rw-r--r--inc/poche/PocheReadability.php46
-rw-r--r--inc/poche/Url.class.php2
-rwxr-xr-xinc/poche/config.inc.php10
3 files changed, 49 insertions, 9 deletions
diff --git a/inc/poche/PocheReadability.php b/inc/poche/PocheReadability.php
new file mode 100644
index 00000000..48ae90d0
--- /dev/null
+++ b/inc/poche/PocheReadability.php
@@ -0,0 +1,46 @@
1<?php
2
3class PocheReadability extends Readability
4{
5 /**
6 * Get the article title as an H1.
7 *
8 * @return DOMElement
9 */
10 protected function getArticleTitle() {
11 $curTitle = '';
12 $origTitle = '';
13
14 try {
15 $curTitle = $origTitle = $this->getInnerText($this->dom->getElementsByTagName('title')->item(0));
16 } catch(Exception $e) {}
17
18 if (preg_match('/ [\|\-] /', $curTitle))
19 {
20 $curTitle = preg_replace('/(.*)[\|\-] .*/i', '$1', $origTitle);
21
22 if (count(explode(' ', $curTitle)) < 3) {
23 $curTitle = preg_replace('/[^\|\-]*[\|\-](.*)/i', '$1', $origTitle);
24 }
25 }
26 else if(strlen($curTitle) > 150 || strlen($curTitle) < 15)
27 {
28 $hOnes = $this->dom->getElementsByTagName('h1');
29 if($hOnes->length == 1)
30 {
31 $curTitle = $this->getInnerText($hOnes->item(0));
32 }
33 }
34
35 $curTitle = trim($curTitle);
36
37 if (count(explode(' ', $curTitle)) <= 4) {
38 $curTitle = $origTitle;
39 }
40
41 $articleTitle = $this->dom->createElement('h1');
42 $articleTitle->innerHTML = $curTitle;
43
44 return $articleTitle;
45 }
46} \ No newline at end of file
diff --git a/inc/poche/Url.class.php b/inc/poche/Url.class.php
index 5a893014..600a2166 100644
--- a/inc/poche/Url.class.php
+++ b/inc/poche/Url.class.php
@@ -354,7 +354,7 @@ class Url
354 } 354 }
355 if (isset($splink)) { 355 if (isset($splink)) {
356 // Build DOM tree from HTML 356 // Build DOM tree from HTML
357 $readability = new Readability($html, $url); 357 $readability = new PocheReadability($html, $url);
358 $xpath = new DOMXPath($readability->dom); 358 $xpath = new DOMXPath($readability->dom);
359 // Loop through single_page_link xpath expressions 359 // Loop through single_page_link xpath expressions
360 $single_page_url = null; 360 $single_page_url = null;
diff --git a/inc/poche/config.inc.php b/inc/poche/config.inc.php
index 45526695..aaa26af8 100755
--- a/inc/poche/config.inc.php
+++ b/inc/poche/config.inc.php
@@ -20,6 +20,7 @@ require_once __DIR__ . '/../../inc/poche/Url.class.php';
20require_once __DIR__ . '/../../inc/3rdparty/class.messages.php'; 20require_once __DIR__ . '/../../inc/3rdparty/class.messages.php';
21require_once __DIR__ . '/../../inc/poche/Poche.class.php'; 21require_once __DIR__ . '/../../inc/poche/Poche.class.php';
22require_once __DIR__ . '/../../inc/3rdparty/Readability.php'; 22require_once __DIR__ . '/../../inc/3rdparty/Readability.php';
23require_once __DIR__ . '/../../inc/poche/PocheReadability.php';
23require_once __DIR__ . '/../../inc/3rdparty/Encoding.php'; 24require_once __DIR__ . '/../../inc/3rdparty/Encoding.php';
24require_once __DIR__ . '/../../inc/poche/Database.class.php'; 25require_once __DIR__ . '/../../inc/poche/Database.class.php';
25require_once __DIR__ . '/../../vendor/autoload.php'; 26require_once __DIR__ . '/../../vendor/autoload.php';
@@ -47,11 +48,4 @@ if (!ini_get('date.timezone') || !@date_default_timezone_set(ini_get('date.timez
47 date_default_timezone_set('UTC'); 48 date_default_timezone_set('UTC');
48} 49}
49 50
50$poche = new Poche(); 51$poche = new Poche(); \ No newline at end of file
51#XSRF protection with token
52// if (!empty($_POST)) {
53// if (!Session::isToken($_POST['token'])) {
54// die(_('Wrong token'));
55// }
56// unset($_SESSION['tokens']);
57// } \ No newline at end of file