From cc1ec61b857cc793abab719c164496e8290291c1 Mon Sep 17 00:00:00 2001 From: Maryana Rozhankivska Date: Wed, 9 Jul 2014 16:50:52 +0300 Subject: fix of issue #619 and other similar, error in JSLikeHTMLElement: node no longer exists. --- inc/3rdparty/libraries/readability/Readability.php | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) mode change 100644 => 100755 inc/3rdparty/libraries/readability/Readability.php (limited to 'inc/3rdparty') diff --git a/inc/3rdparty/libraries/readability/Readability.php b/inc/3rdparty/libraries/readability/Readability.php old mode 100644 new mode 100755 index d0f09d74..9e77dc55 --- a/inc/3rdparty/libraries/readability/Readability.php +++ b/inc/3rdparty/libraries/readability/Readability.php @@ -679,6 +679,7 @@ class Readability } else { $topCandidate->innerHTML = $page->documentElement->innerHTML; $page->documentElement->innerHTML = ''; + $this->reinitBody(); $page->documentElement->appendChild($topCandidate); } } else { @@ -794,8 +795,7 @@ class Readability { // TODO: find out why element disappears sometimes, e.g. for this URL http://www.businessinsider.com/6-hedge-fund-etfs-for-average-investors-2011-7 // in the meantime, we check and create an empty element if it's not there. - if (!isset($this->body->childNodes)) $this->body = $this->dom->createElement('body'); - $this->body->innerHTML = $this->bodyCache; + $this->reinitBody(); if ($this->flagIsActive(self::FLAG_STRIP_UNLIKELYS)) { $this->removeFlag(self::FLAG_STRIP_UNLIKELYS); @@ -1134,5 +1134,18 @@ class Readability public function removeFlag($flag) { $this->flags = $this->flags & ~$flag; } + + /** + * Will recreate previously deleted body property + * + * @return void + */ + protected function reinitBody() { + if (!isset($this->body->childNodes)) { + $this->body = $this->dom->createElement('body'); + } + $this->body->innerHTML = $this->bodyCache; + } + } ?> \ No newline at end of file -- cgit v1.2.3 From c1aad6d5746ddb079e3b60d432212021c42c963b Mon Sep 17 00:00:00 2001 From: Maryana Rozhankivska Date: Wed, 9 Jul 2014 16:56:52 +0300 Subject: fix of issue #619 and other similar, error in JSLikeHTMLElement: node no longer exists. --- inc/3rdparty/libraries/readability/Readability.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'inc/3rdparty') diff --git a/inc/3rdparty/libraries/readability/Readability.php b/inc/3rdparty/libraries/readability/Readability.php index 9e77dc55..4fa3ba63 100755 --- a/inc/3rdparty/libraries/readability/Readability.php +++ b/inc/3rdparty/libraries/readability/Readability.php @@ -1142,9 +1142,9 @@ class Readability */ protected function reinitBody() { if (!isset($this->body->childNodes)) { - $this->body = $this->dom->createElement('body'); + $this->body = $this->dom->createElement('body'); + $this->body->innerHTML = $this->bodyCache; } - $this->body->innerHTML = $this->bodyCache; } } -- cgit v1.2.3 From d59536deea443f4bdac2c5cf1bfeea690810a817 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Si=C3=B4n=20Le=20Roux?= Date: Thu, 10 Jul 2014 18:30:44 +0200 Subject: Add support for *.about.com Includes next_page_link for multi-page articles and strips pesky in-line 'next' links from the article body. Also includes an Xpath for author but I can't see where this is used in the wallabag UI. The 'tidy' option is turned off because it messed up bulleted lists. Tested with psychology.about.com and food.about.com. --- inc/3rdparty/site_config/standard/.about.com.txt | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 inc/3rdparty/site_config/standard/.about.com.txt (limited to 'inc/3rdparty') diff --git a/inc/3rdparty/site_config/standard/.about.com.txt b/inc/3rdparty/site_config/standard/.about.com.txt new file mode 100644 index 00000000..e1ebaee3 --- /dev/null +++ b/inc/3rdparty/site_config/standard/.about.com.txt @@ -0,0 +1,14 @@ +body: //div[@id='articlebody'] +title: //h1 +author: //p[@id='by']//a + +next_page_link: //span[@class='next']/a +# Not the same as below! + +prune: yes +tidy: no + +# Annoying 'next' links plainly inside the article body +strip: //*[text()[contains(.,'Next: ')]] + +test_url: http://psychology.about.com/od/theoriesofpersonality/ss/defensemech.htm -- cgit v1.2.3 From 5594d7d05469bcff2a046a99d49990bd63a6fd4f Mon Sep 17 00:00:00 2001 From: Maryana Rozhankivska Date: Mon, 21 Jul 2014 19:34:59 +0300 Subject: issue #750 - config for dn.pt site added --- inc/3rdparty/site_config/standard/dn.pt.txt | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100755 inc/3rdparty/site_config/standard/dn.pt.txt (limited to 'inc/3rdparty') diff --git a/inc/3rdparty/site_config/standard/dn.pt.txt b/inc/3rdparty/site_config/standard/dn.pt.txt new file mode 100755 index 00000000..051b8cb9 --- /dev/null +++ b/inc/3rdparty/site_config/standard/dn.pt.txt @@ -0,0 +1,9 @@ +single_page_link: concat('http://www.dn.pt/Common/print.aspx?content_id=', //input[@type='hidden' and @name='link-comments']/@value) +# + +title: //h1 +author: //div[@class="Author"] + +strip: //div[@class="Patrocinio"] + +test_url: http://www.dn.pt/inicio/opiniao/interior.aspx?content_id=3972244&seccao=Alberto%20Gon%E7alves&tag=Opini%E3o%20-%20Em%20Foco&page=1 \ No newline at end of file -- cgit v1.2.3