diff options
author | Nicolas LÅ“uillet <nicolas@loeuillet.org> | 2014-07-23 13:42:30 +0200 |
---|---|---|
committer | Nicolas LÅ“uillet <nicolas@loeuillet.org> | 2014-07-23 13:42:30 +0200 |
commit | 505a74ad1de7cf2cd3605e793233365501f03d87 (patch) | |
tree | 41206132200aa9390e11d600ad2b84ffa23242e4 /inc/3rdparty | |
parent | a818ff2000c721c6f078c206c3f5214c558a5546 (diff) | |
parent | ebd6bf6007e0fad4c3e11dac0e79f687e1d195a2 (diff) | |
download | wallabag-505a74ad1de7cf2cd3605e793233365501f03d87.tar.gz wallabag-505a74ad1de7cf2cd3605e793233365501f03d87.tar.zst wallabag-505a74ad1de7cf2cd3605e793233365501f03d87.zip |
Merge branch 'dev' into refactor
Conflicts:
check_setup.php
index.php
Diffstat (limited to 'inc/3rdparty')
-rwxr-xr-x[-rw-r--r--] | inc/3rdparty/libraries/readability/Readability.php | 17 | ||||
-rw-r--r-- | inc/3rdparty/site_config/standard/.about.com.txt | 14 | ||||
-rwxr-xr-x | inc/3rdparty/site_config/standard/dn.pt.txt | 9 |
3 files changed, 38 insertions, 2 deletions
diff --git a/inc/3rdparty/libraries/readability/Readability.php b/inc/3rdparty/libraries/readability/Readability.php index d0f09d74..4fa3ba63 100644..100755 --- a/inc/3rdparty/libraries/readability/Readability.php +++ b/inc/3rdparty/libraries/readability/Readability.php | |||
@@ -679,6 +679,7 @@ class Readability | |||
679 | } else { | 679 | } else { |
680 | $topCandidate->innerHTML = $page->documentElement->innerHTML; | 680 | $topCandidate->innerHTML = $page->documentElement->innerHTML; |
681 | $page->documentElement->innerHTML = ''; | 681 | $page->documentElement->innerHTML = ''; |
682 | $this->reinitBody(); | ||
682 | $page->documentElement->appendChild($topCandidate); | 683 | $page->documentElement->appendChild($topCandidate); |
683 | } | 684 | } |
684 | } else { | 685 | } else { |
@@ -794,8 +795,7 @@ class Readability | |||
794 | { | 795 | { |
795 | // TODO: find out why element disappears sometimes, e.g. for this URL http://www.businessinsider.com/6-hedge-fund-etfs-for-average-investors-2011-7 | 796 | // TODO: find out why element disappears sometimes, e.g. for this URL http://www.businessinsider.com/6-hedge-fund-etfs-for-average-investors-2011-7 |
796 | // in the meantime, we check and create an empty element if it's not there. | 797 | // in the meantime, we check and create an empty element if it's not there. |
797 | if (!isset($this->body->childNodes)) $this->body = $this->dom->createElement('body'); | 798 | $this->reinitBody(); |
798 | $this->body->innerHTML = $this->bodyCache; | ||
799 | 799 | ||
800 | if ($this->flagIsActive(self::FLAG_STRIP_UNLIKELYS)) { | 800 | if ($this->flagIsActive(self::FLAG_STRIP_UNLIKELYS)) { |
801 | $this->removeFlag(self::FLAG_STRIP_UNLIKELYS); | 801 | $this->removeFlag(self::FLAG_STRIP_UNLIKELYS); |
@@ -1134,5 +1134,18 @@ class Readability | |||
1134 | public function removeFlag($flag) { | 1134 | public function removeFlag($flag) { |
1135 | $this->flags = $this->flags & ~$flag; | 1135 | $this->flags = $this->flags & ~$flag; |
1136 | } | 1136 | } |
1137 | |||
1138 | /** | ||
1139 | * Will recreate previously deleted body property | ||
1140 | * | ||
1141 | * @return void | ||
1142 | */ | ||
1143 | protected function reinitBody() { | ||
1144 | if (!isset($this->body->childNodes)) { | ||
1145 | $this->body = $this->dom->createElement('body'); | ||
1146 | $this->body->innerHTML = $this->bodyCache; | ||
1147 | } | ||
1148 | } | ||
1149 | |||
1137 | } | 1150 | } |
1138 | ?> \ No newline at end of file | 1151 | ?> \ No newline at end of file |
diff --git a/inc/3rdparty/site_config/standard/.about.com.txt b/inc/3rdparty/site_config/standard/.about.com.txt new file mode 100644 index 00000000..e1ebaee3 --- /dev/null +++ b/inc/3rdparty/site_config/standard/.about.com.txt | |||
@@ -0,0 +1,14 @@ | |||
1 | body: //div[@id='articlebody'] | ||
2 | title: //h1 | ||
3 | author: //p[@id='by']//a | ||
4 | |||
5 | next_page_link: //span[@class='next']/a | ||
6 | # Not the same as below! | ||
7 | |||
8 | prune: yes | ||
9 | tidy: no | ||
10 | |||
11 | # Annoying 'next' links plainly inside the article body | ||
12 | strip: //*[text()[contains(.,'Next: ')]] | ||
13 | |||
14 | test_url: http://psychology.about.com/od/theoriesofpersonality/ss/defensemech.htm | ||
diff --git a/inc/3rdparty/site_config/standard/dn.pt.txt b/inc/3rdparty/site_config/standard/dn.pt.txt new file mode 100755 index 00000000..051b8cb9 --- /dev/null +++ b/inc/3rdparty/site_config/standard/dn.pt.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | single_page_link: concat('http://www.dn.pt/Common/print.aspx?content_id=', //input[@type='hidden' and @name='link-comments']/@value) | ||
2 | #<input type="hidden" name="link-comments" class="link-comments" value="3972244"> | ||
3 | |||
4 | title: //h1 | ||
5 | author: //div[@class="Author"] | ||
6 | |||
7 | strip: //div[@class="Patrocinio"] | ||
8 | |||
9 | test_url: http://www.dn.pt/inicio/opiniao/interior.aspx?content_id=3972244&seccao=Alberto%20Gon%E7alves&tag=Opini%E3o%20-%20Em%20Foco&page=1 \ No newline at end of file | ||