diff options
author | Nicolas LÅ“uillet <nicolas@loeuillet.org> | 2014-07-15 11:49:24 +0200 |
---|---|---|
committer | Nicolas LÅ“uillet <nicolas@loeuillet.org> | 2014-07-15 11:49:24 +0200 |
commit | 0f6273cdb8c77436593782d42f271fddc7a7875d (patch) | |
tree | cd6e0959768f5fac7eac054572a97b3a30674af2 /inc/3rdparty/site_config/standard | |
parent | a9f5e572dde4f986a498d2fbe92a38a1b22f9595 (diff) | |
parent | 26452f891f3ba75f2636733dbfe943535636df06 (diff) | |
download | wallabag-0f6273cdb8c77436593782d42f271fddc7a7875d.tar.gz wallabag-0f6273cdb8c77436593782d42f271fddc7a7875d.tar.zst wallabag-0f6273cdb8c77436593782d42f271fddc7a7875d.zip |
Merge pull request #761 from wallabag/dev1.7.1
1.7.1
Diffstat (limited to 'inc/3rdparty/site_config/standard')
-rw-r--r-- | inc/3rdparty/site_config/standard/.about.com.txt | 14 | ||||
-rw-r--r-- | inc/3rdparty/site_config/standard/moo.nac.uci.edu.txt | 9 | ||||
-rwxr-xr-x[-rw-r--r--] | inc/3rdparty/site_config/standard/politico.com.txt | 4 |
3 files changed, 27 insertions, 0 deletions
diff --git a/inc/3rdparty/site_config/standard/.about.com.txt b/inc/3rdparty/site_config/standard/.about.com.txt new file mode 100644 index 00000000..e1ebaee3 --- /dev/null +++ b/inc/3rdparty/site_config/standard/.about.com.txt | |||
@@ -0,0 +1,14 @@ | |||
1 | body: //div[@id='articlebody'] | ||
2 | title: //h1 | ||
3 | author: //p[@id='by']//a | ||
4 | |||
5 | next_page_link: //span[@class='next']/a | ||
6 | # Not the same as below! | ||
7 | |||
8 | prune: yes | ||
9 | tidy: no | ||
10 | |||
11 | # Annoying 'next' links plainly inside the article body | ||
12 | strip: //*[text()[contains(.,'Next: ')]] | ||
13 | |||
14 | test_url: http://psychology.about.com/od/theoriesofpersonality/ss/defensemech.htm | ||
diff --git a/inc/3rdparty/site_config/standard/moo.nac.uci.edu.txt b/inc/3rdparty/site_config/standard/moo.nac.uci.edu.txt new file mode 100644 index 00000000..24c949e9 --- /dev/null +++ b/inc/3rdparty/site_config/standard/moo.nac.uci.edu.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | title: //div[@id='header']//h1[1] | ||
2 | |||
3 | body: //div[@id='content'] | ||
4 | |||
5 | strip_id_or_class: toc | ||
6 | |||
7 | prune: no | ||
8 | |||
9 | test_url: http://moo.nac.uci.edu/~hjm/HOWTO_move_data.html | ||
diff --git a/inc/3rdparty/site_config/standard/politico.com.txt b/inc/3rdparty/site_config/standard/politico.com.txt index 121fd5b9..c5302d1b 100644..100755 --- a/inc/3rdparty/site_config/standard/politico.com.txt +++ b/inc/3rdparty/site_config/standard/politico.com.txt | |||
@@ -4,10 +4,14 @@ body://div[contains(@class,"story-text")] | |||
4 | # Why doesn't this work? next_page_link://ul[contains(@class,"pagination")]/li/a[@rel="next"] | 4 | # Why doesn't this work? next_page_link://ul[contains(@class,"pagination")]/li/a[@rel="next"] |
5 | 5 | ||
6 | next_page_link://ul[contains(@class,"pagination")]/li[contains(@class, "current")]/following-sibling::node()/a | 6 | next_page_link://ul[contains(@class,"pagination")]/li[contains(@class, "current")]/following-sibling::node()/a |
7 | next_page_link://div[contains(@class,"pagination")]/ol/li[contains(@class, "current")]/following-sibling::node()/a | ||
7 | date://meta[@name="publish_date"]/@content | 8 | date://meta[@name="publish_date"]/@content |
8 | 9 | ||
9 | strip://div[contains(@class, "breadcrumbs")] | 10 | strip://div[contains(@class, "breadcrumbs")] |
10 | strip://a[contains(@class, "hidden")] | 11 | strip://a[contains(@class, "hidden")] |
11 | strip://div[contains(@class, "story-embed")] | 12 | strip://div[contains(@class, "story-embed")] |
12 | strip://div[contains(@class, "story-text")]//p/a[contains(text(), "Also on POLITICO:")]/.. | 13 | strip://div[contains(@class, "story-text")]//p/a[contains(text(), "Also on POLITICO:")]/.. |
14 | strip://div[contains(@class, "story-interrupt")] | ||
15 | strip://footer[contains(@class, "author-bio")] | ||
16 | |||
13 | test_url: http://www.politico.com/news/stories/0712/78105.html \ No newline at end of file | 17 | test_url: http://www.politico.com/news/stories/0712/78105.html \ No newline at end of file |