aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorSiôn Le Roux <sinisterstuf@gmail.com>2014-07-10 18:30:44 +0200
committerSiôn Le Roux <sinisterstuf@gmail.com>2014-07-11 00:04:24 +0200
commitd59536deea443f4bdac2c5cf1bfeea690810a817 (patch)
treec53ec785f3e36bcca07c09f58fa1b496741b6304
parent6400371ff93782d25cdbd50aa224c70145b3890a (diff)
downloadwallabag-d59536deea443f4bdac2c5cf1bfeea690810a817.tar.gz
wallabag-d59536deea443f4bdac2c5cf1bfeea690810a817.tar.zst
wallabag-d59536deea443f4bdac2c5cf1bfeea690810a817.zip
Add support for *.about.com
Includes next_page_link for multi-page articles and strips pesky in-line 'next' links from the article body. Also includes an Xpath for author but I can't see where this is used in the wallabag UI. The 'tidy' option is turned off because it messed up bulleted lists. Tested with psychology.about.com and food.about.com.
-rw-r--r--inc/3rdparty/site_config/standard/.about.com.txt14
1 files changed, 14 insertions, 0 deletions
diff --git a/inc/3rdparty/site_config/standard/.about.com.txt b/inc/3rdparty/site_config/standard/.about.com.txt
new file mode 100644
index 00000000..e1ebaee3
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/.about.com.txt
@@ -0,0 +1,14 @@
1body: //div[@id='articlebody']
2title: //h1
3author: //p[@id='by']//a
4
5next_page_link: //span[@class='next']/a
6# Not the same as below!
7
8prune: yes
9tidy: no
10
11# Annoying 'next' links plainly inside the article body
12strip: //*[text()[contains(.,'Next: ')]]
13
14test_url: http://psychology.about.com/od/theoriesofpersonality/ss/defensemech.htm