aboutsummaryrefslogtreecommitdiffhomepage
path: root/inc/3rdparty/site_config/standard
diff options
context:
space:
mode:
Diffstat (limited to 'inc/3rdparty/site_config/standard')
-rw-r--r--inc/3rdparty/site_config/standard/.about.com.txt14
-rw-r--r--inc/3rdparty/site_config/standard/moo.nac.uci.edu.txt9
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/politico.com.txt4
-rw-r--r--inc/3rdparty/site_config/standard/version.txt2
4 files changed, 28 insertions, 1 deletions
diff --git a/inc/3rdparty/site_config/standard/.about.com.txt b/inc/3rdparty/site_config/standard/.about.com.txt
new file mode 100644
index 00000000..e1ebaee3
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/.about.com.txt
@@ -0,0 +1,14 @@
1body: //div[@id='articlebody']
2title: //h1
3author: //p[@id='by']//a
4
5next_page_link: //span[@class='next']/a
6# Not the same as below!
7
8prune: yes
9tidy: no
10
11# Annoying 'next' links plainly inside the article body
12strip: //*[text()[contains(.,'Next: ')]]
13
14test_url: http://psychology.about.com/od/theoriesofpersonality/ss/defensemech.htm
diff --git a/inc/3rdparty/site_config/standard/moo.nac.uci.edu.txt b/inc/3rdparty/site_config/standard/moo.nac.uci.edu.txt
new file mode 100644
index 00000000..24c949e9
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/moo.nac.uci.edu.txt
@@ -0,0 +1,9 @@
1title: //div[@id='header']//h1[1]
2
3body: //div[@id='content']
4
5strip_id_or_class: toc
6
7prune: no
8
9test_url: http://moo.nac.uci.edu/~hjm/HOWTO_move_data.html
diff --git a/inc/3rdparty/site_config/standard/politico.com.txt b/inc/3rdparty/site_config/standard/politico.com.txt
index 121fd5b9..c5302d1b 100644..100755
--- a/inc/3rdparty/site_config/standard/politico.com.txt
+++ b/inc/3rdparty/site_config/standard/politico.com.txt
@@ -4,10 +4,14 @@ body://div[contains(@class,"story-text")]
4# Why doesn't this work? next_page_link://ul[contains(@class,"pagination")]/li/a[@rel="next"] 4# Why doesn't this work? next_page_link://ul[contains(@class,"pagination")]/li/a[@rel="next"]
5 5
6next_page_link://ul[contains(@class,"pagination")]/li[contains(@class, "current")]/following-sibling::node()/a 6next_page_link://ul[contains(@class,"pagination")]/li[contains(@class, "current")]/following-sibling::node()/a
7next_page_link://div[contains(@class,"pagination")]/ol/li[contains(@class, "current")]/following-sibling::node()/a
7date://meta[@name="publish_date"]/@content 8date://meta[@name="publish_date"]/@content
8 9
9strip://div[contains(@class, "breadcrumbs")] 10strip://div[contains(@class, "breadcrumbs")]
10strip://a[contains(@class, "hidden")] 11strip://a[contains(@class, "hidden")]
11strip://div[contains(@class, "story-embed")] 12strip://div[contains(@class, "story-embed")]
12strip://div[contains(@class, "story-text")]//p/a[contains(text(), "Also on POLITICO:")]/.. 13strip://div[contains(@class, "story-text")]//p/a[contains(text(), "Also on POLITICO:")]/..
14strip://div[contains(@class, "story-interrupt")]
15strip://footer[contains(@class, "author-bio")]
16
13test_url: http://www.politico.com/news/stories/0712/78105.html \ No newline at end of file 17test_url: http://www.politico.com/news/stories/0712/78105.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/version.txt b/inc/3rdparty/site_config/standard/version.txt
index bf0d87ab..eaf01ebd 100644
--- a/inc/3rdparty/site_config/standard/version.txt
+++ b/inc/3rdparty/site_config/standard/version.txt
@@ -1 +1 @@
4 \ No newline at end of file 2013-05-12T22:53:07Z \ No newline at end of file