aboutsummaryrefslogtreecommitdiffhomepage
path: root/inc/3rdparty/site_config/standard/businessweek.com.txt
diff options
context:
space:
mode:
Diffstat (limited to 'inc/3rdparty/site_config/standard/businessweek.com.txt')
-rw-r--r--inc/3rdparty/site_config/standard/businessweek.com.txt30
1 files changed, 30 insertions, 0 deletions
diff --git a/inc/3rdparty/site_config/standard/businessweek.com.txt b/inc/3rdparty/site_config/standard/businessweek.com.txt
new file mode 100644
index 00000000..7b3d063b
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/businessweek.com.txt
@@ -0,0 +1,30 @@
1# story has several pages, should be detected
2body: //div[@id='storyBody']
3body: //div[@id='article_body']
4body: //div[@id='story_body']
5
6title://h1[@id='article_headline']
7
8# article author
9author: //p[@class='author']/a
10# story author(s)
11author: substring-after(//p[@class='byline'], 'By ')
12
13# article date
14date: //span[@class='published_date']
15# story date
16date: //span[@class='date']
17
18date: substring-after(//div[contains(@class,'attributor')],'on')
19strip_id_or_class: inset
20strip: //p/span[@class='photoCredit']
21strip: //h1
22
23strip_id_or_class: page_count
24strip_id_or_class: tools
25strip_id_or_class: pagination
26
27single_page_link: //li[@id='stPrint']/a
28
29test_url: http://www.businessweek.com/magazine/buyback-insurance-a-good-deal-for-retailers-07282011.html
30test_url: http://www.businessweek.com/articles/2012-06-06/american-pain-the-largest-u-dot-s-dot-pill-mills-rise-and-fall \ No newline at end of file