diff options
Diffstat (limited to 'inc/3rdparty/site_config/standard/businessweek.com.txt')
-rw-r--r-- | inc/3rdparty/site_config/standard/businessweek.com.txt | 30 |
1 files changed, 30 insertions, 0 deletions
diff --git a/inc/3rdparty/site_config/standard/businessweek.com.txt b/inc/3rdparty/site_config/standard/businessweek.com.txt new file mode 100644 index 00000000..7b3d063b --- /dev/null +++ b/inc/3rdparty/site_config/standard/businessweek.com.txt | |||
@@ -0,0 +1,30 @@ | |||
1 | # story has several pages, should be detected | ||
2 | body: //div[@id='storyBody'] | ||
3 | body: //div[@id='article_body'] | ||
4 | body: //div[@id='story_body'] | ||
5 | |||
6 | title://h1[@id='article_headline'] | ||
7 | |||
8 | # article author | ||
9 | author: //p[@class='author']/a | ||
10 | # story author(s) | ||
11 | author: substring-after(//p[@class='byline'], 'By ') | ||
12 | |||
13 | # article date | ||
14 | date: //span[@class='published_date'] | ||
15 | # story date | ||
16 | date: //span[@class='date'] | ||
17 | |||
18 | date: substring-after(//div[contains(@class,'attributor')],'on') | ||
19 | strip_id_or_class: inset | ||
20 | strip: //p/span[@class='photoCredit'] | ||
21 | strip: //h1 | ||
22 | |||
23 | strip_id_or_class: page_count | ||
24 | strip_id_or_class: tools | ||
25 | strip_id_or_class: pagination | ||
26 | |||
27 | single_page_link: //li[@id='stPrint']/a | ||
28 | |||
29 | test_url: http://www.businessweek.com/magazine/buyback-insurance-a-good-deal-for-retailers-07282011.html | ||
30 | test_url: http://www.businessweek.com/articles/2012-06-06/american-pain-the-largest-u-dot-s-dot-pill-mills-rise-and-fall \ No newline at end of file | ||