diff options
Diffstat (limited to 'inc/3rdparty/site_config/standard/businessweek.com.txt')
-rwxr-xr-x[-rw-r--r--] | inc/3rdparty/site_config/standard/businessweek.com.txt | 58 |
1 files changed, 29 insertions, 29 deletions
diff --git a/inc/3rdparty/site_config/standard/businessweek.com.txt b/inc/3rdparty/site_config/standard/businessweek.com.txt index 7b3d063b..03085593 100644..100755 --- a/inc/3rdparty/site_config/standard/businessweek.com.txt +++ b/inc/3rdparty/site_config/standard/businessweek.com.txt | |||
@@ -1,30 +1,30 @@ | |||
1 | # story has several pages, should be detected | 1 | # story has several pages, should be detected |
2 | body: //div[@id='storyBody'] | 2 | body: //div[@id='storyBody'] |
3 | body: //div[@id='article_body'] | 3 | body: //div[@id='article_body'] |
4 | body: //div[@id='story_body'] | 4 | body: //div[@id='story_body'] |
5 | 5 | ||
6 | title://h1[@id='article_headline'] | 6 | title://h1[@id='article_headline'] |
7 | 7 | ||
8 | # article author | 8 | # article author |
9 | author: //p[@class='author']/a | 9 | author: //p[@class='author']/a |
10 | # story author(s) | 10 | # story author(s) |
11 | author: substring-after(//p[@class='byline'], 'By ') | 11 | author: substring-after(//p[@class='byline'], 'By ') |
12 | 12 | ||
13 | # article date | 13 | # article date |
14 | date: //span[@class='published_date'] | 14 | date: //span[@class='published_date'] |
15 | # story date | 15 | # story date |
16 | date: //span[@class='date'] | 16 | date: //span[@class='date'] |
17 | 17 | ||
18 | date: substring-after(//div[contains(@class,'attributor')],'on') | 18 | date: substring-after(//div[contains(@class,'attributor')],'on') |
19 | strip_id_or_class: inset | 19 | strip_id_or_class: inset |
20 | strip: //p/span[@class='photoCredit'] | 20 | strip: //p/span[@class='photoCredit'] |
21 | strip: //h1 | 21 | strip: //h1 |
22 | 22 | ||
23 | strip_id_or_class: page_count | 23 | strip_id_or_class: page_count |
24 | strip_id_or_class: tools | 24 | strip_id_or_class: tools |
25 | strip_id_or_class: pagination | 25 | strip_id_or_class: pagination |
26 | 26 | ||
27 | single_page_link: //li[@id='stPrint']/a | 27 | single_page_link: //li[@id='stPrint']/a |
28 | 28 | ||
29 | test_url: http://www.businessweek.com/magazine/buyback-insurance-a-good-deal-for-retailers-07282011.html | 29 | test_url: http://www.businessweek.com/magazine/buyback-insurance-a-good-deal-for-retailers-07282011.html |
30 | test_url: http://www.businessweek.com/articles/2012-06-06/american-pain-the-largest-u-dot-s-dot-pill-mills-rise-and-fall \ No newline at end of file | 30 | test_url: http://www.businessweek.com/articles/2012-06-06/american-pain-the-largest-u-dot-s-dot-pill-mills-rise-and-fall \ No newline at end of file |