diff options
Diffstat (limited to 'inc/3rdparty/site_config/standard/businessweek.com.txt')
-rwxr-xr-x | inc/3rdparty/site_config/standard/businessweek.com.txt | 41 |
1 files changed, 14 insertions, 27 deletions
diff --git a/inc/3rdparty/site_config/standard/businessweek.com.txt b/inc/3rdparty/site_config/standard/businessweek.com.txt index 03085593..f546b708 100755 --- a/inc/3rdparty/site_config/standard/businessweek.com.txt +++ b/inc/3rdparty/site_config/standard/businessweek.com.txt | |||
@@ -1,30 +1,17 @@ | |||
1 | # story has several pages, should be detected | 1 | # include the lead graphic in the body, if available |
2 | body: //div[@id='storyBody'] | 2 | body: //div[contains(concat(' ', normalize-space(@id), ' '), ' lead_graphic ')] | //div[contains(concat(' ', normalize-space(@itemprop), ' '), ' articleBody ')] |
3 | body: //div[@id='article_body'] | 3 | title: //h1[contains(concat(' ', normalize-space(@itemprop), ' '), ' headline ')] |
4 | body: //div[@id='story_body'] | 4 | date: //time[contains(concat(' ', normalize-space(@itemprop), ' '), ' datePublished ')] |
5 | 5 | ||
6 | title://h1[@id='article_headline'] | 6 | strip_id_or_class: photo_credit |
7 | 7 | strip_id_or_class: photo_caption | |
8 | # article author | 8 | strip_id_or_class: inline_gallery |
9 | author: //p[@class='author']/a | 9 | # pull quote, often inside a blockquote element |
10 | # story author(s) | 10 | strip_id_or_class: pq |
11 | author: substring-after(//p[@class='byline'], 'By ') | 11 | strip_id_or_class: credit |
12 | 12 | strip_id_or_class: figcaption | |
13 | # article date | 13 | strip_id_or_class: related_item |
14 | date: //span[@class='published_date'] | ||
15 | # story date | ||
16 | date: //span[@class='date'] | ||
17 | |||
18 | date: substring-after(//div[contains(@class,'attributor')],'on') | ||
19 | strip_id_or_class: inset | ||
20 | strip: //p/span[@class='photoCredit'] | ||
21 | strip: //h1 | ||
22 | |||
23 | strip_id_or_class: page_count | ||
24 | strip_id_or_class: tools | ||
25 | strip_id_or_class: pagination | ||
26 | |||
27 | single_page_link: //li[@id='stPrint']/a | ||
28 | 14 | ||
29 | test_url: http://www.businessweek.com/magazine/buyback-insurance-a-good-deal-for-retailers-07282011.html | 15 | test_url: http://www.businessweek.com/magazine/buyback-insurance-a-good-deal-for-retailers-07282011.html |
30 | test_url: http://www.businessweek.com/articles/2012-06-06/american-pain-the-largest-u-dot-s-dot-pill-mills-rise-and-fall \ No newline at end of file | 16 | test_url: http://www.businessweek.com/articles/2012-06-06/american-pain-the-largest-u-dot-s-dot-pill-mills-rise-and-fall |
17 | test_url: http://www.businessweek.com/articles/2014-07-09/american-apparel-dov-charneys-sleazy-struggle-for-control | ||