]> git.immae.eu Git - github/wallabag/wallabag.git/blob - inc/3rdparty/site_config/standard/businessweek.com.txt
03085593f9587f1a1dea15f671922cf647d3c97a
[github/wallabag/wallabag.git] / inc / 3rdparty / site_config / standard / businessweek.com.txt
1 # story has several pages, should be detected
2 body: //div[@id='storyBody']
3 body: //div[@id='article_body']
4 body: //div[@id='story_body']
5
6 title://h1[@id='article_headline']
7
8 # article author
9 author: //p[@class='author']/a
10 # story author(s)
11 author: substring-after(//p[@class='byline'], 'By ')
12
13 # article date
14 date: //span[@class='published_date']
15 # story date
16 date: //span[@class='date']
17
18 date: substring-after(//div[contains(@class,'attributor')],'on')
19 strip_id_or_class: inset
20 strip: //p/span[@class='photoCredit']
21 strip: //h1
22
23 strip_id_or_class: page_count
24 strip_id_or_class: tools
25 strip_id_or_class: pagination
26
27 single_page_link: //li[@id='stPrint']/a
28
29 test_url: http://www.businessweek.com/magazine/buyback-insurance-a-good-deal-for-retailers-07282011.html
30 test_url: http://www.businessweek.com/articles/2012-06-06/american-pain-the-largest-u-dot-s-dot-pill-mills-rise-and-fall