diff options
author | Thomas Citharel <tcit@tcit.fr> | 2014-10-27 09:28:30 +0100 |
---|---|---|
committer | Thomas Citharel <tcit@tcit.fr> | 2014-10-27 09:28:30 +0100 |
commit | 24479b479d6a9fc406c92def1f7609fbfa142bcd (patch) | |
tree | 8cf17d3e8a13bf21d9709c1df7c790107ab5c1f9 /inc/3rdparty/site_config/standard/businessweek.com.txt | |
parent | 4a50075784bb13ed0764a8a175779d9683782846 (diff) | |
parent | 90a1a78b1e2f4d40e1d9b8e6f46aca129a9d7bcf (diff) | |
download | wallabag-24479b479d6a9fc406c92def1f7609fbfa142bcd.tar.gz wallabag-24479b479d6a9fc406c92def1f7609fbfa142bcd.tar.zst wallabag-24479b479d6a9fc406c92def1f7609fbfa142bcd.zip |
Merge pull request #888 from wallabag/updated-site-config
updated site_config
Diffstat (limited to 'inc/3rdparty/site_config/standard/businessweek.com.txt')
-rwxr-xr-x | inc/3rdparty/site_config/standard/businessweek.com.txt | 41 |
1 files changed, 14 insertions, 27 deletions
diff --git a/inc/3rdparty/site_config/standard/businessweek.com.txt b/inc/3rdparty/site_config/standard/businessweek.com.txt index 03085593..f546b708 100755 --- a/inc/3rdparty/site_config/standard/businessweek.com.txt +++ b/inc/3rdparty/site_config/standard/businessweek.com.txt | |||
@@ -1,30 +1,17 @@ | |||
1 | # story has several pages, should be detected | 1 | # include the lead graphic in the body, if available |
2 | body: //div[@id='storyBody'] | 2 | body: //div[contains(concat(' ', normalize-space(@id), ' '), ' lead_graphic ')] | //div[contains(concat(' ', normalize-space(@itemprop), ' '), ' articleBody ')] |
3 | body: //div[@id='article_body'] | 3 | title: //h1[contains(concat(' ', normalize-space(@itemprop), ' '), ' headline ')] |
4 | body: //div[@id='story_body'] | 4 | date: //time[contains(concat(' ', normalize-space(@itemprop), ' '), ' datePublished ')] |
5 | 5 | ||
6 | title://h1[@id='article_headline'] | 6 | strip_id_or_class: photo_credit |
7 | 7 | strip_id_or_class: photo_caption | |
8 | # article author | 8 | strip_id_or_class: inline_gallery |
9 | author: //p[@class='author']/a | 9 | # pull quote, often inside a blockquote element |
10 | # story author(s) | 10 | strip_id_or_class: pq |
11 | author: substring-after(//p[@class='byline'], 'By ') | 11 | strip_id_or_class: credit |
12 | 12 | strip_id_or_class: figcaption | |
13 | # article date | 13 | strip_id_or_class: related_item |
14 | date: //span[@class='published_date'] | ||
15 | # story date | ||
16 | date: //span[@class='date'] | ||
17 | |||
18 | date: substring-after(//div[contains(@class,'attributor')],'on') | ||
19 | strip_id_or_class: inset | ||
20 | strip: //p/span[@class='photoCredit'] | ||
21 | strip: //h1 | ||
22 | |||
23 | strip_id_or_class: page_count | ||
24 | strip_id_or_class: tools | ||
25 | strip_id_or_class: pagination | ||
26 | |||
27 | single_page_link: //li[@id='stPrint']/a | ||
28 | 14 | ||
29 | test_url: http://www.businessweek.com/magazine/buyback-insurance-a-good-deal-for-retailers-07282011.html | 15 | test_url: http://www.businessweek.com/magazine/buyback-insurance-a-good-deal-for-retailers-07282011.html |
30 | test_url: http://www.businessweek.com/articles/2012-06-06/american-pain-the-largest-u-dot-s-dot-pill-mills-rise-and-fall \ No newline at end of file | 16 | test_url: http://www.businessweek.com/articles/2012-06-06/american-pain-the-largest-u-dot-s-dot-pill-mills-rise-and-fall |
17 | test_url: http://www.businessweek.com/articles/2014-07-09/american-apparel-dov-charneys-sleazy-struggle-for-control | ||