aboutsummaryrefslogtreecommitdiffhomepage
path: root/inc/3rdparty/site_config/standard/businessweek.com.txt
diff options
context:
space:
mode:
authorThomas Citharel <tcit@tcit.fr>2014-10-27 09:28:30 +0100
committerThomas Citharel <tcit@tcit.fr>2014-10-27 09:28:30 +0100
commit24479b479d6a9fc406c92def1f7609fbfa142bcd (patch)
tree8cf17d3e8a13bf21d9709c1df7c790107ab5c1f9 /inc/3rdparty/site_config/standard/businessweek.com.txt
parent4a50075784bb13ed0764a8a175779d9683782846 (diff)
parent90a1a78b1e2f4d40e1d9b8e6f46aca129a9d7bcf (diff)
downloadwallabag-24479b479d6a9fc406c92def1f7609fbfa142bcd.tar.gz
wallabag-24479b479d6a9fc406c92def1f7609fbfa142bcd.tar.zst
wallabag-24479b479d6a9fc406c92def1f7609fbfa142bcd.zip
Merge pull request #888 from wallabag/updated-site-config
updated site_config
Diffstat (limited to 'inc/3rdparty/site_config/standard/businessweek.com.txt')
-rwxr-xr-xinc/3rdparty/site_config/standard/businessweek.com.txt41
1 files changed, 14 insertions, 27 deletions
diff --git a/inc/3rdparty/site_config/standard/businessweek.com.txt b/inc/3rdparty/site_config/standard/businessweek.com.txt
index 03085593..f546b708 100755
--- a/inc/3rdparty/site_config/standard/businessweek.com.txt
+++ b/inc/3rdparty/site_config/standard/businessweek.com.txt
@@ -1,30 +1,17 @@
1# story has several pages, should be detected 1# include the lead graphic in the body, if available
2body: //div[@id='storyBody'] 2body: //div[contains(concat(' ', normalize-space(@id), ' '), ' lead_graphic ')] | //div[contains(concat(' ', normalize-space(@itemprop), ' '), ' articleBody ')]
3body: //div[@id='article_body'] 3title: //h1[contains(concat(' ', normalize-space(@itemprop), ' '), ' headline ')]
4body: //div[@id='story_body'] 4date: //time[contains(concat(' ', normalize-space(@itemprop), ' '), ' datePublished ')]
5 5
6title://h1[@id='article_headline'] 6strip_id_or_class: photo_credit
7 7strip_id_or_class: photo_caption
8# article author 8strip_id_or_class: inline_gallery
9author: //p[@class='author']/a 9# pull quote, often inside a blockquote element
10# story author(s) 10strip_id_or_class: pq
11author: substring-after(//p[@class='byline'], 'By ') 11strip_id_or_class: credit
12 12strip_id_or_class: figcaption
13# article date 13strip_id_or_class: related_item
14date: //span[@class='published_date']
15# story date
16date: //span[@class='date']
17
18date: substring-after(//div[contains(@class,'attributor')],'on')
19strip_id_or_class: inset
20strip: //p/span[@class='photoCredit']
21strip: //h1
22
23strip_id_or_class: page_count
24strip_id_or_class: tools
25strip_id_or_class: pagination
26
27single_page_link: //li[@id='stPrint']/a
28 14
29test_url: http://www.businessweek.com/magazine/buyback-insurance-a-good-deal-for-retailers-07282011.html 15test_url: http://www.businessweek.com/magazine/buyback-insurance-a-good-deal-for-retailers-07282011.html
30test_url: http://www.businessweek.com/articles/2012-06-06/american-pain-the-largest-u-dot-s-dot-pill-mills-rise-and-fall \ No newline at end of file 16test_url: http://www.businessweek.com/articles/2012-06-06/american-pain-the-largest-u-dot-s-dot-pill-mills-rise-and-fall
17test_url: http://www.businessweek.com/articles/2014-07-09/american-apparel-dov-charneys-sleazy-struggle-for-control