aboutsummaryrefslogtreecommitdiffhomepage
path: root/inc/3rdparty/site_config/standard/wsj.com.txt
blob: 467c39c27cb897a04d3260cc5560670d9183f927 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
title: //meta[@property="og:title"]/@content
body: //div[@id='wsj-article-wrap']
# is this still used?
body: //div[@id='article_story_body']

author: //h3[@class='byline']/a
# for slide show content
body: //ul[@id='imageSlide']//li[@class='firstSlide']//img | (//div[@class='txt_body']//p)[1]
date: //li[@class='dateStamp']/small

strip_id_or_class: insetFullBracket
strip_id_or_class: insettipBox
#strip_id_or_class: legacyInset
strip_id_or_class: recipeACShopAndBuyText

strip: //div[contains(@class, 'insetContent')]//cite
strip: //*[contains(@style, 'visibility: hidden;')]
strip: //div[contains(@class, 'insetContent') and not(contains(@class, 'image'))]
strip: //div[contains(@class, 'carousel')]

prune: no
tidy: no

test_url: http://www.wsj.com/articles/airasia-flight-8501-tail-recovered-1420878809
test_contains: Saturday evening that the black boxes
test_url: http://www.wsj.com/news/articles/SB10001424052702304626304579509100018004342
test_url: http://www.wsj.com/article/SB10001424052970203363504577185322849515102.html
# slide show
test_url: http://www.wsj.com/article/SB10001424052970204791104577110550376458164.html