]> git.immae.eu Git - github/wallabag/wallabag.git/blob - inc/3rdparty/site_config/standard/wsj.com.txt
update config from @fivefilters
[github/wallabag/wallabag.git] / inc / 3rdparty / site_config / standard / wsj.com.txt
1 title: //meta[@property="og:title"]/@content
2 body: //div[@id='wsj-article-wrap']
3 # is this still used?
4 body: //div[@id='article_story_body']
5
6 author: //h3[@class='byline']/a
7 # for slide show content
8 body: //ul[@id='imageSlide']//li[@class='firstSlide']//img | (//div[@class='txt_body']//p)[1]
9 date: //li[@class='dateStamp']/small
10
11 strip_id_or_class: insetFullBracket
12 strip_id_or_class: insettipBox
13 #strip_id_or_class: legacyInset
14 strip_id_or_class: recipeACShopAndBuyText
15
16 strip: //div[contains(@class, 'insetContent')]//cite
17 strip: //*[contains(@style, 'visibility: hidden;')]
18 strip: //div[contains(@class, 'insetContent') and not(contains(@class, 'image'))]
19 strip: //div[contains(@class, 'carousel')]
20
21 prune: no
22 tidy: no
23
24 test_url: http://www.wsj.com/articles/airasia-flight-8501-tail-recovered-1420878809
25 test_contains: Saturday evening that the black boxes
26 test_url: http://www.wsj.com/news/articles/SB10001424052702304626304579509100018004342
27 test_url: http://www.wsj.com/article/SB10001424052970203363504577185322849515102.html
28 # slide show
29 test_url: http://www.wsj.com/article/SB10001424052970204791104577110550376458164.html