aboutsummaryrefslogtreecommitdiffhomepage
path: root/inc/3rdparty/site_config/standard/wsj.com.txt
diff options
context:
space:
mode:
Diffstat (limited to 'inc/3rdparty/site_config/standard/wsj.com.txt')
-rwxr-xr-xinc/3rdparty/site_config/standard/wsj.com.txt29
1 files changed, 29 insertions, 0 deletions
diff --git a/inc/3rdparty/site_config/standard/wsj.com.txt b/inc/3rdparty/site_config/standard/wsj.com.txt
new file mode 100755
index 00000000..467c39c2
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/wsj.com.txt
@@ -0,0 +1,29 @@
1title: //meta[@property="og:title"]/@content
2body: //div[@id='wsj-article-wrap']
3# is this still used?
4body: //div[@id='article_story_body']
5
6author: //h3[@class='byline']/a
7# for slide show content
8body: //ul[@id='imageSlide']//li[@class='firstSlide']//img | (//div[@class='txt_body']//p)[1]
9date: //li[@class='dateStamp']/small
10
11strip_id_or_class: insetFullBracket
12strip_id_or_class: insettipBox
13#strip_id_or_class: legacyInset
14strip_id_or_class: recipeACShopAndBuyText
15
16strip: //div[contains(@class, 'insetContent')]//cite
17strip: //*[contains(@style, 'visibility: hidden;')]
18strip: //div[contains(@class, 'insetContent') and not(contains(@class, 'image'))]
19strip: //div[contains(@class, 'carousel')]
20
21prune: no
22tidy: no
23
24test_url: http://www.wsj.com/articles/airasia-flight-8501-tail-recovered-1420878809
25test_contains: Saturday evening that the black boxes
26test_url: http://www.wsj.com/news/articles/SB10001424052702304626304579509100018004342
27test_url: http://www.wsj.com/article/SB10001424052970203363504577185322849515102.html
28# slide show
29test_url: http://www.wsj.com/article/SB10001424052970204791104577110550376458164.html