diff options
Diffstat (limited to 'inc/3rdparty/site_config/standard/wsj.com.txt')
-rwxr-xr-x | inc/3rdparty/site_config/standard/wsj.com.txt | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/inc/3rdparty/site_config/standard/wsj.com.txt b/inc/3rdparty/site_config/standard/wsj.com.txt new file mode 100755 index 00000000..467c39c2 --- /dev/null +++ b/inc/3rdparty/site_config/standard/wsj.com.txt | |||
@@ -0,0 +1,29 @@ | |||
1 | title: //meta[@property="og:title"]/@content | ||
2 | body: //div[@id='wsj-article-wrap'] | ||
3 | # is this still used? | ||
4 | body: //div[@id='article_story_body'] | ||
5 | |||
6 | author: //h3[@class='byline']/a | ||
7 | # for slide show content | ||
8 | body: //ul[@id='imageSlide']//li[@class='firstSlide']//img | (//div[@class='txt_body']//p)[1] | ||
9 | date: //li[@class='dateStamp']/small | ||
10 | |||
11 | strip_id_or_class: insetFullBracket | ||
12 | strip_id_or_class: insettipBox | ||
13 | #strip_id_or_class: legacyInset | ||
14 | strip_id_or_class: recipeACShopAndBuyText | ||
15 | |||
16 | strip: //div[contains(@class, 'insetContent')]//cite | ||
17 | strip: //*[contains(@style, 'visibility: hidden;')] | ||
18 | strip: //div[contains(@class, 'insetContent') and not(contains(@class, 'image'))] | ||
19 | strip: //div[contains(@class, 'carousel')] | ||
20 | |||
21 | prune: no | ||
22 | tidy: no | ||
23 | |||
24 | test_url: http://www.wsj.com/articles/airasia-flight-8501-tail-recovered-1420878809 | ||
25 | test_contains: Saturday evening that the black boxes | ||
26 | test_url: http://www.wsj.com/news/articles/SB10001424052702304626304579509100018004342 | ||
27 | test_url: http://www.wsj.com/article/SB10001424052970203363504577185322849515102.html | ||
28 | # slide show | ||
29 | test_url: http://www.wsj.com/article/SB10001424052970204791104577110550376458164.html | ||