diff options
Diffstat (limited to 'inc/3rdparty/site_config/standard/online.wsj.com.txt')
-rw-r--r-- | inc/3rdparty/site_config/standard/online.wsj.com.txt | 23 |
1 files changed, 23 insertions, 0 deletions
diff --git a/inc/3rdparty/site_config/standard/online.wsj.com.txt b/inc/3rdparty/site_config/standard/online.wsj.com.txt new file mode 100644 index 00000000..edb52855 --- /dev/null +++ b/inc/3rdparty/site_config/standard/online.wsj.com.txt | |||
@@ -0,0 +1,23 @@ | |||
1 | title: //meta[@property="og:title"]/@content | ||
2 | body: //div[@id='article_story_body'] | ||
3 | |||
4 | author: //h3[@class='byline']/a | ||
5 | # for slid show content | ||
6 | body: //ul[@id='imageSlide']//li[@class='firstSlide']//img | (//div[@class='txt_body']//p)[1] | ||
7 | date: //li[@class='dateStamp']/small | ||
8 | |||
9 | strip_id_or_class: insetFullBracket | ||
10 | strip_id_or_class: insettipBox | ||
11 | #strip_id_or_class: legacyInset | ||
12 | strip_id_or_class: recipeACShopAndBuyText | ||
13 | |||
14 | strip: //div[contains(@class, 'insetContent')]//cite | ||
15 | strip: //*[contains(@style, 'visibility: hidden;')] | ||
16 | strip: //div[contains(@class, 'insetContent') and not(contains(@class, 'image'))] | ||
17 | |||
18 | prune: no | ||
19 | tidy: no | ||
20 | |||
21 | test_url: http://online.wsj.com/article/SB10001424052970203363504577185322849515102.html | ||
22 | # slide show | ||
23 | test_url: http://online.wsj.com/article/SB10001424052970204791104577110550376458164.html \ No newline at end of file | ||