diff options
Diffstat (limited to 'inc/3rdparty/site_config/standard/online.wsj.com.txt')
-rwxr-xr-x[-rw-r--r--] | inc/3rdparty/site_config/standard/online.wsj.com.txt | 48 |
1 files changed, 25 insertions, 23 deletions
diff --git a/inc/3rdparty/site_config/standard/online.wsj.com.txt b/inc/3rdparty/site_config/standard/online.wsj.com.txt index edb52855..448bb7e1 100644..100755 --- a/inc/3rdparty/site_config/standard/online.wsj.com.txt +++ b/inc/3rdparty/site_config/standard/online.wsj.com.txt | |||
@@ -1,23 +1,25 @@ | |||
1 | title: //meta[@property="og:title"]/@content | 1 | title: //meta[@property="og:title"]/@content |
2 | body: //div[@id='article_story_body'] | 2 | body: //div[@id='article_story_body'] |
3 | 3 | ||
4 | author: //h3[@class='byline']/a | 4 | author: //h3[@class='byline']/a |
5 | # for slid show content | 5 | # for slide show content |
6 | body: //ul[@id='imageSlide']//li[@class='firstSlide']//img | (//div[@class='txt_body']//p)[1] | 6 | body: //ul[@id='imageSlide']//li[@class='firstSlide']//img | (//div[@class='txt_body']//p)[1] |
7 | date: //li[@class='dateStamp']/small | 7 | date: //li[@class='dateStamp']/small |
8 | 8 | ||
9 | strip_id_or_class: insetFullBracket | 9 | strip_id_or_class: insetFullBracket |
10 | strip_id_or_class: insettipBox | 10 | strip_id_or_class: insettipBox |
11 | #strip_id_or_class: legacyInset | 11 | #strip_id_or_class: legacyInset |
12 | strip_id_or_class: recipeACShopAndBuyText | 12 | strip_id_or_class: recipeACShopAndBuyText |
13 | 13 | ||
14 | strip: //div[contains(@class, 'insetContent')]//cite | 14 | strip: //div[contains(@class, 'insetContent')]//cite |
15 | strip: //*[contains(@style, 'visibility: hidden;')] | 15 | strip: //*[contains(@style, 'visibility: hidden;')] |
16 | strip: //div[contains(@class, 'insetContent') and not(contains(@class, 'image'))] | 16 | strip: //div[contains(@class, 'insetContent') and not(contains(@class, 'image'))] |
17 | 17 | strip: //div[contains(@class, 'carousel')] | |
18 | prune: no | 18 | |
19 | tidy: no | 19 | prune: no |
20 | 20 | tidy: no | |
21 | test_url: http://online.wsj.com/article/SB10001424052970203363504577185322849515102.html | 21 | |
22 | # slide show | 22 | test_url: http://online.wsj.com/news/articles/SB10001424052702304626304579509100018004342 |
23 | test_url: http://online.wsj.com/article/SB10001424052970204791104577110550376458164.html \ No newline at end of file | 23 | test_url: http://online.wsj.com/article/SB10001424052970203363504577185322849515102.html |
24 | # slide show | ||
25 | test_url: http://online.wsj.com/article/SB10001424052970204791104577110550376458164.html | ||