aboutsummaryrefslogtreecommitdiffhomepage
path: root/inc/3rdparty/site_config/standard/online.wsj.com.txt
diff options
context:
space:
mode:
authorNicolas LÅ“uillet <nicolas@loeuillet.org>2014-07-13 10:15:40 +0200
committerNicolas LÅ“uillet <nicolas@loeuillet.org>2014-07-13 10:15:40 +0200
commit4e067ceabd705201a16b4c92cf4b23f3b990326c (patch)
tree939f3a8e5ff3ab9ee414a57a895d3e78e1d46ce3 /inc/3rdparty/site_config/standard/online.wsj.com.txt
parent58dbe103889148def78b0fc8744d3f94c56a1561 (diff)
downloadwallabag-4e067ceabd705201a16b4c92cf4b23f3b990326c.tar.gz
wallabag-4e067ceabd705201a16b4c92cf4b23f3b990326c.tar.zst
wallabag-4e067ceabd705201a16b4c92cf4b23f3b990326c.zip
updated specific configuration for parsing
Diffstat (limited to 'inc/3rdparty/site_config/standard/online.wsj.com.txt')
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/online.wsj.com.txt48
1 files changed, 25 insertions, 23 deletions
diff --git a/inc/3rdparty/site_config/standard/online.wsj.com.txt b/inc/3rdparty/site_config/standard/online.wsj.com.txt
index edb52855..448bb7e1 100644..100755
--- a/inc/3rdparty/site_config/standard/online.wsj.com.txt
+++ b/inc/3rdparty/site_config/standard/online.wsj.com.txt
@@ -1,23 +1,25 @@
1title: //meta[@property="og:title"]/@content 1title: //meta[@property="og:title"]/@content
2body: //div[@id='article_story_body'] 2body: //div[@id='article_story_body']
3 3
4author: //h3[@class='byline']/a 4author: //h3[@class='byline']/a
5# for slid show content 5# for slide show content
6body: //ul[@id='imageSlide']//li[@class='firstSlide']//img | (//div[@class='txt_body']//p)[1] 6body: //ul[@id='imageSlide']//li[@class='firstSlide']//img | (//div[@class='txt_body']//p)[1]
7date: //li[@class='dateStamp']/small 7date: //li[@class='dateStamp']/small
8 8
9strip_id_or_class: insetFullBracket 9strip_id_or_class: insetFullBracket
10strip_id_or_class: insettipBox 10strip_id_or_class: insettipBox
11#strip_id_or_class: legacyInset 11#strip_id_or_class: legacyInset
12strip_id_or_class: recipeACShopAndBuyText 12strip_id_or_class: recipeACShopAndBuyText
13 13
14strip: //div[contains(@class, 'insetContent')]//cite 14strip: //div[contains(@class, 'insetContent')]//cite
15strip: //*[contains(@style, 'visibility: hidden;')] 15strip: //*[contains(@style, 'visibility: hidden;')]
16strip: //div[contains(@class, 'insetContent') and not(contains(@class, 'image'))] 16strip: //div[contains(@class, 'insetContent') and not(contains(@class, 'image'))]
17 17strip: //div[contains(@class, 'carousel')]
18prune: no 18
19tidy: no 19prune: no
20 20tidy: no
21test_url: http://online.wsj.com/article/SB10001424052970203363504577185322849515102.html 21
22# slide show 22test_url: http://online.wsj.com/news/articles/SB10001424052702304626304579509100018004342
23test_url: http://online.wsj.com/article/SB10001424052970204791104577110550376458164.html \ No newline at end of file 23test_url: http://online.wsj.com/article/SB10001424052970203363504577185322849515102.html
24# slide show
25test_url: http://online.wsj.com/article/SB10001424052970204791104577110550376458164.html