diff options
author | Nicolas LÅ“uillet <nicolas@loeuillet.org> | 2014-07-13 10:15:40 +0200 |
---|---|---|
committer | Nicolas LÅ“uillet <nicolas@loeuillet.org> | 2014-07-13 10:15:40 +0200 |
commit | 4e067ceabd705201a16b4c92cf4b23f3b990326c (patch) | |
tree | 939f3a8e5ff3ab9ee414a57a895d3e78e1d46ce3 /inc/3rdparty/site_config/standard/sportsillustrated.cnn.com.txt | |
parent | 58dbe103889148def78b0fc8744d3f94c56a1561 (diff) | |
download | wallabag-4e067ceabd705201a16b4c92cf4b23f3b990326c.tar.gz wallabag-4e067ceabd705201a16b4c92cf4b23f3b990326c.tar.zst wallabag-4e067ceabd705201a16b4c92cf4b23f3b990326c.zip |
updated specific configuration for parsing
Diffstat (limited to 'inc/3rdparty/site_config/standard/sportsillustrated.cnn.com.txt')
-rwxr-xr-x[-rw-r--r--] | inc/3rdparty/site_config/standard/sportsillustrated.cnn.com.txt | 50 |
1 files changed, 25 insertions, 25 deletions
diff --git a/inc/3rdparty/site_config/standard/sportsillustrated.cnn.com.txt b/inc/3rdparty/site_config/standard/sportsillustrated.cnn.com.txt index afc5879f..b3da8138 100644..100755 --- a/inc/3rdparty/site_config/standard/sportsillustrated.cnn.com.txt +++ b/inc/3rdparty/site_config/standard/sportsillustrated.cnn.com.txt | |||
@@ -1,26 +1,26 @@ | |||
1 | # main sportsillustrated.com articles | 1 | # main sportsillustrated.com articles |
2 | # | 2 | # |
3 | body: //div[@id="cnnStoryContent"] | 3 | body: //div[@id="cnnStoryContent"] |
4 | title: //div[@id="cnnStoryHeadline"]//h1 | 4 | title: //div[@id="cnnStoryHeadline"]//h1 |
5 | author: //div[@id="cnnSubBanner"]//strong | 5 | author: //div[@id="cnnSubBanner"]//strong |
6 | date: substring-after(//div[@id="cnnTimeStamp"], "Updated: ") | 6 | date: substring-after(//div[@id="cnnTimeStamp"], "Updated: ") |
7 | date: substring-after(//div[@id="cnnTimeStamp"], "Posted: ") | 7 | date: substring-after(//div[@id="cnnTimeStamp"], "Posted: ") |
8 | 8 | ||
9 | # kill ugly font buttons | 9 | # kill ugly font buttons |
10 | strip: //div[@id="cnnSCFontButtons"] | 10 | strip: //div[@id="cnnSCFontButtons"] |
11 | 11 | ||
12 | # kill misc filler videos & etc | 12 | # kill misc filler videos & etc |
13 | strip: //div[@class="cnnDivideContent"] | 13 | strip: //div[@class="cnnDivideContent"] |
14 | strip: //*[@class="cnnTMbox"] | 14 | strip: //*[@class="cnnTMbox"] |
15 | 15 | ||
16 | # si vault articles | 16 | # si vault articles |
17 | # ------------- | 17 | # ------------- |
18 | body: //div[@class="siv_artPara"] | 18 | body: //div[@class="siv_artPara"] |
19 | title: //div[@class="siv_artHeader"]//h1 | 19 | title: //div[@class="siv_artHeader"]//h1 |
20 | author: //div[@class="byline"] | 20 | author: //div[@class="byline"] |
21 | date: //div[@class="date"] | 21 | date: //div[@class="date"] |
22 | 22 | ||
23 | next_page_link: //div[@id='cnnStoryContinue']/a | 23 | next_page_link: //div[@id='cnnStoryContinue']/a |
24 | strip_id_or_class: cnnstorypagination | 24 | strip_id_or_class: cnnstorypagination |
25 | 25 | ||
26 | test_url: http://sportsillustrated.cnn.com/2012/writers/peter_king/02/27/combine/index.html \ No newline at end of file | 26 | test_url: http://sportsillustrated.cnn.com/2012/writers/peter_king/02/27/combine/index.html \ No newline at end of file |