diff options
Diffstat (limited to 'inc/3rdparty/site_config/standard/sportsillustrated.cnn.com.txt')
-rw-r--r-- | inc/3rdparty/site_config/standard/sportsillustrated.cnn.com.txt | 26 |
1 files changed, 26 insertions, 0 deletions
diff --git a/inc/3rdparty/site_config/standard/sportsillustrated.cnn.com.txt b/inc/3rdparty/site_config/standard/sportsillustrated.cnn.com.txt new file mode 100644 index 00000000..afc5879f --- /dev/null +++ b/inc/3rdparty/site_config/standard/sportsillustrated.cnn.com.txt | |||
@@ -0,0 +1,26 @@ | |||
1 | # main sportsillustrated.com articles | ||
2 | # | ||
3 | body: //div[@id="cnnStoryContent"] | ||
4 | title: //div[@id="cnnStoryHeadline"]//h1 | ||
5 | author: //div[@id="cnnSubBanner"]//strong | ||
6 | date: substring-after(//div[@id="cnnTimeStamp"], "Updated: ") | ||
7 | date: substring-after(//div[@id="cnnTimeStamp"], "Posted: ") | ||
8 | |||
9 | # kill ugly font buttons | ||
10 | strip: //div[@id="cnnSCFontButtons"] | ||
11 | |||
12 | # kill misc filler videos & etc | ||
13 | strip: //div[@class="cnnDivideContent"] | ||
14 | strip: //*[@class="cnnTMbox"] | ||
15 | |||
16 | # si vault articles | ||
17 | # ------------- | ||
18 | body: //div[@class="siv_artPara"] | ||
19 | title: //div[@class="siv_artHeader"]//h1 | ||
20 | author: //div[@class="byline"] | ||
21 | date: //div[@class="date"] | ||
22 | |||
23 | next_page_link: //div[@id='cnnStoryContinue']/a | ||
24 | strip_id_or_class: cnnstorypagination | ||
25 | |||
26 | test_url: http://sportsillustrated.cnn.com/2012/writers/peter_king/02/27/combine/index.html \ No newline at end of file | ||