]>
Commit | Line | Data |
---|---|---|
1 | # main sportsillustrated.com articles | |
2 | # | |
3 | body: //div[@id="cnnStoryContent"] | |
4 | title: //div[@id="cnnStoryHeadline"]//h1 | |
5 | author: //div[@id="cnnSubBanner"]//strong | |
6 | date: substring-after(//div[@id="cnnTimeStamp"], "Updated: ") | |
7 | date: substring-after(//div[@id="cnnTimeStamp"], "Posted: ") | |
8 | ||
9 | # kill ugly font buttons | |
10 | strip: //div[@id="cnnSCFontButtons"] | |
11 | ||
12 | # kill misc filler videos & etc | |
13 | strip: //div[@class="cnnDivideContent"] | |
14 | strip: //*[@class="cnnTMbox"] | |
15 | ||
16 | # si vault articles | |
17 | # ------------- | |
18 | body: //div[@class="siv_artPara"] | |
19 | title: //div[@class="siv_artHeader"]//h1 | |
20 | author: //div[@class="byline"] | |
21 | date: //div[@class="date"] | |
22 | ||
23 | next_page_link: //div[@id='cnnStoryContinue']/a | |
24 | strip_id_or_class: cnnstorypagination | |
25 | ||
26 | test_url: http://sportsillustrated.cnn.com/2012/writers/peter_king/02/27/combine/index.html |