]>
Commit | Line | Data |
---|---|---|
4e067cea NL |
1 | title: //h1[@class='headline'] |
2 | body: //div[@id='storyText'] | |
3 | # for video entries | |
4 | body: //img[@id='ff-img'] | //div[@id='meta']//div[contains(@class, 'overview')] | |
5 | author: //div[@class='byline'] | |
6 | date: //div[@class='date'] | |
7 | strip: //*[@id='date_partner'] | |
8 | ||
9 | strip: //div[@class='breadcrumb'] | |
10 | strip: //div[contains(@class,'show_tools')] | |
11 | strip: //div[@id='sponsoredByAd'] | |
12 | strip: //div[contains(@class,'rel_container')] | |
13 | strip: //p[a[starts-with(@href, 'http://www.twitter.com')]] | |
14 | strip: //p[a[starts-with(@href, 'http://www.facebook.com')]] | |
15 | strip: //p[contains(., 'Click here to return to')] | |
16 | #strip_id_or_class: media | |
17 | strip_id_or_class: mediaplayer | |
18 | ||
19 | replace_string(<link rel="image_src" href="http): <img id="ff-img" src="http | |
20 | ||
21 | prune: no | |
22 | ||
23 | single_page_link: concat(//li[@class='pager']//a/@href, '&singlePage=true') | |
24 | ||
25 | test_url: http://abcnews.go.com/Politics/newt-gingrich-rocky-rollout-presidential-campaign-recover/story?id=13632744 | |
26 | # multi-page | |
ac4d1142 | 27 | test_url: http://abcnews.go.com/Blotter/family-freed-american-hostage-somalia-seals-obama/story?id=15439544 |