]>
Commit | Line | Data |
---|---|---|
ac4d1142 NL |
1 | title: //h1[@class='headline']\r |
2 | body: //div[@id='storyText']\r | |
3 | # for video entries\r | |
4 | body: //img[@id='ff-img'] | //div[@id='meta']//div[contains(@class, 'overview')]\r | |
5 | author: //div[@class='byline']\r | |
6 | date: //div[@class='date']\r | |
7 | strip: //*[@id='date_partner']\r | |
8 | \r | |
9 | strip: //div[@class='breadcrumb']\r | |
10 | strip: //div[contains(@class,'show_tools')]\r | |
11 | strip: //div[@id='sponsoredByAd']\r | |
12 | strip: //div[contains(@class,'rel_container')]\r | |
13 | strip: //p[a[starts-with(@href, 'http://www.twitter.com')]]\r | |
14 | strip: //p[a[starts-with(@href, 'http://www.facebook.com')]]\r | |
15 | strip: //p[contains(., 'Click here to return to')]\r | |
16 | #strip_id_or_class: media\r | |
17 | strip_id_or_class: mediaplayer\r | |
18 | \r | |
19 | replace_string(<link rel="image_src" href="http): <img id="ff-img" src="http\r | |
20 | \r | |
21 | prune: no\r | |
22 | \r | |
23 | single_page_link: concat(//li[@class='pager']//a/@href, '&singlePage=true')\r | |
24 | \r | |
25 | test_url: http://abcnews.go.com/Politics/newt-gingrich-rocky-rollout-presidential-campaign-recover/story?id=13632744\r | |
26 | # multi-page\r | |
27 | test_url: http://abcnews.go.com/Blotter/family-freed-american-hostage-somalia-seals-obama/story?id=15439544 |