]>
Commit | Line | Data |
---|---|---|
ac4d1142 NL |
1 | title: //div[@id='storyHdr']/h1\r |
2 | title: //div[@id='print']//h2\r | |
3 | body: //div[@class="virtualpage"]\r | |
4 | body: //div[@id='print']//div[@id='bd']\r | |
5 | author: //meta[@name="AUTHOR"]/@content\r | |
6 | author: (//div[@id='print']//div[@id='bd']/h4)[1]\r | |
7 | date: //meta[@name="DATE"]/@content\r | |
8 | date: //div[@id='print']//div[@id='dte']\r | |
9 | \r | |
10 | strip_id_or_class: articleFooter\r | |
11 | strip_id_or_class: sidebar\r | |
12 | strip_id_or_class: ie6PrintSubhead\r | |
13 | strip_id_or_class: subHdr\r | |
14 | \r | |
15 | \r | |
16 | replace_string(<P/>): </p><p>\r | |
17 | \r | |
18 | prune: no\r | |
19 | \r | |
20 | #TODO: redirects back - perhaps needs referer to work\r | |
21 | single_page_link: //div[@id='storyDetail']//a[contains(@href, '/print/')]\r | |
22 | \r | |
23 | test_url: http://www.thestreet.com/story/11386556/1/which-of-these-10-dividend-stocks-is-worth-the-risk.html\r | |
24 | # multi page\r | |
25 | test_url: http://www.thestreet.com/story/11387090/1/7-ubs-stock-picks-for-2012.html |