]>
Commit | Line | Data |
---|---|---|
ac4d1142 NL |
1 | # Think there might be something up with your parser that it strips out 'print' from the title :)\r |
2 | \r | |
3 | title: //meta[@name='title']/@content\r | |
4 | author: //meta[@name='author']/@content\r | |
5 | date: //meta[@name='date']/@content\r | |
6 | \r | |
7 | body: //div[@class='articleText']\r | |
8 | \r | |
9 | strip: //div[contains(@class, 'day')]\r | |
10 | strip: //div[contains(@class, 'month')]\r | |
11 | strip: //div[contains(@class, 'year')]\r | |
12 | strip: //div[contains(@class, 'time')]\r | |
13 | strip: //h1[@class='gl_headline']\r | |
14 | strip: //div[@class='byline']\r | |
15 | strip: //div[@id='left_ear']\r | |
16 | strip: //div[@id='right_ear']\r | |
17 | strip: //div[contains(@class, 'PopularPosts')]\r | |
18 | strip ://div[@class='discuss_page_break']\r | |
19 | strip ://div[contains(@class, 'p-content_TagList')] | |
20 | test_url: http://redtape.msnbc.msn.com/_news/2011/09/28/8020661-sprint-raises-fee-but-wont-free-users-from-two-year-contracts?preview=true |