]>
Commit | Line | Data |
---|---|---|
4e067cea NL |
1 | # Think there might be something up with your parser that it strips out 'print' from the title :) |
2 | ||
3 | title: //meta[@name='title']/@content | |
4 | author: //meta[@name='author']/@content | |
5 | date: //meta[@name='date']/@content | |
6 | ||
7 | body: //div[@class='articleText'] | |
8 | ||
9 | strip: //div[contains(@class, 'day')] | |
10 | strip: //div[contains(@class, 'month')] | |
11 | strip: //div[contains(@class, 'year')] | |
12 | strip: //div[contains(@class, 'time')] | |
13 | strip: //h1[@class='gl_headline'] | |
14 | strip: //div[@class='byline'] | |
15 | strip: //div[@id='left_ear'] | |
16 | strip: //div[@id='right_ear'] | |
17 | strip: //div[contains(@class, 'PopularPosts')] | |
18 | strip ://div[@class='discuss_page_break'] | |
ac4d1142 NL |
19 | strip ://div[contains(@class, 'p-content_TagList')] |
20 | test_url: http://redtape.msnbc.msn.com/_news/2011/09/28/8020661-sprint-raises-fee-but-wont-free-users-from-two-year-contracts?preview=true |