]>
Commit | Line | Data |
---|---|---|
ac4d1142 NL |
1 | # meta data\r |
2 | title://h1[@class = 'postTitle']\r | |
3 | author:substring-before(substring-after(//span[@class = 'byline'],'By '),'|')\r | |
4 | date://span[@class = 'datestamp']\r | |
5 | \r | |
6 | #body content\r | |
7 | body://div[@id = 'singleBlogPost']\r | |
8 | \r | |
9 | #reclaim author info\r | |
10 | move_into(//div[@id = 'singleBlogPost'])://div[@id = 'aboutAuthorDiv']\r | |
11 | strip://p[@class = 'moreLink mobileHide']\r | |
12 | \r | |
13 | #cleanup comments, there might be some open <div> sections\r | |
14 | strip://div[@id = 'comments2']\r | |
15 | strip://h3[a[@href = '#add-comment']] | |
16 | test_url: http://blogs.scientificamerican.com/a-blog-around-the-clock/2012/07/10/science-blogs-definition-and-a-history/ |