]>
Commit | Line | Data |
---|---|---|
4e067cea NL |
1 | # meta data |
2 | title://h1[@class = 'postTitle'] | |
3 | author:substring-before(substring-after(//span[@class = 'byline'],'By '),'|') | |
4 | date://span[@class = 'datestamp'] | |
5 | ||
6 | #body content | |
7 | body://div[@id = 'singleBlogPost'] | |
8 | ||
9 | #reclaim author info | |
10 | move_into(//div[@id = 'singleBlogPost'])://div[@id = 'aboutAuthorDiv'] | |
11 | strip://p[@class = 'moreLink mobileHide'] | |
12 | ||
13 | #cleanup comments, there might be some open <div> sections | |
14 | strip://div[@id = 'comments2'] | |
ac4d1142 NL |
15 | strip://h3[a[@href = '#add-comment']] |
16 | test_url: http://blogs.scientificamerican.com/a-blog-around-the-clock/2012/07/10/science-blogs-definition-and-a-history/ |