1 title://h1[@class='page-title']
2 body://*[@id='content']//div[contains(@class,'node-content')]
4 author://*[@id='content']//div[contains(@class,'node-submitted')]/a
6 date:substring-after(//div[contains(@class,'node-submitted')],' on ')
7 test_url: http://ignoredbydinosaurs.com/2011/09/great-lie-lorem-ipsum