1 # get author from string like "Posted by <author> on <date>"
2 author: substring-before(substring-after(//div[@class='post']/p[@class='post-meta'], 'by'), 'on')
4 # get date from string like "Posted by <author> on <date>"
5 date: substring-after(//div[@class='post']/p[@class='post-meta'], 'on')
7 # this keeps thumbnail images
9 test_url: http://www.contemporist.com/2011/11/02/landing-200-lamp-by-kim-hyunjoo