title://div[@class="article-title"]/h1[@class="title"] date: //p[@class="article-date"] body://div[contains(@class, "article-body")] # Trim out related posts at bottom of article strip://blockquote[@class="memo"] tidy: no # Yup, no idea why author won't work... author://div[@class="page-header article-header clearfix"]/p[@class="title"] # [Marco:] Author won't work here because the page defines the "home" link under the author's name as rel="author", which always gets priority if the page has defined it. test_url: http://allthingsd.com/20120513/exclusive-yahoos-thompson-out-levinsohn-in-board-settlement-with-loeb-nears-completion/ test_url: http://allthingsd.com/20131010/google-cio-ben-fried-on-how-google-works/