blob: c5302d1bac527785345b3ec4e1f58fccff63d32c (
plain) (
tree)
|
|
title://div[contains(@class, "article")]/h1
body://div[contains(@class,"story-text")]
# Why doesn't this work? next_page_link://ul[contains(@class,"pagination")]/li/a[@rel="next"]
next_page_link://ul[contains(@class,"pagination")]/li[contains(@class, "current")]/following-sibling::node()/a
next_page_link://div[contains(@class,"pagination")]/ol/li[contains(@class, "current")]/following-sibling::node()/a
date://meta[@name="publish_date"]/@content
strip://div[contains(@class, "breadcrumbs")]
strip://a[contains(@class, "hidden")]
strip://div[contains(@class, "story-embed")]
strip://div[contains(@class, "story-text")]//p/a[contains(text(), "Also on POLITICO:")]/..
strip://div[contains(@class, "story-interrupt")]
strip://footer[contains(@class, "author-bio")]
test_url: http://www.politico.com/news/stories/0712/78105.html
|