blob: 4ba3da19f7dca6ccba284712ff77c4e23e069d88 (
plain) (
tree)
|
|
date: //meta[@name="published"]/@content
date: //div[@class="timeLine"]
title: //div[@id='contentBody']//h1
author: //dl[@class="storyBlogByline"]/dd/a
body: //div[@id='storyMediaBox'] | //div[contains(@class, 'storyText')]
# Content Pruning
strip: //div[@class="scrollingArrows"]
strip: //div[@class="timeLine"]
strip: //dl[@class="storyBlogByline"]
prune: no
test_url: http://www.cbsnews.com/8301-201_162-57366361/rescued-americans-dad-proud-of-the-u.s/
|