blob: 04d2023098b8463f05f3c6f8f9110964c3ee7411 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
|
date: //meta[@name="published"]/@content
date: //div[@class="timeLine"]
title: //div[@id='contentBody']//h1
author: //dl[@class="storyBlogByline"]/dd/a
body: //div[@id='storyMediaBox'] | //div[contains(@class, 'storyText')]
# Content Pruning
strip: //div[@class="scrollingArrows"]
strip: //div[@class="timeLine"]
strip: //dl[@class="storyBlogByline"]
strip: //span[@class='image-credit']
prune: no
test_url: http://www.cbsnews.com/8301-201_162-57366361/rescued-americans-dad-proud-of-the-u.s/
|