blob: ffb6b2d19b0cf3759aff23a40bcf6a8e40bb37a3 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
|
title: //div[@class="bodyText"]/h1/text()
body: //div[@class="bodyText"]
# author and date are separated by only a newline
# can't figure out how to tokenize that yet
author: //div[@class="bodyText"]/span[@class="info"]/text()
date: //div[@class="bodyText"]/span[@class="info"]/text()
# strip metdata from body text
strip: //div[@class="bodyText"]/h1/text()
strip: //div[@class="bodyText"]/span[@class="info"]
strip: //div[@class="bodyText"]/span[@class="info"]
test_url: http://www.wmnf.org/news_stories/light-rail-advocates-join-forces-to-combat-opposition-in-pinellas
|