]>
Commit | Line | Data |
---|---|---|
ac4d1142 NL |
1 | title: //div[contains(@class, 'storytitle')]//h1\r |
2 | author: //p[@class="byline"]/span\r | |
3 | body: //div[@id='storyspan02']//*[@class='duration' or @class='download' or contains(@class, 'photo')] | //div[@id='storytext'] | //div[@class='transcript']\r | |
4 | date: //meta[@name="date"]/@content\r | |
5 | \r | |
6 | strip: //div[@class='enlarge_measure']\r | |
7 | strip: //div[@class='enlarge_html']\r | |
8 | strip: //a[@class='enlargeicon']\r | |
9 | strip: //div[contains(@class, 'bookedition')]\r | |
10 | strip: //div[@class='textsize']\r | |
11 | strip: //ul[@class='genres']\r | |
12 | strip: //span[@class='bull']\r | |
13 | strip_id_or_class: secondary\r | |
14 | strip_id_or_class: con1col\r | |
15 | strip: //h3[@class='conheader']\r | |
16 | \r | |
17 | replace_string(<a name="more"> </a>): <!-- no more -->\r | |
18 | replace_string(<div class="transcript">): <div class="transcript"><h2>Transcript</h2>\r | |
19 | \r | |
20 | prune: no\r | |
21 | strip://div[@class="ecommercepop"]\r | |
22 | strip://span[@class="bull"]\r | |
23 | strip://span[@class="purchaseLink"]\r | |
24 | strip://div[@class="enlarge_html"]\r | |
25 | strip://div[@class="enlarge_measure"]\r | |
26 | strip://div[@class="container con1col small"]\r | |
27 | strip://a[contains(@class, "enlargebtn")]\r | |
28 | strip://div[contains(@class, "bucketwrap internallink")]\r | |
29 | \r | |
30 | test_url: http://www.npr.org/blogs/thetwo-way/2011/07/12/137799301/sports-loses-its-escapist-gleam-in-a-summer-of-court-dates\r | |
31 | test_url: http://www.npr.org/2012/07/04/156190948/feeling-under-siege-catholic-leadership-shifts-right\r | |
32 | test_url: http://www.npr.org/2012/12/13/166480907/the-years-best-sci-fi-crosses-galaxies-and-genres |