]>
Commit | Line | Data |
---|---|---|
4e067cea NL |
1 | title: //div[contains(@class, 'storytitle')]//h1 |
2 | author: //p[@class="byline"]/span | |
3 | body: //div[@id='primaryaudio']//*[@class='duration' or @class='download' or contains(@class, 'photo')] | //div[@id='storytext' or @id='supplementarycontent' or contains(@class, 'transcript')] | |
4 | date: //meta[@name="date"]/@content | |
5 | ||
6 | strip_id_or_class: enlarge_measure | |
7 | strip_id_or_class: enlarge_html | |
8 | strip: //a[contains(@class, 'enlargeicon')] | |
9 | strip: //div[contains(@class, 'bookedition')] | |
10 | strip: //div[@class='textsize'] | |
11 | strip: //ul[@class='genres'] | |
12 | strip: //span[@class='bull'] | |
13 | strip_id_or_class: secondary | |
14 | strip_id_or_class: con1col | |
15 | strip: //h3[@class='conheader'] | |
16 | ||
17 | replace_string(<a name="more"> </a>): <!-- no more --> | |
18 | replace_string(<div class="transcript">): <div class="transcript"><h2>Transcript</h2> | |
19 | replace_string(<div class="transcript storytext">): <div class="transcript storytext"><h2>Transcript</h2> | |
20 | ||
21 | prune: no | |
22 | strip://div[@class="ecommercepop"] | |
23 | strip://span[@class="bull"] | |
24 | strip://span[@class="purchaseLink"] | |
25 | strip://div[@class="enlarge_html"] | |
26 | strip://div[@class="enlarge_measure"] | |
27 | strip://div[@class="container con1col small"] | |
28 | strip://a[contains(@class, "enlargebtn")] | |
29 | strip://div[contains(@class, "bucketwrap internallink")] | |
30 | ||
31 | test_url: http://www.npr.org/blogs/thetwo-way/2011/07/12/137799301/sports-loses-its-escapist-gleam-in-a-summer-of-court-dates | |
32 | test_url: http://www.npr.org/2012/07/04/156190948/feeling-under-siege-catholic-leadership-shifts-right | |
33 | test_url: http://www.npr.org/2012/12/13/166480907/the-years-best-sci-fi-crosses-galaxies-and-genres | |
34 | test_url: http://www.npr.org/templates/story/story.php?storyId=229103221 |