aboutsummaryrefslogtreecommitdiffhomepage
path: root/inc/3rdparty/site_config/standard/npr.org.txt
blob: acd73e48c8a41d2b527c5181fd4640beaec5fa3c (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
title: //div[contains(@class, 'storytitle')]//h1
author: //p[@class="byline"]/span
body: //div[@id='primaryaudio']//*[@class='duration' or @class='download' or contains(@class, 'photo')] | //div[@id='storytext' or @id='supplementarycontent' or contains(@class, 'transcript')]
date: //meta[@name="date"]/@content

strip_id_or_class: enlarge_measure
strip_id_or_class: enlarge_html
strip: //a[contains(@class, 'enlargeicon')]
strip: //div[contains(@class, 'bookedition')]
strip: //div[@class='textsize']
strip: //ul[@class='genres']
strip: //span[@class='bull']
strip_id_or_class: secondary
strip_id_or_class: con1col
strip: //h3[@class='conheader']

replace_string(<a name="more">&nbsp;</a>): <!-- no more -->
replace_string(<div class="transcript">): <div class="transcript"><h2>Transcript</h2>
replace_string(<div class="transcript storytext">): <div class="transcript storytext"><h2>Transcript</h2>

prune: no
strip://div[@class="ecommercepop"]
strip://span[@class="bull"]
strip://span[@class="purchaseLink"]
strip://div[@class="enlarge_html"]
strip://div[@class="enlarge_measure"]
strip://div[@class="container con1col small"]
strip://a[contains(@class, "enlargebtn")]
strip://div[contains(@class, "bucketwrap internallink")]

test_url: http://www.npr.org/blogs/thetwo-way/2011/07/12/137799301/sports-loses-its-escapist-gleam-in-a-summer-of-court-dates
test_url: http://www.npr.org/2012/07/04/156190948/feeling-under-siege-catholic-leadership-shifts-right
test_url: http://www.npr.org/2012/12/13/166480907/the-years-best-sci-fi-crosses-galaxies-and-genres
test_url: http://www.npr.org/templates/story/story.php?storyId=229103221