diff options
Diffstat (limited to 'inc/3rdparty/site_config/standard/npr.org.txt')
-rwxr-xr-x[-rw-r--r--] | inc/3rdparty/site_config/standard/npr.org.txt | 66 |
1 files changed, 34 insertions, 32 deletions
diff --git a/inc/3rdparty/site_config/standard/npr.org.txt b/inc/3rdparty/site_config/standard/npr.org.txt index afab0eb3..acd73e48 100644..100755 --- a/inc/3rdparty/site_config/standard/npr.org.txt +++ b/inc/3rdparty/site_config/standard/npr.org.txt | |||
@@ -1,32 +1,34 @@ | |||
1 | title: //div[contains(@class, 'storytitle')]//h1 | 1 | title: //div[contains(@class, 'storytitle')]//h1 |
2 | author: //p[@class="byline"]/span | 2 | author: //p[@class="byline"]/span |
3 | body: //div[@id='storyspan02']//*[@class='duration' or @class='download' or contains(@class, 'photo')] | //div[@id='storytext'] | //div[@class='transcript'] | 3 | body: //div[@id='primaryaudio']//*[@class='duration' or @class='download' or contains(@class, 'photo')] | //div[@id='storytext' or @id='supplementarycontent' or contains(@class, 'transcript')] |
4 | date: //meta[@name="date"]/@content | 4 | date: //meta[@name="date"]/@content |
5 | 5 | ||
6 | strip: //div[@class='enlarge_measure'] | 6 | strip_id_or_class: enlarge_measure |
7 | strip: //div[@class='enlarge_html'] | 7 | strip_id_or_class: enlarge_html |
8 | strip: //a[@class='enlargeicon'] | 8 | strip: //a[contains(@class, 'enlargeicon')] |
9 | strip: //div[contains(@class, 'bookedition')] | 9 | strip: //div[contains(@class, 'bookedition')] |
10 | strip: //div[@class='textsize'] | 10 | strip: //div[@class='textsize'] |
11 | strip: //ul[@class='genres'] | 11 | strip: //ul[@class='genres'] |
12 | strip: //span[@class='bull'] | 12 | strip: //span[@class='bull'] |
13 | strip_id_or_class: secondary | 13 | strip_id_or_class: secondary |
14 | strip_id_or_class: con1col | 14 | strip_id_or_class: con1col |
15 | strip: //h3[@class='conheader'] | 15 | strip: //h3[@class='conheader'] |
16 | 16 | ||
17 | replace_string(<a name="more"> </a>): <!-- no more --> | 17 | replace_string(<a name="more"> </a>): <!-- no more --> |
18 | replace_string(<div class="transcript">): <div class="transcript"><h2>Transcript</h2> | 18 | replace_string(<div class="transcript">): <div class="transcript"><h2>Transcript</h2> |
19 | 19 | replace_string(<div class="transcript storytext">): <div class="transcript storytext"><h2>Transcript</h2> | |
20 | prune: no | 20 | |
21 | strip://div[@class="ecommercepop"] | 21 | prune: no |
22 | strip://span[@class="bull"] | 22 | strip://div[@class="ecommercepop"] |
23 | strip://span[@class="purchaseLink"] | 23 | strip://span[@class="bull"] |
24 | strip://div[@class="enlarge_html"] | 24 | strip://span[@class="purchaseLink"] |
25 | strip://div[@class="enlarge_measure"] | 25 | strip://div[@class="enlarge_html"] |
26 | strip://div[@class="container con1col small"] | 26 | strip://div[@class="enlarge_measure"] |
27 | strip://a[contains(@class, "enlargebtn")] | 27 | strip://div[@class="container con1col small"] |
28 | strip://div[contains(@class, "bucketwrap internallink")] | 28 | strip://a[contains(@class, "enlargebtn")] |
29 | 29 | strip://div[contains(@class, "bucketwrap internallink")] | |
30 | test_url: http://www.npr.org/blogs/thetwo-way/2011/07/12/137799301/sports-loses-its-escapist-gleam-in-a-summer-of-court-dates | 30 | |
31 | test_url: http://www.npr.org/2012/07/04/156190948/feeling-under-siege-catholic-leadership-shifts-right | 31 | test_url: http://www.npr.org/blogs/thetwo-way/2011/07/12/137799301/sports-loses-its-escapist-gleam-in-a-summer-of-court-dates |
32 | test_url: http://www.npr.org/2012/12/13/166480907/the-years-best-sci-fi-crosses-galaxies-and-genres \ No newline at end of file | 32 | test_url: http://www.npr.org/2012/07/04/156190948/feeling-under-siege-catholic-leadership-shifts-right |
33 | test_url: http://www.npr.org/2012/12/13/166480907/the-years-best-sci-fi-crosses-galaxies-and-genres | ||
34 | test_url: http://www.npr.org/templates/story/story.php?storyId=229103221 \ No newline at end of file | ||