]>
Commit | Line | Data |
---|---|---|
4e067cea NL |
1 | # A. Niepel, narya.de@... |
2 | # - added single_page_link | |
3 | # - added author for default and single page view | |
4 | # - added date for single page view | |
5 | # fforst@... | |
6 | # - Fixed it | |
7 | # bode2104@... | |
8 | # - Fixed single_page_link | |
9 | # - Included intro text in single page view | |
10 | # - Added body in default view | |
11 | ||
12 | # set body | |
13 | tidy: no | |
14 | # body in single page view | |
15 | body: //div[@id="spArticleContent"] | |
16 | # body in default view | |
17 | body: //div[@id="spArticleSection"] | |
18 | # body in "Fotostrecke" | |
19 | body: //div[@id="spBigaContent"] | |
20 | ||
21 | # set date in single page view | |
22 | date: //div[@id="spArticleContent"]/h3 | |
23 | # strip date | |
24 | strip: //div[@id="spArticleContent"]/h3 | |
25 | # set date in "Fotostrecke" | |
26 | date: //div[@id="spBigaDatum"] | |
27 | ||
28 | #set title in single page view | |
29 | title: //div[@id='spArticleContent']/h2 | |
30 | # strip title | |
31 | strip: //div[@id='spArticleContent']/h1 | |
32 | strip: //div[@id='spArticleContent']/h2 | |
33 | #set title in "Fotostrecke" | |
34 | title: //div[@class='spBigaHeadline'] | |
35 | ||
36 | # set author | |
37 | author: //p[@class="spAuthor"]/a | |
38 | author: substring-after(//p[@class="spAuthor"], 'Von ') | |
39 | # strip author | |
40 | strip: //p[@class='spAuthor'] | |
41 | ||
42 | # remove captions | |
43 | strip: //*/span[@class='spPicLayerText'] | |
44 | strip: //*/div[@class='spPanoPlayerPaneControl'] | |
45 | strip: //*/div[@class='spCredit'] | |
46 | strip: //*/div[@class='spCredit']/following-sibling::p | |
47 | ||
48 | # remove ads | |
49 | strip: //div[@class='spMInline'] | |
50 | ||
51 | # remove photogalleries and extras | |
52 | strip: //div[@class='spPhotoGallery'] | |
53 | strip: //div[@class='spPhotoGallery']/following-sibling::br | |
54 | strip: //div[@class='spAssetAlignleft'] | |
55 | strip: //div[contains(@class,'spAsset')] | |
56 | strip: //br[@clear='all'] | |
57 | ||
58 | # remove community functions | |
59 | strip: //div[@id='spSocialBookmark'] | |
60 | strip: //div[contains(@class, 'spCommunityBox')] | |
61 | strip: //div[contains(@class, 'spArticleNewsfeedBox')] | |
62 | strip: //div[@class='spArticleCredit'] | |
63 | ||
64 | # remove clutter in "Fotostrecke" | |
65 | strip: //div[@id='spBreadcrumb'] | |
66 | strip: //div[@id='spBigaLatestEntries'] | |
67 | strip: //div[contains(@class, 'spBigaNavi')] | |
68 | strip: //div[@class='spDottedLine'] | |
69 | ||
70 | # Use link to print article for single page view | |
71 | single_page_link: //a[contains(@href, '-druck')] | |
72 | ||
73 | # use next link in "Fotostrecke" | |
74 | next_page_link: //a[@class='spBigaControlForw'] | |
ac4d1142 | 75 | test_url: http://www.spiegel.de/politik/deutschland/0,1518,787602,00.html |