]>
Commit | Line | Data |
---|---|---|
ac4d1142 NL |
1 | # A. Niepel, narya.de@...\r |
2 | # - added single_page_link\r | |
3 | # - added author for default and single page view\r | |
4 | # - added date for single page view\r | |
5 | # fforst@...\r | |
6 | # - Fixed it\r | |
7 | # bode2104@...\r | |
8 | # - Fixed single_page_link\r | |
9 | # - Included intro text in single page view\r | |
10 | # - Added body in default view\r | |
11 | \r | |
12 | # set body\r | |
13 | tidy: no\r | |
14 | # body in single page view\r | |
15 | body: //div[@id="spArticleContent"]\r | |
16 | # body in default view\r | |
17 | body: //div[@id="spArticleSection"]\r | |
18 | # body in "Fotostrecke"\r | |
19 | body: //div[@id="spBigaContent"]\r | |
20 | \r | |
21 | # set date in single page view\r | |
22 | date: //div[@id="spArticleContent"]/h3\r | |
23 | # strip date\r | |
24 | strip: //div[@id="spArticleContent"]/h3\r | |
25 | # set date in "Fotostrecke"\r | |
26 | date: //div[@id="spBigaDatum"]\r | |
27 | \r | |
28 | #set title in single page view\r | |
29 | title: //div[@id='spArticleContent']/h2\r | |
30 | # strip title\r | |
31 | strip: //div[@id='spArticleContent']/h1\r | |
32 | strip: //div[@id='spArticleContent']/h2\r | |
33 | #set title in "Fotostrecke"\r | |
34 | title: //div[@class='spBigaHeadline']\r | |
35 | \r | |
36 | # set author\r | |
37 | author: //p[@class="spAuthor"]/a\r | |
38 | author: substring-after(//p[@class="spAuthor"], 'Von ')\r | |
39 | # strip author\r | |
40 | strip: //p[@class='spAuthor']\r | |
41 | \r | |
42 | # remove captions\r | |
43 | strip: //*/span[@class='spPicLayerText']\r | |
44 | strip: //*/div[@class='spPanoPlayerPaneControl']\r | |
45 | strip: //*/div[@class='spCredit']\r | |
46 | strip: //*/div[@class='spCredit']/following-sibling::p\r | |
47 | \r | |
48 | # remove ads\r | |
49 | strip: //div[@class='spMInline']\r | |
50 | \r | |
51 | # remove photogalleries and extras\r | |
52 | strip: //div[@class='spPhotoGallery']\r | |
53 | strip: //div[@class='spPhotoGallery']/following-sibling::br\r | |
54 | strip: //div[@class='spAssetAlignleft']\r | |
55 | strip: //div[contains(@class,'spAsset')]\r | |
56 | strip: //br[@clear='all']\r | |
57 | \r | |
58 | # remove community functions\r | |
59 | strip: //div[@id='spSocialBookmark']\r | |
60 | strip: //div[contains(@class, 'spCommunityBox')]\r | |
61 | strip: //div[contains(@class, 'spArticleNewsfeedBox')]\r | |
62 | strip: //div[@class='spArticleCredit']\r | |
63 | \r | |
64 | # remove clutter in "Fotostrecke"\r | |
65 | strip: //div[@id='spBreadcrumb']\r | |
66 | strip: //div[@id='spBigaLatestEntries']\r | |
67 | strip: //div[contains(@class, 'spBigaNavi')]\r | |
68 | strip: //div[@class='spDottedLine']\r | |
69 | \r | |
70 | # Use link to print article for single page view\r | |
71 | single_page_link: //a[contains(@href, '-druck')]\r | |
72 | \r | |
73 | # use next link in "Fotostrecke"\r | |
74 | next_page_link: //a[@class='spBigaControlForw']\r | |
75 | test_url: http://www.spiegel.de/politik/deutschland/0,1518,787602,00.html |