1 # A. Niepel, narya.de@...
2 # - added single_page_link
3 # - added author for default and single page view
4 # - added date for single page view
8 # - Fixed single_page_link
9 # - Included intro text in single page view
10 # - Added body in default view
12 # - removed copyright box
13 # - removed "print more" box
17 # body in single page view
18 body: //div[@id="spArticleContent"]
19 # body in default view
20 body: //div[@id="spArticleSection"]
21 body: //div[contains(@class, 'article-section')] | //div[@id='js-article-top-wide-asset'] | //p[contains(@class, 'article-intro')] | //div[contains(@class, 'js-module-box-image')]
22 # body in "Fotostrecke"
23 body: //div[@id="spBigaContent"]
25 # set date in single page view
26 date: //div[@id="spArticleContent"]/h3
28 strip: //div[@id="spArticleContent"]/h3
29 # set date in "Fotostrecke"
30 date: //div[@id="spBigaDatum"]
32 # title in default view
33 title: //h2[contains(@class, 'article-title')]
34 #set title in single page view
35 title: //div[@id='spArticleContent']/h2
37 strip: //div[@id='spArticleContent']/h1
38 strip: //div[@id='spArticleContent']/h2
39 #set title in "Fotostrecke"
40 title: //div[@class='spBigaHeadline']
43 author: //p[@class="spAuthor"]/a
44 author: substring-after(//p[@class="spAuthor"], 'Von ')
46 strip: //p[@class='spAuthor']
49 strip: //*/span[@class='spPicLayerText']
50 strip: //*/div[@class='spPanoPlayerPaneControl']
51 strip: //*/div[@class='spCredit']
52 strip: //*/div[@class='spCredit']/following-sibling::p
55 strip: //div[@class='spMInline']
57 # remove photogalleries and extras
58 strip: //div[contains(@class, 'spPhotoGallery')]
59 strip: //div[@class='spPhotoGallery']/following-sibling::br
60 strip: //div[@class='spAssetAlignleft']
61 strip: //div[contains(@class,'spAsset')]
62 strip: //br[@clear='all']
64 # remove community functions
65 strip: //div[@id='spSocialBookmark']
66 strip: //div[contains(@class, 'spCommunityBox')]
67 strip: //div[contains(@class, 'spArticleNewsfeedBox')]
68 strip: //div[@class='spArticleCredit']
70 # remove clutter in "Fotostrecke"
71 strip: //div[@id='spBreadcrumb']
72 strip: //div[@id='spBigaLatestEntries']
73 strip: //div[contains(@class, 'spBigaNavi')]
74 strip: //div[@class='spDottedLine']
76 strip: //div[@class='asset-box article-print-more']
77 strip: //div[@class='article-copyright']
78 strip: //span[@class='image-buttons']
80 # Use link to print article for single page view
81 single_page_link: //a[contains(@href, '-druck')]
82 if_page_contains: //div[contains(@class, 'multi-pager-control')]
84 # Clean up title in print view
85 find_string: <title>Druckversion -
86 replace_string: <title>
88 # use next link in "Fotostrecke"
89 next_page_link: //a[@class='spBigaControlForw']
90 test_url: http://www.spiegel.de/politik/deutschland/0,1518,787602,00.html
93 test_url: http://www.spiegel.de/wirtschaft/soziales/griechenland-was-den-griechischen-buergern-nun-droht-a-1042682.html
96 test_url: http://www.spiegel.de/spiegel/a-710880.html