# A. Niepel, narya.de@... # - added single_page_link # - added author for default and single page view # - added date for single page view # fforst@... # - Fixed it # bode2104@... # - Fixed single_page_link # - Included intro text in single page view # - Added body in default view # stesie@ # - removed copyright box # - removed "print more" box # set body tidy: no # body in single page view body: //div[@id="spArticleContent"] # body in default view body: //div[@id="spArticleSection"] body: //div[contains(@class, 'article-section')] | //div[@id='js-article-top-wide-asset'] | //p[contains(@class, 'article-intro')] | //div[contains(@class, 'js-module-box-image')] # body in "Fotostrecke" body: //div[@id="spBigaContent"] # set date in single page view date: //div[@id="spArticleContent"]/h3 # strip date strip: //div[@id="spArticleContent"]/h3 # set date in "Fotostrecke" date: //div[@id="spBigaDatum"] # title in default view title: //h2[contains(@class, 'article-title')] #set title in single page view title: //div[@id='spArticleContent']/h2 # strip title strip: //div[@id='spArticleContent']/h1 strip: //div[@id='spArticleContent']/h2 #set title in "Fotostrecke" title: //div[@class='spBigaHeadline'] # set author author: //p[@class="spAuthor"]/a author: substring-after(//p[@class="spAuthor"], 'Von ') # strip author strip: //p[@class='spAuthor'] # remove captions strip: //*/span[@class='spPicLayerText'] strip: //*/div[@class='spPanoPlayerPaneControl'] strip: //*/div[@class='spCredit'] strip: //*/div[@class='spCredit']/following-sibling::p # remove ads strip: //div[@class='spMInline'] # remove photogalleries and extras strip: //div[contains(@class, 'spPhotoGallery')] strip: //div[@class='spPhotoGallery']/following-sibling::br strip: //div[@class='spAssetAlignleft'] strip: //div[contains(@class,'spAsset')] strip: //br[@clear='all'] # remove community functions strip: //div[@id='spSocialBookmark'] strip: //div[contains(@class, 'spCommunityBox')] strip: //div[contains(@class, 'spArticleNewsfeedBox')] strip: //div[@class='spArticleCredit'] # remove clutter in "Fotostrecke" strip: //div[@id='spBreadcrumb'] strip: //div[@id='spBigaLatestEntries'] strip: //div[contains(@class, 'spBigaNavi')] strip: //div[@class='spDottedLine'] strip: //div[@class='asset-box article-print-more'] strip: //div[@class='article-copyright'] strip: //span[@class='image-buttons'] # Use link to print article for single page view single_page_link: //a[contains(@href, '-druck')] if_page_contains: //div[contains(@class, 'multi-pager-control')] # Clean up title in print view find_string: Druckversion - replace_string: <title> # use next link in "Fotostrecke" next_page_link: //a[@class='spBigaControlForw'] test_url: http://www.spiegel.de/politik/deutschland/0,1518,787602,00.html # regular article test_url: http://www.spiegel.de/wirtschaft/soziales/griechenland-was-den-griechischen-buergern-nun-droht-a-1042682.html # multipage article test_url: http://www.spiegel.de/spiegel/a-710880.html