From 4e067ceabd705201a16b4c92cf4b23f3b990326c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20L=C5=93uillet?= Date: Sun, 13 Jul 2014 10:15:40 +0200 Subject: updated specific configuration for parsing --- inc/3rdparty/site_config/standard/spiegel.de.txt | 148 +++++++++++------------ 1 file changed, 74 insertions(+), 74 deletions(-) mode change 100644 => 100755 inc/3rdparty/site_config/standard/spiegel.de.txt (limited to 'inc/3rdparty/site_config/standard/spiegel.de.txt') diff --git a/inc/3rdparty/site_config/standard/spiegel.de.txt b/inc/3rdparty/site_config/standard/spiegel.de.txt old mode 100644 new mode 100755 index 390c075c..413e0155 --- a/inc/3rdparty/site_config/standard/spiegel.de.txt +++ b/inc/3rdparty/site_config/standard/spiegel.de.txt @@ -1,75 +1,75 @@ -# A. Niepel, narya.de@... -# - added single_page_link -# - added author for default and single page view -# - added date for single page view -# fforst@... -# - Fixed it -# bode2104@... -# - Fixed single_page_link -# - Included intro text in single page view -# - Added body in default view - -# set body -tidy: no -# body in single page view -body: //div[@id="spArticleContent"] -# body in default view -body: //div[@id="spArticleSection"] -# body in "Fotostrecke" -body: //div[@id="spBigaContent"] - -# set date in single page view -date: //div[@id="spArticleContent"]/h3 -# strip date -strip: //div[@id="spArticleContent"]/h3 -# set date in "Fotostrecke" -date: //div[@id="spBigaDatum"] - -#set title in single page view -title: //div[@id='spArticleContent']/h2 -# strip title -strip: //div[@id='spArticleContent']/h1 -strip: //div[@id='spArticleContent']/h2 -#set title in "Fotostrecke" -title: //div[@class='spBigaHeadline'] - -# set author -author: //p[@class="spAuthor"]/a -author: substring-after(//p[@class="spAuthor"], 'Von ') -# strip author -strip: //p[@class='spAuthor'] - -# remove captions -strip: //*/span[@class='spPicLayerText'] -strip: //*/div[@class='spPanoPlayerPaneControl'] -strip: //*/div[@class='spCredit'] -strip: //*/div[@class='spCredit']/following-sibling::p - -# remove ads -strip: //div[@class='spMInline'] - -# remove photogalleries and extras -strip: //div[@class='spPhotoGallery'] -strip: //div[@class='spPhotoGallery']/following-sibling::br -strip: //div[@class='spAssetAlignleft'] -strip: //div[contains(@class,'spAsset')] -strip: //br[@clear='all'] - -# remove community functions -strip: //div[@id='spSocialBookmark'] -strip: //div[contains(@class, 'spCommunityBox')] -strip: //div[contains(@class, 'spArticleNewsfeedBox')] -strip: //div[@class='spArticleCredit'] - -# remove clutter in "Fotostrecke" -strip: //div[@id='spBreadcrumb'] -strip: //div[@id='spBigaLatestEntries'] -strip: //div[contains(@class, 'spBigaNavi')] -strip: //div[@class='spDottedLine'] - -# Use link to print article for single page view -single_page_link: //a[contains(@href, '-druck')] - -# use next link in "Fotostrecke" -next_page_link: //a[@class='spBigaControlForw'] +# A. Niepel, narya.de@... +# - added single_page_link +# - added author for default and single page view +# - added date for single page view +# fforst@... +# - Fixed it +# bode2104@... +# - Fixed single_page_link +# - Included intro text in single page view +# - Added body in default view + +# set body +tidy: no +# body in single page view +body: //div[@id="spArticleContent"] +# body in default view +body: //div[@id="spArticleSection"] +# body in "Fotostrecke" +body: //div[@id="spBigaContent"] + +# set date in single page view +date: //div[@id="spArticleContent"]/h3 +# strip date +strip: //div[@id="spArticleContent"]/h3 +# set date in "Fotostrecke" +date: //div[@id="spBigaDatum"] + +#set title in single page view +title: //div[@id='spArticleContent']/h2 +# strip title +strip: //div[@id='spArticleContent']/h1 +strip: //div[@id='spArticleContent']/h2 +#set title in "Fotostrecke" +title: //div[@class='spBigaHeadline'] + +# set author +author: //p[@class="spAuthor"]/a +author: substring-after(//p[@class="spAuthor"], 'Von ') +# strip author +strip: //p[@class='spAuthor'] + +# remove captions +strip: //*/span[@class='spPicLayerText'] +strip: //*/div[@class='spPanoPlayerPaneControl'] +strip: //*/div[@class='spCredit'] +strip: //*/div[@class='spCredit']/following-sibling::p + +# remove ads +strip: //div[@class='spMInline'] + +# remove photogalleries and extras +strip: //div[@class='spPhotoGallery'] +strip: //div[@class='spPhotoGallery']/following-sibling::br +strip: //div[@class='spAssetAlignleft'] +strip: //div[contains(@class,'spAsset')] +strip: //br[@clear='all'] + +# remove community functions +strip: //div[@id='spSocialBookmark'] +strip: //div[contains(@class, 'spCommunityBox')] +strip: //div[contains(@class, 'spArticleNewsfeedBox')] +strip: //div[@class='spArticleCredit'] + +# remove clutter in "Fotostrecke" +strip: //div[@id='spBreadcrumb'] +strip: //div[@id='spBigaLatestEntries'] +strip: //div[contains(@class, 'spBigaNavi')] +strip: //div[@class='spDottedLine'] + +# Use link to print article for single page view +single_page_link: //a[contains(@href, '-druck')] + +# use next link in "Fotostrecke" +next_page_link: //a[@class='spBigaControlForw'] test_url: http://www.spiegel.de/politik/deutschland/0,1518,787602,00.html \ No newline at end of file -- cgit v1.2.3