From 4e067ceabd705201a16b4c92cf4b23f3b990326c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20L=C5=93uillet?= Date: Sun, 13 Jul 2014 10:15:40 +0200 Subject: updated specific configuration for parsing --- inc/3rdparty/site_config/standard/expressen.se.txt | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) mode change 100644 => 100755 inc/3rdparty/site_config/standard/expressen.se.txt (limited to 'inc/3rdparty/site_config/standard/expressen.se.txt') diff --git a/inc/3rdparty/site_config/standard/expressen.se.txt b/inc/3rdparty/site_config/standard/expressen.se.txt old mode 100644 new mode 100755 index d0cb283e..d81d3251 --- a/inc/3rdparty/site_config/standard/expressen.se.txt +++ b/inc/3rdparty/site_config/standard/expressen.se.txt @@ -1,9 +1,10 @@ -title: //div[@id='article']/div[contains(@class, 'content')]/h1 -body: //div[@id='article']/div[contains(@class, 'content')] -date: //div[contains(@class, 'article-slot')]/descendant::div[contains(@id, 'articledates')] - -strip: //img[contains(@src, 'img/px.gif')] -prune: no -# remove Facebook banner and obtrusive ad -strip: //div[@id='article']/div[contains(@class, 'content')]/div[contains(@class, 'art-right')] -test_url: http://www.expressen.se/kultur/1.2683904/medan-natet-dras-at \ No newline at end of file +title: //h1[contains(@class, 'b-headline_article')] +body: //div[contains(@class, 'b-article_print')] + +single_page_link: //div[contains(@class, 'b-page__footer__actions')]//a[contains(@href, 'print=true')] + +prune: no + +test_url: http://www.expressen.se/kultur/1.2683904/medan-natet-dras-at +test_url: http://www.expressen.se/gt/polis-om-styckmordet-extremt-markligt-fall/ +test_url: http://www.expressen.se/Pages/OutboundFeedsPage.aspx?id=3642159&viewstyle=rss \ No newline at end of file -- cgit v1.2.3