From 4e067ceabd705201a16b4c92cf4b23f3b990326c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20L=C5=93uillet?= Date: Sun, 13 Jul 2014 10:15:40 +0200 Subject: updated specific configuration for parsing --- inc/3rdparty/site_config/standard/falter.at.txt | 32 +++++++++++-------------- 1 file changed, 14 insertions(+), 18 deletions(-) mode change 100644 => 100755 inc/3rdparty/site_config/standard/falter.at.txt (limited to 'inc/3rdparty/site_config/standard/falter.at.txt') diff --git a/inc/3rdparty/site_config/standard/falter.at.txt b/inc/3rdparty/site_config/standard/falter.at.txt old mode 100644 new mode 100755 index b941b740..2bfcc9b4 --- a/inc/3rdparty/site_config/standard/falter.at.txt +++ b/inc/3rdparty/site_config/standard/falter.at.txt @@ -1,18 +1,14 @@ -title: //h2[@class='related relatedTitle'] -author: //a[contains(@href, 'liste.php?author_id')] - -# can't think of a better way unfortunately, really bad markup on this site -date: substring-after(//td[@style='width:85%;'], 'vom') - -# not sure why, but instapaper seems to suck up the teaser paragraph -# not solved! -body: //div[contains(@class, 'teaser')] -body: //div[@id='content'] - -# cleanup -strip: //img[@src='http://www.falter.at/web/_pics/falterlogo_dblau.gif'] -strip: //div[@class='servicebox'] -strip: //h1 -strip: //br -strip: //td[@id='adcol'] -test_url: http://www.falter.at/web/print/detail.php?id=1634 \ No newline at end of file +title: //h1 +author: //a[contains(@href, '/kategorie/autoren')] +date: //a[contains(@href, '/falter/ausgabe')] +body: //article[@class='spanMain'] + +# cleanup +strip_id_or_class: 'respond' +strip: //img[@src='http://www.falter.at/web/_pics/falterlogo_dblau.gif'] +strip_id_or_class: 'meta' +strip_id_or_class: 'servicebox' +strip_id_or_class: 'related' +strip_id_or_class: 'twitter-share-button' +strip: //br +test_url: http://www.falter.at/falter/2013/03/26/der-dandy-auf-der-sinkenden-galeere/ \ No newline at end of file -- cgit v1.2.3