From 76b1e0babee9137974f7ce1677259b62c3b7fb4d Mon Sep 17 00:00:00 2001 From: Marmo Date: Tue, 21 Oct 2014 19:33:40 +0200 Subject: update zeit.de.txt for removal of inline ads --- inc/3rdparty/site_config/standard/zeit.de.txt | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'inc/3rdparty/site_config') diff --git a/inc/3rdparty/site_config/standard/zeit.de.txt b/inc/3rdparty/site_config/standard/zeit.de.txt index 9815d478..8c9c1718 100755 --- a/inc/3rdparty/site_config/standard/zeit.de.txt +++ b/inc/3rdparty/site_config/standard/zeit.de.txt @@ -1,3 +1,4 @@ +# 2014-10-21 [Marmo] added stripping of inline ads and appropriate test_url # 2013.10.30 [rezor92] fixed single_page_link # 2012-12-23 [carlo@...] fixed half-assed headlines in articles, removed inline author profiles, adjusted picture captions # 2012-03-17 [dkless@...] Cut metadata parts in the beginning and the ends of the content block; copyright entries for pictures removed; Author fixed, not sure if old entries still valid (I left them); Weird problems with some pages addressed (see last section for removing hidden section) @@ -16,6 +17,8 @@ author: substring-after(//li[@class='source first '], 'Quelle: ') strip_id_or_class: articleheader strip: //div[@id="comments"] | //div[@class="pagination block"] | //p[@class="ressortbacklink"] | //div[@id="relatedArticles"] | // div[@class="inline portrait"] +#Remove inline ads +strip: //div[@class="innerad"] #Removes author and date from the start strip: //ul[@class="tools"] @@ -43,3 +46,4 @@ strip_id_or_class:"pagination" footnotes: no test_url: http://www.zeit.de/kultur/film/2012-12/Kurzfilmtag +test_url: http://www.zeit.de/wissen/2014-10/ebola-nigeria-who -- cgit v1.2.3