From: Marmo Date: Tue, 21 Oct 2014 17:33:40 +0000 (+0200) Subject: update zeit.de.txt for removal of inline ads X-Git-Tag: 1.8.1^2~26^2 X-Git-Url: https://git.immae.eu/?a=commitdiff_plain;h=refs%2Fpull%2F879%2Fhead;p=github%2Fwallabag%2Fwallabag.git update zeit.de.txt for removal of inline ads --- diff --git a/inc/3rdparty/site_config/standard/zeit.de.txt b/inc/3rdparty/site_config/standard/zeit.de.txt index 9815d478..8c9c1718 100755 --- a/inc/3rdparty/site_config/standard/zeit.de.txt +++ b/inc/3rdparty/site_config/standard/zeit.de.txt @@ -1,3 +1,4 @@ +# 2014-10-21 [Marmo] added stripping of inline ads and appropriate test_url # 2013.10.30 [rezor92] fixed single_page_link # 2012-12-23 [carlo@...] fixed half-assed headlines in articles, removed inline author profiles, adjusted picture captions # 2012-03-17 [dkless@...] Cut metadata parts in the beginning and the ends of the content block; copyright entries for pictures removed; Author fixed, not sure if old entries still valid (I left them); Weird problems with some pages addressed (see last section for removing hidden section) @@ -16,6 +17,8 @@ author: substring-after(//li[@class='source first '], 'Quelle: ') strip_id_or_class: articleheader strip: //div[@id="comments"] | //div[@class="pagination block"] | //p[@class="ressortbacklink"] | //div[@id="relatedArticles"] | // div[@class="inline portrait"] +#Remove inline ads +strip: //div[@class="innerad"] #Removes author and date from the start strip: //ul[@class="tools"] @@ -43,3 +46,4 @@ strip_id_or_class:"pagination" footnotes: no test_url: http://www.zeit.de/kultur/film/2012-12/Kurzfilmtag +test_url: http://www.zeit.de/wissen/2014-10/ebola-nigeria-who