From 90a1a78b1e2f4d40e1d9b8e6f46aca129a9d7bcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20L=C5=93uillet?= Date: Mon, 27 Oct 2014 06:46:13 +0100 Subject: updated site_config --- .../site_config/standard/blogs.faz.net.txt | 45 ++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100755 inc/3rdparty/site_config/standard/blogs.faz.net.txt (limited to 'inc/3rdparty/site_config/standard/blogs.faz.net.txt') diff --git a/inc/3rdparty/site_config/standard/blogs.faz.net.txt b/inc/3rdparty/site_config/standard/blogs.faz.net.txt new file mode 100755 index 00000000..4f2626f1 --- /dev/null +++ b/inc/3rdparty/site_config/standard/blogs.faz.net.txt @@ -0,0 +1,45 @@ +# Author: zinnober + +tidy: no +prune: no + +# Set author +author: //a[@rel='author'] + +# Set date +date: //span[@class='Datum'] + +# Content is here +body: //div[@class='Artikel'] + +# Tidy up before article +strip: //div[@id='FAZHeaderNeu'] +strip: //h2[@itemprop='headline'] +strip: //span[@class='Datum'] +strip: //span[@class='Autor'] +strip_id_or_class: ArticlePagerTop +strip: //div[@class='FAZArtikelEinleitung']/h2 + +# General cleanup +strip: //div[@class='clear'] +strip: //span[@class='Bildnachweis'] +strip: //iframe +strip_id_or_class: Community +strip: ' ยท ' + +# Remove tracking and ads +strip_image_src: /l.gif? +strip: //img[@width='1'] +strip_id_or_class: invisible +strip_id_or_class: Anzeige +strip_id_or_class: billboard + +# Remove clutter after article +strip_id_or_class: Tagline +strip_id_or_class: ArtikelAbbinder +strip_id_or_class: FAZArtikelKommentare +strip_id_or_class: ArtikelKommentieren +strip_id_or_class: FAZContentRight + +# Try it yourself +test_url: http://blogs.faz.net/wost/2014/08/17/viel-fuck-und-wenig-guter-sex-1239/ -- cgit v1.2.3