From 90a1a78b1e2f4d40e1d9b8e6f46aca129a9d7bcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20L=C5=93uillet?= Date: Mon, 27 Oct 2014 06:46:13 +0100 Subject: updated site_config --- .../site_config/standard/tagesspiegel.de.txt | 60 ++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100755 inc/3rdparty/site_config/standard/tagesspiegel.de.txt (limited to 'inc/3rdparty/site_config/standard/tagesspiegel.de.txt') diff --git a/inc/3rdparty/site_config/standard/tagesspiegel.de.txt b/inc/3rdparty/site_config/standard/tagesspiegel.de.txt new file mode 100755 index 00000000..57e7d3df --- /dev/null +++ b/inc/3rdparty/site_config/standard/tagesspiegel.de.txt @@ -0,0 +1,60 @@ +# Author: zinnober +# Should work with "normal" articles as well as with image galleries + +prune: no + +# Title +title: //h1/span[@class='hcf-headline'] + +# Set author +author: //a[@rel='author'] + +# Set date +date: //span[@class='date hcf-atlas'] + +# Fetch full multipage articles +next_page_link: //a[contains(@class, 'hcf-forward')] + +# Content is here +body: //article +body: //div[contains(@class, 'hcf-screen')] + +# Remove tracking and ads +strip_id_or_class: hcf-ad +strip_id_or_class: hcf-autoload-ad +strip_id_or_class: hcf-content-ad + +# Tidy up before article +strip: //article/h1 +strip_id_or_class: hcf-atlas +strip_id_or_class: hcf-author +strip_id_or_class: date hcf-atlas +strip_id_or_class: date hcf-atlas + +# General cleanup +strip: //div[contains(@class, 'hcf-screen')]//h1 +strip: //div[@class='hcf-subpage-titles']//ul +strip_id_or_class: hcf-doctype-media +strip_id_or_class: hcf-inline-gallery +strip_id_or_class: hcf-doctype-video +strip_id_or_class: hcf-links +strip_id_or_class: hcf-mini-navi +strip_id_or_class: hcf-media-control +strip_id_or_class: hcf-hidden +replace_string(Update): Update: + +# Fix pictures and captions +replace_string():
+replace_string(): + +# Fix image galleries +replace_string(

): + +# Try it yourself +test_url: http://www.tagesspiegel.de/berlin/bezirke/wedding/wedding-jetzt/auf-der-suche-nach-einem-stadtteil-wilder-weiter-wedding/8757156.html +test_url: http://www.tagesspiegel.de/berlin/olympia-in-berlin-der-flughafen-tegel-soll-das-olympische-dorf-werden/10645036.html +test_url: http://www.tagesspiegel.de/mediacenter/fotostrecken/berlin/bildergalerie-kreuzberger-der-woche/9305534.html + -- cgit v1.2.3