aboutsummaryrefslogblamecommitdiffhomepage
path: root/inc/3rdparty/site_config/standard/faz.net.txt
blob: d087d2aa2af5748b3d61aed5e5ea415ab1ab6568 (plain) (tree)



































                                                                                                                
# Title
title: //p[@class='Content HeadlineShort']

# Authors
# some are known and have a link, others don't
author: substring-after(//span[@class='Autor'], 'Von')

# Date
date: //span[@class='Datum']

# Body
body: //div[@class='Artikel']

# Removements before body text
strip: //div[@class='Breadcrumbs']
strip: //div[@class='QuickSearchBox']
strip: //div[@class='FAZArtikelEinleitung']
strip: //div[@class='FAZArtikelReiter']
strip: //div[@class='clear']

# General removements
strip: //span[@class='Bildnachweis']
strip: //img[@class='MediaIcon']
strip: //div[@class='ArtikelMediaLink']
dissolve: //a[img]

# Removements after body text
strip: //div[@class='ArtikelAbbinder']
strip: //div[@class='ArtikelKommentieren Artikelfuss GETS;tk;boxen.top-lesermeinungen;tp;content']
strip: //div[@class='FAZArtikelKommentare FAZArtikelContent']
strip: //div[@class='FAZArtikelFunktionen']
strip: //div[@id='FAZContentRight']

# Fix picture captions
wrap_in(small): //span[@class='Bildunterschrift']/text()
test_url: http://www.faz.net/aktuell/feuilleton/zum-tod-von-margaret-thatcher-die-reizfigur-12141919.html#Drucken