]>
Commit | Line | Data |
---|---|---|
4e067cea NL |
1 | # Title |
2 | title: //p[@class='Content HeadlineShort'] | |
3 | ||
4 | # Authors | |
5 | # some are known and have a link, others don't | |
6 | author: substring-after(//span[@class='Autor'], 'Von') | |
7 | ||
8 | # Date | |
9 | date: //span[@class='Datum'] | |
10 | ||
11 | # Body | |
12 | body: //div[@class='Artikel'] | |
13 | ||
14 | # Removements before body text | |
15 | strip: //div[@class='Breadcrumbs'] | |
16 | strip: //div[@class='QuickSearchBox'] | |
17 | strip: //div[@class='FAZArtikelEinleitung'] | |
18 | strip: //div[@class='FAZArtikelReiter'] | |
19 | strip: //div[@class='clear'] | |
20 | ||
21 | # General removements | |
22 | strip: //span[@class='Bildnachweis'] | |
23 | strip: //img[@class='MediaIcon'] | |
24 | strip: //div[@class='ArtikelMediaLink'] | |
25 | dissolve: //a[img] | |
26 | ||
27 | # Removements after body text | |
28 | strip: //div[@class='ArtikelAbbinder'] | |
29 | strip: //div[@class='ArtikelKommentieren Artikelfuss GETS;tk;boxen.top-lesermeinungen;tp;content'] | |
30 | strip: //div[@class='FAZArtikelKommentare FAZArtikelContent'] | |
31 | strip: //div[@class='FAZArtikelFunktionen'] | |
32 | strip: //div[@id='FAZContentRight'] | |
33 | ||
34 | # Fix picture captions | |
35 | wrap_in(small): //span[@class='Bildunterschrift']/text() | |
36 | test_url: http://www.faz.net/aktuell/feuilleton/zum-tod-von-margaret-thatcher-die-reizfigur-12141919.html#Drucken |