diff options
Diffstat (limited to 'inc/3rdparty/site_config/standard/faz.net.txt')
-rwxr-xr-x[-rw-r--r--] | inc/3rdparty/site_config/standard/faz.net.txt | 66 |
1 files changed, 36 insertions, 30 deletions
diff --git a/inc/3rdparty/site_config/standard/faz.net.txt b/inc/3rdparty/site_config/standard/faz.net.txt index 4fe5968b..d087d2aa 100644..100755 --- a/inc/3rdparty/site_config/standard/faz.net.txt +++ b/inc/3rdparty/site_config/standard/faz.net.txt | |||
@@ -1,30 +1,36 @@ | |||
1 | # Title | 1 | # Title |
2 | title: //p[@class='Content HeadlineShort'] | 2 | title: //p[@class='Content HeadlineShort'] |
3 | 3 | ||
4 | # Authors | 4 | # Authors |
5 | # some are known and have a link, others don't | 5 | # some are known and have a link, others don't |
6 | author: substring-after(//span[@class='Autor'], 'Von') | 6 | author: substring-after(//span[@class='Autor'], 'Von') |
7 | 7 | ||
8 | # Date | 8 | # Date |
9 | date: //span[@class='Datum'] | 9 | date: //span[@class='Datum'] |
10 | 10 | ||
11 | # Body | 11 | # Body |
12 | body: //div[@class='Artikel'] | 12 | body: //div[@class='Artikel'] |
13 | 13 | ||
14 | # Removements before body text | 14 | # Removements before body text |
15 | strip: //div[@class='Breadcrumbs'] | 15 | strip: //div[@class='Breadcrumbs'] |
16 | strip: //div[@class='QuickSearchBox'] | 16 | strip: //div[@class='QuickSearchBox'] |
17 | strip: //div[@class='FAZArtikelEinleitung'] | 17 | strip: //div[@class='FAZArtikelEinleitung'] |
18 | strip: //div[@class='FAZArtikelReiter'] | 18 | strip: //div[@class='FAZArtikelReiter'] |
19 | strip: //div[@class='clear'] | 19 | strip: //div[@class='clear'] |
20 | 20 | ||
21 | # General removements | 21 | # General removements |
22 | strip: //span[@class='Bildnachweis'] | 22 | strip: //span[@class='Bildnachweis'] |
23 | 23 | strip: //img[@class='MediaIcon'] | |
24 | # Removements after body text | 24 | strip: //div[@class='ArtikelMediaLink'] |
25 | strip: //div[@class='ArtikelAbbinder'] | 25 | dissolve: //a[img] |
26 | strip: //div[@class='ArtikelKommentieren Artikelfuss GETS;tk;boxen.top-lesermeinungen;tp;content'] | 26 | |
27 | strip: //div[@class='FAZArtikelKommentare FAZArtikelContent'] | 27 | # Removements after body text |
28 | strip: //div[@class='FAZArtikelFunktionen'] | 28 | strip: //div[@class='ArtikelAbbinder'] |
29 | strip: //div[@id='FAZContentRight'] | 29 | strip: //div[@class='ArtikelKommentieren Artikelfuss GETS;tk;boxen.top-lesermeinungen;tp;content'] |
30 | test_url: http://www.faz.net/aktuell/gesellschaft/ehe-haltbarkeitsformel-verliebe-dich-oft-verlobe-dich-selten-heirate-vielleicht-11685306.html \ No newline at end of file | 30 | strip: //div[@class='FAZArtikelKommentare FAZArtikelContent'] |
31 | strip: //div[@class='FAZArtikelFunktionen'] | ||
32 | strip: //div[@id='FAZContentRight'] | ||
33 | |||
34 | # Fix picture captions | ||
35 | wrap_in(small): //span[@class='Bildunterschrift']/text() | ||
36 | test_url: http://www.faz.net/aktuell/feuilleton/zum-tod-von-margaret-thatcher-die-reizfigur-12141919.html#Drucken \ No newline at end of file | ||