diff options
author | Nicolas LÅ“uillet <nicolas@loeuillet.org> | 2014-07-13 10:15:40 +0200 |
---|---|---|
committer | Nicolas LÅ“uillet <nicolas@loeuillet.org> | 2014-07-13 10:15:40 +0200 |
commit | 4e067ceabd705201a16b4c92cf4b23f3b990326c (patch) | |
tree | 939f3a8e5ff3ab9ee414a57a895d3e78e1d46ce3 /inc/3rdparty/site_config/standard/faz.net.txt | |
parent | 58dbe103889148def78b0fc8744d3f94c56a1561 (diff) | |
download | wallabag-4e067ceabd705201a16b4c92cf4b23f3b990326c.tar.gz wallabag-4e067ceabd705201a16b4c92cf4b23f3b990326c.tar.zst wallabag-4e067ceabd705201a16b4c92cf4b23f3b990326c.zip |
updated specific configuration for parsing
Diffstat (limited to 'inc/3rdparty/site_config/standard/faz.net.txt')
-rwxr-xr-x[-rw-r--r--] | inc/3rdparty/site_config/standard/faz.net.txt | 66 |
1 files changed, 36 insertions, 30 deletions
diff --git a/inc/3rdparty/site_config/standard/faz.net.txt b/inc/3rdparty/site_config/standard/faz.net.txt index 4fe5968b..d087d2aa 100644..100755 --- a/inc/3rdparty/site_config/standard/faz.net.txt +++ b/inc/3rdparty/site_config/standard/faz.net.txt | |||
@@ -1,30 +1,36 @@ | |||
1 | # Title | 1 | # Title |
2 | title: //p[@class='Content HeadlineShort'] | 2 | title: //p[@class='Content HeadlineShort'] |
3 | 3 | ||
4 | # Authors | 4 | # Authors |
5 | # some are known and have a link, others don't | 5 | # some are known and have a link, others don't |
6 | author: substring-after(//span[@class='Autor'], 'Von') | 6 | author: substring-after(//span[@class='Autor'], 'Von') |
7 | 7 | ||
8 | # Date | 8 | # Date |
9 | date: //span[@class='Datum'] | 9 | date: //span[@class='Datum'] |
10 | 10 | ||
11 | # Body | 11 | # Body |
12 | body: //div[@class='Artikel'] | 12 | body: //div[@class='Artikel'] |
13 | 13 | ||
14 | # Removements before body text | 14 | # Removements before body text |
15 | strip: //div[@class='Breadcrumbs'] | 15 | strip: //div[@class='Breadcrumbs'] |
16 | strip: //div[@class='QuickSearchBox'] | 16 | strip: //div[@class='QuickSearchBox'] |
17 | strip: //div[@class='FAZArtikelEinleitung'] | 17 | strip: //div[@class='FAZArtikelEinleitung'] |
18 | strip: //div[@class='FAZArtikelReiter'] | 18 | strip: //div[@class='FAZArtikelReiter'] |
19 | strip: //div[@class='clear'] | 19 | strip: //div[@class='clear'] |
20 | 20 | ||
21 | # General removements | 21 | # General removements |
22 | strip: //span[@class='Bildnachweis'] | 22 | strip: //span[@class='Bildnachweis'] |
23 | 23 | strip: //img[@class='MediaIcon'] | |
24 | # Removements after body text | 24 | strip: //div[@class='ArtikelMediaLink'] |
25 | strip: //div[@class='ArtikelAbbinder'] | 25 | dissolve: //a[img] |
26 | strip: //div[@class='ArtikelKommentieren Artikelfuss GETS;tk;boxen.top-lesermeinungen;tp;content'] | 26 | |
27 | strip: //div[@class='FAZArtikelKommentare FAZArtikelContent'] | 27 | # Removements after body text |
28 | strip: //div[@class='FAZArtikelFunktionen'] | 28 | strip: //div[@class='ArtikelAbbinder'] |
29 | strip: //div[@id='FAZContentRight'] | 29 | strip: //div[@class='ArtikelKommentieren Artikelfuss GETS;tk;boxen.top-lesermeinungen;tp;content'] |
30 | test_url: http://www.faz.net/aktuell/gesellschaft/ehe-haltbarkeitsformel-verliebe-dich-oft-verlobe-dich-selten-heirate-vielleicht-11685306.html \ No newline at end of file | 30 | strip: //div[@class='FAZArtikelKommentare FAZArtikelContent'] |
31 | strip: //div[@class='FAZArtikelFunktionen'] | ||
32 | strip: //div[@id='FAZContentRight'] | ||
33 | |||
34 | # Fix picture captions | ||
35 | wrap_in(small): //span[@class='Bildunterschrift']/text() | ||
36 | test_url: http://www.faz.net/aktuell/feuilleton/zum-tod-von-margaret-thatcher-die-reizfigur-12141919.html#Drucken \ No newline at end of file | ||