aboutsummaryrefslogtreecommitdiffhomepage
path: root/inc/3rdparty/site_config/standard/faz.net.txt
diff options
context:
space:
mode:
Diffstat (limited to 'inc/3rdparty/site_config/standard/faz.net.txt')
-rw-r--r--inc/3rdparty/site_config/standard/faz.net.txt131
1 files changed, 101 insertions, 30 deletions
diff --git a/inc/3rdparty/site_config/standard/faz.net.txt b/inc/3rdparty/site_config/standard/faz.net.txt
index 4fe5968b..47048a1b 100644
--- a/inc/3rdparty/site_config/standard/faz.net.txt
+++ b/inc/3rdparty/site_config/standard/faz.net.txt
@@ -1,30 +1,101 @@
1# Title 1# Author: zinnober
2title: //p[@class='Content HeadlineShort'] 2# Complete rewrite of the faz.net template as the standard one is broken
3 3# I tried to consider as many page variants as possible, which was some serious work
4# Authors 4
5# some are known and have a link, others don't 5tidy: no
6author: substring-after(//span[@class='Autor'], 'Von') 6prune: no
7 7
8# Date 8# Title
9date: //span[@class='Datum'] 9title: //p[@class='Content HeadlineShort']
10 10
11# Body 11# Set author
12body: //div[@class='Artikel'] 12author: substring-after(//span[@class='Autor'], 'von ')
13 13author: //span[@class='caps last']/span[@class='caps last']
14# Removements before body text 14author: //a[@rel='author']
15strip: //div[@class='Breadcrumbs'] 15
16strip: //div[@class='QuickSearchBox'] 16# Set date
17strip: //div[@class='FAZArtikelEinleitung'] 17date: //span[@class='Datum']
18strip: //div[@class='FAZArtikelReiter'] 18date: //span[@class='Datum'],/span
19strip: //div[@class='clear'] 19
20 20# Fetch full multipage articles
21# General removements 21next_page_link: //a[@title='Nächste Seite']
22strip: //span[@class='Bildnachweis'] 22
23 23# Content is here
24# Removements after body text 24body: //div[@class='Artikel']
25strip: //div[@class='ArtikelAbbinder'] 25
26strip: //div[@class='ArtikelKommentieren Artikelfuss GETS;tk;boxen.top-lesermeinungen;tp;content'] 26# Tidy up before article
27strip: //div[@class='FAZArtikelKommentare FAZArtikelContent'] 27strip: //div[@id='FAZHeaderNeu']
28strip: //div[@class='FAZArtikelFunktionen'] 28strip: //h2[@itemprop='headline']
29strip: //div[@id='FAZContentRight'] 29strip: //span[@class='Datum']
30test_url: http://www.faz.net/aktuell/gesellschaft/ehe-haltbarkeitsformel-verliebe-dich-oft-verlobe-dich-selten-heirate-vielleicht-11685306.html \ No newline at end of file 30strip: //span[@class='Autor']
31strip_id_or_class: ArticlePagerTop
32
33# General cleanup
34strip: //div[@class='clear']
35strip: //a[@title='Zur Homepage FAZ.NET']
36strip: //iframe
37replace_string( · ):
38
39# Remove tracking and ads
40strip_image_src: /l.gif?
41strip: //div[contains(@style, 'background-image')]
42strip: //img[@width='1']
43strip_id_or_class: invisible
44strip_id_or_class: Anzeige
45strip_id_or_class: billboard
46
47# Remove various text boxes and social media foo
48strip_id_or_class: WeitereBeitraege
49strip_id_or_class: WBListe
50strip_id_or_class: AutorenModul
51strip_id_or_class: Community
52strip_id_or_class: SocialMediaStatus
53strip_id_or_class: RelatedLinkBox
54strip_id_or_class: MultimediaNavigation
55strip_id_or_class: IndexTitel
56
57# Fix picture caps and pictures (use better resolution and remove clutter)
58strip_id_or_class: LightBoxOverlay
59strip_id_or_class: exitLarge
60strip_id_or_class: PagerBox
61strip_id_or_class: Bildnachweis
62strip_id_or_class: Bildueberschrift
63strip_id_or_class: Bildbeschreibung
64strip_id_or_class: ArtikelBild610
65strip_id_or_class: MediaLink
66strip_id_or_class: FotoBoxInnerLeft
67strip_id_or_class: BilderRelatedLinks
68
69# Remove clutter after article
70strip_id_or_class: ArticlePagerBottom
71strip_id_or_class: backToHome
72strip_id_or_class: ArtikelAbbinder
73strip_id_or_class: lesermeinungscontainer
74strip_id_or_class: ThemenLinks
75strip_id_or_class: rechtehinweis
76strip_id_or_class: FAZArtikelMap
77strip_id_or_class: FAZArtikelKommentare
78strip_id_or_class: ArtikelKommentieren
79strip_id_or_class: FAZArtikelFunktionen
80strip_id_or_class: mailLB
81strip_id_or_class: FAZContentRight
82strip_id_or_class: stageModule
83strip_id_or_class: ContentFooter
84strip_id_or_class: ServicesFooter
85strip_id_or_class: FAZFooter
86
87# Clean up stuff present just in some articles
88strip_id_or_class: Teaser620
89strip_id_or_class: TeaserMultimedia
90strip_id_or_class: VideoBox
91
92# Remove as soon as Wallabag maight be able to embed flash video
93strip_id_or_class: mmoObjectAsTeaserInArticle
94strip_id_or_class: additionalStylesAudioVideo
95strip_id_or_class: hideMMElements
96
97# Try it yourself
98test_url: http://www.faz.net/aktuell/feuilleton/zum-tod-von-margaret-thatcher-die-reizfigur-12141919.html#Drucken
99test_url: http://www.faz.net/aktuell/politik/inland/allensbach-analyse-im-namen-des-volkes-13106492.html
100test_url: http://www.faz.net/aktuell/feuilleton/kino/video-filmkritiken/video-filmkritik-when-animals-dream-zerrissene-jugend-13105772.html
101