]> git.immae.eu Git - github/wallabag/wallabag.git/blob - inc/3rdparty/site_config/standard/faz.net.txt
47048a1b2dfbcc082e9fb64a3e8b3583af94b658
[github/wallabag/wallabag.git] / inc / 3rdparty / site_config / standard / faz.net.txt
1 # Author: zinnober
2 # Complete rewrite of the faz.net template as the standard one is broken
3 # I tried to consider as many page variants as possible, which was some serious work
4
5 tidy: no
6 prune: no
7
8 # Title
9 title: //p[@class='Content HeadlineShort']
10
11 # Set author
12 author: substring-after(//span[@class='Autor'], 'von ')
13 author: //span[@class='caps last']/span[@class='caps last']
14 author: //a[@rel='author']
15
16 # Set date
17 date: //span[@class='Datum']
18 date: //span[@class='Datum'],/span
19
20 # Fetch full multipage articles
21 next_page_link: //a[@title='Nächste Seite']
22
23 # Content is here
24 body: //div[@class='Artikel']
25
26 # Tidy up before article
27 strip: //div[@id='FAZHeaderNeu']
28 strip: //h2[@itemprop='headline']
29 strip: //span[@class='Datum']
30 strip: //span[@class='Autor']
31 strip_id_or_class: ArticlePagerTop
32
33 # General cleanup
34 strip: //div[@class='clear']
35 strip: //a[@title='Zur Homepage FAZ.NET']
36 strip: //iframe
37 replace_string( · ):
38
39 # Remove tracking and ads
40 strip_image_src: /l.gif?
41 strip: //div[contains(@style, 'background-image')]
42 strip: //img[@width='1']
43 strip_id_or_class: invisible
44 strip_id_or_class: Anzeige
45 strip_id_or_class: billboard
46
47 # Remove various text boxes and social media foo
48 strip_id_or_class: WeitereBeitraege
49 strip_id_or_class: WBListe
50 strip_id_or_class: AutorenModul
51 strip_id_or_class: Community
52 strip_id_or_class: SocialMediaStatus
53 strip_id_or_class: RelatedLinkBox
54 strip_id_or_class: MultimediaNavigation
55 strip_id_or_class: IndexTitel
56
57 # Fix picture caps and pictures (use better resolution and remove clutter)
58 strip_id_or_class: LightBoxOverlay
59 strip_id_or_class: exitLarge
60 strip_id_or_class: PagerBox
61 strip_id_or_class: Bildnachweis
62 strip_id_or_class: Bildueberschrift
63 strip_id_or_class: Bildbeschreibung
64 strip_id_or_class: ArtikelBild610
65 strip_id_or_class: MediaLink
66 strip_id_or_class: FotoBoxInnerLeft
67 strip_id_or_class: BilderRelatedLinks
68
69 # Remove clutter after article
70 strip_id_or_class: ArticlePagerBottom
71 strip_id_or_class: backToHome
72 strip_id_or_class: ArtikelAbbinder
73 strip_id_or_class: lesermeinungscontainer
74 strip_id_or_class: ThemenLinks
75 strip_id_or_class: rechtehinweis
76 strip_id_or_class: FAZArtikelMap
77 strip_id_or_class: FAZArtikelKommentare
78 strip_id_or_class: ArtikelKommentieren
79 strip_id_or_class: FAZArtikelFunktionen
80 strip_id_or_class: mailLB
81 strip_id_or_class: FAZContentRight
82 strip_id_or_class: stageModule
83 strip_id_or_class: ContentFooter
84 strip_id_or_class: ServicesFooter
85 strip_id_or_class: FAZFooter
86
87 # Clean up stuff present just in some articles
88 strip_id_or_class: Teaser620
89 strip_id_or_class: TeaserMultimedia
90 strip_id_or_class: VideoBox
91
92 # Remove as soon as Wallabag maight be able to embed flash video
93 strip_id_or_class: mmoObjectAsTeaserInArticle
94 strip_id_or_class: additionalStylesAudioVideo
95 strip_id_or_class: hideMMElements
96
97 # Try it yourself
98 test_url: http://www.faz.net/aktuell/feuilleton/zum-tod-von-margaret-thatcher-die-reizfigur-12141919.html#Drucken
99 test_url: http://www.faz.net/aktuell/politik/inland/allensbach-analyse-im-namen-des-volkes-13106492.html
100 test_url: http://www.faz.net/aktuell/feuilleton/kino/video-filmkritiken/video-filmkritik-when-animals-dream-zerrissene-jugend-13105772.html
101