]>
Commit | Line | Data |
---|---|---|
ecb8c138 | 1 | # Author: zinnober |
2 | ||
3 | tidy: no | |
4 | prune: no | |
5 | ||
6 | # Set author | |
7 | author: //a[@rel='author'] | |
8 | ||
9 | # Set date | |
10 | date: //span[@class='Datum'] | |
11 | ||
12 | # Content is here | |
13 | body: //div[@class='Artikel'] | |
14 | ||
15 | # Tidy up before article | |
16 | strip: //div[@id='FAZHeaderNeu'] | |
17 | strip: //h2[@itemprop='headline'] | |
18 | strip: //span[@class='Datum'] | |
19 | strip: //span[@class='Autor'] | |
20 | strip_id_or_class: ArticlePagerTop | |
21 | strip: //div[@class='FAZArtikelEinleitung']/h2 | |
22 | ||
23 | # General cleanup | |
24 | strip: //div[@class='clear'] | |
25 | strip: //span[@class='Bildnachweis'] | |
26 | strip: //iframe | |
27 | strip_id_or_class: Community | |
28 | strip: ' ยท ' | |
29 | ||
30 | # Remove tracking and ads | |
31 | strip_image_src: /l.gif? | |
32 | strip: //img[@width='1'] | |
33 | strip_id_or_class: invisible | |
34 | strip_id_or_class: Anzeige | |
35 | strip_id_or_class: billboard | |
36 | ||
37 | # Remove clutter after article | |
38 | strip_id_or_class: Tagline | |
39 | strip_id_or_class: ArtikelAbbinder | |
40 | strip_id_or_class: FAZArtikelKommentare | |
41 | strip_id_or_class: ArtikelKommentieren | |
42 | strip_id_or_class: FAZContentRight | |
43 | ||
44 | # Try it yourself | |
45 | test_url: http://blogs.faz.net/wost/2014/08/17/viel-fuck-und-wenig-guter-sex-1239/ |