]>
Commit | Line | Data |
---|---|---|
90a1a78b NL |
1 | # Author: zinnober |
2 | # Should work with "normal" articles as well as with image galleries | |
3 | ||
4 | prune: no | |
5 | ||
6 | # Title | |
7 | title: //h1/span[@class='hcf-headline'] | |
8 | ||
9 | # Set author | |
10 | author: //a[@rel='author'] | |
11 | ||
12 | # Set date | |
13 | date: //span[@class='date hcf-atlas'] | |
14 | ||
15 | # Fetch full multipage articles | |
16 | next_page_link: //a[contains(@class, 'hcf-forward')] | |
17 | ||
18 | # Content is here | |
19 | body: //article | |
20 | body: //div[contains(@class, 'hcf-screen')] | |
21 | ||
22 | # Remove tracking and ads | |
23 | strip_id_or_class: hcf-ad | |
24 | strip_id_or_class: hcf-autoload-ad | |
25 | strip_id_or_class: hcf-content-ad | |
26 | ||
27 | # Tidy up before article | |
28 | strip: //article/h1 | |
29 | strip_id_or_class: hcf-atlas | |
30 | strip_id_or_class: hcf-author | |
31 | strip_id_or_class: date hcf-atlas | |
32 | strip_id_or_class: date hcf-atlas | |
33 | ||
34 | # General cleanup | |
35 | strip: //div[contains(@class, 'hcf-screen')]//h1 | |
36 | strip: //div[@class='hcf-subpage-titles']//ul | |
37 | strip_id_or_class: hcf-doctype-media | |
38 | strip_id_or_class: hcf-inline-gallery | |
39 | strip_id_or_class: hcf-doctype-video | |
40 | strip_id_or_class: hcf-links | |
41 | strip_id_or_class: hcf-mini-navi | |
42 | strip_id_or_class: hcf-media-control | |
43 | strip_id_or_class: hcf-hidden | |
44 | replace_string(<span class="hcf-update">Update</span>): <strong>Update: </strong> | |
45 | ||
46 | # Fix pictures and captions | |
47 | replace_string(<a class="hcf-doctype-gallery): <p class="hcf-doctype-gallery | |
48 | replace_string(<a class="hcf-doctype-enlarge): <p class="hcf-doctype-enlarge | |
49 | replace_string(<figcaption class="hcf-caption">): <br><small><em> | |
50 | replace_string(</figcaption>): </em></small> | |
51 | ||
52 | # Fix image galleries | |
53 | replace_string(<a class=" ajaxify): <p class="ajaxify | |
54 | replace_string(<div class="hcf-caption"><div><p>): <small><em> | |
55 | ||
56 | # Try it yourself | |
57 | test_url: http://www.tagesspiegel.de/berlin/bezirke/wedding/wedding-jetzt/auf-der-suche-nach-einem-stadtteil-wilder-weiter-wedding/8757156.html | |
58 | test_url: http://www.tagesspiegel.de/berlin/olympia-in-berlin-der-flughafen-tegel-soll-das-olympische-dorf-werden/10645036.html | |
59 | test_url: http://www.tagesspiegel.de/mediacenter/fotostrecken/berlin/bildergalerie-kreuzberger-der-woche/9305534.html | |
60 |