diff options
Diffstat (limited to 'inc/3rdparty/site_config/standard/tagesspiegel.de.txt')
-rwxr-xr-x | inc/3rdparty/site_config/standard/tagesspiegel.de.txt | 60 |
1 files changed, 60 insertions, 0 deletions
diff --git a/inc/3rdparty/site_config/standard/tagesspiegel.de.txt b/inc/3rdparty/site_config/standard/tagesspiegel.de.txt new file mode 100755 index 00000000..57e7d3df --- /dev/null +++ b/inc/3rdparty/site_config/standard/tagesspiegel.de.txt | |||
@@ -0,0 +1,60 @@ | |||
1 | # Author: zinnober | ||
2 | # Should work with "normal" articles as well as with image galleries | ||
3 | |||
4 | prune: no | ||
5 | |||
6 | # Title | ||
7 | title: //h1/span[@class='hcf-headline'] | ||
8 | |||
9 | # Set author | ||
10 | author: //a[@rel='author'] | ||
11 | |||
12 | # Set date | ||
13 | date: //span[@class='date hcf-atlas'] | ||
14 | |||
15 | # Fetch full multipage articles | ||
16 | next_page_link: //a[contains(@class, 'hcf-forward')] | ||
17 | |||
18 | # Content is here | ||
19 | body: //article | ||
20 | body: //div[contains(@class, 'hcf-screen')] | ||
21 | |||
22 | # Remove tracking and ads | ||
23 | strip_id_or_class: hcf-ad | ||
24 | strip_id_or_class: hcf-autoload-ad | ||
25 | strip_id_or_class: hcf-content-ad | ||
26 | |||
27 | # Tidy up before article | ||
28 | strip: //article/h1 | ||
29 | strip_id_or_class: hcf-atlas | ||
30 | strip_id_or_class: hcf-author | ||
31 | strip_id_or_class: date hcf-atlas | ||
32 | strip_id_or_class: date hcf-atlas | ||
33 | |||
34 | # General cleanup | ||
35 | strip: //div[contains(@class, 'hcf-screen')]//h1 | ||
36 | strip: //div[@class='hcf-subpage-titles']//ul | ||
37 | strip_id_or_class: hcf-doctype-media | ||
38 | strip_id_or_class: hcf-inline-gallery | ||
39 | strip_id_or_class: hcf-doctype-video | ||
40 | strip_id_or_class: hcf-links | ||
41 | strip_id_or_class: hcf-mini-navi | ||
42 | strip_id_or_class: hcf-media-control | ||
43 | strip_id_or_class: hcf-hidden | ||
44 | replace_string(<span class="hcf-update">Update</span>): <strong>Update: </strong> | ||
45 | |||
46 | # Fix pictures and captions | ||
47 | replace_string(<a class="hcf-doctype-gallery): <p class="hcf-doctype-gallery | ||
48 | replace_string(<a class="hcf-doctype-enlarge): <p class="hcf-doctype-enlarge | ||
49 | replace_string(<figcaption class="hcf-caption">): <br><small><em> | ||
50 | replace_string(</figcaption>): </em></small> | ||
51 | |||
52 | # Fix image galleries | ||
53 | replace_string(<a class=" ajaxify): <p class="ajaxify | ||
54 | replace_string(<div class="hcf-caption"><div><p>): <small><em> | ||
55 | |||
56 | # Try it yourself | ||
57 | test_url: http://www.tagesspiegel.de/berlin/bezirke/wedding/wedding-jetzt/auf-der-suche-nach-einem-stadtteil-wilder-weiter-wedding/8757156.html | ||
58 | test_url: http://www.tagesspiegel.de/berlin/olympia-in-berlin-der-flughafen-tegel-soll-das-olympische-dorf-werden/10645036.html | ||
59 | test_url: http://www.tagesspiegel.de/mediacenter/fotostrecken/berlin/bildergalerie-kreuzberger-der-woche/9305534.html | ||
60 | |||