]>
Commit | Line | Data |
---|---|---|
4e067cea NL |
1 | title: //h1 |
2 | author: //p[contains(@class, 'author')]/a | |
3 | date: //p[contains(@class, 'time')] | |
4 | body: //div[@class='content']/div[contains(@class, 'text')] | |
5 | ||
6 | # prevent "no text" errors on multi-page articles | |
7 | tidy: no | |
8 | ||
9 | # we use a custom next-link detector instead of the print view because | |
10 | # it's pretty hard to strip out the unwanted parts in the print view | |
11 | autodetect_next_page: no | |
12 | next_page_link: //div[contains(@class, 'text')]/div/div[contains(@class, 'paging')]/a[@class='more '] | |
13 | ||
14 | strip: //h1 | |
15 | ||
16 | strip_id_or_class: meta | |
17 | strip_id_or_class: author | |
18 | strip_id_or_class: paging | |
19 | ||
20 | # prevent "Report an Error" from being recognized as footnote | |
ac4d1142 NL |
21 | footnotes: no |
22 | test_url: http://jetzt.sueddeutsche.de/texte/anzeigen/544308/Alles-flicken |