]>
Commit | Line | Data |
---|---|---|
ac4d1142 NL |
1 | title: //h1\r |
2 | author: //p[contains(@class, 'author')]/a\r | |
3 | date: //p[contains(@class, 'time')]\r | |
4 | body: //div[@class='content']/div[contains(@class, 'text')]\r | |
5 | \r | |
6 | # prevent "no text" errors on multi-page articles\r | |
7 | tidy: no\r | |
8 | \r | |
9 | # we use a custom next-link detector instead of the print view because\r | |
10 | # it's pretty hard to strip out the unwanted parts in the print view\r | |
11 | autodetect_next_page: no\r | |
12 | next_page_link: //div[contains(@class, 'text')]/div/div[contains(@class, 'paging')]/a[@class='more ']\r | |
13 | \r | |
14 | strip: //h1\r | |
15 | \r | |
16 | strip_id_or_class: meta\r | |
17 | strip_id_or_class: author\r | |
18 | strip_id_or_class: paging\r | |
19 | \r | |
20 | # prevent "Report an Error" from being recognized as footnote\r | |
21 | footnotes: no | |
22 | test_url: http://jetzt.sueddeutsche.de/texte/anzeigen/544308/Alles-flicken |