]>
Commit | Line | Data |
---|---|---|
ac4d1142 NL |
1 | # fforst@...\r |
2 | \r | |
3 | # Use link to print article for single page view\r | |
4 | single_page_link: //a[@class="print"]\r | |
5 | \r | |
6 | # set body\r | |
7 | tidy: no\r | |
8 | body: //div[@class='artikel-content']\r | |
9 | \r | |
10 | # strip title and subtitle since we got it already\r | |
11 | strip: //div[@class='issue']\r | |
12 | strip: //div[@class='artikel-content']/h2\r | |
13 | \r | |
14 | # some authors are known and have a link, others don't\r | |
15 | author: //a[contains(@href, 'autor?')]\r | |
16 | \r | |
17 | #date\r | |
18 | date: //span[@class='article-date']\r | |
19 | \r | |
20 | # Strip author since we got him\r | |
21 | strip_id_or_class: author\r | |
22 | \r | |
23 | #strip captions\r | |
24 | strip_id_or_class: field-name-field-image-credit\r | |
25 | strip_id_or_class: field-name-field-article-image-subtitle\r | |
26 | \r | |
27 | # remove community functions\r | |
28 | strip: //div[@class='meta']\r | |
29 | strip: //div[@id='comments']\r | |
30 | \r | |
31 | # remove "continue on the next page" text\r | |
32 | strip: //p[text()="[SEITE]"] | |
33 | test_url: http://www.cicero.de/weltbuehne/ihre-wut-ist-global-krise-jugend-revolten-aufstaende-zelte/43049 |