diff options
Diffstat (limited to 'inc/3rdparty/site_config/standard/handelsblatt.com.txt')
-rwxr-xr-x | inc/3rdparty/site_config/standard/handelsblatt.com.txt | 31 |
1 files changed, 31 insertions, 0 deletions
diff --git a/inc/3rdparty/site_config/standard/handelsblatt.com.txt b/inc/3rdparty/site_config/standard/handelsblatt.com.txt new file mode 100755 index 00000000..7d067aa6 --- /dev/null +++ b/inc/3rdparty/site_config/standard/handelsblatt.com.txt | |||
@@ -0,0 +1,31 @@ | |||
1 | #Single Page | ||
2 | single_page_link: //li[contains(@class,"hcf-print")]/a | ||
3 | |||
4 | # Title hcf-headline | ||
5 | title: //span[@class='hcf-headline'] | ||
6 | |||
7 | # Authors | ||
8 | author: //div[@class="hcf-author"]/a/text() | ||
9 | author: substring-after(//div[@class='hcf-author'], 'von ') | ||
10 | |||
11 | # Date | ||
12 | date: //div[@class='hcf-article-date'] | ||
13 | |||
14 | # Body | ||
15 | body: //div[@class='article'] | ||
16 | |||
17 | # General removements | ||
18 | strip: //div[contains(@class,"hcf-smartbox")] | ||
19 | strip: //div[contains(@class,"hcf-stopper")] | ||
20 | strip: //div[contains(@class,"hcf-img-controls")] | ||
21 | strip: //span[@class='hcf-location-mark'] | ||
22 | strip: //span[@class='hcf-copyright'] | ||
23 | strip: //div[@class='hcf-copyright'] | ||
24 | strip: //div[@class='hcf-origin'] | ||
25 | |||
26 | |||
27 | |||
28 | |||
29 | # Fix picture captions | ||
30 | wrap_in(small): //div[@class="hcf-caption"] | ||
31 | test_url: http://www.handelsblatt.com/meinung/gastbeitraege/gastkommentar-zum-emissionshandel-kurskorrekturen-fuehren-zum-kentern/8044326.html \ No newline at end of file | ||