diff options
Diffstat (limited to 'inc/3rdparty/site_config/standard/domusweb.it.txt')
-rw-r--r-- | inc/3rdparty/site_config/standard/domusweb.it.txt | 21 |
1 files changed, 21 insertions, 0 deletions
diff --git a/inc/3rdparty/site_config/standard/domusweb.it.txt b/inc/3rdparty/site_config/standard/domusweb.it.txt new file mode 100644 index 00000000..81683f02 --- /dev/null +++ b/inc/3rdparty/site_config/standard/domusweb.it.txt | |||
@@ -0,0 +1,21 @@ | |||
1 | # TODO: clean up the extra junk at the end of articles | ||
2 | |||
3 | # general text formatting | ||
4 | prune: no | ||
5 | convert_double_br_tags:yes | ||
6 | |||
7 | # where to find the basic metadata | ||
8 | author://a[@class='articleauthor'] | ||
9 | date://a[starts-with(@href,'/en/search/published/')] | ||
10 | title:substring-before(//h2[@class='title'],'—') | ||
11 | body://div[@id='maincontainer'] | ||
12 | |||
13 | dissolve://div[starts-with(@id,'commentableblock')] | ||
14 | |||
15 | # clean up the crap | ||
16 | strip://div[contains(@class,'domusnetwork')] | ||
17 | strip://div[contains(@class,'relative_wrapper')] | ||
18 | |||
19 | strip://div[contains(@class,'captionsubimage')]/img[contains(@class,'arrow')] | ||
20 | wrap_in(em): //div[contains(@class,'captionsubimage')]/span | ||
21 | test_url: http://www.domusweb.it/en/design/in-praise-of-lost-time/ \ No newline at end of file | ||