]>
Commit | Line | Data |
---|---|---|
4e067cea NL |
1 | # TODO: clean up the extra junk at the end of articles |
2 | ||
3 | # general text formatting | |
4 | prune: no | |
5 | convert_double_br_tags:yes | |
6 | ||
7 | # where to find the basic metadata | |
8 | author://a[@class='articleauthor'] | |
9 | date://a[starts-with(@href,'/en/search/published/')] | |
10 | title:substring-before(//h2[@class='title'],'—') | |
11 | body://div[@id='maincontainer'] | |
12 | ||
13 | dissolve://div[starts-with(@id,'commentableblock')] | |
14 | ||
15 | # clean up the crap | |
16 | strip://div[contains(@class,'domusnetwork')] | |
17 | strip://div[contains(@class,'relative_wrapper')] | |
18 | ||
19 | strip://div[contains(@class,'captionsubimage')]/img[contains(@class,'arrow')] | |
ac4d1142 NL |
20 | wrap_in(em): //div[contains(@class,'captionsubimage')]/span |
21 | test_url: http://www.domusweb.it/en/design/in-praise-of-lost-time/ |