]>
Commit | Line | Data |
---|---|---|
4e067cea NL |
1 | # Works with old posts too, such as http://www.joelonsoftware.com/articles/fog0000000332.html |
2 | ||
3 | author: substring-after(//div[@class="author"], 'by ') | |
4 | date: //div[@class="date"] | |
5 | ||
6 | ## Clean stuff at top ## | |
7 | ||
8 | strip: //h1[1] | |
9 | strip: //h2[1] | |
10 | strip: //div[@class="date"] | |
11 | strip: //div[@class="author"] | |
12 | ||
13 | ## Clean stuff at bottom ## | |
14 | ||
15 | strip: //blockquote[@class="textmessage"] | |
16 | strip: //div[@style="width:500px"]/p[last()] | |
17 | strip: //div[@style="width:500px"]/p[last()-1] | |
18 | strip: //div[@style="width:500px"]/h4[last()] | |
19 | strip: //div[@style="width:500px"]/h4[last()-1] | |
ac4d1142 NL |
20 | strip: //div[@style="width:500px"]/div[last()] |
21 | test_url: http://www.joelonsoftware.com/items/2011/09/15.html |