]>
Commit | Line | Data |
---|---|---|
4e067cea NL |
1 | author: //p[@class='mastname'] |
2 | ||
3 | body: //div[@class='indivbody'] | |
4 | date: //div[@class='indivbody']/h2[1] | |
5 | ||
6 | # Remove blog title. Specify first occurrence in case h1 is used in article | |
7 | strip: //div[@class='indivbody']/h1[1] | |
8 | ||
9 | # Remove blog description (the first p element) | |
10 | strip: //div[@class='indivbody']/p[1] | |
11 | ||
12 | # Remove navigation (second p element) | |
13 | strip: //div[@class='indivbody']/p[2] | |
14 | ||
15 | # Remove duplicate of article title. Specify first occurrence in case h3 is used in article | |
16 | strip: //div[@class='indivbody']/h3[1] | |
17 | ||
18 | # Remove publishing date, it's extracted by rule above | |
19 | strip: //div[@class='indivbody']/h2[1] | |
20 | ||
21 | # Remove duplicate of date at end, and newsletter signup | |
22 | strip: //p[@class='posted'] | |
23 | ||
24 | # Leave date at top | |
ac4d1142 | 25 | test_url: http://www.schneier.com/blog/archives/2010/12/security_in_202.html |