diff options
Diffstat (limited to 'inc/3rdparty/site_config/standard/schneier.com.txt')
-rw-r--r-- | inc/3rdparty/site_config/standard/schneier.com.txt | 25 |
1 files changed, 25 insertions, 0 deletions
diff --git a/inc/3rdparty/site_config/standard/schneier.com.txt b/inc/3rdparty/site_config/standard/schneier.com.txt new file mode 100644 index 00000000..67181b65 --- /dev/null +++ b/inc/3rdparty/site_config/standard/schneier.com.txt | |||
@@ -0,0 +1,25 @@ | |||
1 | author: //p[@class='mastname'] | ||
2 | |||
3 | body: //div[@class='indivbody'] | ||
4 | date: //div[@class='indivbody']/h2[1] | ||
5 | |||
6 | # Remove blog title. Specify first occurrence in case h1 is used in article | ||
7 | strip: //div[@class='indivbody']/h1[1] | ||
8 | |||
9 | # Remove blog description (the first p element) | ||
10 | strip: //div[@class='indivbody']/p[1] | ||
11 | |||
12 | # Remove navigation (second p element) | ||
13 | strip: //div[@class='indivbody']/p[2] | ||
14 | |||
15 | # Remove duplicate of article title. Specify first occurrence in case h3 is used in article | ||
16 | strip: //div[@class='indivbody']/h3[1] | ||
17 | |||
18 | # Remove publishing date, it's extracted by rule above | ||
19 | strip: //div[@class='indivbody']/h2[1] | ||
20 | |||
21 | # Remove duplicate of date at end, and newsletter signup | ||
22 | strip: //p[@class='posted'] | ||
23 | |||
24 | # Leave date at top | ||
25 | test_url: http://www.schneier.com/blog/archives/2010/12/security_in_202.html \ No newline at end of file | ||