diff options
Diffstat (limited to 'inc/3rdparty/site_config/standard/blog.sina.com.cn.txt')
-rw-r--r-- | inc/3rdparty/site_config/standard/blog.sina.com.cn.txt | 26 |
1 files changed, 26 insertions, 0 deletions
diff --git a/inc/3rdparty/site_config/standard/blog.sina.com.cn.txt b/inc/3rdparty/site_config/standard/blog.sina.com.cn.txt new file mode 100644 index 00000000..acb9ce81 --- /dev/null +++ b/inc/3rdparty/site_config/standard/blog.sina.com.cn.txt | |||
@@ -0,0 +1,26 @@ | |||
1 | # Sina blog, the most popular blog host in China. | ||
2 | # Its source code is horrible. | ||
3 | # | ||
4 | # Issue: | ||
5 | # Only the first image in the article is displayed. | ||
6 | # The rest images are replace by a 1x1 transparent gif by sina blog host. | ||
7 | # | ||
8 | |||
9 | title://*[contains(@class,'titName SG_txta')] | ||
10 | author://*[contains(@id,'ownernick')] | ||
11 | date://*[contains(@class,'time SG_txtc')] | ||
12 | body://div[contains(@class,'articalContent')] | ||
13 | |||
14 | # Remove redundant content which has span class start with "MASS" | ||
15 | # Example <span class="MASSf21674ffeef7"></span> | ||
16 | strip://span[contains(@class,'MASS')] | ||
17 | |||
18 | # Remove comment | ||
19 | strip://div[contains(@class,'allComm')] | ||
20 | |||
21 | # Remove hiden text and link | ||
22 | strip://ins | ||
23 | |||
24 | tidy:no | ||
25 | convert_double_br_tags:yes | ||
26 | test_url: http://blog.sina.com.cn/s/blog_5054769e0102dtja.html \ No newline at end of file | ||