aboutsummaryrefslogtreecommitdiffhomepage
path: root/inc/3rdparty/site_config/standard/blog.sina.com.cn.txt
blob: acb9ce81a10a061272d1c8d6f9754c5c2a4057fe (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# Sina blog, the most popular blog host in China.
# Its source code is horrible.
#  
# Issue:
# Only the first image in the article is displayed.
# The rest images are replace by a 1x1 transparent gif by sina blog host.
# 

title://*[contains(@class,'titName SG_txta')]
author://*[contains(@id,'ownernick')]
date://*[contains(@class,'time SG_txtc')]
body://div[contains(@class,'articalContent')]

# Remove redundant content which has span class start with "MASS"
# Example <span class="MASSf21674ffeef7"></span>
strip://span[contains(@class,'MASS')]

# Remove comment
strip://div[contains(@class,'allComm')]

# Remove hiden text and link
strip://ins

tidy:no
convert_double_br_tags:yes
test_url: http://blog.sina.com.cn/s/blog_5054769e0102dtja.html