From 4e067ceabd705201a16b4c92cf4b23f3b990326c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20L=C5=93uillet?= Date: Sun, 13 Jul 2014 10:15:40 +0200 Subject: updated specific configuration for parsing --- .../site_config/standard/blog.sina.com.cn.txt | 50 +++++++++++----------- 1 file changed, 25 insertions(+), 25 deletions(-) mode change 100644 => 100755 inc/3rdparty/site_config/standard/blog.sina.com.cn.txt (limited to 'inc/3rdparty/site_config/standard/blog.sina.com.cn.txt') diff --git a/inc/3rdparty/site_config/standard/blog.sina.com.cn.txt b/inc/3rdparty/site_config/standard/blog.sina.com.cn.txt old mode 100644 new mode 100755 index acb9ce81..4895272a --- a/inc/3rdparty/site_config/standard/blog.sina.com.cn.txt +++ b/inc/3rdparty/site_config/standard/blog.sina.com.cn.txt @@ -1,26 +1,26 @@ -# Sina blog, the most popular blog host in China. -# Its source code is horrible. -# -# Issue: -# Only the first image in the article is displayed. -# The rest images are replace by a 1x1 transparent gif by sina blog host. -# - -title://*[contains(@class,'titName SG_txta')] -author://*[contains(@id,'ownernick')] -date://*[contains(@class,'time SG_txtc')] -body://div[contains(@class,'articalContent')] - -# Remove redundant content which has span class start with "MASS" -# Example -strip://span[contains(@class,'MASS')] - -# Remove comment -strip://div[contains(@class,'allComm')] - -# Remove hiden text and link -strip://ins - -tidy:no -convert_double_br_tags:yes +# Sina blog, the most popular blog host in China. +# Its source code is horrible. +# +# Issue: +# Only the first image in the article is displayed. +# The rest images are replace by a 1x1 transparent gif by sina blog host. +# + +title://*[contains(@class,'titName SG_txta')] +author://*[contains(@id,'ownernick')] +date://*[contains(@class,'time SG_txtc')] +body://div[contains(@class,'articalContent')] + +# Remove redundant content which has span class start with "MASS" +# Example +strip://span[contains(@class,'MASS')] + +# Remove comment +strip://div[contains(@class,'allComm')] + +# Remove hiden text and link +strip://ins + +tidy:no +convert_double_br_tags:yes test_url: http://blog.sina.com.cn/s/blog_5054769e0102dtja.html \ No newline at end of file -- cgit v1.2.3