From 4e067ceabd705201a16b4c92cf4b23f3b990326c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20L=C5=93uillet?= Date: Sun, 13 Jul 2014 10:15:40 +0200 Subject: updated specific configuration for parsing --- .../standard/blogs.scientificamerican.com.txt | 28 +++++++++++----------- 1 file changed, 14 insertions(+), 14 deletions(-) mode change 100644 => 100755 inc/3rdparty/site_config/standard/blogs.scientificamerican.com.txt (limited to 'inc/3rdparty/site_config/standard/blogs.scientificamerican.com.txt') diff --git a/inc/3rdparty/site_config/standard/blogs.scientificamerican.com.txt b/inc/3rdparty/site_config/standard/blogs.scientificamerican.com.txt old mode 100644 new mode 100755 index a7d15081..2102015d --- a/inc/3rdparty/site_config/standard/blogs.scientificamerican.com.txt +++ b/inc/3rdparty/site_config/standard/blogs.scientificamerican.com.txt @@ -1,16 +1,16 @@ -# meta data -title://h1[@class = 'postTitle'] -author:substring-before(substring-after(//span[@class = 'byline'],'By '),'|') -date://span[@class = 'datestamp'] - -#body content -body://div[@id = 'singleBlogPost'] - -#reclaim author info -move_into(//div[@id = 'singleBlogPost'])://div[@id = 'aboutAuthorDiv'] -strip://p[@class = 'moreLink mobileHide'] - -#cleanup comments, there might be some open
sections -strip://div[@id = 'comments2'] +# meta data +title://h1[@class = 'postTitle'] +author:substring-before(substring-after(//span[@class = 'byline'],'By '),'|') +date://span[@class = 'datestamp'] + +#body content +body://div[@id = 'singleBlogPost'] + +#reclaim author info +move_into(//div[@id = 'singleBlogPost'])://div[@id = 'aboutAuthorDiv'] +strip://p[@class = 'moreLink mobileHide'] + +#cleanup comments, there might be some open
sections +strip://div[@id = 'comments2'] strip://h3[a[@href = '#add-comment']] test_url: http://blogs.scientificamerican.com/a-blog-around-the-clock/2012/07/10/science-blogs-definition-and-a-history/ \ No newline at end of file -- cgit v1.2.3