From 4e067ceabd705201a16b4c92cf4b23f3b990326c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20L=C5=93uillet?= Date: Sun, 13 Jul 2014 10:15:40 +0200 Subject: updated specific configuration for parsing --- inc/3rdparty/site_config/standard/newsmill.se.txt | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) mode change 100644 => 100755 inc/3rdparty/site_config/standard/newsmill.se.txt (limited to 'inc/3rdparty/site_config/standard/newsmill.se.txt') diff --git a/inc/3rdparty/site_config/standard/newsmill.se.txt b/inc/3rdparty/site_config/standard/newsmill.se.txt old mode 100644 new mode 100755 index eb7d3350..1a990319 --- a/inc/3rdparty/site_config/standard/newsmill.se.txt +++ b/inc/3rdparty/site_config/standard/newsmill.se.txt @@ -1,12 +1,12 @@ -title: //h1 -body: (//div[@class='articleImg']//img)[1] | //p[contains(@class, 'commentTextArticle') or contains(@class, 'articlePublished')] | //div[@id='articleLeftContent'] -author: //div[@class='byline']//a[contains(@href, '/user/')] - -strip_id_or_class: facts -strip_id_or_class: articleBlogsHolder -strip_id_or_class: byline - -prune: no -tidy: no - +title: //h1 +body: (//div[@class='articleImg']//img)[1] | //p[contains(@class, 'commentTextArticle') or contains(@class, 'articlePublished')] | //div[@id='articleLeftContent'] +author: //div[@class='byline']//a[contains(@href, '/user/')] + +strip_id_or_class: facts +strip_id_or_class: articleBlogsHolder +strip_id_or_class: byline + +prune: no +tidy: no + test_url: http://www.newsmill.se/artikel/2012/05/06/medielogiken-v-ger-tyngre-n-reportrarnas-sikter \ No newline at end of file -- cgit v1.2.3