From 4e067ceabd705201a16b4c92cf4b23f3b990326c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20L=C5=93uillet?= Date: Sun, 13 Jul 2014 10:15:40 +0200 Subject: updated specific configuration for parsing --- inc/3rdparty/site_config/standard/abc.net.au.txt | 26 ++++++++++++++++-------- 1 file changed, 17 insertions(+), 9 deletions(-) mode change 100644 => 100755 inc/3rdparty/site_config/standard/abc.net.au.txt (limited to 'inc/3rdparty/site_config/standard/abc.net.au.txt') diff --git a/inc/3rdparty/site_config/standard/abc.net.au.txt b/inc/3rdparty/site_config/standard/abc.net.au.txt old mode 100644 new mode 100755 index 5e6269cb..22b3a0f4 --- a/inc/3rdparty/site_config/standard/abc.net.au.txt +++ b/inc/3rdparty/site_config/standard/abc.net.au.txt @@ -1,10 +1,18 @@ -title: //h1 -author: //div[@class="byline"]/a -date: //span[@class="timestamp"] - -strip: //p[@class="topics"] -strip: //h1 -strip: //div[@class="byline"] -strip: //p[@class="published"] +title: //div[@class='article section']//h1 +author: //div[@class="byline"]/a +date: //span[@class="timestamp"] +body: //div[@class="page section"] + +strip: //a[@class="inline-caption"] +strip: //p[@class="ticker section noprint"] +strip: //p[@class="topics"] +strip: //h1 +strip: //div[@class="byline"] +strip: //p[@class="published"] strip: //div[contains(@class,"featured-scroller")] -test_url: http://www.abc.net.au/news/2011-11-08/crabb-carbon-legislation-abbott-demolition/3652544 \ No newline at end of file +strip_id_or_class: footer + +tidy: no + +test_url: http://www.abc.net.au/news/2013-03-27/open-speed-highways-change-clp-giles/4597892 +test_url: http://www.abc.net.au/news/2013-04-30/credit-growth-remains-subdued/4660054?section=business -- cgit v1.2.3