From 4e067ceabd705201a16b4c92cf4b23f3b990326c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20L=C5=93uillet?= Date: Sun, 13 Jul 2014 10:15:40 +0200 Subject: updated specific configuration for parsing --- inc/3rdparty/site_config/standard/slate.com.txt | 36 ++++++++++++------------- 1 file changed, 18 insertions(+), 18 deletions(-) mode change 100644 => 100755 inc/3rdparty/site_config/standard/slate.com.txt (limited to 'inc/3rdparty/site_config/standard/slate.com.txt') diff --git a/inc/3rdparty/site_config/standard/slate.com.txt b/inc/3rdparty/site_config/standard/slate.com.txt old mode 100644 new mode 100755 index e92f6a06..d5798e01 --- a/inc/3rdparty/site_config/standard/slate.com.txt +++ b/inc/3rdparty/site_config/standard/slate.com.txt @@ -1,19 +1,19 @@ -title: //h1[@class="sl-art-head-dek"] -body: //article//div[@class='sl-art-body']/div[contains(@class, 'body')] -strip: //div[@class="department_kicker"] -strip: //div[@id="insider_ad_wrapper" or @id="insider_ad_inner"] -strip: //div[@id="bottom_sponsored_links"] -strip: //div[@class="sl-art-ad-midflex"] -#strip: //dl -#strip: //p[em/a[contains(@href, 'facebook.com')]] -prune: no - -author: //div[@id='author_bio']//a[contains(@href, '/author/')] -author: //a[contains(@href, '/authors.')] - -date: substring-before(substring-after(//span[@class='sl-art-byline'], 'Posted '), ', at ') - -single_page_link: //a[@class='sl-art-sinpage'] - -test_url: http://www.slate.com/id/2274583/pagenum/all/ +title: //h1[@class="sl-art-head-dek"] +body: //article//div[@class='sl-art-body']/div[contains(@class, 'body')] +strip: //div[@class="department_kicker"] +strip: //div[@id="insider_ad_wrapper" or @id="insider_ad_inner"] +strip: //div[@id="bottom_sponsored_links"] +strip: //div[@class="sl-art-ad-midflex"] +#strip: //dl +#strip: //p[em/a[contains(@href, 'facebook.com')]] +prune: no + +author: //div[@id='author_bio']//a[contains(@href, '/author/')] +author: //a[contains(@href, '/authors.')] + +date: substring-before(substring-after(//span[@class='sl-art-byline'], 'Posted '), ', at ') + +single_page_link: //a[@class='sl-art-sinpage'] + +test_url: http://www.slate.com/id/2274583/pagenum/all/ test_url: http://www.slate.com/id/2293116/ \ No newline at end of file -- cgit v1.2.3