From 4e067ceabd705201a16b4c92cf4b23f3b990326c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20L=C5=93uillet?= Date: Sun, 13 Jul 2014 10:15:40 +0200 Subject: updated specific configuration for parsing --- inc/3rdparty/site_config/standard/thedaily.com.txt | 46 +++++++++++----------- 1 file changed, 23 insertions(+), 23 deletions(-) mode change 100644 => 100755 inc/3rdparty/site_config/standard/thedaily.com.txt (limited to 'inc/3rdparty/site_config/standard/thedaily.com.txt') diff --git a/inc/3rdparty/site_config/standard/thedaily.com.txt b/inc/3rdparty/site_config/standard/thedaily.com.txt old mode 100644 new mode 100755 index 24ebbbac..e255e6a8 --- a/inc/3rdparty/site_config/standard/thedaily.com.txt +++ b/inc/3rdparty/site_config/standard/thedaily.com.txt @@ -1,24 +1,24 @@ -#keep all body text -prune: no - -#title, body, metadata -title: //div[@class='story_header']/h1 -body: //div[@id='content'] -author: substring-after(//span[@class='byline'], "by ") -author: substring-after(//span[@class='byline'], "By ") -author: //span[@class='byline'] -date: //span[@class='date'] - -#formatting -convert_double_br_tags: yes -dissolve: //div[@class='slides_full']/ul/li - -# cleanup -strip: //a[@id='story_note'] -strip: //br -strip: //div[@class='intro'] -strip: //div[@class='share-block'] -strip: //div[@class='sidebar-social'] -strip: //div[@class='top-stories'] -strip: //div[@class='prevnext'] +#keep all body text +prune: no + +#title, body, metadata +title: //div[@class='story_header']/h1 +body: //div[@id='content'] +author: substring-after(//span[@class='byline'], "by ") +author: substring-after(//span[@class='byline'], "By ") +author: //span[@class='byline'] +date: //span[@class='date'] + +#formatting +convert_double_br_tags: yes +dissolve: //div[@class='slides_full']/ul/li + +# cleanup +strip: //a[@id='story_note'] +strip: //br +strip: //div[@class='intro'] +strip: //div[@class='share-block'] +strip: //div[@class='sidebar-social'] +strip: //div[@class='top-stories'] +strip: //div[@class='prevnext'] test_url: http://www.thedaily.com/page/2012/01/09/010912-news-college-costs-1-5/ \ No newline at end of file -- cgit v1.2.3