From 4e067ceabd705201a16b4c92cf4b23f3b990326c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20L=C5=93uillet?= Date: Sun, 13 Jul 2014 10:15:40 +0200 Subject: updated specific configuration for parsing --- inc/3rdparty/site_config/standard/cnn.com.txt | 42 +++++++++++++++------------ 1 file changed, 23 insertions(+), 19 deletions(-) mode change 100644 => 100755 inc/3rdparty/site_config/standard/cnn.com.txt (limited to 'inc/3rdparty/site_config/standard/cnn.com.txt') diff --git a/inc/3rdparty/site_config/standard/cnn.com.txt b/inc/3rdparty/site_config/standard/cnn.com.txt old mode 100644 new mode 100755 index 995e2c79..6f69e4e8 --- a/inc/3rdparty/site_config/standard/cnn.com.txt +++ b/inc/3rdparty/site_config/standard/cnn.com.txt @@ -1,19 +1,23 @@ -title: //div[@class="cnn_storyarea"]/h1 -author: //div[@class="cnnByline"]/strong -date: substring-after(//div[@class="cnn_strytmstmp"], 'Sun') -date: substring-after(//div[@class="cnn_strytmstmp"], 'Mon') -date: substring-after(//div[@class="cnn_strytmstmp"], 'Tue') -date: substring-after(//div[@class="cnn_strytmstmp"], 'Wed') -date: substring-after(//div[@class="cnn_strytmstmp"], 'Thu') -date: substring-after(//div[@class="cnn_strytmstmp"], 'Fri') -date: substring-after(//div[@class="cnn_strytmstmp"], 'Sat') -strip: //div[@class="cnn_storyarea"]/h1 -strip_id_or_class: cnnByline -strip_id_or_class: cnn_strytmstmp -strip_id_or_class: cnn_strycaptiontxt -strip_id_or_class: cnn_strybtntoolsbttm -strip_id_or_class: cnn_strybtntools -strip_id_or_class: cnn_strybtmcntnt -strip_id_or_class: cnn_containerwht -strip_id_or_class: cnn_stryathrtmp -test_url: http://www.cnn.com/2012/05/13/us/new-york-police-policy/index.html?eref=rss_topstories \ No newline at end of file +body: //div[@id='cnnContentContainer']//div[contains(@class, 'cnn_strycntntlft')] +title: //div[@class="cnn_storyarea"]/h1 +author: //div[@class="cnnByline"]/strong +date: substring-after(//div[@class="cnn_strytmstmp"], 'Sun') +date: substring-after(//div[@class="cnn_strytmstmp"], 'Mon') +date: substring-after(//div[@class="cnn_strytmstmp"], 'Tue') +date: substring-after(//div[@class="cnn_strytmstmp"], 'Wed') +date: substring-after(//div[@class="cnn_strytmstmp"], 'Thu') +date: substring-after(//div[@class="cnn_strytmstmp"], 'Fri') +date: substring-after(//div[@class="cnn_strytmstmp"], 'Sat') +strip: //div[@class="cnn_storyarea"]/h1 +strip_id_or_class: cnnByline +strip_id_or_class: cnn_strytmstmp +strip_id_or_class: cnn_strycaptiontxt +strip_id_or_class: cnn_strybtntoolsbttm +strip_id_or_class: cnn_strybtntools +strip_id_or_class: cnn_strybtmcntnt +strip_id_or_class: sharebar +#strip_id_or_class: cnn_containerwht +strip_id_or_class: cnn_stryathrtmp +replace_string(): +test_url: http://www.cnn.com/2012/05/13/us/new-york-police-policy/index.html?eref=rss_topstories +test_url: http://rss.cnn.com/rss/edition.rss \ No newline at end of file -- cgit v1.2.3