From 4e067ceabd705201a16b4c92cf4b23f3b990326c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20L=C5=93uillet?= Date: Sun, 13 Jul 2014 10:15:40 +0200 Subject: updated specific configuration for parsing --- .../site_config/standard/searchengineland.com.txt | 36 +++++++++++----------- 1 file changed, 18 insertions(+), 18 deletions(-) mode change 100644 => 100755 inc/3rdparty/site_config/standard/searchengineland.com.txt (limited to 'inc/3rdparty/site_config/standard/searchengineland.com.txt') diff --git a/inc/3rdparty/site_config/standard/searchengineland.com.txt b/inc/3rdparty/site_config/standard/searchengineland.com.txt old mode 100644 new mode 100755 index f176d7c7..fb6a1074 --- a/inc/3rdparty/site_config/standard/searchengineland.com.txt +++ b/inc/3rdparty/site_config/standard/searchengineland.com.txt @@ -1,20 +1,20 @@ -body: //div[@class="storyBox"] -title: //div[@class="storyBox"]/h1 -author: //a[@rel="author"] -date: substring-before(//span[@class="dateline"], 'by') - -#Removes related content but cleans up article text -strip: //h1 -strip: //p[@class="homeStory tdmSideInfo"] -strip: //div[@id="bylineShare"] -strip: //script -strip: //hr - -strip_id_or_class: homeStory -strip_id_or_class: authorpic -strip_id_or_class: insideComments -strip_id_or_class: authorbio -strip_id_or_class: gpt-ad-sel-cube -strip_id_or_class: smxTextAd +body: //div[@class="storyBox"] +title: //div[@class="storyBox"]/h1 +author: //a[@rel="author"] +date: substring-before(//span[@class="dateline"], 'by') + +#Removes related content but cleans up article text +strip: //h1 +strip: //p[@class="homeStory tdmSideInfo"] +strip: //div[@id="bylineShare"] +strip: //script +strip: //hr + +strip_id_or_class: homeStory +strip_id_or_class: authorpic +strip_id_or_class: insideComments +strip_id_or_class: authorbio +strip_id_or_class: gpt-ad-sel-cube +strip_id_or_class: smxTextAd test_url: http://searchengineland.com/googles-jaw-dropping-sponsored-post-campaign-for-chrome-106348 \ No newline at end of file -- cgit v1.2.3