From 4e067ceabd705201a16b4c92cf4b23f3b990326c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20L=C5=93uillet?= Date: Sun, 13 Jul 2014 10:15:40 +0200 Subject: updated specific configuration for parsing --- inc/3rdparty/site_config/standard/mashable.com.txt | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) mode change 100644 => 100755 inc/3rdparty/site_config/standard/mashable.com.txt (limited to 'inc/3rdparty/site_config/standard/mashable.com.txt') diff --git a/inc/3rdparty/site_config/standard/mashable.com.txt b/inc/3rdparty/site_config/standard/mashable.com.txt old mode 100644 new mode 100755 index 2c5a14a6..b6efb6c5 --- a/inc/3rdparty/site_config/standard/mashable.com.txt +++ b/inc/3rdparty/site_config/standard/mashable.com.txt @@ -1,4 +1,11 @@ -title: //header[@class='entry-title']/h1 -body: //div[@class='description'] +title: //h1[@class='title'] +author: substring-after(//span[@class='author_name'], 'By ') +date: //time + +body: //article strip: //div[@class='ytm-gallery-box'] -test_url: http://mashable.com/2011/12/05/india-wants-google-and-facebook-to-censor-user-content/ \ No newline at end of file +strip: //div[contains(@class, 'adsense')] +strip: //aside[contains(@class, 'social')] +strip_id_or_class: article-topics + +test_url: http://mashable.com/2013/05/24/myspace-architects-rebuilding-a-brand/ -- cgit v1.2.3