From 4e067ceabd705201a16b4c92cf4b23f3b990326c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20L=C5=93uillet?= Date: Sun, 13 Jul 2014 10:15:40 +0200 Subject: updated specific configuration for parsing --- inc/3rdparty/site_config/standard/gigaom.com.txt | 29 ++++++++++-------------- 1 file changed, 12 insertions(+), 17 deletions(-) mode change 100644 => 100755 inc/3rdparty/site_config/standard/gigaom.com.txt (limited to 'inc/3rdparty/site_config/standard/gigaom.com.txt') diff --git a/inc/3rdparty/site_config/standard/gigaom.com.txt b/inc/3rdparty/site_config/standard/gigaom.com.txt old mode 100644 new mode 100755 index 348bdf23..cc8fdfa0 --- a/inc/3rdparty/site_config/standard/gigaom.com.txt +++ b/inc/3rdparty/site_config/standard/gigaom.com.txt @@ -1,17 +1,12 @@ -date: //meta[@name='DC.date.issued']/@content -date: //span[@class='post-meta the-date'] - -title: //meta[@property='og:title']/@content - -author: //meta[@name='DC.creator']/@content - -body: //div[contains(@class, 'post-sub-head') or starts-with(@id, 'post-content-')] - -find_string: id="content" -replace_string: id="content-ignore" - -strip_id_or_class: sharedaddy - -prune: no - -test_url: http://gigaom.com/2011/10/24/groupon-google-lawsuit/ \ No newline at end of file +date: //meta[@name='dcterms.created']/@content +title: //meta[@property='og:title']/@content +author: //section[@class="post-meta"]//a[@rel="author"] + +body: //div[starts-with(@id, 'post-content-')] + +strip_id_or_class: sharedaddy + +prune: no + +test_url: http://gigaom.com/2011/10/24/groupon-google-lawsuit/ +test_url: http://gigaom.com/2012/12/26/snapchat-rises-why-pokes-decline-shows-facebooks-inability-to-invent/ \ No newline at end of file -- cgit v1.2.3