From 4e067ceabd705201a16b4c92cf4b23f3b990326c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20L=C5=93uillet?= Date: Sun, 13 Jul 2014 10:15:40 +0200 Subject: updated specific configuration for parsing --- inc/3rdparty/site_config/standard/reddit.com.txt | 36 +++++++++++++----------- 1 file changed, 20 insertions(+), 16 deletions(-) mode change 100644 => 100755 inc/3rdparty/site_config/standard/reddit.com.txt (limited to 'inc/3rdparty/site_config/standard/reddit.com.txt') diff --git a/inc/3rdparty/site_config/standard/reddit.com.txt b/inc/3rdparty/site_config/standard/reddit.com.txt old mode 100644 new mode 100755 index 58ca9ece..8871f564 --- a/inc/3rdparty/site_config/standard/reddit.com.txt +++ b/inc/3rdparty/site_config/standard/reddit.com.txt @@ -1,16 +1,20 @@ -# This setup grabs the text from a Reddit self post. It ignores all comments etc. - -title: //p[@class="title"]/a/text() - -author: //p[@class="tagline"]/a - -# this doesn't work for some reason...? -date: //p[@class="tagline"]//@datetime - -body: //div[@class="expando"]//div[@class="usertext-body"] - -strip_id_or_class: tagline -strip_id_or_class: unvotable-message -strip_id_or_class: buttons - -test_url: http://www.reddit.com/r/truegaming/comments/wfe7r/i_wrote_about_the_problems_i_honestly_feel_that/ \ No newline at end of file +# This setup grabs the text from a Reddit self post. It ignores all comments etc. + +title: //p[@class="title"]/a/text() + +author: //p[@class="tagline"]/a + +# this doesn't work for some reason...? +date: //p[@class="tagline"]//@datetime + +body: //div[@class="expando"]//div[@class="usertext-body"] + +strip_id_or_class: tagline +strip_id_or_class: unvotable-message +strip_id_or_class: buttons + +# follow the posted link (unless it's a self post - relative URL, no http://) +single_page_link: //p[@class="title"]/a[contains(@href, 'http://')] + +test_url: http://www.reddit.com/r/truegaming/comments/wfe7r/i_wrote_about_the_problems_i_honestly_feel_that/ +test_url: http://www.reddit.com/r/worldnews/comments/1as37r/twelve_north_korean_soldiers_attempting_to_defect/ \ No newline at end of file -- cgit v1.2.3