From 4e067ceabd705201a16b4c92cf4b23f3b990326c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20L=C5=93uillet?= Date: Sun, 13 Jul 2014 10:15:40 +0200 Subject: updated specific configuration for parsing --- inc/3rdparty/site_config/standard/bt.no.txt | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100755 inc/3rdparty/site_config/standard/bt.no.txt (limited to 'inc/3rdparty/site_config/standard/bt.no.txt') diff --git a/inc/3rdparty/site_config/standard/bt.no.txt b/inc/3rdparty/site_config/standard/bt.no.txt new file mode 100755 index 00000000..200c2e4e --- /dev/null +++ b/inc/3rdparty/site_config/standard/bt.no.txt @@ -0,0 +1,12 @@ +title: //h1[contains(@class,'articleTitle')] +author: //span[@itemprop='name'] +date: //time[@class='published'] +body: //div[contains(@class,'bodyText')] + +strip_id_or_class: 'pull1' +strip_id_or_class: 'relationArticle' +strip: //span[@class='quote'] + +# strip h2 if at end of article (typically a request for comments) +strip: //div[contains(@class,'bodyText')]/node()[last()-1]/self::h2 +test_url: http://www.bt.no/meninger/debatt/Typisk-norsk-a-vare-god-nok-2884108.html \ No newline at end of file -- cgit v1.2.3