From ac4d114214d820b20e18518a2dbc809337e39043 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20L=C5=93uillet?= Date: Fri, 6 Dec 2013 10:13:03 +0100 Subject: [add] new specific configuration files --- .../site_config/standard/www1.folha.uol.com.br.txt | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 inc/3rdparty/site_config/standard/www1.folha.uol.com.br.txt (limited to 'inc/3rdparty/site_config/standard/www1.folha.uol.com.br.txt') diff --git a/inc/3rdparty/site_config/standard/www1.folha.uol.com.br.txt b/inc/3rdparty/site_config/standard/www1.folha.uol.com.br.txt new file mode 100644 index 00000000..0846be2c --- /dev/null +++ b/inc/3rdparty/site_config/standard/www1.folha.uol.com.br.txt @@ -0,0 +1,15 @@ +body://div[@id='articleNew'] +strip://div[@id='articleBy'] +strip://div[@id='articleDate'] +strip://td[@class='articleGraphicCredit'] +strip://h1 +strip://div[@id='articleEnd'] +strip://p[@class='tagline'] +strip://div[@class='openBox adslibraryArticle'] +strip_id_or_class:ad-180x150-1 + + +title: //div[@id="articleNew"]/h1 +author: //div[@id="articleBy"]/p/b +date: substring-before(//div[@id="articleDate"], "-") +test_url: http://www1.folha.uol.com.br/mundo/1115805-ex-ditador-argentino-videla-e-condenado-a-50-anos-de-prisao.shtml \ No newline at end of file -- cgit v1.2.3