From 3bb6a8ed2ab40b17d3f3b9925664c8da38ea2570 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20L=C5=93uillet?= Date: Sat, 1 Aug 2015 21:20:43 +0200 Subject: update config from @fivefilters --- .../site_config/standard/contrepoints.org.txt | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100755 inc/3rdparty/site_config/standard/contrepoints.org.txt (limited to 'inc/3rdparty/site_config/standard/contrepoints.org.txt') diff --git a/inc/3rdparty/site_config/standard/contrepoints.org.txt b/inc/3rdparty/site_config/standard/contrepoints.org.txt new file mode 100755 index 00000000..8a6a1250 --- /dev/null +++ b/inc/3rdparty/site_config/standard/contrepoints.org.txt @@ -0,0 +1,21 @@ +# Contrepoints.org +# As of 2015-04, it's a wordpress-powered website. + +title: //h1[contains(concat(' ',normalize-space(@class),' '),' page-title ')]//span[contains(concat(' ',normalize-space(@class),' '),' inner-text ')] +date: //time[contains(concat(' ',normalize-space(@class),' '),' art-date ')] +author: //h1[contains(concat(' ',normalize-space(@class),' '),' author-name ')] +body: //article[contains(concat(' ',normalize-space(@class),' '),' plain-art ')] + +# no toolbar, meta, etc, but misses excerpt +# body: //div[contains(concat(' ',normalize-space(@class),' '),' entry ')] + +# Thus, we need to strip useless elements from the "plain-art" +strip: //div[contains(concat(' ',normalize-space(@class),' '),' plain-post-topbar ')] +strip: //div[contains(concat(' ',normalize-space(@class),' '),' single-type-block ')] +strip: //header[contains(concat(' ',normalize-space(@class),' '),' entry-header ')] + +# And no pruning is needed because we stripped unwanted elements. +prune: no + +test_url: http://www.contrepoints.org/2015/04/25/205709-leconomie-selon-ray-dalio +test_url: http://www.contrepoints.org/2015/04/25/205734-huile-et-gaz-de-schiste-revolution-durable \ No newline at end of file -- cgit v1.2.3