2 # As of 2015-04, it's a wordpress-powered website.
4 title: //h1[contains(concat(' ',normalize-space(@class),' '),' page-title ')]//span[contains(concat(' ',normalize-space(@class),' '),' inner-text ')]
5 date: //time[contains(concat(' ',normalize-space(@class),' '),' art-date ')]
6 author: //h1[contains(concat(' ',normalize-space(@class),' '),' author-name ')]
7 body: //article[contains(concat(' ',normalize-space(@class),' '),' plain-art ')]
9 # no toolbar, meta, etc, but misses excerpt
10 # body: //div[contains(concat(' ',normalize-space(@class),' '),' entry ')]
12 # Thus, we need to strip useless elements from the "plain-art"
13 strip: //div[contains(concat(' ',normalize-space(@class),' '),' plain-post-topbar ')]
14 strip: //div[contains(concat(' ',normalize-space(@class),' '),' single-type-block ')]
15 strip: //header[contains(concat(' ',normalize-space(@class),' '),' entry-header ')]
17 # And no pruning is needed because we stripped unwanted elements.
20 test_url: http://www.contrepoints.org/2015/04/25/205709-leconomie-selon-ray-dalio
21 test_url: http://www.contrepoints.org/2015/04/25/205734-huile-et-gaz-de-schiste-revolution-durable