From 4e067ceabd705201a16b4c92cf4b23f3b990326c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20L=C5=93uillet?= Date: Sun, 13 Jul 2014 10:15:40 +0200 Subject: updated specific configuration for parsing --- .../site_config/standard/lareviewofbooks.org.txt | 24 +++++++++++----------- 1 file changed, 12 insertions(+), 12 deletions(-) mode change 100644 => 100755 inc/3rdparty/site_config/standard/lareviewofbooks.org.txt (limited to 'inc/3rdparty/site_config/standard/lareviewofbooks.org.txt') diff --git a/inc/3rdparty/site_config/standard/lareviewofbooks.org.txt b/inc/3rdparty/site_config/standard/lareviewofbooks.org.txt old mode 100644 new mode 100755 index 504dbea1..25e36543 --- a/inc/3rdparty/site_config/standard/lareviewofbooks.org.txt +++ b/inc/3rdparty/site_config/standard/lareviewofbooks.org.txt @@ -1,12 +1,12 @@ -#meta data -title:substring-after(title,'|') - -author:substring-before( substring-after(//meta[@name = 'description']/@content, normalize-space(substring-after(//title,'|'))),' respond ') -date://h5[@class = 'postDate'] - -#text -body://div[@class = 'articleBody'] - -#clean up -strip://center -test_url: http://lareviewofbooks.org/post/14066007115/literary-transactions-and-their-vicissitudes \ No newline at end of file +#metadata +title: substring-before(//title,' |') +author: //a[contains(@class,'person') and starts-with(@href, '/contributor')] + +#text +body: //div[contains(@class, 'article_body')] + +#clean up +strip_id_or_class: recommended_section + +test_url: http://lareviewofbooks.org/review/american-politics-redeembale-robert-gates-hillary-clinton-two-memoirs-washington-dc +test_url: http://lareviewofbooks.org/interview/souvenirs-future -- cgit v1.2.3