]> git.immae.eu Git - github/wallabag/wallabag.git/blob - inc/3rdparty/site_config/standard/neh.gov.txt
e7cc43134211bfd3954d3ed26b8698be83a4f306
[github/wallabag/wallabag.git] / inc / 3rdparty / site_config / standard / neh.gov.txt
1 #host configuration should be http://www.neh.gov/news/humanities/
2
3
4 #meta data
5 title:substring-after(substring-after(//title,':'),':')
6 author:substring-after(//h2[@class = 'subHead'],'By')
7 date:substring-before(substring-after(//title,':'),':')
8
9 #img and caption handling
10 wrap_in(small)://div[@id = 'mainContent']/table/descendant::p/descendant::text()
11 wrap_in(fieldset)://div[@id = 'mainContent']/table
12
13 # clean up
14 strip: //table[@class = 'marginpaddingTop']
15 strip: //h2[@class = 'subHead']
16
17 test_url: http://www.neh.gov/news/humanities/2011-11/IslamicScholar.html