aboutsummaryrefslogtreecommitdiffhomepage
path: root/inc/3rdparty/site_config/standard/neh.gov.txt
diff options
context:
space:
mode:
authorNicolas LÅ“uillet <nicolas@loeuillet.org>2014-07-13 10:15:40 +0200
committerNicolas LÅ“uillet <nicolas@loeuillet.org>2014-07-13 10:15:40 +0200
commit4e067ceabd705201a16b4c92cf4b23f3b990326c (patch)
tree939f3a8e5ff3ab9ee414a57a895d3e78e1d46ce3 /inc/3rdparty/site_config/standard/neh.gov.txt
parent58dbe103889148def78b0fc8744d3f94c56a1561 (diff)
downloadwallabag-4e067ceabd705201a16b4c92cf4b23f3b990326c.tar.gz
wallabag-4e067ceabd705201a16b4c92cf4b23f3b990326c.tar.zst
wallabag-4e067ceabd705201a16b4c92cf4b23f3b990326c.zip
updated specific configuration for parsing
Diffstat (limited to 'inc/3rdparty/site_config/standard/neh.gov.txt')
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/neh.gov.txt30
1 files changed, 15 insertions, 15 deletions
diff --git a/inc/3rdparty/site_config/standard/neh.gov.txt b/inc/3rdparty/site_config/standard/neh.gov.txt
index 45136a2b..e7cc4313 100644..100755
--- a/inc/3rdparty/site_config/standard/neh.gov.txt
+++ b/inc/3rdparty/site_config/standard/neh.gov.txt
@@ -1,17 +1,17 @@
1#host configuration should be http://www.neh.gov/news/humanities/ 1#host configuration should be http://www.neh.gov/news/humanities/
2 2
3 3
4#meta data 4#meta data
5title:substring-after(substring-after(//title,':'),':') 5title:substring-after(substring-after(//title,':'),':')
6author:substring-after(//h2[@class = 'subHead'],'By') 6author:substring-after(//h2[@class = 'subHead'],'By')
7date:substring-before(substring-after(//title,':'),':') 7date:substring-before(substring-after(//title,':'),':')
8 8
9#img and caption handling 9#img and caption handling
10wrap_in(small)://div[@id = 'mainContent']/table/descendant::p/descendant::text() 10wrap_in(small)://div[@id = 'mainContent']/table/descendant::p/descendant::text()
11wrap_in(fieldset)://div[@id = 'mainContent']/table 11wrap_in(fieldset)://div[@id = 'mainContent']/table
12 12
13# clean up 13# clean up
14strip: //table[@class = 'marginpaddingTop'] 14strip: //table[@class = 'marginpaddingTop']
15strip: //h2[@class = 'subHead'] 15strip: //h2[@class = 'subHead']
16 16
17test_url: http://www.neh.gov/news/humanities/2011-11/IslamicScholar.html \ No newline at end of file 17test_url: http://www.neh.gov/news/humanities/2011-11/IslamicScholar.html \ No newline at end of file