diff options
Diffstat (limited to 'inc/3rdparty/site_config/standard/neh.gov.txt')
-rwxr-xr-x[-rw-r--r--] | inc/3rdparty/site_config/standard/neh.gov.txt | 30 |
1 files changed, 15 insertions, 15 deletions
diff --git a/inc/3rdparty/site_config/standard/neh.gov.txt b/inc/3rdparty/site_config/standard/neh.gov.txt index 45136a2b..e7cc4313 100644..100755 --- a/inc/3rdparty/site_config/standard/neh.gov.txt +++ b/inc/3rdparty/site_config/standard/neh.gov.txt | |||
@@ -1,17 +1,17 @@ | |||
1 | #host configuration should be http://www.neh.gov/news/humanities/ | 1 | #host configuration should be http://www.neh.gov/news/humanities/ |
2 | 2 | ||
3 | 3 | ||
4 | #meta data | 4 | #meta data |
5 | title:substring-after(substring-after(//title,':'),':') | 5 | title:substring-after(substring-after(//title,':'),':') |
6 | author:substring-after(//h2[@class = 'subHead'],'By') | 6 | author:substring-after(//h2[@class = 'subHead'],'By') |
7 | date:substring-before(substring-after(//title,':'),':') | 7 | date:substring-before(substring-after(//title,':'),':') |
8 | 8 | ||
9 | #img and caption handling | 9 | #img and caption handling |
10 | wrap_in(small)://div[@id = 'mainContent']/table/descendant::p/descendant::text() | 10 | wrap_in(small)://div[@id = 'mainContent']/table/descendant::p/descendant::text() |
11 | wrap_in(fieldset)://div[@id = 'mainContent']/table | 11 | wrap_in(fieldset)://div[@id = 'mainContent']/table |
12 | 12 | ||
13 | # clean up | 13 | # clean up |
14 | strip: //table[@class = 'marginpaddingTop'] | 14 | strip: //table[@class = 'marginpaddingTop'] |
15 | strip: //h2[@class = 'subHead'] | 15 | strip: //h2[@class = 'subHead'] |
16 | 16 | ||
17 | test_url: http://www.neh.gov/news/humanities/2011-11/IslamicScholar.html \ No newline at end of file | 17 | test_url: http://www.neh.gov/news/humanities/2011-11/IslamicScholar.html \ No newline at end of file |