diff options
author | tcit <tcit@tcit.fr> | 2014-10-08 19:26:26 +0200 |
---|---|---|
committer | tcit <tcit@tcit.fr> | 2014-10-08 19:26:26 +0200 |
commit | 8327f1c371ad1d930bf9c9a13e443f2aa29ecfe3 (patch) | |
tree | ea559def90e546716f3d6016fe8f06f333249ef6 /inc/3rdparty/site_config/standard/neh.gov.txt | |
parent | d05f5eeb1dfd989e76f6040b220fe52738284841 (diff) | |
parent | 73c833780c37278a319fd3bfff172eede1a040bd (diff) | |
download | wallabag-8327f1c371ad1d930bf9c9a13e443f2aa29ecfe3.tar.gz wallabag-8327f1c371ad1d930bf9c9a13e443f2aa29ecfe3.tar.zst wallabag-8327f1c371ad1d930bf9c9a13e443f2aa29ecfe3.zip |
Merge branch 'dev' into data-for-mysql
Diffstat (limited to 'inc/3rdparty/site_config/standard/neh.gov.txt')
-rwxr-xr-x[-rw-r--r--] | inc/3rdparty/site_config/standard/neh.gov.txt | 30 |
1 files changed, 15 insertions, 15 deletions
diff --git a/inc/3rdparty/site_config/standard/neh.gov.txt b/inc/3rdparty/site_config/standard/neh.gov.txt index 45136a2b..e7cc4313 100644..100755 --- a/inc/3rdparty/site_config/standard/neh.gov.txt +++ b/inc/3rdparty/site_config/standard/neh.gov.txt | |||
@@ -1,17 +1,17 @@ | |||
1 | #host configuration should be http://www.neh.gov/news/humanities/ | 1 | #host configuration should be http://www.neh.gov/news/humanities/ |
2 | 2 | ||
3 | 3 | ||
4 | #meta data | 4 | #meta data |
5 | title:substring-after(substring-after(//title,':'),':') | 5 | title:substring-after(substring-after(//title,':'),':') |
6 | author:substring-after(//h2[@class = 'subHead'],'By') | 6 | author:substring-after(//h2[@class = 'subHead'],'By') |
7 | date:substring-before(substring-after(//title,':'),':') | 7 | date:substring-before(substring-after(//title,':'),':') |
8 | 8 | ||
9 | #img and caption handling | 9 | #img and caption handling |
10 | wrap_in(small)://div[@id = 'mainContent']/table/descendant::p/descendant::text() | 10 | wrap_in(small)://div[@id = 'mainContent']/table/descendant::p/descendant::text() |
11 | wrap_in(fieldset)://div[@id = 'mainContent']/table | 11 | wrap_in(fieldset)://div[@id = 'mainContent']/table |
12 | 12 | ||
13 | # clean up | 13 | # clean up |
14 | strip: //table[@class = 'marginpaddingTop'] | 14 | strip: //table[@class = 'marginpaddingTop'] |
15 | strip: //h2[@class = 'subHead'] | 15 | strip: //h2[@class = 'subHead'] |
16 | 16 | ||
17 | test_url: http://www.neh.gov/news/humanities/2011-11/IslamicScholar.html \ No newline at end of file | 17 | test_url: http://www.neh.gov/news/humanities/2011-11/IslamicScholar.html \ No newline at end of file |