diff options
author | Nicolas LÅ“uillet <nicolas@loeuillet.org> | 2014-07-23 13:44:48 +0200 |
---|---|---|
committer | Nicolas LÅ“uillet <nicolas@loeuillet.org> | 2014-07-23 13:44:48 +0200 |
commit | 887b015def3098f1e898e7bf3338fa2d093b6d95 (patch) | |
tree | 41206132200aa9390e11d600ad2b84ffa23242e4 /inc/3rdparty/site_config/standard/neh.gov.txt | |
parent | ebd6bf6007e0fad4c3e11dac0e79f687e1d195a2 (diff) | |
parent | 505a74ad1de7cf2cd3605e793233365501f03d87 (diff) | |
download | wallabag-887b015def3098f1e898e7bf3338fa2d093b6d95.tar.gz wallabag-887b015def3098f1e898e7bf3338fa2d093b6d95.tar.zst wallabag-887b015def3098f1e898e7bf3338fa2d093b6d95.zip |
Merge branch 'refactor' into dev
Diffstat (limited to 'inc/3rdparty/site_config/standard/neh.gov.txt')
-rwxr-xr-x[-rw-r--r--] | inc/3rdparty/site_config/standard/neh.gov.txt | 30 |
1 files changed, 15 insertions, 15 deletions
diff --git a/inc/3rdparty/site_config/standard/neh.gov.txt b/inc/3rdparty/site_config/standard/neh.gov.txt index 45136a2b..e7cc4313 100644..100755 --- a/inc/3rdparty/site_config/standard/neh.gov.txt +++ b/inc/3rdparty/site_config/standard/neh.gov.txt | |||
@@ -1,17 +1,17 @@ | |||
1 | #host configuration should be http://www.neh.gov/news/humanities/ | 1 | #host configuration should be http://www.neh.gov/news/humanities/ |
2 | 2 | ||
3 | 3 | ||
4 | #meta data | 4 | #meta data |
5 | title:substring-after(substring-after(//title,':'),':') | 5 | title:substring-after(substring-after(//title,':'),':') |
6 | author:substring-after(//h2[@class = 'subHead'],'By') | 6 | author:substring-after(//h2[@class = 'subHead'],'By') |
7 | date:substring-before(substring-after(//title,':'),':') | 7 | date:substring-before(substring-after(//title,':'),':') |
8 | 8 | ||
9 | #img and caption handling | 9 | #img and caption handling |
10 | wrap_in(small)://div[@id = 'mainContent']/table/descendant::p/descendant::text() | 10 | wrap_in(small)://div[@id = 'mainContent']/table/descendant::p/descendant::text() |
11 | wrap_in(fieldset)://div[@id = 'mainContent']/table | 11 | wrap_in(fieldset)://div[@id = 'mainContent']/table |
12 | 12 | ||
13 | # clean up | 13 | # clean up |
14 | strip: //table[@class = 'marginpaddingTop'] | 14 | strip: //table[@class = 'marginpaddingTop'] |
15 | strip: //h2[@class = 'subHead'] | 15 | strip: //h2[@class = 'subHead'] |
16 | 16 | ||
17 | test_url: http://www.neh.gov/news/humanities/2011-11/IslamicScholar.html \ No newline at end of file | 17 | test_url: http://www.neh.gov/news/humanities/2011-11/IslamicScholar.html \ No newline at end of file |