diff options
author | tcitworld <thomas.citharet@gmail.com> | 2014-01-04 12:30:31 -0800 |
---|---|---|
committer | tcitworld <thomas.citharet@gmail.com> | 2014-01-04 12:30:31 -0800 |
commit | 7f667839764621b5aa01c9db8ce5dde2a29ef18f (patch) | |
tree | 93d8241ee81c87e18494325ae02f0589a8e328a2 /inc/3rdparty/site_config/standard/neh.gov.txt | |
parent | a84f77d6ba15a64ff00453f5d5190c021ce460ed (diff) | |
parent | 2abcccb37180c17318f5226f5d4bc28f30b621ea (diff) | |
download | wallabag-7f667839764621b5aa01c9db8ce5dde2a29ef18f.tar.gz wallabag-7f667839764621b5aa01c9db8ce5dde2a29ef18f.tar.zst wallabag-7f667839764621b5aa01c9db8ce5dde2a29ef18f.zip |
Merge pull request #1 from inthepoche/dev
Dev
Diffstat (limited to 'inc/3rdparty/site_config/standard/neh.gov.txt')
-rw-r--r-- | inc/3rdparty/site_config/standard/neh.gov.txt | 17 |
1 files changed, 17 insertions, 0 deletions
diff --git a/inc/3rdparty/site_config/standard/neh.gov.txt b/inc/3rdparty/site_config/standard/neh.gov.txt new file mode 100644 index 00000000..45136a2b --- /dev/null +++ b/inc/3rdparty/site_config/standard/neh.gov.txt | |||
@@ -0,0 +1,17 @@ | |||
1 | #host configuration should be http://www.neh.gov/news/humanities/ | ||
2 | |||
3 | |||
4 | #meta data | ||
5 | title:substring-after(substring-after(//title,':'),':') | ||
6 | author:substring-after(//h2[@class = 'subHead'],'By') | ||
7 | date:substring-before(substring-after(//title,':'),':') | ||
8 | |||
9 | #img and caption handling | ||
10 | wrap_in(small)://div[@id = 'mainContent']/table/descendant::p/descendant::text() | ||
11 | wrap_in(fieldset)://div[@id = 'mainContent']/table | ||
12 | |||
13 | # clean up | ||
14 | strip: //table[@class = 'marginpaddingTop'] | ||
15 | strip: //h2[@class = 'subHead'] | ||
16 | |||
17 | test_url: http://www.neh.gov/news/humanities/2011-11/IslamicScholar.html \ No newline at end of file | ||