diff options
author | Nicolas LÅ“uillet <nicolas.loeuillet@gmail.com> | 2013-12-06 10:13:03 +0100 |
---|---|---|
committer | Nicolas LÅ“uillet <nicolas.loeuillet@gmail.com> | 2013-12-06 10:13:03 +0100 |
commit | ac4d114214d820b20e18518a2dbc809337e39043 (patch) | |
tree | 27886128ef949b7f8dd174b0646b5a4d99883b44 /inc/3rdparty/site_config/standard/neh.gov.txt | |
parent | d5501950e2470d52f6bf5954d2179010cdee0475 (diff) | |
download | wallabag-ac4d114214d820b20e18518a2dbc809337e39043.tar.gz wallabag-ac4d114214d820b20e18518a2dbc809337e39043.tar.zst wallabag-ac4d114214d820b20e18518a2dbc809337e39043.zip |
[add] new specific configuration files
Diffstat (limited to 'inc/3rdparty/site_config/standard/neh.gov.txt')
-rw-r--r-- | inc/3rdparty/site_config/standard/neh.gov.txt | 17 |
1 files changed, 17 insertions, 0 deletions
diff --git a/inc/3rdparty/site_config/standard/neh.gov.txt b/inc/3rdparty/site_config/standard/neh.gov.txt new file mode 100644 index 00000000..45136a2b --- /dev/null +++ b/inc/3rdparty/site_config/standard/neh.gov.txt | |||
@@ -0,0 +1,17 @@ | |||
1 | #host configuration should be http://www.neh.gov/news/humanities/ | ||
2 | |||
3 | |||
4 | #meta data | ||
5 | title:substring-after(substring-after(//title,':'),':') | ||
6 | author:substring-after(//h2[@class = 'subHead'],'By') | ||
7 | date:substring-before(substring-after(//title,':'),':') | ||
8 | |||
9 | #img and caption handling | ||
10 | wrap_in(small)://div[@id = 'mainContent']/table/descendant::p/descendant::text() | ||
11 | wrap_in(fieldset)://div[@id = 'mainContent']/table | ||
12 | |||
13 | # clean up | ||
14 | strip: //table[@class = 'marginpaddingTop'] | ||
15 | strip: //h2[@class = 'subHead'] | ||
16 | |||
17 | test_url: http://www.neh.gov/news/humanities/2011-11/IslamicScholar.html \ No newline at end of file | ||