diff options
author | Nicolas LÅ“uillet <nicolas.loeuillet@gmail.com> | 2013-12-23 02:28:56 -0800 |
---|---|---|
committer | Nicolas LÅ“uillet <nicolas.loeuillet@gmail.com> | 2013-12-23 02:28:56 -0800 |
commit | 60fc4f4b1ab37fbfe9021f3fa1395d66a4424ed2 (patch) | |
tree | 515c4b9c8286ae363f77722c91acb878151dc386 /inc/3rdparty/site_config/standard/neh.gov.txt | |
parent | cbfd5a1019f47fadefd8490dae9f039ae894298d (diff) | |
parent | da5fc42f615eeb45a702604970f94967507fb432 (diff) | |
download | wallabag-60fc4f4b1ab37fbfe9021f3fa1395d66a4424ed2.tar.gz wallabag-60fc4f4b1ab37fbfe9021f3fa1395d66a4424ed2.tar.zst wallabag-60fc4f4b1ab37fbfe9021f3fa1395d66a4424ed2.zip |
Merge pull request #363 from inthepoche/dev1.3.0
poche 1.3.0
Diffstat (limited to 'inc/3rdparty/site_config/standard/neh.gov.txt')
-rw-r--r-- | inc/3rdparty/site_config/standard/neh.gov.txt | 17 |
1 files changed, 17 insertions, 0 deletions
diff --git a/inc/3rdparty/site_config/standard/neh.gov.txt b/inc/3rdparty/site_config/standard/neh.gov.txt new file mode 100644 index 00000000..45136a2b --- /dev/null +++ b/inc/3rdparty/site_config/standard/neh.gov.txt | |||
@@ -0,0 +1,17 @@ | |||
1 | #host configuration should be http://www.neh.gov/news/humanities/ | ||
2 | |||
3 | |||
4 | #meta data | ||
5 | title:substring-after(substring-after(//title,':'),':') | ||
6 | author:substring-after(//h2[@class = 'subHead'],'By') | ||
7 | date:substring-before(substring-after(//title,':'),':') | ||
8 | |||
9 | #img and caption handling | ||
10 | wrap_in(small)://div[@id = 'mainContent']/table/descendant::p/descendant::text() | ||
11 | wrap_in(fieldset)://div[@id = 'mainContent']/table | ||
12 | |||
13 | # clean up | ||
14 | strip: //table[@class = 'marginpaddingTop'] | ||
15 | strip: //h2[@class = 'subHead'] | ||
16 | |||
17 | test_url: http://www.neh.gov/news/humanities/2011-11/IslamicScholar.html \ No newline at end of file | ||