diff options
author | Nicolas LÅ“uillet <nicolas.loeuillet@gmail.com> | 2013-12-23 02:28:56 -0800 |
---|---|---|
committer | Nicolas LÅ“uillet <nicolas.loeuillet@gmail.com> | 2013-12-23 02:28:56 -0800 |
commit | 60fc4f4b1ab37fbfe9021f3fa1395d66a4424ed2 (patch) | |
tree | 515c4b9c8286ae363f77722c91acb878151dc386 /inc/3rdparty/site_config/standard/smithsonianmag.com.txt | |
parent | cbfd5a1019f47fadefd8490dae9f039ae894298d (diff) | |
parent | da5fc42f615eeb45a702604970f94967507fb432 (diff) | |
download | wallabag-1.3.0.tar.gz wallabag-1.3.0.tar.zst wallabag-1.3.0.zip |
Merge pull request #363 from inthepoche/dev1.3.0
poche 1.3.0
Diffstat (limited to 'inc/3rdparty/site_config/standard/smithsonianmag.com.txt')
-rw-r--r-- | inc/3rdparty/site_config/standard/smithsonianmag.com.txt | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/inc/3rdparty/site_config/standard/smithsonianmag.com.txt b/inc/3rdparty/site_config/standard/smithsonianmag.com.txt new file mode 100644 index 00000000..10a3f717 --- /dev/null +++ b/inc/3rdparty/site_config/standard/smithsonianmag.com.txt | |||
@@ -0,0 +1,20 @@ | |||
1 | # meta data | ||
2 | title://h1[@id = 'articleTitle'] | ||
3 | author:substring-after(//ul[@id = 'byLine']/li[1],'By ') | ||
4 | date:substring-before(substring-after(//ul[@id = 'byLine']/li[last()],','),',') | ||
5 | body://div[@id = 'article-body'] | ||
6 | |||
7 | # full content | ||
8 | single_page_link://td/li[@class = 'article-singlepage']/a | ||
9 | |||
10 | # caption clean up | ||
11 | wrap_in(i)://span[@class='articleImageCaptionwide'] | ||
12 | move_into (//span[@class='articleImageCaptionwide'])://div[@id = 'articleImage']/p | ||
13 | |||
14 | |||
15 | # clean up | ||
16 | strip://p[@id = 'articlePaginationWrapper'] | ||
17 | strip://ul[contains(@class, 'cat-breadcrumb')] | ||
18 | strip://div [@class= 'viewMorePhotos'] | ||
19 | |||
20 | test_url: http://www.smithsonianmag.com/history-archaeology/The-Goddess-Goes-Home.html \ No newline at end of file | ||