]> git.immae.eu Git - github/wallabag/wallabag.git/blame - inc/3rdparty/site_config/standard/smithsonianmag.com.txt
update config from @fivefilters
[github/wallabag/wallabag.git] / inc / 3rdparty / site_config / standard / smithsonianmag.com.txt
CommitLineData
4e067cea
NL
1# meta data
2title://h1[@id = 'articleTitle']
3author:substring-after(//ul[@id = 'byLine']/li[1],'By ')
4date:substring-before(substring-after(//ul[@id = 'byLine']/li[last()],','),',')
5body://div[@id = 'article-body']
6
7# full content
8single_page_link://td/li[@class = 'article-singlepage']/a
9
3bb6a8ed
NL
10# continue link
11single_page_link: //a[@id='continue-btn']
12
4e067cea
NL
13# caption clean up
14wrap_in(i)://span[@class='articleImageCaptionwide']
15move_into (//span[@class='articleImageCaptionwide'])://div[@id = 'articleImage']/p
16
17
18# clean up
19strip://p[@id = 'articlePaginationWrapper']
20strip://ul[contains(@class, 'cat-breadcrumb')]
21strip://div [@class= 'viewMorePhotos']
ac4d1142 22
3bb6a8ed 23test_url: http://www.smithsonianmag.com/history-archaeology/The-Goddess-Goes-Home.html