]> git.immae.eu Git - github/wallabag/wallabag.git/blame - inc/3rdparty/site_config/standard/smithsonianmag.com.txt
[add] new specific configuration files
[github/wallabag/wallabag.git] / inc / 3rdparty / site_config / standard / smithsonianmag.com.txt
CommitLineData
ac4d1142
NL
1# meta data\r
2title://h1[@id = 'articleTitle']\r
3author:substring-after(//ul[@id = 'byLine']/li[1],'By ')\r
4date:substring-before(substring-after(//ul[@id = 'byLine']/li[last()],','),',')\r
5body://div[@id = 'article-body']\r
6\r
7# full content\r
8single_page_link://td/li[@class = 'article-singlepage']/a\r
9\r
10# caption clean up\r
11wrap_in(i)://span[@class='articleImageCaptionwide']\r
12move_into (//span[@class='articleImageCaptionwide'])://div[@id = 'articleImage']/p\r
13\r
14\r
15# clean up\r
16strip://p[@id = 'articlePaginationWrapper']\r
17strip://ul[contains(@class, 'cat-breadcrumb')]\r
18strip://div [@class= 'viewMorePhotos']\r
19
20test_url: http://www.smithsonianmag.com/history-archaeology/The-Goddess-Goes-Home.html