aboutsummaryrefslogblamecommitdiffhomepage
path: root/inc/3rdparty/site_config/standard/smithsonianmag.com.txt
blob: fc479c2a81fcdbffc0d275e2cee59245f7ef52a6 (plain) (tree)
1
2
3
4
5
6
7
8
9








                                                                               


                                         








                                                                                  
 
                                                                                      
# meta data
title://h1[@id = 'articleTitle']
author:substring-after(//ul[@id = 'byLine']/li[1],'By ')
date:substring-before(substring-after(//ul[@id = 'byLine']/li[last()],','),',')
body://div[@id = 'article-body']

# full content
single_page_link://td/li[@class = 'article-singlepage']/a

# continue link
single_page_link: //a[@id='continue-btn']

# caption clean up
wrap_in(i)://span[@class='articleImageCaptionwide']
move_into (//span[@class='articleImageCaptionwide'])://div[@id = 'articleImage']/p


# clean up
strip://p[@id = 'articlePaginationWrapper']
strip://ul[contains(@class, 'cat-breadcrumb')]
strip://div [@class= 'viewMorePhotos']

test_url: http://www.smithsonianmag.com/history-archaeology/The-Goddess-Goes-Home.html