diff options
Diffstat (limited to 'inc/3rdparty/site_config/standard/smithsonianmag.com.txt')
-rwxr-xr-x | inc/3rdparty/site_config/standard/smithsonianmag.com.txt | 5 |
1 files changed, 4 insertions, 1 deletions
diff --git a/inc/3rdparty/site_config/standard/smithsonianmag.com.txt b/inc/3rdparty/site_config/standard/smithsonianmag.com.txt index 3e8fee95..fc479c2a 100755 --- a/inc/3rdparty/site_config/standard/smithsonianmag.com.txt +++ b/inc/3rdparty/site_config/standard/smithsonianmag.com.txt | |||
@@ -7,6 +7,9 @@ body://div[@id = 'article-body'] | |||
7 | # full content | 7 | # full content |
8 | single_page_link://td/li[@class = 'article-singlepage']/a | 8 | single_page_link://td/li[@class = 'article-singlepage']/a |
9 | 9 | ||
10 | # continue link | ||
11 | single_page_link: //a[@id='continue-btn'] | ||
12 | |||
10 | # caption clean up | 13 | # caption clean up |
11 | wrap_in(i)://span[@class='articleImageCaptionwide'] | 14 | wrap_in(i)://span[@class='articleImageCaptionwide'] |
12 | move_into (//span[@class='articleImageCaptionwide'])://div[@id = 'articleImage']/p | 15 | move_into (//span[@class='articleImageCaptionwide'])://div[@id = 'articleImage']/p |
@@ -17,4 +20,4 @@ strip://p[@id = 'articlePaginationWrapper'] | |||
17 | strip://ul[contains(@class, 'cat-breadcrumb')] | 20 | strip://ul[contains(@class, 'cat-breadcrumb')] |
18 | strip://div [@class= 'viewMorePhotos'] | 21 | strip://div [@class= 'viewMorePhotos'] |
19 | 22 | ||
20 | test_url: http://www.smithsonianmag.com/history-archaeology/The-Goddess-Goes-Home.html \ No newline at end of file | 23 | test_url: http://www.smithsonianmag.com/history-archaeology/The-Goddess-Goes-Home.html |