diff options
Diffstat (limited to 'inc/3rdparty/site_config/standard/blogs.smithsonianmag.com.txt')
-rw-r--r-- | inc/3rdparty/site_config/standard/blogs.smithsonianmag.com.txt | 15 |
1 files changed, 15 insertions, 0 deletions
diff --git a/inc/3rdparty/site_config/standard/blogs.smithsonianmag.com.txt b/inc/3rdparty/site_config/standard/blogs.smithsonianmag.com.txt new file mode 100644 index 00000000..ba8bc6e7 --- /dev/null +++ b/inc/3rdparty/site_config/standard/blogs.smithsonianmag.com.txt | |||
@@ -0,0 +1,15 @@ | |||
1 | # metadata | ||
2 | author://div[@class = 'post']/div[@class='meta']/a[1] | ||
3 | date://div[@id = 'rap']/h2[1] | ||
4 | body://div[@class = 'post'] | ||
5 | |||
6 | # wrapping caption and image | ||
7 | wrap_in(fieldset)://div[contains(@class, 'wp-caption')] | ||
8 | |||
9 | |||
10 | # clean up | ||
11 | strip://div[@class = 'post']/h3[@class = 'storytitle'] | ||
12 | strip://div[@class = 'post']/div[@class = 'social'] | ||
13 | strip://img[@style = 'display:none;'] | ||
14 | strip://img[@height='0' and @width='0'] | ||
15 | test_url: http://blogs.smithsonianmag.com/adventure/2011/10/tips-for-women-traveling-in-turkey/ \ No newline at end of file | ||