blob: edb52855f7cce15b9e7e7f7a235aa0fc79b40c09 (
plain) (
tree)
|
|
title: //meta[@property="og:title"]/@content
body: //div[@id='article_story_body']
author: //h3[@class='byline']/a
# for slid show content
body: //ul[@id='imageSlide']//li[@class='firstSlide']//img | (//div[@class='txt_body']//p)[1]
date: //li[@class='dateStamp']/small
strip_id_or_class: insetFullBracket
strip_id_or_class: insettipBox
#strip_id_or_class: legacyInset
strip_id_or_class: recipeACShopAndBuyText
strip: //div[contains(@class, 'insetContent')]//cite
strip: //*[contains(@style, 'visibility: hidden;')]
strip: //div[contains(@class, 'insetContent') and not(contains(@class, 'image'))]
prune: no
tidy: no
test_url: http://online.wsj.com/article/SB10001424052970203363504577185322849515102.html
# slide show
test_url: http://online.wsj.com/article/SB10001424052970204791104577110550376458164.html
|