blob: cee50c9be4e9c4bf6fde4e6ad4ba701965f95204 (
plain) (
tree)
|
|
author: //p[contains(@class, "byline")]/a[contains(@class, "author")]
date: //span[contains(@class, "publish-date")]/time[@pubdate]/@datetime
body: //div[contains(@class, 'entry-content')]
# for vergecasts, e.g. http://www.theverge.com/2013/8/22/4648566/the-vergecast-090-august-22th-2013-video
body: //article
prune: no
#tidy: no
strip: //article/header
strip: //*[@id='sticky-menu']
strip: //aside
strip: //nav
strip: //img[contains(@class, 'vox-lazy-load')]
# deal with bad parsing
strip: //div[contains(@class, 'story-image')]//div[contains(., 'function(')]
strip: //div[contains(@class, 'm-linkset')]
strip: //div[contains(@class, 'm-entry__sidebar')]
strip: //ul[contains(@class, 'm-article__sources')]
strip: //div[contains(@class, 'chorus-emc__content')]
strip_id_or_class: gallery
strip_id_or_class: article-meta
strip_id_or_class: story-navigation
strip_id_or_class: slegend
strip_id_or_class: related-product-meta
strip_id_or_class: comments
strip_id_or_class: ui-jump-list
strip_id_or_class: pullquote
strip_id_or_class: m-ad
strip_id_or_class: social-sharing
strip_id_or_class: m-video-entry__excerpt
strip_id_or_class: hidden
strip_id_or_class: m-article__follow-bar
strip_id_or_class: m-article__share-buttons
replace_string(<noscript>): <div>
replace_string(</noscript>): </div>
find_string: <script
replace_string: <div style="display:none"
find_string: </script>
replace_string: </div>
strip: //q
strip: //a[contains(@class, 'entry-section-title')]
test_url: http://www.theverge.com/2012/2/29/2821763/lytro-review
test_url: http://www.theverge.com/2011/11/3/2534861/nokia-lumia-800-review
test_url: http://www.theverge.com/2013/2/24/4026114/barnes-noble-shifting-focus-away-from-nook-hardware
test_url: http://www.theverge.com/2014/6/19/5824072/top-shelf-living-the-dream
test_url: http://www.theverge.com/rss/frontpage
|