From 4e067ceabd705201a16b4c92cf4b23f3b990326c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20L=C5=93uillet?= Date: Sun, 13 Jul 2014 10:15:40 +0200 Subject: updated specific configuration for parsing --- inc/3rdparty/site_config/standard/theverge.com.txt | 79 +++++++++++++--------- 1 file changed, 48 insertions(+), 31 deletions(-) mode change 100644 => 100755 inc/3rdparty/site_config/standard/theverge.com.txt (limited to 'inc/3rdparty/site_config/standard/theverge.com.txt') diff --git a/inc/3rdparty/site_config/standard/theverge.com.txt b/inc/3rdparty/site_config/standard/theverge.com.txt old mode 100644 new mode 100755 index 11c5c153..1e1ce58f --- a/inc/3rdparty/site_config/standard/theverge.com.txt +++ b/inc/3rdparty/site_config/standard/theverge.com.txt @@ -1,31 +1,48 @@ -title: //h1[contains(@class, "headline")] - -author: //p[contains(@class, "byline")]/a[contains(@class, "author")] - -date: substring-after(normalize-space(//p[contains(@class, "byline")]/span[contains(@class, "publish-date")]), "on ") - -body: //article[contains(@class, 'feature-entry')] -body: //article -prune: no -tidy: no - -strip: //article/header -strip: //*[@id='sticky-menu'] -strip: //aside -strip: //nav - -strip_id_or_class: gallery -strip_id_or_class: article-meta -strip_id_or_class: story-navigation -strip_id_or_class: slegend -strip_id_or_class: related-product-meta -strip_id_or_class: comments -strip_id_or_class: ui-jump-list -strip_id_or_class: pullquote - -strip: //q - -strip: //a[contains(@class, 'entry-section-title')] - -test_url: http://www.theverge.com/2012/2/29/2821763/lytro-review -test_url: http://www.theverge.com/2011/11/3/2534861/nokia-lumia-800-review \ No newline at end of file +author: //p[contains(@class, "byline")]/a[contains(@class, "author")] + +date: //span[contains(@class, "publish-date")]/time[@pubdate]/@datetime + +body: //div[contains(@class, 'entry-content')] +# for vergecasts, e.g. http://www.theverge.com/2013/8/22/4648566/the-vergecast-090-august-22th-2013-video +body: //article +prune: no +#tidy: no + +strip: //article/header +strip: //*[@id='sticky-menu'] +strip: //aside +strip: //nav +strip: //img[contains(@class, 'vox-lazy-load')] +# deal with bad parsing +strip: //div[contains(@class, 'story-image')]//div[contains(., 'function(')] + +strip_id_or_class: gallery +strip_id_or_class: article-meta +strip_id_or_class: story-navigation +strip_id_or_class: slegend +strip_id_or_class: related-product-meta +strip_id_or_class: comments +strip_id_or_class: ui-jump-list +strip_id_or_class: pullquote +strip_id_or_class: m-ad +strip_id_or_class: social-sharing +strip_id_or_class: m-video-entry__excerpt +strip_id_or_class: hidden + +replace_string(