From ac4d114214d820b20e18518a2dbc809337e39043 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20L=C5=93uillet?= Date: Fri, 6 Dec 2013 10:13:03 +0100 Subject: [add] new specific configuration files --- inc/3rdparty/site_config/standard/wired.com.txt | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 inc/3rdparty/site_config/standard/wired.com.txt (limited to 'inc/3rdparty/site_config/standard/wired.com.txt') diff --git a/inc/3rdparty/site_config/standard/wired.com.txt b/inc/3rdparty/site_config/standard/wired.com.txt new file mode 100644 index 00000000..69bbf5b7 --- /dev/null +++ b/inc/3rdparty/site_config/standard/wired.com.txt @@ -0,0 +1,22 @@ +title: //meta[@property="og:title"]/@content +title: //h1 +title: //*[@class='posttitle'] +author: //*[@class='entryAuthor']/a[1] +author://*[@class='member-title'] +author://li[@class='author']/a[contains(@href, '/author/')] +date: substring-after(//div[@class='entryAuthor'], '·') +date: substring-before(//*[@class='entryDate'], '|') +body: //div[@class='entry'] +strip: //span[contains(@class, 'nextprev')] +#strip_id_or_class: ngg-galleryoverview +# ngg-galleryoverview is the whole content sometimes, e.g. http://www.wired.com/underwire/2011/12/best-mixtapes-of-2011/?pid=5736&viewall=true + +strip: //p[span[contains(@class, 'contentjump')]] +strip: //text()[contains(., 'nextpage')] + +prune: no + +single_page_link: //a[contains(@href, '/all/1') and contains(@class, 'contentjumpall')] + +test_url: http://www.wired.com/cloudline/2011/10/meet-arms-cortex-a15-the-future-of-the-ipad-and-possibly-the-macbook-air/ +test_url: http://www.wired.com/threatlevel/2012/05/ff_counterfeiter/all/1 \ No newline at end of file -- cgit v1.2.3