From ac4d114214d820b20e18518a2dbc809337e39043 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20L=C5=93uillet?= Date: Fri, 6 Dec 2013 10:13:03 +0100 Subject: [add] new specific configuration files --- inc/3rdparty/site_config/standard/thedaily.com.txt | 24 ++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 inc/3rdparty/site_config/standard/thedaily.com.txt (limited to 'inc/3rdparty/site_config/standard/thedaily.com.txt') diff --git a/inc/3rdparty/site_config/standard/thedaily.com.txt b/inc/3rdparty/site_config/standard/thedaily.com.txt new file mode 100644 index 00000000..24ebbbac --- /dev/null +++ b/inc/3rdparty/site_config/standard/thedaily.com.txt @@ -0,0 +1,24 @@ +#keep all body text +prune: no + +#title, body, metadata +title: //div[@class='story_header']/h1 +body: //div[@id='content'] +author: substring-after(//span[@class='byline'], "by ") +author: substring-after(//span[@class='byline'], "By ") +author: //span[@class='byline'] +date: //span[@class='date'] + +#formatting +convert_double_br_tags: yes +dissolve: //div[@class='slides_full']/ul/li + +# cleanup +strip: //a[@id='story_note'] +strip: //br +strip: //div[@class='intro'] +strip: //div[@class='share-block'] +strip: //div[@class='sidebar-social'] +strip: //div[@class='top-stories'] +strip: //div[@class='prevnext'] +test_url: http://www.thedaily.com/page/2012/01/09/010912-news-college-costs-1-5/ \ No newline at end of file -- cgit v1.2.3