diff options
Diffstat (limited to 'inc/3rdparty/site_config/standard/thedaily.com.txt')
-rw-r--r-- | inc/3rdparty/site_config/standard/thedaily.com.txt | 24 |
1 files changed, 24 insertions, 0 deletions
diff --git a/inc/3rdparty/site_config/standard/thedaily.com.txt b/inc/3rdparty/site_config/standard/thedaily.com.txt new file mode 100644 index 00000000..24ebbbac --- /dev/null +++ b/inc/3rdparty/site_config/standard/thedaily.com.txt | |||
@@ -0,0 +1,24 @@ | |||
1 | #keep all body text | ||
2 | prune: no | ||
3 | |||
4 | #title, body, metadata | ||
5 | title: //div[@class='story_header']/h1 | ||
6 | body: //div[@id='content'] | ||
7 | author: substring-after(//span[@class='byline'], "by ") | ||
8 | author: substring-after(//span[@class='byline'], "By ") | ||
9 | author: //span[@class='byline'] | ||
10 | date: //span[@class='date'] | ||
11 | |||
12 | #formatting | ||
13 | convert_double_br_tags: yes | ||
14 | dissolve: //div[@class='slides_full']/ul/li | ||
15 | |||
16 | # cleanup | ||
17 | strip: //a[@id='story_note'] | ||
18 | strip: //br | ||
19 | strip: //div[@class='intro'] | ||
20 | strip: //div[@class='share-block'] | ||
21 | strip: //div[@class='sidebar-social'] | ||
22 | strip: //div[@class='top-stories'] | ||
23 | strip: //div[@class='prevnext'] | ||
24 | test_url: http://www.thedaily.com/page/2012/01/09/010912-news-college-costs-1-5/ \ No newline at end of file | ||