diff options
author | Nicolas LÅ“uillet <nicolas.loeuillet@gmail.com> | 2013-12-06 10:13:03 +0100 |
---|---|---|
committer | Nicolas LÅ“uillet <nicolas.loeuillet@gmail.com> | 2013-12-06 10:13:03 +0100 |
commit | ac4d114214d820b20e18518a2dbc809337e39043 (patch) | |
tree | 27886128ef949b7f8dd174b0646b5a4d99883b44 /inc/3rdparty/site_config/standard/thedaily.com.txt | |
parent | d5501950e2470d52f6bf5954d2179010cdee0475 (diff) | |
download | wallabag-ac4d114214d820b20e18518a2dbc809337e39043.tar.gz wallabag-ac4d114214d820b20e18518a2dbc809337e39043.tar.zst wallabag-ac4d114214d820b20e18518a2dbc809337e39043.zip |
[add] new specific configuration files
Diffstat (limited to 'inc/3rdparty/site_config/standard/thedaily.com.txt')
-rw-r--r-- | inc/3rdparty/site_config/standard/thedaily.com.txt | 24 |
1 files changed, 24 insertions, 0 deletions
diff --git a/inc/3rdparty/site_config/standard/thedaily.com.txt b/inc/3rdparty/site_config/standard/thedaily.com.txt new file mode 100644 index 00000000..24ebbbac --- /dev/null +++ b/inc/3rdparty/site_config/standard/thedaily.com.txt | |||
@@ -0,0 +1,24 @@ | |||
1 | #keep all body text | ||
2 | prune: no | ||
3 | |||
4 | #title, body, metadata | ||
5 | title: //div[@class='story_header']/h1 | ||
6 | body: //div[@id='content'] | ||
7 | author: substring-after(//span[@class='byline'], "by ") | ||
8 | author: substring-after(//span[@class='byline'], "By ") | ||
9 | author: //span[@class='byline'] | ||
10 | date: //span[@class='date'] | ||
11 | |||
12 | #formatting | ||
13 | convert_double_br_tags: yes | ||
14 | dissolve: //div[@class='slides_full']/ul/li | ||
15 | |||
16 | # cleanup | ||
17 | strip: //a[@id='story_note'] | ||
18 | strip: //br | ||
19 | strip: //div[@class='intro'] | ||
20 | strip: //div[@class='share-block'] | ||
21 | strip: //div[@class='sidebar-social'] | ||
22 | strip: //div[@class='top-stories'] | ||
23 | strip: //div[@class='prevnext'] | ||
24 | test_url: http://www.thedaily.com/page/2012/01/09/010912-news-college-costs-1-5/ \ No newline at end of file | ||