diff options
author | Nicolas LÅ“uillet <nicolas@loeuillet.org> | 2014-07-13 10:15:40 +0200 |
---|---|---|
committer | Nicolas LÅ“uillet <nicolas@loeuillet.org> | 2014-07-13 10:15:40 +0200 |
commit | 4e067ceabd705201a16b4c92cf4b23f3b990326c (patch) | |
tree | 939f3a8e5ff3ab9ee414a57a895d3e78e1d46ce3 /inc/3rdparty/site_config/standard/thedaily.com.txt | |
parent | 58dbe103889148def78b0fc8744d3f94c56a1561 (diff) | |
download | wallabag-4e067ceabd705201a16b4c92cf4b23f3b990326c.tar.gz wallabag-4e067ceabd705201a16b4c92cf4b23f3b990326c.tar.zst wallabag-4e067ceabd705201a16b4c92cf4b23f3b990326c.zip |
updated specific configuration for parsing
Diffstat (limited to 'inc/3rdparty/site_config/standard/thedaily.com.txt')
-rwxr-xr-x[-rw-r--r--] | inc/3rdparty/site_config/standard/thedaily.com.txt | 46 |
1 files changed, 23 insertions, 23 deletions
diff --git a/inc/3rdparty/site_config/standard/thedaily.com.txt b/inc/3rdparty/site_config/standard/thedaily.com.txt index 24ebbbac..e255e6a8 100644..100755 --- a/inc/3rdparty/site_config/standard/thedaily.com.txt +++ b/inc/3rdparty/site_config/standard/thedaily.com.txt | |||
@@ -1,24 +1,24 @@ | |||
1 | #keep all body text | 1 | #keep all body text |
2 | prune: no | 2 | prune: no |
3 | 3 | ||
4 | #title, body, metadata | 4 | #title, body, metadata |
5 | title: //div[@class='story_header']/h1 | 5 | title: //div[@class='story_header']/h1 |
6 | body: //div[@id='content'] | 6 | body: //div[@id='content'] |
7 | author: substring-after(//span[@class='byline'], "by ") | 7 | author: substring-after(//span[@class='byline'], "by ") |
8 | author: substring-after(//span[@class='byline'], "By ") | 8 | author: substring-after(//span[@class='byline'], "By ") |
9 | author: //span[@class='byline'] | 9 | author: //span[@class='byline'] |
10 | date: //span[@class='date'] | 10 | date: //span[@class='date'] |
11 | 11 | ||
12 | #formatting | 12 | #formatting |
13 | convert_double_br_tags: yes | 13 | convert_double_br_tags: yes |
14 | dissolve: //div[@class='slides_full']/ul/li | 14 | dissolve: //div[@class='slides_full']/ul/li |
15 | 15 | ||
16 | # cleanup | 16 | # cleanup |
17 | strip: //a[@id='story_note'] | 17 | strip: //a[@id='story_note'] |
18 | strip: //br | 18 | strip: //br |
19 | strip: //div[@class='intro'] | 19 | strip: //div[@class='intro'] |
20 | strip: //div[@class='share-block'] | 20 | strip: //div[@class='share-block'] |
21 | strip: //div[@class='sidebar-social'] | 21 | strip: //div[@class='sidebar-social'] |
22 | strip: //div[@class='top-stories'] | 22 | strip: //div[@class='top-stories'] |
23 | strip: //div[@class='prevnext'] | 23 | strip: //div[@class='prevnext'] |
24 | test_url: http://www.thedaily.com/page/2012/01/09/010912-news-college-costs-1-5/ \ No newline at end of file | 24 | test_url: http://www.thedaily.com/page/2012/01/09/010912-news-college-costs-1-5/ \ No newline at end of file |