diff options
author | Nicolas LÅ“uillet <nicolas@loeuillet.org> | 2014-07-13 10:15:40 +0200 |
---|---|---|
committer | Nicolas LÅ“uillet <nicolas@loeuillet.org> | 2014-07-13 10:15:40 +0200 |
commit | 4e067ceabd705201a16b4c92cf4b23f3b990326c (patch) | |
tree | 939f3a8e5ff3ab9ee414a57a895d3e78e1d46ce3 /inc/3rdparty/site_config/standard/businessweek.com.txt | |
parent | 58dbe103889148def78b0fc8744d3f94c56a1561 (diff) | |
download | wallabag-4e067ceabd705201a16b4c92cf4b23f3b990326c.tar.gz wallabag-4e067ceabd705201a16b4c92cf4b23f3b990326c.tar.zst wallabag-4e067ceabd705201a16b4c92cf4b23f3b990326c.zip |
updated specific configuration for parsing
Diffstat (limited to 'inc/3rdparty/site_config/standard/businessweek.com.txt')
-rwxr-xr-x[-rw-r--r--] | inc/3rdparty/site_config/standard/businessweek.com.txt | 58 |
1 files changed, 29 insertions, 29 deletions
diff --git a/inc/3rdparty/site_config/standard/businessweek.com.txt b/inc/3rdparty/site_config/standard/businessweek.com.txt index 7b3d063b..03085593 100644..100755 --- a/inc/3rdparty/site_config/standard/businessweek.com.txt +++ b/inc/3rdparty/site_config/standard/businessweek.com.txt | |||
@@ -1,30 +1,30 @@ | |||
1 | # story has several pages, should be detected | 1 | # story has several pages, should be detected |
2 | body: //div[@id='storyBody'] | 2 | body: //div[@id='storyBody'] |
3 | body: //div[@id='article_body'] | 3 | body: //div[@id='article_body'] |
4 | body: //div[@id='story_body'] | 4 | body: //div[@id='story_body'] |
5 | 5 | ||
6 | title://h1[@id='article_headline'] | 6 | title://h1[@id='article_headline'] |
7 | 7 | ||
8 | # article author | 8 | # article author |
9 | author: //p[@class='author']/a | 9 | author: //p[@class='author']/a |
10 | # story author(s) | 10 | # story author(s) |
11 | author: substring-after(//p[@class='byline'], 'By ') | 11 | author: substring-after(//p[@class='byline'], 'By ') |
12 | 12 | ||
13 | # article date | 13 | # article date |
14 | date: //span[@class='published_date'] | 14 | date: //span[@class='published_date'] |
15 | # story date | 15 | # story date |
16 | date: //span[@class='date'] | 16 | date: //span[@class='date'] |
17 | 17 | ||
18 | date: substring-after(//div[contains(@class,'attributor')],'on') | 18 | date: substring-after(//div[contains(@class,'attributor')],'on') |
19 | strip_id_or_class: inset | 19 | strip_id_or_class: inset |
20 | strip: //p/span[@class='photoCredit'] | 20 | strip: //p/span[@class='photoCredit'] |
21 | strip: //h1 | 21 | strip: //h1 |
22 | 22 | ||
23 | strip_id_or_class: page_count | 23 | strip_id_or_class: page_count |
24 | strip_id_or_class: tools | 24 | strip_id_or_class: tools |
25 | strip_id_or_class: pagination | 25 | strip_id_or_class: pagination |
26 | 26 | ||
27 | single_page_link: //li[@id='stPrint']/a | 27 | single_page_link: //li[@id='stPrint']/a |
28 | 28 | ||
29 | test_url: http://www.businessweek.com/magazine/buyback-insurance-a-good-deal-for-retailers-07282011.html | 29 | test_url: http://www.businessweek.com/magazine/buyback-insurance-a-good-deal-for-retailers-07282011.html |
30 | test_url: http://www.businessweek.com/articles/2012-06-06/american-pain-the-largest-u-dot-s-dot-pill-mills-rise-and-fall \ No newline at end of file | 30 | test_url: http://www.businessweek.com/articles/2012-06-06/american-pain-the-largest-u-dot-s-dot-pill-mills-rise-and-fall \ No newline at end of file |