diff options
author | Nicolas LÅ“uillet <nicolas@loeuillet.org> | 2014-07-13 10:15:40 +0200 |
---|---|---|
committer | Nicolas LÅ“uillet <nicolas@loeuillet.org> | 2014-07-13 10:15:40 +0200 |
commit | 4e067ceabd705201a16b4c92cf4b23f3b990326c (patch) | |
tree | 939f3a8e5ff3ab9ee414a57a895d3e78e1d46ce3 /inc/3rdparty/site_config/standard/pcworld.com.txt | |
parent | 58dbe103889148def78b0fc8744d3f94c56a1561 (diff) | |
download | wallabag-4e067ceabd705201a16b4c92cf4b23f3b990326c.tar.gz wallabag-4e067ceabd705201a16b4c92cf4b23f3b990326c.tar.zst wallabag-4e067ceabd705201a16b4c92cf4b23f3b990326c.zip |
updated specific configuration for parsing
Diffstat (limited to 'inc/3rdparty/site_config/standard/pcworld.com.txt')
-rwxr-xr-x[-rw-r--r--] | inc/3rdparty/site_config/standard/pcworld.com.txt | 36 |
1 files changed, 18 insertions, 18 deletions
diff --git a/inc/3rdparty/site_config/standard/pcworld.com.txt b/inc/3rdparty/site_config/standard/pcworld.com.txt index 30ccbb5f..7193f87e 100644..100755 --- a/inc/3rdparty/site_config/standard/pcworld.com.txt +++ b/inc/3rdparty/site_config/standard/pcworld.com.txt | |||
@@ -1,19 +1,19 @@ | |||
1 | title: //div[@class='articleHead']//h1 | 1 | title: //div[@class='articleHead']//h1 |
2 | author: //div[@class="author-name"]/a[1] | 2 | author: //div[@class="author-name"]/a[1] |
3 | body: //div[@class="main"] | 3 | body: //div[@class="main"] |
4 | 4 | ||
5 | # remove 'From the Lab' and 'Recent posts' text | 5 | # remove 'From the Lab' and 'Recent posts' text |
6 | strip: //div[@class='blogLabel'] | 6 | strip: //div[@class='blogLabel'] |
7 | 7 | ||
8 | # remove byline and meta info | 8 | # remove byline and meta info |
9 | strip: //h1 | 9 | strip: //h1 |
10 | strip: //div[@class="article-meta"] | 10 | strip: //div[@class="article-meta"] |
11 | strip: //div[@class="author-info"] | 11 | strip: //div[@class="author-info"] |
12 | 12 | ||
13 | #strip tags and categories | 13 | #strip tags and categories |
14 | strip: //div[@class="department"] | 14 | strip: //div[@class="department"] |
15 | 15 | ||
16 | #strip product cap links | 16 | #strip product cap links |
17 | strip: //div[@class="cap-main"] | 17 | strip: //div[@class="cap-main"] |
18 | strip: //div[@id="compare-lede"] | 18 | strip: //div[@id="compare-lede"] |
19 | test_url: http://www.pcworld.com/article/262034/are-printer-companies-gouging-us-on-laser-toner-pricing.html \ No newline at end of file | 19 | test_url: http://www.pcworld.com/article/262034/are-printer-companies-gouging-us-on-laser-toner-pricing.html \ No newline at end of file |