]>
Commit | Line | Data |
---|---|---|
90a1a78b NL |
1 | body: //div[@id='column_1'] |
2 | next_page_link: //div[@class='next']/a[not(contains(@href, '/comments') or contains(@href, '/news/'))] | |
3 | prune: no | |
4 | ||
5 | author: substring-after(//p[@class='byline'], 'by ') | |
6 | date: substring-before(substring-after(//p[@class='byline'], 'on '), ' by') | |
7 | ||
8 | strip: //h1 | |
9 | strip_id_or_class: socialLinks | |
10 | strip_id_or_class: byline | |
11 | strip_id_or_class: pageSelector | |
12 | strip_id_or_class: articleTabs | |
13 | strip_id_or_class: pageNav | |
14 | strip_id_or_class: share | |
15 | strip_id_or_class: commentsContainer | |
16 | strip_id_or_class: below_article_related | |
17 | ||
18 | test_url: http://www.bit-tech.net/hardware/storage/2014/08/13/ocz-arc-100-240gb-review/1 | |
19 | test_url: http://www.bit-tech.net/news/bits/2014/08/15/google-trojan/1 |