]>
Commit | Line | Data |
---|---|---|
4e067cea NL |
1 | title: //h1[@class="sl-art-head-dek"] |
2 | body: //article//div[@class='sl-art-body']/div[contains(@class, 'body')] | |
3 | strip: //div[@class="department_kicker"] | |
4 | strip: //div[@id="insider_ad_wrapper" or @id="insider_ad_inner"] | |
5 | strip: //div[@id="bottom_sponsored_links"] | |
6 | strip: //div[@class="sl-art-ad-midflex"] | |
7 | #strip: //dl | |
8 | #strip: //p[em/a[contains(@href, 'facebook.com')]] | |
9 | prune: no | |
10 | ||
11 | author: //div[@id='author_bio']//a[contains(@href, '/author/')] | |
12 | author: //a[contains(@href, '/authors.')] | |
13 | ||
14 | date: substring-before(substring-after(//span[@class='sl-art-byline'], 'Posted '), ', at ') | |
15 | ||
16 | single_page_link: //a[@class='sl-art-sinpage'] | |
17 | ||
18 | test_url: http://www.slate.com/id/2274583/pagenum/all/ | |
ac4d1142 | 19 | test_url: http://www.slate.com/id/2293116/ |