]> git.immae.eu Git - github/wallabag/wallabag.git/blob - inc/3rdparty/site_config/standard/nytimes.com.txt
Merge pull request #789 from wallabag/feature/someMoreSitesConfig
[github/wallabag/wallabag.git] / inc / 3rdparty / site_config / standard / nytimes.com.txt
1 title://h1[@class="articleHeadline"]
2 body://div[@id="article"]
3 strip_id_or_class:articleTools
4 strip_id_or_class:readerscomment
5 #strip://div[contains(@class, "articleInline runaroundLeft")]
6 strip: //div[contains(@class, "doubleRule")]
7 # strip image credit - appears as a bold heading
8 strip: //div[contains(@class, "articleInline")]//h6
9 strip_id_or_class:enlargeThis
10 strip_id_or_class:pageLinks
11 strip_id_or_class:memberTools
12 strip_id_or_class:articleExtras
13 strip_id_or_class:singleAd
14 strip_id_or_class:byline
15 strip_id_or_class:dateline
16 strip_id_or_class:articleheadline
17 strip_id_or_class:articleBottomExtra
18 strip://a[contains(@href, 'nytimes.com/adx/')]
19 strip: //nyt_byline
20 strip: //span[contains(@class, 'slideshow') or contains(@class, 'video')]
21 strip: //p[@class='caption']//a[contains(., 'More Photos')]
22
23 prune: no
24 tidy: no
25
26 date: substring-after(//*[contains(@class, 'dateline')], 'Published:')
27
28 single_page_link: //link[contains(@href, 'pagewanted=all')]
29 #single_page_link: //a[contains(@href, 'pagewanted=all') and not(contains(@href, 'login'))]
30
31 strip://ul[@id = 'toolsList']
32 strip://h6[@class = 'kicker']
33 author:substring-after(//h6[@class='byline'],'By ')
34
35 test_url: http://www.nytimes.com/2011/07/24/books/review/an-academic-authors-unintentional-masterpiece.html
36 test_url: http://www.nytimes.com/2012/06/10/arts/television/the-newsroom-aaron-sorkins-return-to-tv.html