aboutsummaryrefslogtreecommitdiffhomepage
path: root/inc/3rdparty/site_config/standard/nytimes.com.txt
diff options
context:
space:
mode:
Diffstat (limited to 'inc/3rdparty/site_config/standard/nytimes.com.txt')
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/nytimes.com.txt85
1 files changed, 49 insertions, 36 deletions
diff --git a/inc/3rdparty/site_config/standard/nytimes.com.txt b/inc/3rdparty/site_config/standard/nytimes.com.txt
index 8d9a794a..23c9ad11 100644..100755
--- a/inc/3rdparty/site_config/standard/nytimes.com.txt
+++ b/inc/3rdparty/site_config/standard/nytimes.com.txt
@@ -1,36 +1,49 @@
1title://h1[@class="articleHeadline"] 1title://h1[@class="articleHeadline"]
2body://div[@id="article"] 2body://div[@id="article"]
3strip_id_or_class:articleTools 3body://*[@itemprop="articleBody"]
4strip_id_or_class:readerscomment 4strip_id_or_class:articleTools
5#strip://div[contains(@class, "articleInline runaroundLeft")] 5strip_id_or_class:readerscomment
6strip: //div[contains(@class, "doubleRule")] 6#strip://div[contains(@class, "articleInline runaroundLeft")]
7# strip image credit - appears as a bold heading 7strip: //div[contains(@class, "doubleRule")]
8strip: //div[contains(@class, "articleInline")]//h6 8# strip image credit - appears as a bold heading
9strip_id_or_class:enlargeThis 9strip: //div[contains(@class, "articleInline")]//h6
10strip_id_or_class:pageLinks 10strip_id_or_class:enlargeThis
11strip_id_or_class:memberTools 11strip_id_or_class:pageLinks
12strip_id_or_class:articleExtras 12strip_id_or_class:memberTools
13strip_id_or_class:singleAd 13strip_id_or_class:articleExtras
14strip_id_or_class:byline 14strip_id_or_class:singleAd
15strip_id_or_class:dateline 15strip_id_or_class:byline
16strip_id_or_class:articleheadline 16strip_id_or_class:dateline
17strip_id_or_class:articleBottomExtra 17strip_id_or_class:articleheadline
18strip://a[contains(@href, 'nytimes.com/adx/')] 18strip_id_or_class:articleBottomExtra
19strip: //nyt_byline 19strip_id_or_class:shareTools
20strip: //span[contains(@class, 'slideshow') or contains(@class, 'video')] 20strip://a[contains(@href, 'nytimes.com/adx/')]
21strip: //p[@class='caption']//a[contains(., 'More Photos')] 21strip: //nyt_byline
22 22strip: //span[contains(@class, 'slideshow') or contains(@class, 'video')]
23prune: no 23strip: //p[@class='caption']//a[contains(., 'More Photos')]
24tidy: no 24
25 25prune: no
26date: substring-after(//*[contains(@class, 'dateline')], 'Published:') 26tidy: no
27 27
28single_page_link: //link[contains(@href, 'pagewanted=all')] 28find_string: <script
29#single_page_link: //a[contains(@href, 'pagewanted=all') and not(contains(@href, 'login'))] 29replace_string: <div style="display:none"
30 30find_string: </script>
31strip://ul[@id = 'toolsList'] 31replace_string: </div>
32strip://h6[@class = 'kicker'] 32
33author:substring-after(//h6[@class='byline'],'By ') 33date: substring-after(//*[contains(@class, 'dateline')], 'Published:')
34 34
35test_url: http://www.nytimes.com/2011/07/24/books/review/an-academic-authors-unintentional-masterpiece.html 35single_page_link: //link[contains(@href, 'pagewanted=all')]
36test_url: http://www.nytimes.com/2012/06/10/arts/television/the-newsroom-aaron-sorkins-return-to-tv.html \ No newline at end of file 36single_page_link: //link[@rel='alternate' and contains(@href, 'mobile.nytimes.com')]/@href
37single_page_link: concat(substring-before(//div[@id='pageLinks']//a[contains(@href, 'pagewanted=')]/@href, 'pagewanted='), 'pagewanted=all')
38#single_page_link: //a[contains(@href, 'pagewanted=all') and not(contains(@href, 'login'))]
39
40strip://ul[@id = 'toolsList']
41strip://h6[@class = 'kicker']
42author:substring-after(//h6[@class='byline'],'By ')
43
44test_url: http://www.nytimes.com/2011/07/24/books/review/an-academic-authors-unintentional-masterpiece.html
45test_url: http://www.nytimes.com/2012/06/10/arts/television/the-newsroom-aaron-sorkins-return-to-tv.html
46test_url: http://www.nytimes.com/2013/03/25/world/middleeast/israeli-military-responds-after-patrols-come-under-fire-from-syria.html
47test_url: http://www.nytimes.com/2013/08/15/nyregion/when-the-new-york-city-subway-ran-without-rails.html
48test_url: http://www.nytimes.com/2004/02/29/weekinreview/correspondence-class-consciousness-china-s-wealthy-live-creed-hobbes-darwin-meet.html
49test_url: http://www.nytimes.com/2014/06/19/opinion/gail-collins-romney-and-the-2016-contenders-huddle.html \ No newline at end of file