]> git.immae.eu Git - github/wallabag/wallabag.git/blobdiff - inc/3rdparty/site_config/standard/nytimes.com.txt
Merge pull request #802 from tcitworld/traductionfix
[github/wallabag/wallabag.git] / inc / 3rdparty / site_config / standard / nytimes.com.txt
old mode 100644 (file)
new mode 100755 (executable)
index 8d9a794..23c9ad1
@@ -1,36 +1,49 @@
-title://h1[@class="articleHeadline"]\r
-body://div[@id="article"]\r
-strip_id_or_class:articleTools\r
-strip_id_or_class:readerscomment\r
-#strip://div[contains(@class, "articleInline runaroundLeft")]\r
-strip: //div[contains(@class, "doubleRule")]\r
-# strip image credit - appears as a bold heading\r
-strip: //div[contains(@class, "articleInline")]//h6\r
-strip_id_or_class:enlargeThis\r
-strip_id_or_class:pageLinks\r
-strip_id_or_class:memberTools\r
-strip_id_or_class:articleExtras\r
-strip_id_or_class:singleAd\r
-strip_id_or_class:byline\r
-strip_id_or_class:dateline\r
-strip_id_or_class:articleheadline\r
-strip_id_or_class:articleBottomExtra\r
-strip://a[contains(@href, 'nytimes.com/adx/')]\r
-strip: //nyt_byline\r
-strip: //span[contains(@class, 'slideshow') or contains(@class, 'video')]\r
-strip: //p[@class='caption']//a[contains(., 'More Photos')]\r
-\r
-prune: no\r
-tidy: no\r
-\r
-date: substring-after(//*[contains(@class, 'dateline')], 'Published:')\r
-\r
-single_page_link: //link[contains(@href, 'pagewanted=all')]\r
-#single_page_link: //a[contains(@href, 'pagewanted=all') and not(contains(@href, 'login'))]\r
-\r
-strip://ul[@id = 'toolsList']\r
-strip://h6[@class = 'kicker']\r
-author:substring-after(//h6[@class='byline'],'By ')\r
-\r
-test_url: http://www.nytimes.com/2011/07/24/books/review/an-academic-authors-unintentional-masterpiece.html\r
-test_url: http://www.nytimes.com/2012/06/10/arts/television/the-newsroom-aaron-sorkins-return-to-tv.html
\ No newline at end of file
+title://h1[@class="articleHeadline"]
+body://div[@id="article"]
+body://*[@itemprop="articleBody"]
+strip_id_or_class:articleTools
+strip_id_or_class:readerscomment
+#strip://div[contains(@class, "articleInline runaroundLeft")]
+strip: //div[contains(@class, "doubleRule")]
+# strip image credit - appears as a bold heading
+strip: //div[contains(@class, "articleInline")]//h6
+strip_id_or_class:enlargeThis
+strip_id_or_class:pageLinks
+strip_id_or_class:memberTools
+strip_id_or_class:articleExtras
+strip_id_or_class:singleAd
+strip_id_or_class:byline
+strip_id_or_class:dateline
+strip_id_or_class:articleheadline
+strip_id_or_class:articleBottomExtra
+strip_id_or_class:shareTools
+strip://a[contains(@href, 'nytimes.com/adx/')]
+strip: //nyt_byline
+strip: //span[contains(@class, 'slideshow') or contains(@class, 'video')]
+strip: //p[@class='caption']//a[contains(., 'More Photos')]
+
+prune: no
+tidy: no
+
+find_string: <script 
+replace_string: <div style="display:none" 
+find_string: </script>
+replace_string: </div>
+
+date: substring-after(//*[contains(@class, 'dateline')], 'Published:')
+
+single_page_link: //link[contains(@href, 'pagewanted=all')]
+single_page_link: //link[@rel='alternate' and contains(@href, 'mobile.nytimes.com')]/@href
+single_page_link: concat(substring-before(//div[@id='pageLinks']//a[contains(@href, 'pagewanted=')]/@href, 'pagewanted='), 'pagewanted=all') 
+#single_page_link: //a[contains(@href, 'pagewanted=all') and not(contains(@href, 'login'))]
+
+strip://ul[@id = 'toolsList']
+strip://h6[@class = 'kicker']
+author:substring-after(//h6[@class='byline'],'By ')
+
+test_url: http://www.nytimes.com/2011/07/24/books/review/an-academic-authors-unintentional-masterpiece.html
+test_url: http://www.nytimes.com/2012/06/10/arts/television/the-newsroom-aaron-sorkins-return-to-tv.html
+test_url: http://www.nytimes.com/2013/03/25/world/middleeast/israeli-military-responds-after-patrols-come-under-fire-from-syria.html
+test_url: http://www.nytimes.com/2013/08/15/nyregion/when-the-new-york-city-subway-ran-without-rails.html
+test_url: http://www.nytimes.com/2004/02/29/weekinreview/correspondence-class-consciousness-china-s-wealthy-live-creed-hobbes-darwin-meet.html
+test_url: http://www.nytimes.com/2014/06/19/opinion/gail-collins-romney-and-the-2016-contenders-huddle.html
\ No newline at end of file