aboutsummaryrefslogtreecommitdiffhomepage
path: root/inc/3rdparty/site_config/standard/nytimes.com.txt
diff options
context:
space:
mode:
authorNicolas LÅ“uillet <nicolas.loeuillet@gmail.com>2013-12-06 10:13:03 +0100
committerNicolas LÅ“uillet <nicolas.loeuillet@gmail.com>2013-12-06 10:13:03 +0100
commitac4d114214d820b20e18518a2dbc809337e39043 (patch)
tree27886128ef949b7f8dd174b0646b5a4d99883b44 /inc/3rdparty/site_config/standard/nytimes.com.txt
parentd5501950e2470d52f6bf5954d2179010cdee0475 (diff)
downloadwallabag-ac4d114214d820b20e18518a2dbc809337e39043.tar.gz
wallabag-ac4d114214d820b20e18518a2dbc809337e39043.tar.zst
wallabag-ac4d114214d820b20e18518a2dbc809337e39043.zip
[add] new specific configuration files
Diffstat (limited to 'inc/3rdparty/site_config/standard/nytimes.com.txt')
-rw-r--r--inc/3rdparty/site_config/standard/nytimes.com.txt36
1 files changed, 36 insertions, 0 deletions
diff --git a/inc/3rdparty/site_config/standard/nytimes.com.txt b/inc/3rdparty/site_config/standard/nytimes.com.txt
new file mode 100644
index 00000000..8d9a794a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/nytimes.com.txt
@@ -0,0 +1,36 @@
1title://h1[@class="articleHeadline"]
2body://div[@id="article"]
3strip_id_or_class:articleTools
4strip_id_or_class:readerscomment
5#strip://div[contains(@class, "articleInline runaroundLeft")]
6strip: //div[contains(@class, "doubleRule")]
7# strip image credit - appears as a bold heading
8strip: //div[contains(@class, "articleInline")]//h6
9strip_id_or_class:enlargeThis
10strip_id_or_class:pageLinks
11strip_id_or_class:memberTools
12strip_id_or_class:articleExtras
13strip_id_or_class:singleAd
14strip_id_or_class:byline
15strip_id_or_class:dateline
16strip_id_or_class:articleheadline
17strip_id_or_class:articleBottomExtra
18strip://a[contains(@href, 'nytimes.com/adx/')]
19strip: //nyt_byline
20strip: //span[contains(@class, 'slideshow') or contains(@class, 'video')]
21strip: //p[@class='caption']//a[contains(., 'More Photos')]
22
23prune: no
24tidy: no
25
26date: substring-after(//*[contains(@class, 'dateline')], 'Published:')
27
28single_page_link: //link[contains(@href, 'pagewanted=all')]
29#single_page_link: //a[contains(@href, 'pagewanted=all') and not(contains(@href, 'login'))]
30
31strip://ul[@id = 'toolsList']
32strip://h6[@class = 'kicker']
33author:substring-after(//h6[@class='byline'],'By ')
34
35test_url: http://www.nytimes.com/2011/07/24/books/review/an-academic-authors-unintentional-masterpiece.html
36test_url: http://www.nytimes.com/2012/06/10/arts/television/the-newsroom-aaron-sorkins-return-to-tv.html \ No newline at end of file