diff options
author | Nicolas LÅ“uillet <nicolas.loeuillet@gmail.com> | 2013-12-06 10:13:03 +0100 |
---|---|---|
committer | Nicolas LÅ“uillet <nicolas.loeuillet@gmail.com> | 2013-12-06 10:13:03 +0100 |
commit | ac4d114214d820b20e18518a2dbc809337e39043 (patch) | |
tree | 27886128ef949b7f8dd174b0646b5a4d99883b44 /inc/3rdparty/site_config/standard/nytimes.com.txt | |
parent | d5501950e2470d52f6bf5954d2179010cdee0475 (diff) | |
download | wallabag-ac4d114214d820b20e18518a2dbc809337e39043.tar.gz wallabag-ac4d114214d820b20e18518a2dbc809337e39043.tar.zst wallabag-ac4d114214d820b20e18518a2dbc809337e39043.zip |
[add] new specific configuration files
Diffstat (limited to 'inc/3rdparty/site_config/standard/nytimes.com.txt')
-rw-r--r-- | inc/3rdparty/site_config/standard/nytimes.com.txt | 36 |
1 files changed, 36 insertions, 0 deletions
diff --git a/inc/3rdparty/site_config/standard/nytimes.com.txt b/inc/3rdparty/site_config/standard/nytimes.com.txt new file mode 100644 index 00000000..8d9a794a --- /dev/null +++ b/inc/3rdparty/site_config/standard/nytimes.com.txt | |||
@@ -0,0 +1,36 @@ | |||
1 | title://h1[@class="articleHeadline"] | ||
2 | body://div[@id="article"] | ||
3 | strip_id_or_class:articleTools | ||
4 | strip_id_or_class:readerscomment | ||
5 | #strip://div[contains(@class, "articleInline runaroundLeft")] | ||
6 | strip: //div[contains(@class, "doubleRule")] | ||
7 | # strip image credit - appears as a bold heading | ||
8 | strip: //div[contains(@class, "articleInline")]//h6 | ||
9 | strip_id_or_class:enlargeThis | ||
10 | strip_id_or_class:pageLinks | ||
11 | strip_id_or_class:memberTools | ||
12 | strip_id_or_class:articleExtras | ||
13 | strip_id_or_class:singleAd | ||
14 | strip_id_or_class:byline | ||
15 | strip_id_or_class:dateline | ||
16 | strip_id_or_class:articleheadline | ||
17 | strip_id_or_class:articleBottomExtra | ||
18 | strip://a[contains(@href, 'nytimes.com/adx/')] | ||
19 | strip: //nyt_byline | ||
20 | strip: //span[contains(@class, 'slideshow') or contains(@class, 'video')] | ||
21 | strip: //p[@class='caption']//a[contains(., 'More Photos')] | ||
22 | |||
23 | prune: no | ||
24 | tidy: no | ||
25 | |||
26 | date: substring-after(//*[contains(@class, 'dateline')], 'Published:') | ||
27 | |||
28 | single_page_link: //link[contains(@href, 'pagewanted=all')] | ||
29 | #single_page_link: //a[contains(@href, 'pagewanted=all') and not(contains(@href, 'login'))] | ||
30 | |||
31 | strip://ul[@id = 'toolsList'] | ||
32 | strip://h6[@class = 'kicker'] | ||
33 | author:substring-after(//h6[@class='byline'],'By ') | ||
34 | |||
35 | test_url: http://www.nytimes.com/2011/07/24/books/review/an-academic-authors-unintentional-masterpiece.html | ||
36 | test_url: http://www.nytimes.com/2012/06/10/arts/television/the-newsroom-aaron-sorkins-return-to-tv.html \ No newline at end of file | ||