diff options
author | Nicolas LÅ“uillet <nicolas.loeuillet@smile.fr> | 2014-10-10 13:33:54 +0200 |
---|---|---|
committer | Nicolas LÅ“uillet <nicolas.loeuillet@smile.fr> | 2014-10-10 13:33:54 +0200 |
commit | 44d35257e805856b4913c63fcbed3c0acb64bae8 (patch) | |
tree | 11e9d276c34b1b287706cb61182bdc71729661e2 /inc/3rdparty/site_config/standard/nytimes.com.txt | |
parent | af8292c1de1886cd975d79f0f42df40e0bd1c5bd (diff) | |
parent | cf8a5e1eedbed484dbcb1ddc9f7a13fc19b7a27b (diff) | |
download | wallabag-1.8.0.tar.gz wallabag-1.8.0.tar.zst wallabag-1.8.0.zip |
Merge branch 'dev'1.8.0
Diffstat (limited to 'inc/3rdparty/site_config/standard/nytimes.com.txt')
-rwxr-xr-x[-rw-r--r--] | inc/3rdparty/site_config/standard/nytimes.com.txt | 85 |
1 files changed, 49 insertions, 36 deletions
diff --git a/inc/3rdparty/site_config/standard/nytimes.com.txt b/inc/3rdparty/site_config/standard/nytimes.com.txt index 8d9a794a..23c9ad11 100644..100755 --- a/inc/3rdparty/site_config/standard/nytimes.com.txt +++ b/inc/3rdparty/site_config/standard/nytimes.com.txt | |||
@@ -1,36 +1,49 @@ | |||
1 | title://h1[@class="articleHeadline"] | 1 | title://h1[@class="articleHeadline"] |
2 | body://div[@id="article"] | 2 | body://div[@id="article"] |
3 | strip_id_or_class:articleTools | 3 | body://*[@itemprop="articleBody"] |
4 | strip_id_or_class:readerscomment | 4 | strip_id_or_class:articleTools |
5 | #strip://div[contains(@class, "articleInline runaroundLeft")] | 5 | strip_id_or_class:readerscomment |
6 | strip: //div[contains(@class, "doubleRule")] | 6 | #strip://div[contains(@class, "articleInline runaroundLeft")] |
7 | # strip image credit - appears as a bold heading | 7 | strip: //div[contains(@class, "doubleRule")] |
8 | strip: //div[contains(@class, "articleInline")]//h6 | 8 | # strip image credit - appears as a bold heading |
9 | strip_id_or_class:enlargeThis | 9 | strip: //div[contains(@class, "articleInline")]//h6 |
10 | strip_id_or_class:pageLinks | 10 | strip_id_or_class:enlargeThis |
11 | strip_id_or_class:memberTools | 11 | strip_id_or_class:pageLinks |
12 | strip_id_or_class:articleExtras | 12 | strip_id_or_class:memberTools |
13 | strip_id_or_class:singleAd | 13 | strip_id_or_class:articleExtras |
14 | strip_id_or_class:byline | 14 | strip_id_or_class:singleAd |
15 | strip_id_or_class:dateline | 15 | strip_id_or_class:byline |
16 | strip_id_or_class:articleheadline | 16 | strip_id_or_class:dateline |
17 | strip_id_or_class:articleBottomExtra | 17 | strip_id_or_class:articleheadline |
18 | strip://a[contains(@href, 'nytimes.com/adx/')] | 18 | strip_id_or_class:articleBottomExtra |
19 | strip: //nyt_byline | 19 | strip_id_or_class:shareTools |
20 | strip: //span[contains(@class, 'slideshow') or contains(@class, 'video')] | 20 | strip://a[contains(@href, 'nytimes.com/adx/')] |
21 | strip: //p[@class='caption']//a[contains(., 'More Photos')] | 21 | strip: //nyt_byline |
22 | 22 | strip: //span[contains(@class, 'slideshow') or contains(@class, 'video')] | |
23 | prune: no | 23 | strip: //p[@class='caption']//a[contains(., 'More Photos')] |
24 | tidy: no | 24 | |
25 | 25 | prune: no | |
26 | date: substring-after(//*[contains(@class, 'dateline')], 'Published:') | 26 | tidy: no |
27 | 27 | ||
28 | single_page_link: //link[contains(@href, 'pagewanted=all')] | 28 | find_string: <script |
29 | #single_page_link: //a[contains(@href, 'pagewanted=all') and not(contains(@href, 'login'))] | 29 | replace_string: <div style="display:none" |
30 | 30 | find_string: </script> | |
31 | strip://ul[@id = 'toolsList'] | 31 | replace_string: </div> |
32 | strip://h6[@class = 'kicker'] | 32 | |
33 | author:substring-after(//h6[@class='byline'],'By ') | 33 | date: substring-after(//*[contains(@class, 'dateline')], 'Published:') |
34 | 34 | ||
35 | test_url: http://www.nytimes.com/2011/07/24/books/review/an-academic-authors-unintentional-masterpiece.html | 35 | single_page_link: //link[contains(@href, 'pagewanted=all')] |
36 | test_url: http://www.nytimes.com/2012/06/10/arts/television/the-newsroom-aaron-sorkins-return-to-tv.html \ No newline at end of file | 36 | single_page_link: //link[@rel='alternate' and contains(@href, 'mobile.nytimes.com')]/@href |
37 | single_page_link: concat(substring-before(//div[@id='pageLinks']//a[contains(@href, 'pagewanted=')]/@href, 'pagewanted='), 'pagewanted=all') | ||
38 | #single_page_link: //a[contains(@href, 'pagewanted=all') and not(contains(@href, 'login'))] | ||
39 | |||
40 | strip://ul[@id = 'toolsList'] | ||
41 | strip://h6[@class = 'kicker'] | ||
42 | author:substring-after(//h6[@class='byline'],'By ') | ||
43 | |||
44 | test_url: http://www.nytimes.com/2011/07/24/books/review/an-academic-authors-unintentional-masterpiece.html | ||
45 | test_url: http://www.nytimes.com/2012/06/10/arts/television/the-newsroom-aaron-sorkins-return-to-tv.html | ||
46 | test_url: http://www.nytimes.com/2013/03/25/world/middleeast/israeli-military-responds-after-patrols-come-under-fire-from-syria.html | ||
47 | test_url: http://www.nytimes.com/2013/08/15/nyregion/when-the-new-york-city-subway-ran-without-rails.html | ||
48 | test_url: http://www.nytimes.com/2004/02/29/weekinreview/correspondence-class-consciousness-china-s-wealthy-live-creed-hobbes-darwin-meet.html | ||
49 | test_url: http://www.nytimes.com/2014/06/19/opinion/gail-collins-romney-and-the-2016-contenders-huddle.html \ No newline at end of file | ||