diff options
113 files changed, 742 insertions, 119 deletions
diff --git a/inc/3rdparty/site_config/standard/20min.ch.txt b/inc/3rdparty/site_config/standard/20min.ch.txt new file mode 100755 index 00000000..cd8e3fc0 --- /dev/null +++ b/inc/3rdparty/site_config/standard/20min.ch.txt | |||
@@ -0,0 +1,24 @@ | |||
1 | # Author: cirnod@gmail.com | ||
2 | |||
3 | tidy: no | ||
4 | prune: no | ||
5 | |||
6 | title: //h1 | ||
7 | date: /html/body/div[3]/div[1]/div[6]/div/div[1]/div[2]/div[1]/div/p | ||
8 | body: //div[@class='published clearfix'] | //div[@class='story_titles']/h3 | //div[@class='story_text'] | ||
9 | |||
10 | # General Cleanup | ||
11 | strip_id_or_class: info_panel | ||
12 | strip_id_or_class: info_poll | ||
13 | strip_id_or_class: teaser | ||
14 | strip_id_or_class: panelbox | ||
15 | strip_id_or_class: polls | ||
16 | strip_id_or_class: warning | ||
17 | strip_id_or_class: vplaceholder | ||
18 | |||
19 | # visual removal only -> complete removal doesn't work | ||
20 | replace_string(Print</a>): </a> | ||
21 | |||
22 | # Try yourself | ||
23 | test_url: http://www.20min.ch/wissen/news/story/31588952 | ||
24 | test_url: http://www.20min.ch/digital/dossier/apple/story/So-einfach-laesst-sich-das-iPhone-6-Plus-verbiegen-24651169 | ||
diff --git a/inc/3rdparty/site_config/standard/24.ae.txt b/inc/3rdparty/site_config/standard/24.ae.txt new file mode 100755 index 00000000..6e515076 --- /dev/null +++ b/inc/3rdparty/site_config/standard/24.ae.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | title: //div[@id='DivTitle'] | ||
2 | body: //div[@id='divImages' or @id='Divkhabarcontent'] | ||
3 | author: //div[@id='DivAuthor'] | ||
4 | |||
5 | prune: no | ||
6 | |||
7 | test_url: http://24.ae/article.aspx?ArticleId=123304 | ||
8 | test_url: http://24.ae/rss.aspx?pageId=30 | ||
diff --git a/inc/3rdparty/site_config/standard/9gag.com.txt b/inc/3rdparty/site_config/standard/9gag.com.txt new file mode 100755 index 00000000..4ebb62ad --- /dev/null +++ b/inc/3rdparty/site_config/standard/9gag.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | # Generated by FiveFilters.org's web-based selection tool | ||
2 | # Place this file inside your site_config/custom/ folder | ||
3 | # Source: http://siteconfig.fivefilters.org/grab.php?url=http%3A%2F%2F9gag.com%2Fgag%2FaDwQnO7 | ||
4 | |||
5 | body: //div[contains(concat(' ',normalize-space(@class),' '),' badge-post-container ')] | ||
6 | test_url: http://9gag.com/gag/aDwQnO7 | ||
diff --git a/inc/3rdparty/site_config/standard/ad.nl.txt b/inc/3rdparty/site_config/standard/ad.nl.txt new file mode 100755 index 00000000..422faa57 --- /dev/null +++ b/inc/3rdparty/site_config/standard/ad.nl.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | #bypass cookie check | ||
2 | single_page_link: //a[contains(@href, '/acceptCookieCheck.do?url=')] | ||
3 | |||
4 | test_url: http://www.ad.nl/ad/nl/10444/Offside/article/detail/4043834/2015/05/31/Dani-Alves-voetbalt-met-drol-op-zijn-hoofd.dhtml | ||
5 | test_contains: De nieuwe coupe van Alves | ||
6 | |||
7 | test_url: http://www.ad.nl/digitaal/rss.xml \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/albayan.ae.txt b/inc/3rdparty/site_config/standard/albayan.ae.txt index f6c093d2..d52700b3 100755 --- a/inc/3rdparty/site_config/standard/albayan.ae.txt +++ b/inc/3rdparty/site_config/standard/albayan.ae.txt | |||
@@ -1,5 +1,7 @@ | |||
1 | body: //div[@id='main-column']//div[@class='content'] | 1 | body: //div[@id='main-column']//div[@class='content'] |
2 | 2 | ||
3 | strip_id_or_class: social-buttons | ||
4 | |||
3 | prune: no | 5 | prune: no |
4 | 6 | ||
5 | test_url: http://www.albayan.ae/across-the-uae/education/2013-08-29-1.1949645 | 7 | test_url: http://www.albayan.ae/across-the-uae/education/2013-08-29-1.1949645 |
diff --git a/inc/3rdparty/site_config/standard/androidpolice.com.txt b/inc/3rdparty/site_config/standard/androidpolice.com.txt index 8f9b1a21..660f29d9 100755 --- a/inc/3rdparty/site_config/standard/androidpolice.com.txt +++ b/inc/3rdparty/site_config/standard/androidpolice.com.txt | |||
@@ -1,5 +1,6 @@ | |||
1 | body: //div[@class='post_content'] | 1 | body: //div[@class='post_content'] |
2 | date: //div[@class='date_day'] | div[@class='date_month'] | 2 | date: //div[@class='date_day'] | div[@class='date_month'] |
3 | strip_id_or_class: author-box | ||
4 | author: //h2[@class='author-box-heading']/a | ||
3 | 5 | ||
4 | test_url: http://www.androidpolice.com/2014/03/30/music-boss-for-pebble-can-now-control-playback-and-volume-on-chromecast-content-from-your-smartwatch/ | 6 | test_url: http://www.androidpolice.com/2014/03/30/music-boss-for-pebble-can-now-control-playback-and-volume-on-chromecast-content-from-your-smartwatch/ |
5 | |||
diff --git a/inc/3rdparty/site_config/standard/artofmanliness.com.txt b/inc/3rdparty/site_config/standard/artofmanliness.com.txt new file mode 100755 index 00000000..b29ea0db --- /dev/null +++ b/inc/3rdparty/site_config/standard/artofmanliness.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | parser: html5php | ||
2 | date: //article/p[contains(@class, 'single-date')] | ||
3 | author: //article/p[contains(@class, 'byline')] | ||
4 | |||
5 | test_url: http://www.artofmanliness.com/2013/01/31/relationship-red-flags/ | ||
6 | test_contains: It seems that once we get close to a person \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/au.businessinsider.com.txt b/inc/3rdparty/site_config/standard/au.businessinsider.com.txt new file mode 100755 index 00000000..46bcddf2 --- /dev/null +++ b/inc/3rdparty/site_config/standard/au.businessinsider.com.txt | |||
@@ -0,0 +1,12 @@ | |||
1 | title://div[@class="sl-layout-post"]/h1 | ||
2 | body: //div[@id='content_post'] | ||
3 | strip: //div[contains(@class, "post-sidebar")] | ||
4 | strip: //div[@id='related-links'] | ||
5 | strip: //img[@class='size_xlarge'] | ||
6 | author://div[@class="byline"]/a | ||
7 | date://div[@class="byline"]/span[@class="date"] | ||
8 | prune: no | ||
9 | tidy: no | ||
10 | |||
11 | |||
12 | test_url: http://www.businessinsider.com/microsoft-just-put-one-of-its-hardcore-technical-geniuses-on-xbox-2012-1 | ||
diff --git a/inc/3rdparty/site_config/standard/au.news.yahoo.com.txt b/inc/3rdparty/site_config/standard/au.news.yahoo.com.txt new file mode 100755 index 00000000..8e84cbbb --- /dev/null +++ b/inc/3rdparty/site_config/standard/au.news.yahoo.com.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | strip: //a[contains(text(), "RELATED:")] | ||
2 | author: //div[@class="info"]//span[@class="association printer-source"] | ||
3 | author: //div[@class="info"]//span[@class="stamp printer-date"] | ||
4 | |||
diff --git a/inc/3rdparty/site_config/standard/bbc.co.uk.txt b/inc/3rdparty/site_config/standard/bbc.co.uk.txt index bad77654..7bef73ad 100755 --- a/inc/3rdparty/site_config/standard/bbc.co.uk.txt +++ b/inc/3rdparty/site_config/standard/bbc.co.uk.txt | |||
@@ -30,6 +30,12 @@ strip: //div[contains(@class, 'comment-introduction')] | |||
30 | strip: //div[contains(@class, 'share-tools')] | 30 | strip: //div[contains(@class, 'share-tools')] |
31 | strip: //div[@id='also-related-links'] | 31 | strip: //div[@id='also-related-links'] |
32 | 32 | ||
33 | find_string: http://ichef.bbci.co.uk/news/200/ | ||
34 | replace_string: http://ichef.bbci.co.uk/news/624/ | ||
35 | |||
36 | find_string: http://ichef.bbci.co.uk/news/304/ | ||
37 | replace_string: http://ichef.bbci.co.uk/news/624/ | ||
38 | |||
33 | strip_id_or_class: share-help | 39 | strip_id_or_class: share-help |
34 | strip_id_or_class: comments_module | 40 | strip_id_or_class: comments_module |
35 | 41 | ||
diff --git a/inc/3rdparty/site_config/standard/bbc.com.txt b/inc/3rdparty/site_config/standard/bbc.com.txt index c04a683e..200dba63 100755 --- a/inc/3rdparty/site_config/standard/bbc.com.txt +++ b/inc/3rdparty/site_config/standard/bbc.com.txt | |||
@@ -33,6 +33,12 @@ strip: //div[@id='also-related-links'] | |||
33 | strip_id_or_class: share-help | 33 | strip_id_or_class: share-help |
34 | strip_id_or_class: comments_module | 34 | strip_id_or_class: comments_module |
35 | 35 | ||
36 | find_string: http://ichef.bbci.co.uk/news/200/ | ||
37 | replace_string: http://ichef.bbci.co.uk/news/624/ | ||
38 | |||
39 | find_string: http://ichef.bbci.co.uk/news/304/ | ||
40 | replace_string: http://ichef.bbci.co.uk/news/624/ | ||
41 | |||
36 | replace_string(<noscript>): <div> | 42 | replace_string(<noscript>): <div> |
37 | replace_string(</noscript>): </div> | 43 | replace_string(</noscript>): </div> |
38 | 44 | ||
diff --git a/inc/3rdparty/site_config/standard/blog.cloudflare.com.txt b/inc/3rdparty/site_config/standard/blog.cloudflare.com.txt index 9b7cf25c..db80a35f 100755 --- a/inc/3rdparty/site_config/standard/blog.cloudflare.com.txt +++ b/inc/3rdparty/site_config/standard/blog.cloudflare.com.txt | |||
@@ -3,11 +3,7 @@ title: substring-before(//title, '-') | |||
3 | 3 | ||
4 | author: //a[ contains(@href, '/people') ] | 4 | author: //a[ contains(@href, '/people') ] |
5 | 5 | ||
6 | body: //article[contains(concat(' ',normalize-space(@class),' '),' post ')] | 6 | body: //div[ @class='post' ] |
7 | |||
8 | strip_id_or_class: section learn-more | ||
9 | strip_id_or_class: section comments | ||
10 | strip_id_or_class: disqus_thread | ||
11 | 7 | ||
12 | # Date is impossible to retrieve since they use those stupid "fuzzy" dates, inserted through javascript, at posterous. | 8 | # Date is impossible to retrieve since they use those stupid "fuzzy" dates, inserted through javascript, at posterous. |
13 | test_url: http://blog.cloudflare.com/understanding-analytics-when-is-a-page-view-n | 9 | test_url: http://blog.cloudflare.com/understanding-analytics-when-is-a-page-view-n \ No newline at end of file |
diff --git a/inc/3rdparty/site_config/standard/blogs.msdn.com.txt b/inc/3rdparty/site_config/standard/blogs.msdn.com.txt index b2ff8332..11b8d42d 100755 --- a/inc/3rdparty/site_config/standard/blogs.msdn.com.txt +++ b/inc/3rdparty/site_config/standard/blogs.msdn.com.txt | |||
@@ -1,6 +1,6 @@ | |||
1 | title: //h3[@class="post-name"] | 1 | title: //h3[@class="post-name"] |
2 | author: //span[@class="user-name"] | 2 | author: //span[@class="user-name"] |
3 | date: //div[@class="post-date"] | 3 | date: //div[@class="post-date"]/span[@class="value"] |
4 | body: //div[@class="post-content user-defined-markup"] | 4 | body: //div[@class="post-content user-defined-markup"] |
5 | footnotes: no | 5 | footnotes: no |
6 | test_url: http://blogs.msdn.com/b/b8/archive/2011/10/04/designing-the-start-screen.aspx \ No newline at end of file | 6 | test_url: http://blogs.msdn.com/b/b8/archive/2011/10/04/designing-the-start-screen.aspx |
diff --git a/inc/3rdparty/site_config/standard/brandeins.de.txt b/inc/3rdparty/site_config/standard/brandeins.de.txt index 36aa2efa..be326346 100755 --- a/inc/3rdparty/site_config/standard/brandeins.de.txt +++ b/inc/3rdparty/site_config/standard/brandeins.de.txt | |||
@@ -1,7 +1,9 @@ | |||
1 | # set body | ||
2 | body: //div[@id='theContent'] | ||
3 | 1 | ||
4 | # set title | 2 | body: //div[@class="articleTeaser"] | //section[@class="contentSection"] |
5 | title: //div[@id='theContent']/h3 | 3 | |
6 | strip: //div[@id='theContent']/h3 | 4 | strip: //section[@class="greenBox italic"] |
7 | test_url: http://www.brandeins.de/archiv/magazin/gegessen-wird-immer/artikel/hunger.html \ No newline at end of file | 5 | |
6 | author: //div[@class="articleAuthor"] | ||
7 | # no publish date on page (the articles are from a monthly periodical) | ||
8 | |||
9 | test_url: http://www.brandeins.de/archiv/2015/fuehrung/ministry-group-mach-doch-mal-ne-ansage/ | ||
diff --git a/inc/3rdparty/site_config/standard/brokernews.com.au.txt b/inc/3rdparty/site_config/standard/brokernews.com.au.txt new file mode 100755 index 00000000..814da38a --- /dev/null +++ b/inc/3rdparty/site_config/standard/brokernews.com.au.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | author: //span[@itemprop="author"] | ||
2 | date: //span[@itemprop="datePublished"] | ||
diff --git a/inc/3rdparty/site_config/standard/business.time.com.txt b/inc/3rdparty/site_config/standard/business.time.com.txt new file mode 100755 index 00000000..5502beae --- /dev/null +++ b/inc/3rdparty/site_config/standard/business.time.com.txt | |||
@@ -0,0 +1,17 @@ | |||
1 | # 2011-10-25 - carlo@... - Initial setup. | ||
2 | |||
3 | single_page_link: //li[@class='print']/a/@href | ||
4 | |||
5 | title: //h1 | ||
6 | author: //meta[@name="byline"]/@content | ||
7 | date: //meta[@name="date"]/@content | ||
8 | |||
9 | strip: //span[@class="see"] | ||
10 | strip: //div[@class="byline"] | ||
11 | strip: //div[@id="date2"] | ||
12 | strip: //h1 | ||
13 | strip: //div[@class='post-rail-ad'] | ||
14 | strip: //div[@class='post-rail-content'] | ||
15 | strip: //aside[@class='post-rail'] | ||
16 | |||
17 | test_url: http://www.time.com/time/specials/packages/article/0,28804,2094921_2094923_2094924,00.html | ||
diff --git a/inc/3rdparty/site_config/standard/choice.com.au.txt b/inc/3rdparty/site_config/standard/choice.com.au.txt new file mode 100755 index 00000000..02714755 --- /dev/null +++ b/inc/3rdparty/site_config/standard/choice.com.au.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | |||
2 | body: //div[@id='content']//div[@id='mainBlogContentWrapper']//*[self::p or self::img or self::ul] | //div[@class='mainArticleIntro')] | ||
3 | |||
4 | date: //span[@class='date'] | ||
diff --git a/inc/3rdparty/site_config/standard/cnet.com.au.txt b/inc/3rdparty/site_config/standard/cnet.com.au.txt new file mode 100755 index 00000000..d5719d40 --- /dev/null +++ b/inc/3rdparty/site_config/standard/cnet.com.au.txt | |||
@@ -0,0 +1,17 @@ | |||
1 | title: //meta[@property="og:title"]/@content | ||
2 | body: //div[contains(@class, 'postBody')] | ||
3 | date: //div[@id='nameAndTime']/time | ||
4 | author: //div[@id='nameAndTime']/span[@class='author'] | ||
5 | |||
6 | strip_id_or_class: image-credit | ||
7 | strip_id_or_class: noAutolink | ||
8 | strip_id_or_class: related | ||
9 | strip_id_or_class: cite | ||
10 | |||
11 | prune: no | ||
12 | tidy: no | ||
13 | |||
14 | # early end | ||
15 | replace_string(Download today's podcast</a>): Download today's podcast</a></div></body></html> | ||
16 | |||
17 | test_url: http://www.cnet.com/8301-13952_1-57367607-81/the-404-981-where-the-world-is-a-vampire-podcast/ | ||
diff --git a/inc/3rdparty/site_config/standard/computerbase.de.txt b/inc/3rdparty/site_config/standard/computerbase.de.txt index 5973c50b..214fcceb 100755 --- a/inc/3rdparty/site_config/standard/computerbase.de.txt +++ b/inc/3rdparty/site_config/standard/computerbase.de.txt | |||
@@ -2,7 +2,7 @@ title://h1 | |||
2 | 2 | ||
3 | author://div[@id="news-meta"]/a | 3 | author://div[@id="news-meta"]/a |
4 | 4 | ||
5 | body://*[@id="main"]/div[1] | 5 | body: //div[contains(@class, 'text-content')] |
6 | 6 | ||
7 | strip://*[@id="main"]/div[2] | 7 | strip://*[@id="main"]/div[2] |
8 | strip://*[@id="main"]/div[3] | 8 | strip://*[@id="main"]/div[3] |
@@ -15,4 +15,4 @@ strip://img | |||
15 | 15 | ||
16 | #figures are not displayed in instapaper... | 16 | #figures are not displayed in instapaper... |
17 | strip://figure | //figcaption | 17 | strip://figure | //figcaption |
18 | test_url: http://www.computerbase.de/news/2012-06/verbraucherzentrale-mahnt-blizzard-fuer-diablo-3-ab/ \ No newline at end of file | 18 | test_url: http://www.computerbase.de/news/2012-06/verbraucherzentrale-mahnt-blizzard-fuer-diablo-3-ab/ |
diff --git a/inc/3rdparty/site_config/standard/contrepoints.org.txt b/inc/3rdparty/site_config/standard/contrepoints.org.txt new file mode 100755 index 00000000..8a6a1250 --- /dev/null +++ b/inc/3rdparty/site_config/standard/contrepoints.org.txt | |||
@@ -0,0 +1,21 @@ | |||
1 | # Contrepoints.org | ||
2 | # As of 2015-04, it's a wordpress-powered website. | ||
3 | |||
4 | title: //h1[contains(concat(' ',normalize-space(@class),' '),' page-title ')]//span[contains(concat(' ',normalize-space(@class),' '),' inner-text ')] | ||
5 | date: //time[contains(concat(' ',normalize-space(@class),' '),' art-date ')] | ||
6 | author: //h1[contains(concat(' ',normalize-space(@class),' '),' author-name ')] | ||
7 | body: //article[contains(concat(' ',normalize-space(@class),' '),' plain-art ')] | ||
8 | |||
9 | # no toolbar, meta, etc, but misses excerpt | ||
10 | # body: //div[contains(concat(' ',normalize-space(@class),' '),' entry ')] | ||
11 | |||
12 | # Thus, we need to strip useless elements from the "plain-art" | ||
13 | strip: //div[contains(concat(' ',normalize-space(@class),' '),' plain-post-topbar ')] | ||
14 | strip: //div[contains(concat(' ',normalize-space(@class),' '),' single-type-block ')] | ||
15 | strip: //header[contains(concat(' ',normalize-space(@class),' '),' entry-header ')] | ||
16 | |||
17 | # And no pruning is needed because we stripped unwanted elements. | ||
18 | prune: no | ||
19 | |||
20 | test_url: http://www.contrepoints.org/2015/04/25/205709-leconomie-selon-ray-dalio | ||
21 | test_url: http://www.contrepoints.org/2015/04/25/205734-huile-et-gaz-de-schiste-revolution-durable \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/cooper.com.txt b/inc/3rdparty/site_config/standard/cooper.com.txt index a4244097..fc156f7b 100755 --- a/inc/3rdparty/site_config/standard/cooper.com.txt +++ b/inc/3rdparty/site_config/standard/cooper.com.txt | |||
@@ -1,4 +1,4 @@ | |||
1 | body: //*[contains(@class,'body')] | 1 | body: //div[contains(@class,'post-body')] |
2 | date: //abbr[@class='published'] | 2 | date: //abbr[@class='published'] |
3 | 3 | ||
4 | test_url: http://www.cooper.com/journal/2012/08/2-weeks-left-to-win-your-way-to-the-woodstock-of-ux-coopers-ux-boot-camp.html/ \ No newline at end of file | 4 | test_url: http://www.cooper.com/journal/2015/6/creating-personas |
diff --git a/inc/3rdparty/site_config/standard/cwnp.com.txt b/inc/3rdparty/site_config/standard/cwnp.com.txt new file mode 100755 index 00000000..169fdf84 --- /dev/null +++ b/inc/3rdparty/site_config/standard/cwnp.com.txt | |||
@@ -0,0 +1,14 @@ | |||
1 | title: //div[@class='entry-pad']//h2 | ||
2 | body: //div[contains(concat(' ',normalize-space(@class),' '),' entry-pad ')] | ||
3 | strip: //h1 | ||
4 | strip: //p | ||
5 | strip: //h2 | ||
6 | strip: //div[@class='clear'] | ||
7 | |||
8 | prune: no | ||
9 | tidy: no | ||
10 | |||
11 | autodetect_on_failure: no | ||
12 | |||
13 | test_url: https://www.cwnp.com/wotd.php | ||
14 | test_url: https://www.cwnp.com/qotd.php | ||
diff --git a/inc/3rdparty/site_config/standard/dailymail.co.uk.txt b/inc/3rdparty/site_config/standard/dailymail.co.uk.txt index cd29a4d4..8535b19f 100755 --- a/inc/3rdparty/site_config/standard/dailymail.co.uk.txt +++ b/inc/3rdparty/site_config/standard/dailymail.co.uk.txt | |||
@@ -7,6 +7,15 @@ strip_id_or_class: googleAds | |||
7 | strip_id_or_class: digg-button | 7 | strip_id_or_class: digg-button |
8 | strip_id_or_class: article-icon-links-container | 8 | strip_id_or_class: article-icon-links-container |
9 | strip_id_or_class: clickToEnlarge | 9 | strip_id_or_class: clickToEnlarge |
10 | strip_id_or_class: articleIconLinksContainer | ||
11 | strip_id_or_class: related-carousel | ||
12 | strip_id_or_class: reader-comments | ||
13 | strip_id_or_class: most-watched | ||
14 | strip_id_or_class: most-read | ||
15 | |||
16 | find_string:blkBorder img-share | ||
17 | replace_string: nothing | ||
18 | |||
10 | tidy: no | 19 | tidy: no |
11 | 20 | ||
12 | test_url: http://www.dailymail.co.uk/news/article-1375423/Royal-wedding-Texan-billionaire-Joe-Albritton-invited-Prince-Charles.html \ No newline at end of file | 21 | test_url: http://www.dailymail.co.uk/news/article-1375423/Royal-wedding-Texan-billionaire-Joe-Albritton-invited-Prince-Charles.html \ No newline at end of file |
diff --git a/inc/3rdparty/site_config/standard/dailytelegraph.com.au.txt b/inc/3rdparty/site_config/standard/dailytelegraph.com.au.txt new file mode 100755 index 00000000..571e8111 --- /dev/null +++ b/inc/3rdparty/site_config/standard/dailytelegraph.com.au.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | title: //h1[@class="heading"] | ||
2 | author: //cite[@class='author'] | ||
3 | date: //li[contains(@class, 'date-and-time')] | ||
4 | |||
5 | |||
diff --git a/inc/3rdparty/site_config/standard/deadspin.com.txt b/inc/3rdparty/site_config/standard/deadspin.com.txt new file mode 100755 index 00000000..e6ca16ae --- /dev/null +++ b/inc/3rdparty/site_config/standard/deadspin.com.txt | |||
@@ -0,0 +1 @@ | |||
http_header(user-agent): PHP/5.3 \ No newline at end of file | |||
diff --git a/inc/3rdparty/site_config/standard/derbund.ch.txt b/inc/3rdparty/site_config/standard/derbund.ch.txt new file mode 100755 index 00000000..1363eff6 --- /dev/null +++ b/inc/3rdparty/site_config/standard/derbund.ch.txt | |||
@@ -0,0 +1,13 @@ | |||
1 | # Author: cirnod@gmail.com | ||
2 | |||
3 | tidy: no | ||
4 | prune: no | ||
5 | |||
6 | body: //div[@id="article"]/h3 | //*[@id="mainContent"] | ||
7 | |||
8 | # General Cleanup | ||
9 | #strip_id_or_class: info_panel | ||
10 | |||
11 | |||
12 | # Try yourself | ||
13 | test_url: http://www.derbund.ch/bern/nachrichten/Fossilienforscher-stehen-auf-Heavy-Metal/story/20919522 | ||
diff --git a/inc/3rdparty/site_config/standard/designbuildsource.com.au.txt b/inc/3rdparty/site_config/standard/designbuildsource.com.au.txt new file mode 100755 index 00000000..93d3507e --- /dev/null +++ b/inc/3rdparty/site_config/standard/designbuildsource.com.au.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | date: substring-after(//p[@class='post_date'], 'on') | ||
2 | |||
diff --git a/inc/3rdparty/site_config/standard/dilbert.com.txt b/inc/3rdparty/site_config/standard/dilbert.com.txt index 85cc78e5..b8788553 100755 --- a/inc/3rdparty/site_config/standard/dilbert.com.txt +++ b/inc/3rdparty/site_config/standard/dilbert.com.txt | |||
@@ -1,11 +1,9 @@ | |||
1 | #title: substring(substring-after(//title, ':'), 1, string-length(substring-after(//title, ':')) - 10) | 1 | title: //a[@class="post-title"]/text() |
2 | title: //div[contains(@class, 'SB_Title')]//a | 2 | title: //meta[@name="twitter:title"]/@content |
3 | body: //div[contains(@class, 'STR_Image')] | 3 | body: //img[@class="img-responsive img-comic"] |
4 | body: //*[contains(@class, 'SB_Content')] | ||
5 | author: string('Scott Adams') | 4 | author: string('Scott Adams') |
6 | date: //*[contains(@class, 'SB_Detail')]/text()[1] | 5 | date: //meta[@property="article:publish_date"]/@content |
7 | |||
8 | 6 | ||
9 | test_url: http://dilbert.com/blog/entry/death_by_hypnosis_or_not/ | 7 | test_url: http://dilbert.com/blog/entry/death_by_hypnosis_or_not/ |
10 | test_url: http://dilbert.com/strips/comic/2013-10-22 | 8 | test_url: http://dilbert.com/strips/comic/2013-10-22 |
11 | test_url: http://feed.dilbert.com/dilbert/daily_strip \ No newline at end of file | 9 | test_url: http://feed.dilbert.com/dilbert/daily_strip |
diff --git a/inc/3rdparty/site_config/standard/dn.se.txt b/inc/3rdparty/site_config/standard/dn.se.txt index 5283a0cd..a2ad609b 100755 --- a/inc/3rdparty/site_config/standard/dn.se.txt +++ b/inc/3rdparty/site_config/standard/dn.se.txt | |||
@@ -15,6 +15,9 @@ strip_id_or_class: hook | |||
15 | strip_id_or_class: right | 15 | strip_id_or_class: right |
16 | strip_id_or_class: footer | 16 | strip_id_or_class: footer |
17 | 17 | ||
18 | strip_id_or_class: ad-head | ||
19 | strip_id_or_class: atc-share-title | ||
20 | |||
18 | # Other news | 21 | # Other news |
19 | strip: //div[@id="mirrors"] | 22 | strip: //div[@id="mirrors"] |
20 | 23 | ||
@@ -25,4 +28,5 @@ author: //div[@id="byline"]/div/p/strong | |||
25 | date: substring(substring-after(//p[@class="published"], 'Publicerad '), 0, 11) | 28 | date: substring(substring-after(//p[@class="published"], 'Publicerad '), 0, 11) |
26 | 29 | ||
27 | test_url: http://www.dn.se/nyheter/varlden/landade-flygplan-mitt-i-villaomrade | 30 | test_url: http://www.dn.se/nyheter/varlden/landade-flygplan-mitt-i-villaomrade |
28 | test_url: http://www.dn.se/m/rss/senaste-nytt \ No newline at end of file | 31 | test_contains: Ett tekniskt haveri tvingade |
32 | test_url: http://www.dn.se/rss/senaste-nytt | ||
diff --git a/inc/3rdparty/site_config/standard/economie.gouv.fr.txt b/inc/3rdparty/site_config/standard/economie.gouv.fr.txt new file mode 100755 index 00000000..b0db03c1 --- /dev/null +++ b/inc/3rdparty/site_config/standard/economie.gouv.fr.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | body: //div[contains(@class, 'txtVisu')] | ||
2 | prune: no | ||
3 | |||
4 | test_url: http://www.economie.gouv.fr/dgccrf/Publications/Vie-pratique/Fiches-pratiques/Assurance \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/entwickler.de.txt b/inc/3rdparty/site_config/standard/entwickler.de.txt new file mode 100755 index 00000000..316f3991 --- /dev/null +++ b/inc/3rdparty/site_config/standard/entwickler.de.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | title: //h1[@class="post-title"] | ||
2 | body: //section[@class="article-content"] | ||
3 | author: //div[@class="post-bottom-meta"]/span[@class="post-author"] | ||
4 | date: //div[@class="post-date"]/time/@datetime | ||
5 | |||
6 | test_url: https://entwickler.de/online/mobile-welt-offline-welt-was-der-offline-first-ansatz-fuer-app-entwickler-heisst-140602.html | ||
7 | test_url: https://entwickler.de/online/development/plex-docker-joomla-165345.html | ||
diff --git a/inc/3rdparty/site_config/standard/explosm.net.txt b/inc/3rdparty/site_config/standard/explosm.net.txt new file mode 100755 index 00000000..f2d0a20f --- /dev/null +++ b/inc/3rdparty/site_config/standard/explosm.net.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | body: //img[@id='main-comic'] | ||
2 | author: substring(//small[@class="author-credit-name"], 4) | ||
3 | |||
4 | test_url: http://explosm.net/comics/3954/ | ||
diff --git a/inc/3rdparty/site_config/standard/facebook.com.txt b/inc/3rdparty/site_config/standard/facebook.com.txt index 26d4f905..2641a0b2 100755 --- a/inc/3rdparty/site_config/standard/facebook.com.txt +++ b/inc/3rdparty/site_config/standard/facebook.com.txt | |||
@@ -1,12 +1,14 @@ | |||
1 | body: //div[@id='imagestage'] | 1 | body: //div[@id='imagestage'] |
2 | body: //div[contains(@class, 'userContentWrapper')] | 2 | body: //div[contains(@class, 'userContentWrapper')] |
3 | 3 | body: //div[@id='m_story_permalink_view' or contains(@data-sigil, 'm-story-view')] | |
4 | strip_id_or_class: commentable | 4 | strip_id_or_class: commentable |
5 | strip: //div[contains(@data-sigil, 'm-mentions-expand')] | ||
5 | 6 | ||
6 | prune: no | 7 | prune: no |
7 | tidy: no | 8 | tidy: no |
8 | 9 | ||
9 | # single_page_link: replace(substring-after(//noscript//meta[@http-equiv="refresh"]/@content, 'URL='), "&", "&") | 10 | single_page_link: concat("https://m.", substring-after(//link[@rel="alternate" and @media="handheld"]/@href, "//www.")) |
11 | if_page_contains: //link[@rel="alternate" and @media="handheld"] | ||
10 | 12 | ||
11 | test_url: https://www.facebook.com/permalink.php?story_fbid=10154584776550183&id=294468630182 | 13 | test_url: https://www.facebook.com/permalink.php?story_fbid=10154584776550183&id=294468630182 |
12 | test_contains: holding an extraordinary session in Brussels this month | 14 | test_contains: holding an extraordinary session in Brussels this month |
diff --git a/inc/3rdparty/site_config/standard/fastcompany.com.txt b/inc/3rdparty/site_config/standard/fastcompany.com.txt index a6417237..bf8375ee 100755 --- a/inc/3rdparty/site_config/standard/fastcompany.com.txt +++ b/inc/3rdparty/site_config/standard/fastcompany.com.txt | |||
@@ -1,16 +1,20 @@ | |||
1 | title: //h1 | 1 | author: //div[@class='byline']//a |
2 | author: //h5[@class='byline']//a | 2 | date: //meta[@property='article:published_time']/@content |
3 | date: //h5[@class='date'] | 3 | body: //figure[@class='jumbotron'] | //div[@itemprop='body'] |
4 | body: //figure[@class='node-poster'] | //div[contains(@class, "node-content")] | 4 | |
5 | strip_id_or_class: article-top-wrapper | 5 | prune: no |
6 | strip_id_or_class: footer-message | 6 | |
7 | strip_id_or_class: print-logo | 7 | #strip_id_or_class: article-top-wrapper |
8 | strip: //cite | 8 | #strip_id_or_class: footer-message |
9 | strip://*[@class='timestamp'] | 9 | #strip_id_or_class: print-logo |
10 | strip://div[@id='page_right'] | 10 | #strip: //cite |
11 | strip://section[@id='header_region'] | 11 | #strip://*[@class='timestamp'] |
12 | strip://h1[@class='node-title'] | 12 | #strip://div[@id='page_right'] |
13 | strip://div[@class='node-submitted'] | 13 | #strip://section[@id='header_region'] |
14 | strip_id_or_class: skipnav | 14 | #strip://h1[@class='node-title'] |
15 | #strip://div[@class='node-submitted'] | ||
16 | #strip_id_or_class: skipnav | ||
17 | |||
15 | test_url: http://www.fastcompany.com/3000226/link-between-quietness-and-productivity | 18 | test_url: http://www.fastcompany.com/3000226/link-between-quietness-and-productivity |
16 | test_url: http://www.fastcompany.com/3003586/6-simple-rituals-reach-your-potential-every-day \ No newline at end of file | 19 | test_contains: Some of you may have tried to reach me this morning |
20 | test_url: http://www.fastcompany.com/3003586/6-simple-rituals-reach-your-potential-every-day | ||
diff --git a/inc/3rdparty/site_config/standard/fok.nl.txt b/inc/3rdparty/site_config/standard/fok.nl.txt new file mode 100755 index 00000000..012f07df --- /dev/null +++ b/inc/3rdparty/site_config/standard/fok.nl.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | # skip cookie warning | ||
2 | single_page_link: concat(//form/@action, '?allowcookies=yes') | ||
3 | |||
4 | test_url: http://fok.nl/687116 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/forsvaret.no.txt b/inc/3rdparty/site_config/standard/forsvaret.no.txt index c1bd2bac..ec9e5807 100755 --- a/inc/3rdparty/site_config/standard/forsvaret.no.txt +++ b/inc/3rdparty/site_config/standard/forsvaret.no.txt | |||
@@ -6,4 +6,5 @@ strip: //div[contains(@class,"aside")] | |||
6 | # remove some SharePoint webpart label junk | 6 | # remove some SharePoint webpart label junk |
7 | strip: //div[@id="ctl00_PlaceHolderMain_ArticleLeadField_label"] | 7 | strip: //div[@id="ctl00_PlaceHolderMain_ArticleLeadField_label"] |
8 | strip: //div[@id="ctl00_PlaceHolderMain_PublishingPageContentField_label"] | 8 | strip: //div[@id="ctl00_PlaceHolderMain_PublishingPageContentField_label"] |
9 | test_url: http://forsvaret.no/aktuelt/publisert/nyheter/Sider/F5-fly-til-Skedsmo.aspx \ No newline at end of file | 9 | test_url: https://forsvaret.no/aktuelt/historisk-medaljeutdeling |
10 | test_contains: Samarbeidet med Marinen har vært en sann glede | ||
diff --git a/inc/3rdparty/site_config/standard/france24.com.txt b/inc/3rdparty/site_config/standard/france24.com.txt new file mode 100755 index 00000000..6356e048 --- /dev/null +++ b/inc/3rdparty/site_config/standard/france24.com.txt | |||
@@ -0,0 +1,14 @@ | |||
1 | # Generated by FiveFilters.org's web-based selection tool | ||
2 | # Place this file inside your site_config/custom/ folder | ||
3 | # Source: http://siteconfig.fivefilters.org/grab.php?url=http%3A%2F%2Fwww.france24.com%2Fen%2F20150427-togo-gnassingbe-poised-extend-power-election%2F | ||
4 | |||
5 | body: //article[contains(concat(' ',normalize-space(@class),' '),' article-long ')]//div[contains(concat(' ',normalize-space(@class),' '),' bd ')] | ||
6 | title: //h1[@class="title"] | ||
7 | author://p[@class="author"] | ||
8 | date://p[@class="modification"] | ||
9 | |||
10 | find_string: <p class="modification">Latest update : | ||
11 | replace_string: <p class="modification"> | ||
12 | |||
13 | |||
14 | test_url: http://www.france24.com/en/20150427-togo-gnassingbe-poised-extend-power-election/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/galwayindependent.com.txt b/inc/3rdparty/site_config/standard/galwayindependent.com.txt new file mode 100755 index 00000000..d45b7acf --- /dev/null +++ b/inc/3rdparty/site_config/standard/galwayindependent.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | title: //div[@class='leftCol']/h1 | ||
2 | |||
3 | prune: no | ||
diff --git a/inc/3rdparty/site_config/standard/gameblog.fr.txt b/inc/3rdparty/site_config/standard/gameblog.fr.txt index 73f8342f..227d39ac 100755 --- a/inc/3rdparty/site_config/standard/gameblog.fr.txt +++ b/inc/3rdparty/site_config/standard/gameblog.fr.txt | |||
@@ -1,5 +1,7 @@ | |||
1 | title: //meta[@property="og:title"]/@content | 1 | title: //meta[@property="og:title"]/@content |
2 | body: //div[@id='GBTVPlayer'] | //div[contains(@class, 'col490')] | 2 | body: //div[@id='GBTVPlayer'] | //div[contains(@class, 'col490')] |
3 | author: //span[contains(concat(' ',normalize-space(@class),' '),' author ')] | ||
4 | date: //header[@id='gbArticleHeader']//div//time/@datetime | ||
3 | 5 | ||
4 | prune: no | 6 | prune: no |
5 | 7 | ||
@@ -7,4 +9,4 @@ strip_id_or_class: noprint | |||
7 | strip: //div[@id='gbNewsTextContent']/following-sibling::* | 9 | strip: //div[@id='gbNewsTextContent']/following-sibling::* |
8 | 10 | ||
9 | test_url: http://www.gameblog.fr/news/26330-les-sims-3-showtime-s-annonce-en-video | 11 | test_url: http://www.gameblog.fr/news/26330-les-sims-3-showtime-s-annonce-en-video |
10 | test_url: http://www.gameblog.fr/news/26306-mise-a-jour-du-dashboard-de-la-xbox-360-disponible \ No newline at end of file | 12 | test_url: http://www.gameblog.fr/news/26306-mise-a-jour-du-dashboard-de-la-xbox-360-disponible |
diff --git a/inc/3rdparty/site_config/standard/gawker.com.txt b/inc/3rdparty/site_config/standard/gawker.com.txt index 9bc5613a..27e4b4bb 100755 --- a/inc/3rdparty/site_config/standard/gawker.com.txt +++ b/inc/3rdparty/site_config/standard/gawker.com.txt | |||
@@ -3,4 +3,6 @@ body: //div[@class="post-body"] | |||
3 | # Remove 'content is restricted' | 3 | # Remove 'content is restricted' |
4 | strip: //div[@id='agegate_IDHERE'] | 4 | strip: //div[@id='agegate_IDHERE'] |
5 | 5 | ||
6 | http_header(user-agent): PHP/5.3 | ||
7 | |||
6 | test_url: http://gawker.com/#!5782070/russian-bomb-squad-successfully-defuses-sex-toy \ No newline at end of file | 8 | test_url: http://gawker.com/#!5782070/russian-bomb-squad-successfully-defuses-sex-toy \ No newline at end of file |
diff --git a/inc/3rdparty/site_config/standard/getpocket.com.txt b/inc/3rdparty/site_config/standard/getpocket.com.txt new file mode 100755 index 00000000..e6ca16ae --- /dev/null +++ b/inc/3rdparty/site_config/standard/getpocket.com.txt | |||
@@ -0,0 +1 @@ | |||
http_header(user-agent): PHP/5.3 \ No newline at end of file | |||
diff --git a/inc/3rdparty/site_config/standard/gist.github.com.txt b/inc/3rdparty/site_config/standard/gist.github.com.txt index f11b7b42..90207862 100755 --- a/inc/3rdparty/site_config/standard/gist.github.com.txt +++ b/inc/3rdparty/site_config/standard/gist.github.com.txt | |||
@@ -1,4 +1,6 @@ | |||
1 | body: //div[@class="highlight"]/pre | ||
1 | 2 | ||
2 | title: //div[contains(@class,'gist-description')] | 3 | prune: no |
3 | body: //div[contains(@class,'blob-wrapper')] | 4 | tidy: no |
4 | test_url: https://gist.github.com/staltz/868e7e9bc2a7b8c1f754 | 5 | |
6 | test_url: https://gist.github.com/1258908 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/gizmodo.com.au.txt b/inc/3rdparty/site_config/standard/gizmodo.com.au.txt new file mode 100755 index 00000000..9dbfc152 --- /dev/null +++ b/inc/3rdparty/site_config/standard/gizmodo.com.au.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | body: //div[@id='content_post' or @class="post-body" or contains(@class, 'illustration top')] | ||
2 | author: (//cite//span[@class="plus-icon"])[1] | ||
3 | date: //span[@class="date"] | ||
4 | date: //time | ||
5 | |||
6 | prune: no | ||
7 | |||
8 | test_url: http://gizmodo.com/5880147/kuhn-rikon-improves-their-spice-grinder-with-grade-school-science | ||
diff --git a/inc/3rdparty/site_config/standard/gizmodo.com.txt b/inc/3rdparty/site_config/standard/gizmodo.com.txt index e73ec9d2..535041cd 100755 --- a/inc/3rdparty/site_config/standard/gizmodo.com.txt +++ b/inc/3rdparty/site_config/standard/gizmodo.com.txt | |||
@@ -6,6 +6,8 @@ date: //span[@class="date"] | |||
6 | 6 | ||
7 | prune: no | 7 | prune: no |
8 | 8 | ||
9 | http_header(user-agent): PHP/5.3 | ||
10 | |||
9 | test_url: http://gizmodo.com/5880147/kuhn-rikon-improves-their-spice-grinder-with-grade-school-science | 11 | test_url: http://gizmodo.com/5880147/kuhn-rikon-improves-their-spice-grinder-with-grade-school-science |
10 | test_url: http://gizmodo.com/what-van-goghs-paintings-would-look-like-if-they-came-874035680 | 12 | test_url: http://gizmodo.com/what-van-goghs-paintings-would-look-like-if-they-came-874035680 |
11 | test_url: http://gizmodo.com/vip.xml \ No newline at end of file | 13 | test_url: http://gizmodo.com/vip.xml \ No newline at end of file |
diff --git a/inc/3rdparty/site_config/standard/globalgrind.com.txt b/inc/3rdparty/site_config/standard/globalgrind.com.txt new file mode 100755 index 00000000..e2f4e233 --- /dev/null +++ b/inc/3rdparty/site_config/standard/globalgrind.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | body: //div[contains(@class, 'content-body')] | ||
2 | |||
3 | prune: no | ||
4 | |||
5 | test_url: http://globalgrind.com/2015/04/26/listen-jeremih-featuring-chance-the-rapper-the-social-experiment-planes-remix-new-music/ | ||
6 | test_contains: The Chicago rapper has made a name for himself \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/gocomics.com.txt b/inc/3rdparty/site_config/standard/gocomics.com.txt new file mode 100755 index 00000000..212c02d5 --- /dev/null +++ b/inc/3rdparty/site_config/standard/gocomics.com.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | body: //a[@class="photo"]/img[@class="strip"] | ||
2 | author: //meta[@name="author"]/@content | ||
3 | date: //meta[@property="gocomics:publish_date"]/@content | ||
4 | |||
5 | test_url: http://www.gocomics.com/garfield/2015/06/13 | ||
diff --git a/inc/3rdparty/site_config/standard/help.fivefilters.org.txt b/inc/3rdparty/site_config/standard/help.fivefilters.org.txt new file mode 100755 index 00000000..70a7d156 --- /dev/null +++ b/inc/3rdparty/site_config/standard/help.fivefilters.org.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | title: //div[@class="title"]/h3 | ||
2 | date: substring-after(//div[@class="meta"], ": ") | ||
diff --git a/inc/3rdparty/site_config/standard/heraldsun.com.au.txt b/inc/3rdparty/site_config/standard/heraldsun.com.au.txt new file mode 100755 index 00000000..b0ce56c5 --- /dev/null +++ b/inc/3rdparty/site_config/standard/heraldsun.com.au.txt | |||
@@ -0,0 +1,12 @@ | |||
1 | #body: //div[@class='story-body'] | ||
2 | body: //div[contains(@class, 'story-body')] | ||
3 | title: //div[@class='story-headline']//h1 | ||
4 | author: //cite[contains(@class, 'author')] | ||
5 | date: //span[@class='datestamp'] | ||
6 | |||
7 | strip_id_or_class: story-info | ||
8 | strip: //div[contains(@class, 'story-promo')] | ||
9 | strip: //div[contains(@class, 'story-related')] | ||
10 | |||
11 | prune: no | ||
12 | tidy: no | ||
diff --git a/inc/3rdparty/site_config/standard/hiiraan.com.txt b/inc/3rdparty/site_config/standard/hiiraan.com.txt new file mode 100755 index 00000000..cf1f7942 --- /dev/null +++ b/inc/3rdparty/site_config/standard/hiiraan.com.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | # Generated by FiveFilters.org's web-based selection tool | ||
2 | # Place this file inside your site_config/custom/ folder | ||
3 | # Source: http://siteconfig.fivefilters.org/grab.php?url=http%3A%2F%2Fwww.hiiraan.com%2Fnews%2F2014%2FDec%2Fwararka_maanta20-89428.htm | ||
4 | |||
5 | body: //div[contains(concat(' ',normalize-space(@class),' '),' single ')]//div[contains(concat(' ',normalize-space(@class),' '),' description ')] | ||
6 | |||
7 | prune: no | ||
8 | |||
9 | test_url: http://www.hiiraan.com/news/2014/Dec/wararka_maanta20-89428.htm | ||
10 | test_url: http://rss.hiiraan.com/wararka_maanta_rss.xml \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/independent.co.uk.txt b/inc/3rdparty/site_config/standard/independent.co.uk.txt index af742209..6711a0a2 100755 --- a/inc/3rdparty/site_config/standard/independent.co.uk.txt +++ b/inc/3rdparty/site_config/standard/independent.co.uk.txt | |||
@@ -1,9 +1,16 @@ | |||
1 | title: //meta[@property='og:title']/@content | 1 | title: //meta[@property='og:title']/@content |
2 | body: //div[contains(@class, 'articleContent')] | 2 | body: //img[contains(@class, 'FirstImage')] | //div[contains(@class, 'articleContent')] |
3 | date: //meta[@property='article:published_time']/@content | 3 | date: //meta[@property='article:published_time']/@content |
4 | author: //div[@id='main']//div[@class='byline']//span[@class='authorName'] | 4 | author: //div[@id='main']//div[@class='byline']//span[@class='authorName'] |
5 | 5 | ||
6 | strip_id_or_class: RelatedArtTag | 6 | strip_id_or_class: RelatedArtTag |
7 | 7 | ||
8 | strip: //h5[contains(., 'READ MORE:')] | ||
9 | strip: //h5[contains(., 'Read more:')] | ||
10 | |||
8 | tidy: no | 11 | tidy: no |
9 | test_url: http://www.independent.co.uk/news/world/middle-east/syria-could-face-human-rights-probe-2274326.html \ No newline at end of file | 12 | test_url: http://www.independent.co.uk/news/world/middle-east/syria-could-face-human-rights-probe-2274326.html |
13 | test_url: http://www.independent.co.uk/voices/comment/robert-fisk-on-the-cia-torture-report-once-again-language-is-distorted-in-order-to-hide-us-state-wrongdoing-9924501.html | ||
14 | test_contains: Thank God for Noam Chomsky. | ||
15 | |||
16 | test_url: http://www.independent.co.uk/news/uk/rss \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/io9.com.txt b/inc/3rdparty/site_config/standard/io9.com.txt new file mode 100755 index 00000000..e6ca16ae --- /dev/null +++ b/inc/3rdparty/site_config/standard/io9.com.txt | |||
@@ -0,0 +1 @@ | |||
http_header(user-agent): PHP/5.3 \ No newline at end of file | |||
diff --git a/inc/3rdparty/site_config/standard/ippmedia.com.txt b/inc/3rdparty/site_config/standard/ippmedia.com.txt new file mode 100755 index 00000000..99f25dc0 --- /dev/null +++ b/inc/3rdparty/site_config/standard/ippmedia.com.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | title: //div[@class="content_title"]//h2 | ||
2 | author: substring-after(//div[@class="byline"], "By ") | ||
3 | date: //div[@class="publish_date"] | ||
4 | strip: //div[@class="read_image_box"] | ||
diff --git a/inc/3rdparty/site_config/standard/itnews.com.au.txt b/inc/3rdparty/site_config/standard/itnews.com.au.txt new file mode 100755 index 00000000..47cbb0f3 --- /dev/null +++ b/inc/3rdparty/site_config/standard/itnews.com.au.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | title: //h1[@class='article-header'] | ||
2 | body: //div[@class='body-content'] | ||
3 | author: //span[@class='author-byline']/a[contains(@id, 'Author')] | ||
4 | |||
5 | strip: //span[contains(@id, 'Article_SourceLabel')] | ||
diff --git a/inc/3rdparty/site_config/standard/jalopnik.com.txt b/inc/3rdparty/site_config/standard/jalopnik.com.txt index fc2eef8e..7823dbd7 100755 --- a/inc/3rdparty/site_config/standard/jalopnik.com.txt +++ b/inc/3rdparty/site_config/standard/jalopnik.com.txt | |||
@@ -1,2 +1,5 @@ | |||
1 | author: //span[@class='plus-icon'] | 1 | author: //span[@class='plus-icon'] |
2 | |||
3 | http_header(user-agent): PHP/5.3 | ||
4 | |||
2 | test_url: http://jalopnik.com/5892124/1955-porsche-550-spyder-sells-for-record-3685-million/ \ No newline at end of file | 5 | test_url: http://jalopnik.com/5892124/1955-porsche-550-spyder-sells-for-record-3685-million/ \ No newline at end of file |
diff --git a/inc/3rdparty/site_config/standard/jezebel.com.txt b/inc/3rdparty/site_config/standard/jezebel.com.txt new file mode 100755 index 00000000..e6ca16ae --- /dev/null +++ b/inc/3rdparty/site_config/standard/jezebel.com.txt | |||
@@ -0,0 +1 @@ | |||
http_header(user-agent): PHP/5.3 \ No newline at end of file | |||
diff --git a/inc/3rdparty/site_config/standard/kotaku.com.txt b/inc/3rdparty/site_config/standard/kotaku.com.txt index be439d75..61ccbc46 100755 --- a/inc/3rdparty/site_config/standard/kotaku.com.txt +++ b/inc/3rdparty/site_config/standard/kotaku.com.txt | |||
@@ -1,2 +1,5 @@ | |||
1 | author: //span[@class="plus-icon"] | 1 | author: //span[@class="plus-icon"] |
2 | |||
3 | http_header(user-agent): PHP/5.3 | ||
4 | |||
2 | test_url: http://kotaku.com/5920211/save-the-furries-on-your-wii-in-this-weeks-nintendo-download \ No newline at end of file | 5 | test_url: http://kotaku.com/5920211/save-the-furries-on-your-wii-in-this-weeks-nintendo-download \ No newline at end of file |
diff --git a/inc/3rdparty/site_config/standard/lefigaro.fr.txt b/inc/3rdparty/site_config/standard/lefigaro.fr.txt index e720e377..7e1d12d7 100755 --- a/inc/3rdparty/site_config/standard/lefigaro.fr.txt +++ b/inc/3rdparty/site_config/standard/lefigaro.fr.txt | |||
@@ -2,7 +2,8 @@ title: //meta[@name='title']/@content | |||
2 | author: //span[@class='sign']//a[@class='journaliste'] | 2 | author: //span[@class='sign']//a[@class='journaliste'] |
3 | author: //meta[@name='author']/@content | 3 | author: //meta[@name='author']/@content |
4 | body: //*[@id='article']/div[@class='photo'] | //*[@id='article']/h2 | //*[@id='article']/div[@class='texte'] | 4 | body: //*[@id='article']/div[@class='photo'] | //*[@id='article']/h2 | //*[@id='article']/div[@class='texte'] |
5 | date: //time[@pubdate]/@datetime | 5 | date: //li[contains(concat(' ',normalize-space(@class),' '),' fig-date-pub ')]//time |
6 | prune: no | 6 | prune: no |
7 | test_url: http://www.lefigaro.fr/environnement/2011/11/10/01029-20111110ARTFIG00801-la-chine-confrontee-a-un-immense-defi-ecologique.php | 7 | test_url: http://www.lefigaro.fr/environnement/2011/11/10/01029-20111110ARTFIG00801-la-chine-confrontee-a-un-immense-defi-ecologique.php |
8 | test_url: http://www.lefigaro.fr/conjoncture/2012/11/20/20002-20121120ARTFIG00609-l-usager-devrait-payer-plus-pour-financer-les-transports.php \ No newline at end of file | 8 | test_url: http://www.lefigaro.fr/conjoncture/2012/11/20/20002-20121120ARTFIG00609-l-usager-devrait-payer-plus-pour-financer-les-transports.php |
9 | test_url: http://www.lefigaro.fr/social/2015/03/10/09010-20150310ARTFIG00312-encore-une-annee-noire-pour-l-emploi-salarie.php | ||
diff --git a/inc/3rdparty/site_config/standard/lifehacker.com.txt b/inc/3rdparty/site_config/standard/lifehacker.com.txt index ec97f06c..330c4e78 100755 --- a/inc/3rdparty/site_config/standard/lifehacker.com.txt +++ b/inc/3rdparty/site_config/standard/lifehacker.com.txt | |||
@@ -42,6 +42,12 @@ strip: //p[@class="arrow"] | |||
42 | 42 | ||
43 | # Remove "track" image from article body | 43 | # Remove "track" image from article body |
44 | strip: //img[@alt="track"] | 44 | strip: //img[@alt="track"] |
45 | |||
46 | # Remove hidden URLs | ||
47 | strip: //a[@x-inset="hidden"] | ||
48 | |||
49 | http_header(user-agent): PHP/5.3 | ||
50 | |||
45 | test_url: http://lifehacker.com/5925801/how-can-i-turn-vague-goals-into-actionable-to+dos | 51 | test_url: http://lifehacker.com/5925801/how-can-i-turn-vague-goals-into-actionable-to+dos |
46 | test_url: http://lifehacker.com/5941600/hack-an-old-computer-mouse-into-a-wireless-bluetooth-mouse | 52 | test_url: http://lifehacker.com/5941600/hack-an-old-computer-mouse-into-a-wireless-bluetooth-mouse |
47 | test_url: http://lifehacker.com/what-happens-to-the-brain-when-you-meditate-and-how-it-1202533314 \ No newline at end of file | 53 | test_url: http://lifehacker.com/what-happens-to-the-brain-when-you-meditate-and-how-it-1202533314 |
diff --git a/inc/3rdparty/site_config/standard/linuxjournal.com.txt b/inc/3rdparty/site_config/standard/linuxjournal.com.txt new file mode 100755 index 00000000..c5e64463 --- /dev/null +++ b/inc/3rdparty/site_config/standard/linuxjournal.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | body: //div[@class='content-area'] | ||
2 | next_page_link: //a[@title='Go to next page'] | ||
3 | author: //a[@title='View user profile.'] | ||
4 | strip_id_or_class: comments | ||
5 | |||
6 | test_url: http://www.linuxjournal.com/content/be-mechanicwith-android-and-linux | ||
diff --git a/inc/3rdparty/site_config/standard/m.bbc.co.uk.txt b/inc/3rdparty/site_config/standard/m.bbc.co.uk.txt index d1ff0b43..7037c64b 100755 --- a/inc/3rdparty/site_config/standard/m.bbc.co.uk.txt +++ b/inc/3rdparty/site_config/standard/m.bbc.co.uk.txt | |||
@@ -3,6 +3,12 @@ body: //div[@class="story-body"] | |||
3 | date: //p[@class='date']/strong | 3 | date: //p[@class='date']/strong |
4 | author: substring-after(//div[@class="story-inner"]/div[@class="byline"]//span[@class='name'], 'By') | 4 | author: substring-after(//div[@class="story-inner"]/div[@class="byline"]//span[@class='name'], 'By') |
5 | 5 | ||
6 | find_string: http://ichef.bbci.co.uk/news/200/ | ||
7 | replace_string: http://ichef.bbci.co.uk/news/624/ | ||
8 | |||
9 | find_string: http://ichef.bbci.co.uk/news/304/ | ||
10 | replace_string: http://ichef.bbci.co.uk/news/624/ | ||
11 | |||
6 | strip: //div[@class="story-inner"]/div[@class="byline"] | 12 | strip: //div[@class="story-inner"]/div[@class="byline"] |
7 | 13 | ||
8 | test_url: http://m.bbc.co.uk/news/science-environment-19144464 \ No newline at end of file | 14 | test_url: http://m.bbc.co.uk/news/science-environment-19144464 \ No newline at end of file |
diff --git a/inc/3rdparty/site_config/standard/m.facebook.com.txt b/inc/3rdparty/site_config/standard/m.facebook.com.txt new file mode 100755 index 00000000..1b9c1b34 --- /dev/null +++ b/inc/3rdparty/site_config/standard/m.facebook.com.txt | |||
@@ -0,0 +1,12 @@ | |||
1 | body: //div[@id='m_story_permalink_view' or contains(@data-sigil, 'm-story-view')] | ||
2 | |||
3 | title: //div[@id='m_story_permalink_view' or contains(@data-sigil, 'm-story-view')]//h3 | ||
4 | |||
5 | strip_id_or_class: commentable | ||
6 | strip: //*[contains(@data-sigil, 'm-mentions-expand') or contains(@data-sigil, 'story-popup-context') or contains(@data-sigil, 'share') or contains(@data-sigil, 'translate')] | ||
7 | |||
8 | prune: no | ||
9 | tidy: no | ||
10 | |||
11 | test_url: https://www.facebook.com/permalink.php?story_fbid=10154584776550183&id=294468630182 | ||
12 | test_contains: holding an extraordinary session in Brussels this month | ||
diff --git a/inc/3rdparty/site_config/standard/m.theregister.co.uk.txt b/inc/3rdparty/site_config/standard/m.theregister.co.uk.txt new file mode 100755 index 00000000..64cb1c32 --- /dev/null +++ b/inc/3rdparty/site_config/standard/m.theregister.co.uk.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | strip: //div[@class='wptl btm'] | ||
2 | body: //div[@id='article']//h2 | //div[@id='body'] | ||
3 | |||
4 | test_url: http://m.theregister.co.uk/2015/07/06/geeks_guide_spaceguard_center/ | ||
diff --git a/inc/3rdparty/site_config/standard/marketingmag.com.au.txt b/inc/3rdparty/site_config/standard/marketingmag.com.au.txt new file mode 100755 index 00000000..910741f3 --- /dev/null +++ b/inc/3rdparty/site_config/standard/marketingmag.com.au.txt | |||
@@ -0,0 +1 @@ | |||
strip: //h3[@class="related-posts"] | |||
diff --git a/inc/3rdparty/site_config/standard/medium.com.txt b/inc/3rdparty/site_config/standard/medium.com.txt index 9e9c6895..5ab3ac5e 100755 --- a/inc/3rdparty/site_config/standard/medium.com.txt +++ b/inc/3rdparty/site_config/standard/medium.com.txt | |||
@@ -1,4 +1,4 @@ | |||
1 | body: //div[contains(@class, 'postContent-inner')] | 1 | body: //div[contains(@class, 'postArticle-content')] |
2 | strip_id_or_class: supplementalPostContent | 2 | strip_id_or_class: supplementalPostContent |
3 | 3 | ||
4 | prune: no | 4 | prune: no |
diff --git a/inc/3rdparty/site_config/standard/menshealth.com.sg.txt b/inc/3rdparty/site_config/standard/menshealth.com.sg.txt index 6a669253..af450b5e 100755 --- a/inc/3rdparty/site_config/standard/menshealth.com.sg.txt +++ b/inc/3rdparty/site_config/standard/menshealth.com.sg.txt | |||
@@ -3,10 +3,5 @@ body: //div[@style="float:left;width:740px;"] | |||
3 | 3 | ||
4 | tidy: no | 4 | tidy: no |
5 | 5 | ||
6 | test_url: http://www.menshealth.com.sg/fitness/mh-picks-under-armour-clutchfit-nitro-mid-cleats | 6 | # broken feed? |
7 | test_contains: These cleats are made for one thing | 7 | test_url: http://www.menshealth.com.sg/fitness/feed |
8 | |||
9 | test_url: http://www.menshealth.com.sg/fitness/top-10-fat-burning-bodyweight-moves-you-can-do-10-minutes | ||
10 | test_contains: let this workout fool you | ||
11 | |||
12 | test_url: http://www.menshealth.com.sg/fitness/feed \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mitchellrepublic.com.txt b/inc/3rdparty/site_config/standard/mitchellrepublic.com.txt new file mode 100755 index 00000000..fae858a3 --- /dev/null +++ b/inc/3rdparty/site_config/standard/mitchellrepublic.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | body: //div[@class='section'] | ||
2 | strip_id_or_class: mediumtxt | ||
3 | strip: //strong[contains | ||
diff --git a/inc/3rdparty/site_config/standard/mobile.nytimes.com.txt b/inc/3rdparty/site_config/standard/mobile.nytimes.com.txt index c60252ef..ef1ce98d 100755 --- a/inc/3rdparty/site_config/standard/mobile.nytimes.com.txt +++ b/inc/3rdparty/site_config/standard/mobile.nytimes.com.txt | |||
@@ -1,4 +1,7 @@ | |||
1 | title: //h1[contains(@class, 'headline')] | 1 | title: //h1[contains(@class, 'headline')] |
2 | body: //article[contains(@class, 'full-art')] | 2 | body: //article[contains(@class, 'full-art')] |
3 | date: //meta[@name="pdate"]/@content | ||
4 | author: //meta[@name="byl"]/@content | ||
5 | |||
3 | strip_id_or_class: image-credit | 6 | strip_id_or_class: image-credit |
4 | test_url: http://mobile.nytimes.com/2014/06/19/opinion/gail-collins-romney-and-the-2016-contenders-huddle.html \ No newline at end of file | 7 | test_url: http://mobile.nytimes.com/2014/06/19/opinion/gail-collins-romney-and-the-2016-contenders-huddle.html \ No newline at end of file |
diff --git a/inc/3rdparty/site_config/standard/moneymanagement.com.au.txt b/inc/3rdparty/site_config/standard/moneymanagement.com.au.txt new file mode 100755 index 00000000..9892f662 --- /dev/null +++ b/inc/3rdparty/site_config/standard/moneymanagement.com.au.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | date: //span[@class="publishdate"]//time | ||
2 | author: //span[@class="byline"] | ||
diff --git a/inc/3rdparty/site_config/standard/nbnnews.com.au.txt b/inc/3rdparty/site_config/standard/nbnnews.com.au.txt new file mode 100755 index 00000000..a2409878 --- /dev/null +++ b/inc/3rdparty/site_config/standard/nbnnews.com.au.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' entry ') or contains(@class, 'single-post-thumb')] | ||
2 | test_url: http://www.nbnnews.com.au/2015/03/24/lismore-man-will-attempt-to-run-around-australia/ | ||
3 | test_url: http://www.nbnnews.com.au/category/nthn-rivers-sport/feed/ | ||
diff --git a/inc/3rdparty/site_config/standard/news.com.au.txt b/inc/3rdparty/site_config/standard/news.com.au.txt new file mode 100755 index 00000000..57b89a54 --- /dev/null +++ b/inc/3rdparty/site_config/standard/news.com.au.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | body: //div[@class='story-body'] | ||
2 | prune: no | ||
3 | tidy: no | ||
diff --git a/inc/3rdparty/site_config/standard/news.menshealth.com.txt b/inc/3rdparty/site_config/standard/news.menshealth.com.txt new file mode 100755 index 00000000..a07fdacc --- /dev/null +++ b/inc/3rdparty/site_config/standard/news.menshealth.com.txt | |||
@@ -0,0 +1 @@ | |||
strip: //span[@style="color: #cf1206;"] | |||
diff --git a/inc/3rdparty/site_config/standard/news.ninemsn.com.au.txt b/inc/3rdparty/site_config/standard/news.ninemsn.com.au.txt new file mode 100755 index 00000000..ddd64065 --- /dev/null +++ b/inc/3rdparty/site_config/standard/news.ninemsn.com.au.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | strip: //a[@class="contact"] | ||
2 | strip: //div[@class="article-media video-item"] | ||
3 | date: //div[@class='display-date'] | ||
diff --git a/inc/3rdparty/site_config/standard/parool.nl.txt b/inc/3rdparty/site_config/standard/parool.nl.txt new file mode 100755 index 00000000..2ceef940 --- /dev/null +++ b/inc/3rdparty/site_config/standard/parool.nl.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | #bypass cookie check | ||
2 | single_page_link: //a[contains(@href, '/acceptCookieCheck.do?url=')] | ||
3 | |||
4 | test_url: http://www.parool.nl/parool/nl/4/AMSTERDAM/article/detail/4042734/2015/05/29/MRSA-bacterie-niet-verder-verspreid-in-Bijlmerbajes.dhtml | ||
5 | test_contains: De twee gevangenen die | ||
6 | |||
7 | test_url: http://www.parool.nl/amsterdam/rss.xml \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/perthnow.com.au.txt b/inc/3rdparty/site_config/standard/perthnow.com.au.txt new file mode 100755 index 00000000..b0ce56c5 --- /dev/null +++ b/inc/3rdparty/site_config/standard/perthnow.com.au.txt | |||
@@ -0,0 +1,12 @@ | |||
1 | #body: //div[@class='story-body'] | ||
2 | body: //div[contains(@class, 'story-body')] | ||
3 | title: //div[@class='story-headline']//h1 | ||
4 | author: //cite[contains(@class, 'author')] | ||
5 | date: //span[@class='datestamp'] | ||
6 | |||
7 | strip_id_or_class: story-info | ||
8 | strip: //div[contains(@class, 'story-promo')] | ||
9 | strip: //div[contains(@class, 'story-related')] | ||
10 | |||
11 | prune: no | ||
12 | tidy: no | ||
diff --git a/inc/3rdparty/site_config/standard/planetsave.com.txt b/inc/3rdparty/site_config/standard/planetsave.com.txt new file mode 100755 index 00000000..d6f34e22 --- /dev/null +++ b/inc/3rdparty/site_config/standard/planetsave.com.txt | |||
@@ -0,0 +1 @@ | |||
strip_id_or_class: author-bio-box | |||
diff --git a/inc/3rdparty/site_config/standard/presseportal.de.txt b/inc/3rdparty/site_config/standard/presseportal.de.txt new file mode 100755 index 00000000..703806d8 --- /dev/null +++ b/inc/3rdparty/site_config/standard/presseportal.de.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | body: //div[contains(concat(' ',normalize-space(@class),' '),' story-text ')] | ||
2 | |||
3 | strip_id_or_class: news-bodycopy | ||
4 | |||
5 | parser: html5php | ||
6 | tidy: no | ||
7 | |||
8 | test_url: http://www.presseportal.de/pm/103258/2930232/felix-neureuther-vor-der-ski-wm-ich-denke-von-rennen-zu-rennen | ||
9 | test_url: http://www.presseportal.de/pm/66749/2933779/koelner-stadt-anzeiger-bahnmitarbeiter-werden-in-nrw-immer-haeufiger-angegriffen-zahl-der/rss | ||
10 | test_contains: kleineren Bahnhöfen installieren und erhofft | ||
11 | test_url: http://www.presseportal.de/rss/presseportal.rss2 | ||
diff --git a/inc/3rdparty/site_config/standard/quechoisir.org.txt b/inc/3rdparty/site_config/standard/quechoisir.org.txt new file mode 100644 index 00000000..a8bacdb7 --- /dev/null +++ b/inc/3rdparty/site_config/standard/quechoisir.org.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | title: //h1[@id='titre'] | ||
2 | body://h2[@id="surtitre"]|//div[@id="ctn_introarticle"]|//div[@class="ctn_globalcontent"] | ||
3 | |||
4 | strip_id_or_class: qc-container-main | ||
5 | strip_id_or_class: article_footer | ||
6 | |||
7 | tidy: no | ||
8 | prune: no | ||
9 | |||
10 | test_url: http://www.quechoisir.org/alimentation/securite-hygiene/actualite-acrylamide-un-contaminant-trop-present-dans-nos-assiettes | ||
diff --git a/inc/3rdparty/site_config/standard/quora.com.txt b/inc/3rdparty/site_config/standard/quora.com.txt index 732d12d7..f2b75a99 100755 --- a/inc/3rdparty/site_config/standard/quora.com.txt +++ b/inc/3rdparty/site_config/standard/quora.com.txt | |||
@@ -1,8 +1,10 @@ | |||
1 | tidy: no | 1 | tidy: no |
2 | prune: no | 2 | prune: no |
3 | body: //div[contains(@class, 'main_col')] | 3 | body: //div[contains(concat(' ',normalize-space(@class),' '),' Answer ')] | //div[contains(concat(' ',normalize-space(@class),' '),' header ')] | //div[contains(concat(' ',normalize-space(@class),' '),' AnswerWikiArea ')] | //hr |
4 | title: //h1 | 4 | #body: //div[contains(@class, 'main_col')] |
5 | 5 | ||
6 | strip_id_or_class: AnswerFooter | ||
7 | strip_id_or_class: ActionBar | ||
6 | strip_id_or_class: hidden | 8 | strip_id_or_class: hidden |
7 | strip_id_or_class: item_action_bar | 9 | strip_id_or_class: item_action_bar |
8 | strip_id_or_class: answer_voters | 10 | strip_id_or_class: answer_voters |
@@ -13,5 +15,15 @@ strip_id_or_class: view_tag | |||
13 | strip_id_or_class: include_details | 15 | strip_id_or_class: include_details |
14 | strip_id_or_class: sig_edit | 16 | strip_id_or_class: sig_edit |
15 | strip_id_or_class: profile_photo_img | 17 | strip_id_or_class: profile_photo_img |
18 | strip_id_or_class: question_text_icons | ||
16 | 19 | ||
17 | test_url: http://www.quora.com/What-everyday-habit-do-you-wish-you-had-developed-earlier-in-life \ No newline at end of file | 20 | # insert hr between answers |
21 | find_string: <div class="Answer" | ||
22 | replace_string: <hr /><div class="Answer" | ||
23 | |||
24 | test_url: http://www.quora.com/What-everyday-habit-do-you-wish-you-had-developed-earlier-in-life | ||
25 | test_contains: Please provide a specific practical/measurable action-based everyday | ||
26 | test_contains: Exercise every day | ||
27 | |||
28 | test_url: http://www.quora.com/What-is-the-greatest-illusion-in-life | ||
29 | test_contains: What is the greatest illusion in life? | ||
diff --git a/inc/3rdparty/site_config/standard/reddit.com.txt b/inc/3rdparty/site_config/standard/reddit.com.txt index c3f2d3e5..ba342c7c 100755 --- a/inc/3rdparty/site_config/standard/reddit.com.txt +++ b/inc/3rdparty/site_config/standard/reddit.com.txt | |||
@@ -7,9 +7,7 @@ author: //p[@class="tagline"]/a | |||
7 | # this doesn't work for some reason...? | 7 | # this doesn't work for some reason...? |
8 | date: //p[@class="tagline"]//@datetime | 8 | date: //p[@class="tagline"]//@datetime |
9 | 9 | ||
10 | #body: (//div[contains(@class, 'noncollapsed')]//div[contains(@class, 'usertext-body')])[1] | 10 | body: (//div[contains(@class, 'noncollapsed')]//div[contains(@class, 'usertext-body')])[1] |
11 | |||
12 | body: //div[contains(concat(' ',normalize-space(@class),' '),' usertext-body ') and (contains(concat(' ',normalize-space(@class),' '),' may-blank-within ')) and (contains(concat(' ',normalize-space(@class),' '),' md-container '))]//div[contains(concat(' ',normalize-space(@class),' '),' md ')] | ||
13 | 11 | ||
14 | strip_id_or_class: tagline | 12 | strip_id_or_class: tagline |
15 | strip_id_or_class: unvotable-message | 13 | strip_id_or_class: unvotable-message |
@@ -20,4 +18,4 @@ single_page_link: //p[@class="title"]/a[contains(@href, 'http://')] | |||
20 | 18 | ||
21 | test_url: http://www.reddit.com/r/truegaming/comments/wfe7r/i_wrote_about_the_problems_i_honestly_feel_that/ | 19 | test_url: http://www.reddit.com/r/truegaming/comments/wfe7r/i_wrote_about_the_problems_i_honestly_feel_that/ |
22 | test_url: http://www.reddit.com/r/worldnews/comments/1as37r/twelve_north_korean_soldiers_attempting_to_defect/ | 20 | test_url: http://www.reddit.com/r/worldnews/comments/1as37r/twelve_north_korean_soldiers_attempting_to_defect/ |
23 | test_url: http://www.reddit.com/r/WritingPrompts/comments/2786lw/wp_in_a_world_where_puns_are_illegal_one_man/chybk8e | 21 | test_url: http://www.reddit.com/r/WritingPrompts/comments/2786lw/wp_in_a_world_where_puns_are_illegal_one_man/chybk8e \ No newline at end of file |
diff --git a/inc/3rdparty/site_config/standard/rockpapershotgun.com.txt b/inc/3rdparty/site_config/standard/rockpapershotgun.com.txt index f8c9541f..83342cb7 100755 --- a/inc/3rdparty/site_config/standard/rockpapershotgun.com.txt +++ b/inc/3rdparty/site_config/standard/rockpapershotgun.com.txt | |||
@@ -1,8 +1,10 @@ | |||
1 | title: //h2 | 1 | title: //h2 |
2 | 2 | ||
3 | strip: //div[ contains(@class, 'respond') ] | //h2 | //h1 | 3 | strip: //div[ contains(@class, 'respond') ] | //h2 | //h1 |
4 | strip_id_or_class: social | ||
5 | strip_id_or_class: dd_post_share | ||
4 | 6 | ||
5 | date: substring-after(//p[@class='info'], ' on ') | 7 | date: substring-after(//p[@class='info'], ' on ') |
6 | 8 | ||
7 | author: //p[@class='info']//a | 9 | author: //p[@class='info']//a |
8 | test_url: http://www.rockpapershotgun.com/2010/07/29/rps-half-verdict-starcraft-2/ \ No newline at end of file | 10 | test_url: http://www.rockpapershotgun.com/2010/07/29/rps-half-verdict-starcraft-2/ |
diff --git a/inc/3rdparty/site_config/standard/saadaalnews.net.txt b/inc/3rdparty/site_config/standard/saadaalnews.net.txt new file mode 100755 index 00000000..b9ce04e5 --- /dev/null +++ b/inc/3rdparty/site_config/standard/saadaalnews.net.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | body: //div[contains(@class, 'section-content-left')] | ||
2 | |||
3 | strip_id_or_class: related | ||
4 | strip_id_or_class: nocontent | ||
5 | strip_id_or_class: comment | ||
6 | strip_id_or_class: widget | ||
7 | strip_id_or_class: respond | ||
8 | strip: //h3[.='Comments'] | ||
9 | strip: //p[.='comments'] | ||
10 | |||
11 | test_url: http://saadaalnews.net/?p=42624 | ||
diff --git a/inc/3rdparty/site_config/standard/smh.com.au.txt b/inc/3rdparty/site_config/standard/smh.com.au.txt new file mode 100755 index 00000000..f647f812 --- /dev/null +++ b/inc/3rdparty/site_config/standard/smh.com.au.txt | |||
@@ -0,0 +1,14 @@ | |||
1 | body: //div[@id='content'] | ||
2 | title: //h1[@class='cN-headingPage'] | ||
3 | author: //h3[@class='authorName'] | ||
4 | date: //dd[@class='updated dtstamp'] | ||
5 | |||
6 | strip: //ul[@class='social sponsored cfix'] | ||
7 | strip: //div[contains(@class, 'hiddenVisually')] | ||
8 | strip: //dd[@class='updated dtstamp'] | ||
9 | strip: //h3[@class='authorName'] | ||
10 | strip: //ul[@class='social cfix'] | ||
11 | strip: //div[contains(@id, 'adspot')] | ||
12 | |||
13 | strip: //div[contains(@class, 'overlayPlayCountdown')] | ||
14 | strip: //div[@class='fdVideoWof']//span[@class='gone'] | ||
diff --git a/inc/3rdparty/site_config/standard/smh.drive.com.au.txt b/inc/3rdparty/site_config/standard/smh.drive.com.au.txt new file mode 100755 index 00000000..463fd88b --- /dev/null +++ b/inc/3rdparty/site_config/standard/smh.drive.com.au.txt | |||
@@ -0,0 +1,13 @@ | |||
1 | body: //div[@id='content'] | ||
2 | title: //h1[@class='cN-headingPage'] | ||
3 | author: //h3[@class='authorName'] | ||
4 | date: //dd[@class='updated dtstamp'] | ||
5 | |||
6 | strip: //ul[@class='social sponsored cfix'] | ||
7 | strip: //div[contains(@class, 'hiddenVisually')] | ||
8 | strip: //dd[@class='updated dtstamp'] | ||
9 | strip: //h3[@class='authorName'] | ||
10 | strip: //ul[@class='social cfix'] | ||
11 | strip: //div[contains(@id, 'adspot')] | ||
12 | |||
13 | test_url: http://smh.drive.com.au/roads-and-traffic/driver-distraction-responsible-for-more-car-crashes-than-alcohol-20130503-2iyg0.html | ||
diff --git a/inc/3rdparty/site_config/standard/smithsonianmag.com.txt b/inc/3rdparty/site_config/standard/smithsonianmag.com.txt index 3e8fee95..fc479c2a 100755 --- a/inc/3rdparty/site_config/standard/smithsonianmag.com.txt +++ b/inc/3rdparty/site_config/standard/smithsonianmag.com.txt | |||
@@ -7,6 +7,9 @@ body://div[@id = 'article-body'] | |||
7 | # full content | 7 | # full content |
8 | single_page_link://td/li[@class = 'article-singlepage']/a | 8 | single_page_link://td/li[@class = 'article-singlepage']/a |
9 | 9 | ||
10 | # continue link | ||
11 | single_page_link: //a[@id='continue-btn'] | ||
12 | |||
10 | # caption clean up | 13 | # caption clean up |
11 | wrap_in(i)://span[@class='articleImageCaptionwide'] | 14 | wrap_in(i)://span[@class='articleImageCaptionwide'] |
12 | move_into (//span[@class='articleImageCaptionwide'])://div[@id = 'articleImage']/p | 15 | move_into (//span[@class='articleImageCaptionwide'])://div[@id = 'articleImage']/p |
@@ -17,4 +20,4 @@ strip://p[@id = 'articlePaginationWrapper'] | |||
17 | strip://ul[contains(@class, 'cat-breadcrumb')] | 20 | strip://ul[contains(@class, 'cat-breadcrumb')] |
18 | strip://div [@class= 'viewMorePhotos'] | 21 | strip://div [@class= 'viewMorePhotos'] |
19 | 22 | ||
20 | test_url: http://www.smithsonianmag.com/history-archaeology/The-Goddess-Goes-Home.html \ No newline at end of file | 23 | test_url: http://www.smithsonianmag.com/history-archaeology/The-Goddess-Goes-Home.html |
diff --git a/inc/3rdparty/site_config/standard/snip.ly.txt b/inc/3rdparty/site_config/standard/snip.ly.txt new file mode 100755 index 00000000..4e80fcae --- /dev/null +++ b/inc/3rdparty/site_config/standard/snip.ly.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | single_page_link: //meta[@property="og:url"]/@content | ||
2 | |||
3 | test_url: http://snip.ly/qa1R \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/soundcity.tv.txt b/inc/3rdparty/site_config/standard/soundcity.tv.txt new file mode 100755 index 00000000..c26b9f95 --- /dev/null +++ b/inc/3rdparty/site_config/standard/soundcity.tv.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | strip_id_or_class: sharing | ||
2 | |||
3 | test_url: http://soundcity.tv/feed/ | ||
diff --git a/inc/3rdparty/site_config/standard/spiegel.de.txt b/inc/3rdparty/site_config/standard/spiegel.de.txt index 413e0155..7b7b1752 100755 --- a/inc/3rdparty/site_config/standard/spiegel.de.txt +++ b/inc/3rdparty/site_config/standard/spiegel.de.txt | |||
@@ -8,6 +8,9 @@ | |||
8 | # - Fixed single_page_link | 8 | # - Fixed single_page_link |
9 | # - Included intro text in single page view | 9 | # - Included intro text in single page view |
10 | # - Added body in default view | 10 | # - Added body in default view |
11 | # stesie@ | ||
12 | # - removed copyright box | ||
13 | # - removed "print more" box | ||
11 | 14 | ||
12 | # set body | 15 | # set body |
13 | tidy: no | 16 | tidy: no |
@@ -15,6 +18,7 @@ tidy: no | |||
15 | body: //div[@id="spArticleContent"] | 18 | body: //div[@id="spArticleContent"] |
16 | # body in default view | 19 | # body in default view |
17 | body: //div[@id="spArticleSection"] | 20 | body: //div[@id="spArticleSection"] |
21 | body: //div[contains(@class, 'article-section')] | //div[@id='js-article-top-wide-asset'] | //p[contains(@class, 'article-intro')] | //div[contains(@class, 'js-module-box-image')] | ||
18 | # body in "Fotostrecke" | 22 | # body in "Fotostrecke" |
19 | body: //div[@id="spBigaContent"] | 23 | body: //div[@id="spBigaContent"] |
20 | 24 | ||
@@ -25,6 +29,8 @@ strip: //div[@id="spArticleContent"]/h3 | |||
25 | # set date in "Fotostrecke" | 29 | # set date in "Fotostrecke" |
26 | date: //div[@id="spBigaDatum"] | 30 | date: //div[@id="spBigaDatum"] |
27 | 31 | ||
32 | # title in default view | ||
33 | title: //h2[contains(@class, 'article-title')] | ||
28 | #set title in single page view | 34 | #set title in single page view |
29 | title: //div[@id='spArticleContent']/h2 | 35 | title: //div[@id='spArticleContent']/h2 |
30 | # strip title | 36 | # strip title |
@@ -49,7 +55,7 @@ strip: //*/div[@class='spCredit']/following-sibling::p | |||
49 | strip: //div[@class='spMInline'] | 55 | strip: //div[@class='spMInline'] |
50 | 56 | ||
51 | # remove photogalleries and extras | 57 | # remove photogalleries and extras |
52 | strip: //div[@class='spPhotoGallery'] | 58 | strip: //div[contains(@class, 'spPhotoGallery')] |
53 | strip: //div[@class='spPhotoGallery']/following-sibling::br | 59 | strip: //div[@class='spPhotoGallery']/following-sibling::br |
54 | strip: //div[@class='spAssetAlignleft'] | 60 | strip: //div[@class='spAssetAlignleft'] |
55 | strip: //div[contains(@class,'spAsset')] | 61 | strip: //div[contains(@class,'spAsset')] |
@@ -67,9 +73,24 @@ strip: //div[@id='spBigaLatestEntries'] | |||
67 | strip: //div[contains(@class, 'spBigaNavi')] | 73 | strip: //div[contains(@class, 'spBigaNavi')] |
68 | strip: //div[@class='spDottedLine'] | 74 | strip: //div[@class='spDottedLine'] |
69 | 75 | ||
76 | strip: //div[@class='asset-box article-print-more'] | ||
77 | strip: //div[@class='article-copyright'] | ||
78 | strip: //span[@class='image-buttons'] | ||
79 | |||
70 | # Use link to print article for single page view | 80 | # Use link to print article for single page view |
71 | single_page_link: //a[contains(@href, '-druck')] | 81 | single_page_link: //a[contains(@href, '-druck')] |
82 | if_page_contains: //div[contains(@class, 'multi-pager-control')] | ||
83 | |||
84 | # Clean up title in print view | ||
85 | find_string: <title>Druckversion - | ||
86 | replace_string: <title> | ||
72 | 87 | ||
73 | # use next link in "Fotostrecke" | 88 | # use next link in "Fotostrecke" |
74 | next_page_link: //a[@class='spBigaControlForw'] | 89 | next_page_link: //a[@class='spBigaControlForw'] |
75 | test_url: http://www.spiegel.de/politik/deutschland/0,1518,787602,00.html \ No newline at end of file | 90 | test_url: http://www.spiegel.de/politik/deutschland/0,1518,787602,00.html |
91 | |||
92 | # regular article | ||
93 | test_url: http://www.spiegel.de/wirtschaft/soziales/griechenland-was-den-griechischen-buergern-nun-droht-a-1042682.html | ||
94 | |||
95 | # multipage article | ||
96 | test_url: http://www.spiegel.de/spiegel/a-710880.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/srf.ch.txt b/inc/3rdparty/site_config/standard/srf.ch.txt new file mode 100755 index 00000000..d07a9050 --- /dev/null +++ b/inc/3rdparty/site_config/standard/srf.ch.txt | |||
@@ -0,0 +1,24 @@ | |||
1 | # Author: cirnod@gmail.com | ||
2 | |||
3 | tidy: no | ||
4 | prune: no | ||
5 | |||
6 | body: //div[@id="article-content"]/p | //div[@class="main-article-content clearfix"] | ||
7 | |||
8 | # General Cleanup | ||
9 | strip_id_or_class: offscreen | ||
10 | strip_id_or_class: video-description | ||
11 | strip_id_or_class: v2 big-video | ||
12 | strip_id_or_class: module smb freetext | ||
13 | strip_id_or_class: asset span3 | ||
14 | strip_id_or_class: module smb related-links | ||
15 | |||
16 | # fix image-galleries | ||
17 | strip_id_or_class: module lightbox-gallery image hide | ||
18 | replace_string(width="624"): width="100%" | ||
19 | replace_string(height="468"): height="%" | ||
20 | |||
21 | # Try yourself | ||
22 | test_url: http://www.srf.ch/news/wirtschaft/weltbank-korrigiert-konjunktur-erwartungen-nach-unten | ||
23 | test_url: http://www.srf.ch/news/wirtschaft/ural-statt-alpen-russische-touristen-bleiben-zuhause | ||
24 | test_url: http://www.srf.ch/news/international/zwei-schweizer-bei-blutigem-attentat-in-mali-verletzt \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sueddeutsche.de.txt b/inc/3rdparty/site_config/standard/sueddeutsche.de.txt index 74b8d451..26e05605 100755 --- a/inc/3rdparty/site_config/standard/sueddeutsche.de.txt +++ b/inc/3rdparty/site_config/standard/sueddeutsche.de.txt | |||
@@ -1,6 +1,6 @@ | |||
1 | # 2012-12-04: complete rewrite after Süddeutsche.de relaunch - carlo@... | 1 | # 2012-12-04: complete rewrite after Süddeutsche.de relaunch - carlo@... |
2 | 2 | ||
3 | single_page_link: //a[ contains( @href, "/2.220/" ) ] | 3 | single_page_link: //li[@id="article-sidebar-action-print"]/@data-clickurl |
4 | 4 | ||
5 | body: //article[@id="sitecontent"]/section[@class="body"] | 5 | body: //article[@id="sitecontent"]/section[@class="body"] |
6 | author: //address[@class="author"] | 6 | author: //address[@class="author"] |
diff --git a/inc/3rdparty/site_config/standard/sunshinecoastdaily.com.au.txt b/inc/3rdparty/site_config/standard/sunshinecoastdaily.com.au.txt new file mode 100755 index 00000000..bf5e9189 --- /dev/null +++ b/inc/3rdparty/site_config/standard/sunshinecoastdaily.com.au.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | body: //section//article//p | ||
2 | |||
3 | strip: //aside | ||
4 | strip: //div[@class='margin-top-15'] | ||
5 | strip: //p[@class='tags'] | ||
6 | |||
7 | author: //span[@class='byline']//ul[@class='piped']//li[1] | ||
8 | date: //span[@class='byline']//ul[@class='piped']//li[2] | ||
9 | |||
10 | parser: html5lib | ||
diff --git a/inc/3rdparty/site_config/standard/sz.de.txt b/inc/3rdparty/site_config/standard/sz.de.txt index f67637d2..f194271f 100755 --- a/inc/3rdparty/site_config/standard/sz.de.txt +++ b/inc/3rdparty/site_config/standard/sz.de.txt | |||
@@ -1,6 +1,6 @@ | |||
1 | # 2012-12-04: complete rewrite after Süddeutsche.de relaunch - carlo@... | 1 | # 2012-12-04: complete rewrite after Süddeutsche.de relaunch - carlo@... |
2 | 2 | ||
3 | single_page_link: //a[ contains( @href, "/2.220/" ) ] | 3 | single_page_link: //li[@id="article-sidebar-action-print"]/@data-clickurl |
4 | 4 | ||
5 | body: //article[@id="sitecontent"]/section[@class="body"] | 5 | body: //article[@id="sitecontent"]/section[@class="body"] |
6 | author: //address[@class="author"] | 6 | author: //address[@class="author"] |
diff --git a/inc/3rdparty/site_config/standard/tagesanzeiger.ch.txt b/inc/3rdparty/site_config/standard/tagesanzeiger.ch.txt new file mode 100755 index 00000000..45c5cd02 --- /dev/null +++ b/inc/3rdparty/site_config/standard/tagesanzeiger.ch.txt | |||
@@ -0,0 +1,14 @@ | |||
1 | # Author: cirnod@gmail.com | ||
2 | |||
3 | tidy: no | ||
4 | prune: no | ||
5 | |||
6 | body: //div[@id="article"]/h3 | //*[@id="mainContent"] | ||
7 | |||
8 | # General Cleanup | ||
9 | #strip_id_or_class: info_panel | ||
10 | |||
11 | |||
12 | # Try yourself | ||
13 | test_url: http://www.tagesanzeiger.ch/zuerich/stadt/Nach-spektakulaerer-Abseilaktion-verhaftet/story/18039895 | ||
14 | test_url: http://www.tagesanzeiger.ch/ausland/naher-osten-und-afrika/IS-zerstoert-auch-das-antike-Hatra/story/19865699 | ||
diff --git a/inc/3rdparty/site_config/standard/tagesschau.de.txt b/inc/3rdparty/site_config/standard/tagesschau.de.txt index be76cd05..ba3b1d3b 100755 --- a/inc/3rdparty/site_config/standard/tagesschau.de.txt +++ b/inc/3rdparty/site_config/standard/tagesschau.de.txt | |||
@@ -1,23 +1,14 @@ | |||
1 | title://h1[1] | 1 | body: //div[contains(@class, 'sectionArticle')]//div[contains(concat(' ',normalize-space(@class),' '),' box ')] |
2 | 2 | ||
3 | author: substring-after(//em, 'Von ') | 3 | strip_id_or_class: infokasten |
4 | author:string('tagesschau.de') | 4 | strip_id_or_class: teaserImTeaser |
5 | strip_id_or_class: Comments | ||
6 | strip_id_or_class: mediaInfo | ||
7 | strip: //div[contains(@class, 'mediaCon')]//iframe | ||
5 | 8 | ||
6 | date:substring-after(//div[@class='standDatum'], 'Stand: ') | 9 | prune: no |
7 | 10 | ||
8 | body://div[contains(@class, 'article')] | //div[contains(@class, 'centerCol')] | 11 | test_url: http://www.tagesschau.de/ausland/snowden-dateien-entschluesselung-101.html |
12 | test_contains: Snowden hatte zunächst für | ||
9 | 13 | ||
10 | strip://h1[1] | 14 | test_url: http://www.tagesschau.de/xml/rss2 |
11 | strip: //div[contains(@class, 'directLinks')] | ||
12 | strip: //div[contains(@class, 'zitatBox')] | ||
13 | strip: //div[contains(@class, 'teaserBox metaBlock')] | ||
14 | strip: //*[contains(@class, 'inv')] | ||
15 | strip: //span[@class='imgSubline'] | ||
16 | strip: //*[contains(@class, 'topline')][1] | ||
17 | strip: //div[@id='rightCol'][1] | ||
18 | strip: //div[@id="footer"][1] | ||
19 | strip: //div[@class="fPlayer"] | ||
20 | strip: //div[@id='seitenanfang'] | ||
21 | strip: //div[@class='standDatum'] | ||
22 | strip: //em | ||
23 | test_url: http://www.tagesschau.de/ausland/wahlkampffrankreich102.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/taz.de.txt b/inc/3rdparty/site_config/standard/taz.de.txt index cf853662..a3368568 100755 --- a/inc/3rdparty/site_config/standard/taz.de.txt +++ b/inc/3rdparty/site_config/standard/taz.de.txt | |||
@@ -1,8 +1,9 @@ | |||
1 | date: //div[@class='secthead'] | 1 | date: //div[@class='secthead'] |
2 | body: //div[@class='sectbody'] | 2 | body: (//div[@class='sectbody'])[1] |
3 | title: concat(//div[@class='sectbody']/h4,': ',//div[@class='sectbody']/h1) | 3 | title: concat(//div[@class='sectbody']/h4,': ',//div[@class='sectbody']/h1) |
4 | author: //span[@class='author'] | 4 | author: //span[@class='author'] |
5 | strip: //p[@class='caption'] | 5 | strip: //p[@class='caption'] |
6 | strip_id_or_class: ad_bin | ||
6 | strip_id_or_class: rack | 7 | strip_id_or_class: rack |
7 | 8 | ||
8 | test_url: http://www.taz.de/Protestbewegung-Occupy/!80188/ \ No newline at end of file | 9 | test_url: http://www.taz.de/Protestbewegung-Occupy/!80188/ \ No newline at end of file |
diff --git a/inc/3rdparty/site_config/standard/theatlantic.com.txt b/inc/3rdparty/site_config/standard/theatlantic.com.txt index 3fc5611b..36864197 100755 --- a/inc/3rdparty/site_config/standard/theatlantic.com.txt +++ b/inc/3rdparty/site_config/standard/theatlantic.com.txt | |||
@@ -1,5 +1,6 @@ | |||
1 | title: //div[contains(@class, 'articleHead')]//h1 | 1 | title: //div[contains(@class, 'articleHead')]//h1 |
2 | 2 | ||
3 | body: //div[@itemprop='articleBody'] | ||
3 | body: //div[@class='articleText'] | 4 | body: //div[@class='articleText'] |
4 | body: //div[@class='articleContent'] | 5 | body: //div[@class='articleContent'] |
5 | body: //div[@id='article'] | 6 | body: //div[@id='article'] |
@@ -13,10 +14,14 @@ strip: //p[contains(., 'This article available online at:')] | |||
13 | strip: //p[contains(., 'This article available online at:')]/following::* | 14 | strip: //p[contains(., 'This article available online at:')]/following::* |
14 | strip: //div[@class='earthbox'] | 15 | strip: //div[@class='earthbox'] |
15 | 16 | ||
16 | single_page_link: //article//a[contains(@class, 'print')] | 17 | single_page_link: //div[contains(@class, 'article-tools')]//a[contains(@class, 'print')] |
17 | 18 | ||
18 | native_ad_clue: //meta[@property="og:url" and contains(@content, '/sponsored/')] | 19 | native_ad_clue: //meta[@property="og:url" and contains(@content, '/sponsored/')] |
19 | 20 | ||
21 | #multi-page article | ||
22 | test_url: http://www.theatlantic.com/magazine/archive/2014/12/the-real-roots-of-midlife-crisis/382235/ | ||
23 | test_contains: The curve tends to evince itself | ||
24 | |||
20 | test_url: http://www.theatlantic.com/technology/archive/2011/04/want-to-see-how-crazy-a-bot-run-market-can-be/237773/ | 25 | test_url: http://www.theatlantic.com/technology/archive/2011/04/want-to-see-how-crazy-a-bot-run-market-can-be/237773/ |
21 | test_url: http://www.theatlantic.com/magazine/archive/2007/11/the-autumn-of-the-multitaskers/6342/ | 26 | test_url: http://www.theatlantic.com/magazine/archive/2007/11/the-autumn-of-the-multitaskers/6342/ |
22 | test_url: http://www.theatlantic.com/entertainment/archive/2012/04/30-rock-live-a-funny-reminder-of-why-sitcoms-arent-shot-live-anymore/256447/ \ No newline at end of file | 27 | test_url: http://www.theatlantic.com/entertainment/archive/2012/04/30-rock-live-a-funny-reminder-of-why-sitcoms-arent-shot-live-anymore/256447/ |
diff --git a/inc/3rdparty/site_config/standard/theaustralian.com.au.txt b/inc/3rdparty/site_config/standard/theaustralian.com.au.txt new file mode 100755 index 00000000..1245efca --- /dev/null +++ b/inc/3rdparty/site_config/standard/theaustralian.com.au.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | body: //div[contains(@class, 'story-body')] | ||
2 | author: //cite[contains(@class, 'author')] | ||
3 | date: //span[@class='datestamp'] | ||
4 | |||
5 | strip: //div[@class='story-info'] | ||
6 | |||
diff --git a/inc/3rdparty/site_config/standard/thebostonchannel.com.txt b/inc/3rdparty/site_config/standard/thebostonchannel.com.txt index b74442de..808876da 100755 --- a/inc/3rdparty/site_config/standard/thebostonchannel.com.txt +++ b/inc/3rdparty/site_config/standard/thebostonchannel.com.txt | |||
@@ -1,7 +1,7 @@ | |||
1 | title: //meta[@name='og:title']/@content | 1 | title: //meta[@name='og:title']/@content |
2 | date: //meta[@name='created']/@content | 2 | date: //meta[@name='created']/@content |
3 | body: //div[@class="StoryBody" or @class="storyTeaser"] | 3 | body: //div[contains(@class, "article-body")] |
4 | 4 | ||
5 | replace_string(<p></p>): <br /><br /> | 5 | replace_string(<p></p>): <br /><br /> |
6 | 6 | ||
7 | test_url: http://www.thebostonchannel.com/slideshow/news/28210648/detail.html \ No newline at end of file | 7 | test_url: http://www.wcvb.com/news/2-teens-arrested-in-fatal-dorchester-shooting-of-16yearold-boy/33564886 |
diff --git a/inc/3rdparty/site_config/standard/theguardian.com.txt b/inc/3rdparty/site_config/standard/theguardian.com.txt index 88e2ecf4..c8b70e6f 100755 --- a/inc/3rdparty/site_config/standard/theguardian.com.txt +++ b/inc/3rdparty/site_config/standard/theguardian.com.txt | |||
@@ -1,5 +1,5 @@ | |||
1 | title: //div[@id='main-article-info']//h1 | 1 | title: //div[@id='main-article-info']//h1 |
2 | body: //div[@id='article-wrapper'] | 2 | body: //figure[contains(@itemprop, "associatedMedia")] | //div[contains(@itemprop, "articleBody")] |
3 | date: //li[@class='publication']//time[@pubdate] | //li[@class='publication']//data[@pubdate] | 3 | date: //li[@class='publication']//time[@pubdate] | //li[@class='publication']//data[@pubdate] |
4 | strip: //div[contains(@class, 'email-subscription')] | 4 | strip: //div[contains(@class, 'email-subscription')] |
5 | strip: //div[contains(@class, 'kindleWidget')] | 5 | strip: //div[contains(@class, 'kindleWidget')] |
@@ -11,6 +11,8 @@ native_ad_clue: //meta[@property="video:tag" and contains(@content, "Partner zon | |||
11 | prune: no | 11 | prune: no |
12 | tidy: no | 12 | tidy: no |
13 | 13 | ||
14 | strip_id_or_class: -expand- | ||
15 | |||
14 | test_url: http://www.theguardian.com/world/2013/oct/04/nsa-gchq-attack-tor-network-encryption | 16 | test_url: http://www.theguardian.com/world/2013/oct/04/nsa-gchq-attack-tor-network-encryption |
15 | test_contains: The National Security Agency has made repeated attempts to develop | 17 | test_contains: The National Security Agency has made repeated attempts to develop |
16 | test_contains: The agency did not directly address those questions, instead providing a statement. | 18 | test_contains: The agency did not directly address those questions, instead providing a statement. |
diff --git a/inc/3rdparty/site_config/standard/theregister.co.uk.txt b/inc/3rdparty/site_config/standard/theregister.co.uk.txt index 5d30230d..70d3d437 100755 --- a/inc/3rdparty/site_config/standard/theregister.co.uk.txt +++ b/inc/3rdparty/site_config/standard/theregister.co.uk.txt | |||
@@ -1,8 +1,9 @@ | |||
1 | # Updated 25-Jan-2014 | 1 | single_page_link: //link[contains(@href, 'm.theregister')] |
2 | single_page_link: //a[contains(@href, '/Print/')] | 2 | if_page_contains: //div[@id='nextpage'] |
3 | strip: //div[@class='wptl btm'] | ||
4 | body: //div[contains(@class,'article_head')]//h2 | //div[@id='body'] | ||
3 | 5 | ||
4 | title: //div[@id="article"]/h2 | 6 | #multipage |
5 | author: //p[@class="byline"]/a | 7 | test_url: http://www.theregister.co.uk/2015/07/06/geeks_guide_spaceguard_center/ |
6 | date: //p[@class="dateline"]/a[last()] | 8 | #singlepage |
7 | 9 | test_url: http://www.theregister.co.uk/2015/07/06/us_japan_massive_robots_in_the_ring/ | |
8 | test_url: http://www.theregister.co.uk/2014/01/24/thirty_years_of_the_apple_macintosh_part_2/ | ||
diff --git a/inc/3rdparty/site_config/standard/theverge.com.txt b/inc/3rdparty/site_config/standard/theverge.com.txt index 78f8654a..cee50c9b 100755 --- a/inc/3rdparty/site_config/standard/theverge.com.txt +++ b/inc/3rdparty/site_config/standard/theverge.com.txt | |||
@@ -33,6 +33,8 @@ strip_id_or_class: m-ad | |||
33 | strip_id_or_class: social-sharing | 33 | strip_id_or_class: social-sharing |
34 | strip_id_or_class: m-video-entry__excerpt | 34 | strip_id_or_class: m-video-entry__excerpt |
35 | strip_id_or_class: hidden | 35 | strip_id_or_class: hidden |
36 | strip_id_or_class: m-article__follow-bar | ||
37 | strip_id_or_class: m-article__share-buttons | ||
36 | 38 | ||
37 | replace_string(<noscript>): <div> | 39 | replace_string(<noscript>): <div> |
38 | replace_string(</noscript>): </div> | 40 | replace_string(</noscript>): </div> |
diff --git a/inc/3rdparty/site_config/standard/titanic-magazin.de.txt b/inc/3rdparty/site_config/standard/titanic-magazin.de.txt new file mode 100755 index 00000000..70108e36 --- /dev/null +++ b/inc/3rdparty/site_config/standard/titanic-magazin.de.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | body: //div[contains(@class, 'tt_news-bodytext')] | ||
2 | |||
3 | # cut html short | ||
4 | find_string: <!--TYPO3SEARCH_end--> | ||
5 | replace_string: </div></body></html> | ||
6 | |||
7 | test_url: http://www.titanic-magazin.de/ich.war.bei.der.waffen.rss | ||
8 | test_url: http://www.titanic-magazin.de/news/wenig-bekannte-fakten-ueber-2014-6986/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/tofugu.com.txt b/inc/3rdparty/site_config/standard/tofugu.com.txt new file mode 100644 index 00000000..5ac9d6a0 --- /dev/null +++ b/inc/3rdparty/site_config/standard/tofugu.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | body://div[@class='entry-content'] | ||
2 | |||
3 | test_url: http://www.tofugu.com/2015/07/20/interview-with-toriena-japanese-chiptune/ | ||
diff --git a/inc/3rdparty/site_config/standard/truongtx.me.txt b/inc/3rdparty/site_config/standard/truongtx.me.txt new file mode 100755 index 00000000..6b10adce --- /dev/null +++ b/inc/3rdparty/site_config/standard/truongtx.me.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | prune: false | ||
2 | tidy: false | ||
3 | |||
4 | body: //div[@class='col-md-9'] | ||
5 | author: //meta[@name='author'] | ||
6 | date: //i[@class='fa fa-calendar']/../span | ||
7 | title: //div[@class='page-header']/h1 | ||
8 | |||
9 | test_url: https://truongtx.me/2014/04/20/emacs-javascript-completion-and-refactoring/ | ||
diff --git a/inc/3rdparty/site_config/standard/utdailybeacon.com.txt b/inc/3rdparty/site_config/standard/utdailybeacon.com.txt index d37911bc..c4593d55 100755 --- a/inc/3rdparty/site_config/standard/utdailybeacon.com.txt +++ b/inc/3rdparty/site_config/standard/utdailybeacon.com.txt | |||
@@ -1,5 +1,2 @@ | |||
1 | title: //h1 | 1 | body: //div[@id='blox-story-text'] |
2 | author: //*[@class='byline'] | 2 | test_url: http://www.utdailybeacon.com/news/article_ccf6d024-0f15-11e5-ae29-9f63598deb81.html |
3 | date: substring-after(//*[@class='pubdatetime'], 'Published: ') | ||
4 | body: //*[@class='body-block'] | ||
5 | test_url: http://utdailybeacon.com/news/2012/oct/8/energy-forum-continues/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/volkskrant.nl.txt b/inc/3rdparty/site_config/standard/volkskrant.nl.txt new file mode 100755 index 00000000..c2770349 --- /dev/null +++ b/inc/3rdparty/site_config/standard/volkskrant.nl.txt | |||
@@ -0,0 +1,15 @@ | |||
1 | #bypass cookie check | ||
2 | single_page_link: //a[contains(@href, '/cookiewall/accept.do?')] | ||
3 | |||
4 | title: //h1[@itemprop="headline"] | ||
5 | body: //figure[contains(@class, 'article__top-image')] | //div[@itemprop="articleBody"] | ||
6 | |||
7 | strip: //div[contains(@class, 'media-container') and contains(@class, 'pull-right')] | ||
8 | |||
9 | tidy: no | ||
10 | prune: no | ||
11 | |||
12 | test_url: http://www.volkskrant.nl/sport/dossier-wereldvoetbalbond-fifa-wankelt~a4042695/ | ||
13 | test_contains: De belangrijkste spil in het | ||
14 | |||
15 | test_url: http://www.volkskrant.nl/nieuws/rss.xml \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/warnerbros.fr.txt b/inc/3rdparty/site_config/standard/warnerbros.fr.txt index 21f56352..6215b727 100755 --- a/inc/3rdparty/site_config/standard/warnerbros.fr.txt +++ b/inc/3rdparty/site_config/standard/warnerbros.fr.txt | |||
@@ -1,3 +1,2 @@ | |||
1 | title: //h3 | 1 | body: //div[@class="article-body"] |
2 | body: //div[@class="content_wysiwyg"] | 2 | test_url: https://www.warnerbros.fr/articles/magic-mike-xxl-adam-rodriguez-portrait |
3 | test_url: http://www.warnerbros.fr/game-of-thrones-un-junket-vu-de-l-interieur-268.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/washingtonpost.com.txt b/inc/3rdparty/site_config/standard/washingtonpost.com.txt index 0aa9f1d8..c29af00f 100755 --- a/inc/3rdparty/site_config/standard/washingtonpost.com.txt +++ b/inc/3rdparty/site_config/standard/washingtonpost.com.txt | |||
@@ -5,9 +5,14 @@ body: //div[contains(@class, "article_body")] | |||
5 | body: //div[@id='print_facet']//div[@id='body'] | 5 | body: //div[@id='print_facet']//div[@id='body'] |
6 | 6 | ||
7 | author://meta[@name='DC.creator']/@content | 7 | author://meta[@name='DC.creator']/@content |
8 | author://span[@class="pb-byline"] | ||
9 | author://h3[@property='dc.creator']//a[@rel='author'] | ||
8 | title://meta[@name='title']/@content | 10 | title://meta[@name='title']/@content |
9 | date://div[contains(@class,'byline')]//span[contains(@class,'published')]/@title | 11 | date://div[contains(@class,'byline')]//span[contains(@class,'published')]/@title |
10 | date://meta[@name="DC.date.issued"]/@content | 12 | date://meta[@name="DC.date.issued"]/@content |
13 | date://span[contains(@class,"pb-timestamp")] | ||
14 | date://meta[@name="eomportal-lastUpdate"]/@content | ||
15 | |||
11 | strip://div[@class="relative primary-slot padding-top img-border gallery-container photo-wrapper"] | 16 | strip://div[@class="relative primary-slot padding-top img-border gallery-container photo-wrapper"] |
12 | strip://div[@id="wp-column six end"] | 17 | strip://div[@id="wp-column six end"] |
13 | strip://div[contains(@class,'hidden')] | 18 | strip://div[contains(@class,'hidden')] |
@@ -23,6 +28,7 @@ strip_id_or_class: module | |||
23 | 28 | ||
24 | # Change gJQAwdJG4U_story.html to gJQAwdJG4U_print.html | 29 | # Change gJQAwdJG4U_story.html to gJQAwdJG4U_print.html |
25 | single_page_link: concat(substring-before(//link[@rel="canonical"]/@href, "_story.html"), "_print.html") | 30 | single_page_link: concat(substring-before(//link[@rel="canonical"]/@href, "_story.html"), "_print.html") |
31 | if_page_contains: //link[@rel="canonical" and contains(@href, '_story.html')] | ||
26 | 32 | ||
27 | # [OLD] Change gJQAwdJG4U_story.html to gJQAwdJG4U_story_print.html | 33 | # [OLD] Change gJQAwdJG4U_story.html to gJQAwdJG4U_story_print.html |
28 | #single_page_link: concat(substring-before(//link[@rel="canonical"]/@href, "_story.html"), "_story_print.html") | 34 | #single_page_link: concat(substring-before(//link[@rel="canonical"]/@href, "_story.html"), "_story_print.html") |
diff --git a/inc/3rdparty/site_config/standard/watoday.com.au.txt b/inc/3rdparty/site_config/standard/watoday.com.au.txt new file mode 100755 index 00000000..4302ac5e --- /dev/null +++ b/inc/3rdparty/site_config/standard/watoday.com.au.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | author: //h3[@class="authorName"] | ||
2 | date: //dd[@class='updated dtstamp']//time | ||
3 | |||
4 | strip: //div[contains(@class, "adspot")] | ||
5 | strip: //noscript | ||
6 | strip: //p//small | ||
7 | |||
diff --git a/inc/3rdparty/site_config/standard/weeklytimesnow.com.au.txt b/inc/3rdparty/site_config/standard/weeklytimesnow.com.au.txt new file mode 100755 index 00000000..a79871f3 --- /dev/null +++ b/inc/3rdparty/site_config/standard/weeklytimesnow.com.au.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | body: //div[@class='main-col' or @class='article-image-wide'] | ||
2 | title: //h1[@class='article-title'] | ||
3 | author: substring-before(//span[@class='author'], "|") | ||
4 | date: //span[@class='date'] | ||
diff --git a/inc/3rdparty/site_config/standard/westernadvocate.com.au.txt b/inc/3rdparty/site_config/standard/westernadvocate.com.au.txt new file mode 100755 index 00000000..eb00f776 --- /dev/null +++ b/inc/3rdparty/site_config/standard/westernadvocate.com.au.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | title: //header[contains(@class, "news-article-title")]//h1 | ||
2 | date: //div[@class="news-article-byline"]//time | ||
3 | author: //h2[@class="news-article-author"]//cite | ||
4 | |||
5 | # Turns out that westernadvocate is doing funky things with the slide show images. :< | ||
6 | # body: //ul[@class="slides"]//img | //div[contains(@class, "news-article-body")] | ||
7 | body: //div[contains(@class, "news-article-body")] | ||
8 | |||
9 | strip: //div[contains(@class, "flexslider")] | ||
10 | |||
11 | test_url: http://www.westernadvocate.com.au/story/1532050/roos-accept-ziebell-ban-commentators-do-not/ | ||
diff --git a/inc/3rdparty/site_config/standard/wsj.com.txt b/inc/3rdparty/site_config/standard/wsj.com.txt new file mode 100755 index 00000000..467c39c2 --- /dev/null +++ b/inc/3rdparty/site_config/standard/wsj.com.txt | |||
@@ -0,0 +1,29 @@ | |||
1 | title: //meta[@property="og:title"]/@content | ||
2 | body: //div[@id='wsj-article-wrap'] | ||
3 | # is this still used? | ||
4 | body: //div[@id='article_story_body'] | ||
5 | |||
6 | author: //h3[@class='byline']/a | ||
7 | # for slide show content | ||
8 | body: //ul[@id='imageSlide']//li[@class='firstSlide']//img | (//div[@class='txt_body']//p)[1] | ||
9 | date: //li[@class='dateStamp']/small | ||
10 | |||
11 | strip_id_or_class: insetFullBracket | ||
12 | strip_id_or_class: insettipBox | ||
13 | #strip_id_or_class: legacyInset | ||
14 | strip_id_or_class: recipeACShopAndBuyText | ||
15 | |||
16 | strip: //div[contains(@class, 'insetContent')]//cite | ||
17 | strip: //*[contains(@style, 'visibility: hidden;')] | ||
18 | strip: //div[contains(@class, 'insetContent') and not(contains(@class, 'image'))] | ||
19 | strip: //div[contains(@class, 'carousel')] | ||
20 | |||
21 | prune: no | ||
22 | tidy: no | ||
23 | |||
24 | test_url: http://www.wsj.com/articles/airasia-flight-8501-tail-recovered-1420878809 | ||
25 | test_contains: Saturday evening that the black boxes | ||
26 | test_url: http://www.wsj.com/news/articles/SB10001424052702304626304579509100018004342 | ||
27 | test_url: http://www.wsj.com/article/SB10001424052970203363504577185322849515102.html | ||
28 | # slide show | ||
29 | test_url: http://www.wsj.com/article/SB10001424052970204791104577110550376458164.html | ||
diff --git a/inc/3rdparty/site_config/standard/yourerie.com.txt b/inc/3rdparty/site_config/standard/yourerie.com.txt index b46b09e8..46ee5ba1 100755 --- a/inc/3rdparty/site_config/standard/yourerie.com.txt +++ b/inc/3rdparty/site_config/standard/yourerie.com.txt | |||
@@ -1,2 +1,2 @@ | |||
1 | body: //div[@class="nxFullTextData"] | 1 | body: //div[@itemprop="articleBody"] |
2 | test_url: http://yourerie.com/fulltext?nxd_id=306552 | 2 | test_url: http://www.yourerie.com/news/news-article/d/story/cd-release-party-at-pi-downs/22898/G_gFL3mSQkWH_DW2wLuMOA |
diff --git a/inc/3rdparty/site_config/standard/zeit.de.txt b/inc/3rdparty/site_config/standard/zeit.de.txt index 9815d478..4e008946 100755 --- a/inc/3rdparty/site_config/standard/zeit.de.txt +++ b/inc/3rdparty/site_config/standard/zeit.de.txt | |||
@@ -1,3 +1,4 @@ | |||
1 | # 2015.07.08 [Marvin Dickhaus] fixed single_page_link | ||
1 | # 2013.10.30 [rezor92] fixed single_page_link | 2 | # 2013.10.30 [rezor92] fixed single_page_link |
2 | # 2012-12-23 [carlo@...] fixed half-assed headlines in articles, removed inline author profiles, adjusted picture captions | 3 | # 2012-12-23 [carlo@...] fixed half-assed headlines in articles, removed inline author profiles, adjusted picture captions |
3 | # 2012-03-17 [dkless@...] Cut metadata parts in the beginning and the ends of the content block; copyright entries for pictures removed; Author fixed, not sure if old entries still valid (I left them); Weird problems with some pages addressed (see last section for removing hidden section) | 4 | # 2012-03-17 [dkless@...] Cut metadata parts in the beginning and the ends of the content block; copyright entries for pictures removed; Author fixed, not sure if old entries still valid (I left them); Weird problems with some pages addressed (see last section for removing hidden section) |
@@ -5,8 +6,7 @@ | |||
5 | # 2011-08-23 [carlo@...] changed single page link to use print version: page works better, less ambiguity. Related cleanups and simplifications. | 6 | # 2011-08-23 [carlo@...] changed single page link to use print version: page works better, less ambiguity. Related cleanups and simplifications. |
6 | # 2011-08-20 [carlo@...] added author, fixed date | 7 | # 2011-08-20 [carlo@...] added author, fixed date |
7 | 8 | ||
8 | 9 | single_page_link: //a[contains(@href, 'komplettansicht')] | |
9 | single_page_link: //a[@title='Auf einer Seite'] | ||
10 | tidy: no | 10 | tidy: no |
11 | 11 | ||
12 | title: //title | 12 | title: //title |
@@ -24,6 +24,8 @@ strip: //p[@class="copyright"] | |||
24 | strip: //div[@class="copyright"] | 24 | strip: //div[@class="copyright"] |
25 | #Removes pagination links at the end | 25 | #Removes pagination links at the end |
26 | strip: //div[@class="pagination"] | 26 | strip: //div[@class="pagination"] |
27 | #Removes link to main page at the bottom of some articles (Zur Startseite) | ||
28 | strip: //a[@href='http://www.zeit.de'] | ||
27 | 29 | ||
28 | # Fix picture captions | 30 | # Fix picture captions |
29 | wrap_in(small): //p[@class="caption"]/text() | 31 | wrap_in(small): //p[@class="caption"]/text() |
@@ -43,3 +45,4 @@ strip_id_or_class:"pagination" | |||
43 | 45 | ||
44 | footnotes: no | 46 | footnotes: no |
45 | test_url: http://www.zeit.de/kultur/film/2012-12/Kurzfilmtag | 47 | test_url: http://www.zeit.de/kultur/film/2012-12/Kurzfilmtag |
48 | test_url: http://www.zeit.de/kultur/2015-07/kapitalismuskritik-selbstberuhigung-armin-nassehi | ||