aboutsummaryrefslogtreecommitdiffhomepage
path: root/inc/3rdparty
diff options
context:
space:
mode:
authorJeremy Benoist <j0k3r@users.noreply.github.com>2015-08-02 08:59:20 +0200
committerJeremy Benoist <j0k3r@users.noreply.github.com>2015-08-02 08:59:20 +0200
commite968ec6c2244aee600358b3c87648b2409c97945 (patch)
treea5246151bf8278a0e8053b9dd1ce9d770073c50b /inc/3rdparty
parent87b723d7620f9b4e1f3870c1d13e1415d1967f0d (diff)
parent3bb6a8ed2ab40b17d3f3b9925664c8da38ea2570 (diff)
downloadwallabag-e968ec6c2244aee600358b3c87648b2409c97945.tar.gz
wallabag-e968ec6c2244aee600358b3c87648b2409c97945.tar.zst
wallabag-e968ec6c2244aee600358b3c87648b2409c97945.zip
Merge pull request #1266 from wallabag/fix-site-config
Fix site config
Diffstat (limited to 'inc/3rdparty')
-rwxr-xr-xinc/3rdparty/site_config/standard/20min.ch.txt24
-rwxr-xr-xinc/3rdparty/site_config/standard/24.ae.txt8
-rwxr-xr-xinc/3rdparty/site_config/standard/9gag.com.txt6
-rwxr-xr-xinc/3rdparty/site_config/standard/ad.nl.txt7
-rwxr-xr-xinc/3rdparty/site_config/standard/albayan.ae.txt2
-rwxr-xr-xinc/3rdparty/site_config/standard/androidpolice.com.txt3
-rwxr-xr-xinc/3rdparty/site_config/standard/artofmanliness.com.txt6
-rwxr-xr-xinc/3rdparty/site_config/standard/au.businessinsider.com.txt12
-rwxr-xr-xinc/3rdparty/site_config/standard/au.news.yahoo.com.txt4
-rwxr-xr-xinc/3rdparty/site_config/standard/bbc.co.uk.txt6
-rwxr-xr-xinc/3rdparty/site_config/standard/bbc.com.txt6
-rwxr-xr-xinc/3rdparty/site_config/standard/blog.cloudflare.com.txt8
-rwxr-xr-xinc/3rdparty/site_config/standard/blogs.msdn.com.txt4
-rwxr-xr-xinc/3rdparty/site_config/standard/brandeins.de.txt14
-rwxr-xr-xinc/3rdparty/site_config/standard/brokernews.com.au.txt2
-rwxr-xr-xinc/3rdparty/site_config/standard/business.time.com.txt17
-rwxr-xr-xinc/3rdparty/site_config/standard/choice.com.au.txt4
-rwxr-xr-xinc/3rdparty/site_config/standard/cnet.com.au.txt17
-rwxr-xr-xinc/3rdparty/site_config/standard/computerbase.de.txt4
-rwxr-xr-xinc/3rdparty/site_config/standard/contrepoints.org.txt21
-rwxr-xr-xinc/3rdparty/site_config/standard/cooper.com.txt4
-rwxr-xr-xinc/3rdparty/site_config/standard/cwnp.com.txt14
-rwxr-xr-xinc/3rdparty/site_config/standard/dailymail.co.uk.txt9
-rwxr-xr-xinc/3rdparty/site_config/standard/dailytelegraph.com.au.txt5
-rwxr-xr-xinc/3rdparty/site_config/standard/deadspin.com.txt1
-rwxr-xr-xinc/3rdparty/site_config/standard/derbund.ch.txt13
-rwxr-xr-xinc/3rdparty/site_config/standard/designbuildsource.com.au.txt2
-rwxr-xr-xinc/3rdparty/site_config/standard/dilbert.com.txt12
-rwxr-xr-xinc/3rdparty/site_config/standard/dn.se.txt6
-rwxr-xr-xinc/3rdparty/site_config/standard/economie.gouv.fr.txt4
-rwxr-xr-xinc/3rdparty/site_config/standard/entwickler.de.txt7
-rwxr-xr-xinc/3rdparty/site_config/standard/explosm.net.txt4
-rwxr-xr-xinc/3rdparty/site_config/standard/facebook.com.txt6
-rwxr-xr-xinc/3rdparty/site_config/standard/fastcompany.com.txt34
-rwxr-xr-xinc/3rdparty/site_config/standard/fok.nl.txt4
-rwxr-xr-xinc/3rdparty/site_config/standard/forsvaret.no.txt3
-rwxr-xr-xinc/3rdparty/site_config/standard/france24.com.txt14
-rwxr-xr-xinc/3rdparty/site_config/standard/galwayindependent.com.txt3
-rwxr-xr-xinc/3rdparty/site_config/standard/gameblog.fr.txt4
-rwxr-xr-xinc/3rdparty/site_config/standard/gawker.com.txt2
-rwxr-xr-xinc/3rdparty/site_config/standard/getpocket.com.txt1
-rwxr-xr-xinc/3rdparty/site_config/standard/gist.github.com.txt8
-rwxr-xr-xinc/3rdparty/site_config/standard/gizmodo.com.au.txt8
-rwxr-xr-xinc/3rdparty/site_config/standard/gizmodo.com.txt2
-rwxr-xr-xinc/3rdparty/site_config/standard/globalgrind.com.txt6
-rwxr-xr-xinc/3rdparty/site_config/standard/gocomics.com.txt5
-rwxr-xr-xinc/3rdparty/site_config/standard/help.fivefilters.org.txt2
-rwxr-xr-xinc/3rdparty/site_config/standard/heraldsun.com.au.txt12
-rwxr-xr-xinc/3rdparty/site_config/standard/hiiraan.com.txt10
-rwxr-xr-xinc/3rdparty/site_config/standard/independent.co.uk.txt11
-rwxr-xr-xinc/3rdparty/site_config/standard/io9.com.txt1
-rwxr-xr-xinc/3rdparty/site_config/standard/ippmedia.com.txt4
-rwxr-xr-xinc/3rdparty/site_config/standard/itnews.com.au.txt5
-rwxr-xr-xinc/3rdparty/site_config/standard/jalopnik.com.txt3
-rwxr-xr-xinc/3rdparty/site_config/standard/jezebel.com.txt1
-rwxr-xr-xinc/3rdparty/site_config/standard/kotaku.com.txt3
-rwxr-xr-xinc/3rdparty/site_config/standard/lefigaro.fr.txt5
-rwxr-xr-xinc/3rdparty/site_config/standard/lifehacker.com.txt8
-rwxr-xr-xinc/3rdparty/site_config/standard/linuxjournal.com.txt6
-rwxr-xr-xinc/3rdparty/site_config/standard/m.bbc.co.uk.txt6
-rwxr-xr-xinc/3rdparty/site_config/standard/m.facebook.com.txt12
-rwxr-xr-xinc/3rdparty/site_config/standard/m.theregister.co.uk.txt4
-rwxr-xr-xinc/3rdparty/site_config/standard/marketingmag.com.au.txt1
-rwxr-xr-xinc/3rdparty/site_config/standard/medium.com.txt2
-rwxr-xr-xinc/3rdparty/site_config/standard/menshealth.com.sg.txt9
-rwxr-xr-xinc/3rdparty/site_config/standard/mitchellrepublic.com.txt3
-rwxr-xr-xinc/3rdparty/site_config/standard/mobile.nytimes.com.txt3
-rwxr-xr-xinc/3rdparty/site_config/standard/moneymanagement.com.au.txt2
-rwxr-xr-xinc/3rdparty/site_config/standard/nbnnews.com.au.txt3
-rwxr-xr-xinc/3rdparty/site_config/standard/news.com.au.txt3
-rwxr-xr-xinc/3rdparty/site_config/standard/news.menshealth.com.txt1
-rwxr-xr-xinc/3rdparty/site_config/standard/news.ninemsn.com.au.txt3
-rwxr-xr-xinc/3rdparty/site_config/standard/parool.nl.txt7
-rwxr-xr-xinc/3rdparty/site_config/standard/perthnow.com.au.txt12
-rwxr-xr-xinc/3rdparty/site_config/standard/planetsave.com.txt1
-rwxr-xr-xinc/3rdparty/site_config/standard/presseportal.de.txt11
-rw-r--r--inc/3rdparty/site_config/standard/quechoisir.org.txt10
-rwxr-xr-xinc/3rdparty/site_config/standard/quora.com.txt18
-rwxr-xr-xinc/3rdparty/site_config/standard/reddit.com.txt6
-rwxr-xr-xinc/3rdparty/site_config/standard/rockpapershotgun.com.txt4
-rwxr-xr-xinc/3rdparty/site_config/standard/saadaalnews.net.txt11
-rwxr-xr-xinc/3rdparty/site_config/standard/smh.com.au.txt14
-rwxr-xr-xinc/3rdparty/site_config/standard/smh.drive.com.au.txt13
-rwxr-xr-xinc/3rdparty/site_config/standard/smithsonianmag.com.txt5
-rwxr-xr-xinc/3rdparty/site_config/standard/snip.ly.txt3
-rwxr-xr-xinc/3rdparty/site_config/standard/soundcity.tv.txt3
-rwxr-xr-xinc/3rdparty/site_config/standard/spiegel.de.txt25
-rwxr-xr-xinc/3rdparty/site_config/standard/srf.ch.txt24
-rwxr-xr-xinc/3rdparty/site_config/standard/sueddeutsche.de.txt2
-rwxr-xr-xinc/3rdparty/site_config/standard/sunshinecoastdaily.com.au.txt10
-rwxr-xr-xinc/3rdparty/site_config/standard/sz.de.txt2
-rwxr-xr-xinc/3rdparty/site_config/standard/tagesanzeiger.ch.txt14
-rwxr-xr-xinc/3rdparty/site_config/standard/tagesschau.de.txt29
-rwxr-xr-xinc/3rdparty/site_config/standard/taz.de.txt3
-rwxr-xr-xinc/3rdparty/site_config/standard/theatlantic.com.txt9
-rwxr-xr-xinc/3rdparty/site_config/standard/theaustralian.com.au.txt6
-rwxr-xr-xinc/3rdparty/site_config/standard/thebostonchannel.com.txt4
-rwxr-xr-xinc/3rdparty/site_config/standard/theguardian.com.txt4
-rwxr-xr-xinc/3rdparty/site_config/standard/theregister.co.uk.txt15
-rwxr-xr-xinc/3rdparty/site_config/standard/theverge.com.txt2
-rwxr-xr-xinc/3rdparty/site_config/standard/titanic-magazin.de.txt8
-rw-r--r--inc/3rdparty/site_config/standard/tofugu.com.txt3
-rwxr-xr-xinc/3rdparty/site_config/standard/truongtx.me.txt9
-rwxr-xr-xinc/3rdparty/site_config/standard/utdailybeacon.com.txt7
-rwxr-xr-xinc/3rdparty/site_config/standard/volkskrant.nl.txt15
-rwxr-xr-xinc/3rdparty/site_config/standard/warnerbros.fr.txt5
-rwxr-xr-xinc/3rdparty/site_config/standard/washingtonpost.com.txt6
-rwxr-xr-xinc/3rdparty/site_config/standard/watoday.com.au.txt7
-rwxr-xr-xinc/3rdparty/site_config/standard/weeklytimesnow.com.au.txt4
-rwxr-xr-xinc/3rdparty/site_config/standard/westernadvocate.com.au.txt11
-rwxr-xr-xinc/3rdparty/site_config/standard/wsj.com.txt29
-rwxr-xr-xinc/3rdparty/site_config/standard/yourerie.com.txt4
-rwxr-xr-xinc/3rdparty/site_config/standard/zeit.de.txt7
113 files changed, 742 insertions, 119 deletions
diff --git a/inc/3rdparty/site_config/standard/20min.ch.txt b/inc/3rdparty/site_config/standard/20min.ch.txt
new file mode 100755
index 00000000..cd8e3fc0
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/20min.ch.txt
@@ -0,0 +1,24 @@
1# Author: cirnod@gmail.com
2
3tidy: no
4prune: no
5
6title: //h1
7date: /html/body/div[3]/div[1]/div[6]/div/div[1]/div[2]/div[1]/div/p
8body: //div[@class='published clearfix'] | //div[@class='story_titles']/h3 | //div[@class='story_text']
9
10# General Cleanup
11strip_id_or_class: info_panel
12strip_id_or_class: info_poll
13strip_id_or_class: teaser
14strip_id_or_class: panelbox
15strip_id_or_class: polls
16strip_id_or_class: warning
17strip_id_or_class: vplaceholder
18
19# visual removal only -> complete removal doesn't work
20replace_string(Print</a>): </a>
21
22# Try yourself
23test_url: http://www.20min.ch/wissen/news/story/31588952
24test_url: http://www.20min.ch/digital/dossier/apple/story/So-einfach-laesst-sich-das-iPhone-6-Plus-verbiegen-24651169
diff --git a/inc/3rdparty/site_config/standard/24.ae.txt b/inc/3rdparty/site_config/standard/24.ae.txt
new file mode 100755
index 00000000..6e515076
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/24.ae.txt
@@ -0,0 +1,8 @@
1title: //div[@id='DivTitle']
2body: //div[@id='divImages' or @id='Divkhabarcontent']
3author: //div[@id='DivAuthor']
4
5prune: no
6
7test_url: http://24.ae/article.aspx?ArticleId=123304
8test_url: http://24.ae/rss.aspx?pageId=30
diff --git a/inc/3rdparty/site_config/standard/9gag.com.txt b/inc/3rdparty/site_config/standard/9gag.com.txt
new file mode 100755
index 00000000..4ebb62ad
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/9gag.com.txt
@@ -0,0 +1,6 @@
1# Generated by FiveFilters.org's web-based selection tool
2# Place this file inside your site_config/custom/ folder
3# Source: http://siteconfig.fivefilters.org/grab.php?url=http%3A%2F%2F9gag.com%2Fgag%2FaDwQnO7
4
5body: //div[contains(concat(' ',normalize-space(@class),' '),' badge-post-container ')]
6test_url: http://9gag.com/gag/aDwQnO7
diff --git a/inc/3rdparty/site_config/standard/ad.nl.txt b/inc/3rdparty/site_config/standard/ad.nl.txt
new file mode 100755
index 00000000..422faa57
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/ad.nl.txt
@@ -0,0 +1,7 @@
1#bypass cookie check
2single_page_link: //a[contains(@href, '/acceptCookieCheck.do?url=')]
3
4test_url: http://www.ad.nl/ad/nl/10444/Offside/article/detail/4043834/2015/05/31/Dani-Alves-voetbalt-met-drol-op-zijn-hoofd.dhtml
5test_contains: De nieuwe coupe van Alves
6
7test_url: http://www.ad.nl/digitaal/rss.xml \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/albayan.ae.txt b/inc/3rdparty/site_config/standard/albayan.ae.txt
index f6c093d2..d52700b3 100755
--- a/inc/3rdparty/site_config/standard/albayan.ae.txt
+++ b/inc/3rdparty/site_config/standard/albayan.ae.txt
@@ -1,5 +1,7 @@
1body: //div[@id='main-column']//div[@class='content'] 1body: //div[@id='main-column']//div[@class='content']
2 2
3strip_id_or_class: social-buttons
4
3prune: no 5prune: no
4 6
5test_url: http://www.albayan.ae/across-the-uae/education/2013-08-29-1.1949645 7test_url: http://www.albayan.ae/across-the-uae/education/2013-08-29-1.1949645
diff --git a/inc/3rdparty/site_config/standard/androidpolice.com.txt b/inc/3rdparty/site_config/standard/androidpolice.com.txt
index 8f9b1a21..660f29d9 100755
--- a/inc/3rdparty/site_config/standard/androidpolice.com.txt
+++ b/inc/3rdparty/site_config/standard/androidpolice.com.txt
@@ -1,5 +1,6 @@
1body: //div[@class='post_content'] 1body: //div[@class='post_content']
2date: //div[@class='date_day'] | div[@class='date_month'] 2date: //div[@class='date_day'] | div[@class='date_month']
3strip_id_or_class: author-box
4author: //h2[@class='author-box-heading']/a
3 5
4test_url: http://www.androidpolice.com/2014/03/30/music-boss-for-pebble-can-now-control-playback-and-volume-on-chromecast-content-from-your-smartwatch/ 6test_url: http://www.androidpolice.com/2014/03/30/music-boss-for-pebble-can-now-control-playback-and-volume-on-chromecast-content-from-your-smartwatch/
5
diff --git a/inc/3rdparty/site_config/standard/artofmanliness.com.txt b/inc/3rdparty/site_config/standard/artofmanliness.com.txt
new file mode 100755
index 00000000..b29ea0db
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/artofmanliness.com.txt
@@ -0,0 +1,6 @@
1parser: html5php
2date: //article/p[contains(@class, 'single-date')]
3author: //article/p[contains(@class, 'byline')]
4
5test_url: http://www.artofmanliness.com/2013/01/31/relationship-red-flags/
6test_contains: It seems that once we get close to a person \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/au.businessinsider.com.txt b/inc/3rdparty/site_config/standard/au.businessinsider.com.txt
new file mode 100755
index 00000000..46bcddf2
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/au.businessinsider.com.txt
@@ -0,0 +1,12 @@
1title://div[@class="sl-layout-post"]/h1
2body: //div[@id='content_post']
3strip: //div[contains(@class, "post-sidebar")]
4strip: //div[@id='related-links']
5strip: //img[@class='size_xlarge']
6author://div[@class="byline"]/a
7date://div[@class="byline"]/span[@class="date"]
8prune: no
9tidy: no
10
11
12test_url: http://www.businessinsider.com/microsoft-just-put-one-of-its-hardcore-technical-geniuses-on-xbox-2012-1
diff --git a/inc/3rdparty/site_config/standard/au.news.yahoo.com.txt b/inc/3rdparty/site_config/standard/au.news.yahoo.com.txt
new file mode 100755
index 00000000..8e84cbbb
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/au.news.yahoo.com.txt
@@ -0,0 +1,4 @@
1strip: //a[contains(text(), "RELATED:")]
2author: //div[@class="info"]//span[@class="association printer-source"]
3author: //div[@class="info"]//span[@class="stamp printer-date"]
4
diff --git a/inc/3rdparty/site_config/standard/bbc.co.uk.txt b/inc/3rdparty/site_config/standard/bbc.co.uk.txt
index bad77654..7bef73ad 100755
--- a/inc/3rdparty/site_config/standard/bbc.co.uk.txt
+++ b/inc/3rdparty/site_config/standard/bbc.co.uk.txt
@@ -30,6 +30,12 @@ strip: //div[contains(@class, 'comment-introduction')]
30strip: //div[contains(@class, 'share-tools')] 30strip: //div[contains(@class, 'share-tools')]
31strip: //div[@id='also-related-links'] 31strip: //div[@id='also-related-links']
32 32
33find_string: http://ichef.bbci.co.uk/news/200/
34replace_string: http://ichef.bbci.co.uk/news/624/
35
36find_string: http://ichef.bbci.co.uk/news/304/
37replace_string: http://ichef.bbci.co.uk/news/624/
38
33strip_id_or_class: share-help 39strip_id_or_class: share-help
34strip_id_or_class: comments_module 40strip_id_or_class: comments_module
35 41
diff --git a/inc/3rdparty/site_config/standard/bbc.com.txt b/inc/3rdparty/site_config/standard/bbc.com.txt
index c04a683e..200dba63 100755
--- a/inc/3rdparty/site_config/standard/bbc.com.txt
+++ b/inc/3rdparty/site_config/standard/bbc.com.txt
@@ -33,6 +33,12 @@ strip: //div[@id='also-related-links']
33strip_id_or_class: share-help 33strip_id_or_class: share-help
34strip_id_or_class: comments_module 34strip_id_or_class: comments_module
35 35
36find_string: http://ichef.bbci.co.uk/news/200/
37replace_string: http://ichef.bbci.co.uk/news/624/
38
39find_string: http://ichef.bbci.co.uk/news/304/
40replace_string: http://ichef.bbci.co.uk/news/624/
41
36replace_string(<noscript>): <div> 42replace_string(<noscript>): <div>
37replace_string(</noscript>): </div> 43replace_string(</noscript>): </div>
38 44
diff --git a/inc/3rdparty/site_config/standard/blog.cloudflare.com.txt b/inc/3rdparty/site_config/standard/blog.cloudflare.com.txt
index 9b7cf25c..db80a35f 100755
--- a/inc/3rdparty/site_config/standard/blog.cloudflare.com.txt
+++ b/inc/3rdparty/site_config/standard/blog.cloudflare.com.txt
@@ -3,11 +3,7 @@ title: substring-before(//title, '-')
3 3
4author: //a[ contains(@href, '/people') ] 4author: //a[ contains(@href, '/people') ]
5 5
6body: //article[contains(concat(' ',normalize-space(@class),' '),' post ')] 6body: //div[ @class='post' ]
7
8strip_id_or_class: section learn-more
9strip_id_or_class: section comments
10strip_id_or_class: disqus_thread
11 7
12# Date is impossible to retrieve since they use those stupid "fuzzy" dates, inserted through javascript, at posterous. 8# Date is impossible to retrieve since they use those stupid "fuzzy" dates, inserted through javascript, at posterous.
13test_url: http://blog.cloudflare.com/understanding-analytics-when-is-a-page-view-n 9test_url: http://blog.cloudflare.com/understanding-analytics-when-is-a-page-view-n \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blogs.msdn.com.txt b/inc/3rdparty/site_config/standard/blogs.msdn.com.txt
index b2ff8332..11b8d42d 100755
--- a/inc/3rdparty/site_config/standard/blogs.msdn.com.txt
+++ b/inc/3rdparty/site_config/standard/blogs.msdn.com.txt
@@ -1,6 +1,6 @@
1title: //h3[@class="post-name"] 1title: //h3[@class="post-name"]
2author: //span[@class="user-name"] 2author: //span[@class="user-name"]
3date: //div[@class="post-date"] 3date: //div[@class="post-date"]/span[@class="value"]
4body: //div[@class="post-content user-defined-markup"] 4body: //div[@class="post-content user-defined-markup"]
5footnotes: no 5footnotes: no
6test_url: http://blogs.msdn.com/b/b8/archive/2011/10/04/designing-the-start-screen.aspx \ No newline at end of file 6test_url: http://blogs.msdn.com/b/b8/archive/2011/10/04/designing-the-start-screen.aspx
diff --git a/inc/3rdparty/site_config/standard/brandeins.de.txt b/inc/3rdparty/site_config/standard/brandeins.de.txt
index 36aa2efa..be326346 100755
--- a/inc/3rdparty/site_config/standard/brandeins.de.txt
+++ b/inc/3rdparty/site_config/standard/brandeins.de.txt
@@ -1,7 +1,9 @@
1# set body
2body: //div[@id='theContent']
3 1
4# set title 2body: //div[@class="articleTeaser"] | //section[@class="contentSection"]
5title: //div[@id='theContent']/h3 3
6strip: //div[@id='theContent']/h3 4strip: //section[@class="greenBox italic"]
7test_url: http://www.brandeins.de/archiv/magazin/gegessen-wird-immer/artikel/hunger.html \ No newline at end of file 5
6author: //div[@class="articleAuthor"]
7# no publish date on page (the articles are from a monthly periodical)
8
9test_url: http://www.brandeins.de/archiv/2015/fuehrung/ministry-group-mach-doch-mal-ne-ansage/
diff --git a/inc/3rdparty/site_config/standard/brokernews.com.au.txt b/inc/3rdparty/site_config/standard/brokernews.com.au.txt
new file mode 100755
index 00000000..814da38a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/brokernews.com.au.txt
@@ -0,0 +1,2 @@
1author: //span[@itemprop="author"]
2date: //span[@itemprop="datePublished"]
diff --git a/inc/3rdparty/site_config/standard/business.time.com.txt b/inc/3rdparty/site_config/standard/business.time.com.txt
new file mode 100755
index 00000000..5502beae
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/business.time.com.txt
@@ -0,0 +1,17 @@
1# 2011-10-25 - carlo@... - Initial setup.
2
3single_page_link: //li[@class='print']/a/@href
4
5title: //h1
6author: //meta[@name="byline"]/@content
7date: //meta[@name="date"]/@content
8
9strip: //span[@class="see"]
10strip: //div[@class="byline"]
11strip: //div[@id="date2"]
12strip: //h1
13strip: //div[@class='post-rail-ad']
14strip: //div[@class='post-rail-content']
15strip: //aside[@class='post-rail']
16
17test_url: http://www.time.com/time/specials/packages/article/0,28804,2094921_2094923_2094924,00.html
diff --git a/inc/3rdparty/site_config/standard/choice.com.au.txt b/inc/3rdparty/site_config/standard/choice.com.au.txt
new file mode 100755
index 00000000..02714755
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/choice.com.au.txt
@@ -0,0 +1,4 @@
1
2body: //div[@id='content']//div[@id='mainBlogContentWrapper']//*[self::p or self::img or self::ul] | //div[@class='mainArticleIntro')]
3
4date: //span[@class='date']
diff --git a/inc/3rdparty/site_config/standard/cnet.com.au.txt b/inc/3rdparty/site_config/standard/cnet.com.au.txt
new file mode 100755
index 00000000..d5719d40
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/cnet.com.au.txt
@@ -0,0 +1,17 @@
1title: //meta[@property="og:title"]/@content
2body: //div[contains(@class, 'postBody')]
3date: //div[@id='nameAndTime']/time
4author: //div[@id='nameAndTime']/span[@class='author']
5
6strip_id_or_class: image-credit
7strip_id_or_class: noAutolink
8strip_id_or_class: related
9strip_id_or_class: cite
10
11prune: no
12tidy: no
13
14# early end
15replace_string(Download today's podcast</a>): Download today's podcast</a></div></body></html>
16
17test_url: http://www.cnet.com/8301-13952_1-57367607-81/the-404-981-where-the-world-is-a-vampire-podcast/
diff --git a/inc/3rdparty/site_config/standard/computerbase.de.txt b/inc/3rdparty/site_config/standard/computerbase.de.txt
index 5973c50b..214fcceb 100755
--- a/inc/3rdparty/site_config/standard/computerbase.de.txt
+++ b/inc/3rdparty/site_config/standard/computerbase.de.txt
@@ -2,7 +2,7 @@ title://h1
2 2
3author://div[@id="news-meta"]/a 3author://div[@id="news-meta"]/a
4 4
5body://*[@id="main"]/div[1] 5body: //div[contains(@class, 'text-content')]
6 6
7strip://*[@id="main"]/div[2] 7strip://*[@id="main"]/div[2]
8strip://*[@id="main"]/div[3] 8strip://*[@id="main"]/div[3]
@@ -15,4 +15,4 @@ strip://img
15 15
16#figures are not displayed in instapaper... 16#figures are not displayed in instapaper...
17strip://figure | //figcaption 17strip://figure | //figcaption
18test_url: http://www.computerbase.de/news/2012-06/verbraucherzentrale-mahnt-blizzard-fuer-diablo-3-ab/ \ No newline at end of file 18test_url: http://www.computerbase.de/news/2012-06/verbraucherzentrale-mahnt-blizzard-fuer-diablo-3-ab/
diff --git a/inc/3rdparty/site_config/standard/contrepoints.org.txt b/inc/3rdparty/site_config/standard/contrepoints.org.txt
new file mode 100755
index 00000000..8a6a1250
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/contrepoints.org.txt
@@ -0,0 +1,21 @@
1# Contrepoints.org
2# As of 2015-04, it's a wordpress-powered website.
3
4title: //h1[contains(concat(' ',normalize-space(@class),' '),' page-title ')]//span[contains(concat(' ',normalize-space(@class),' '),' inner-text ')]
5date: //time[contains(concat(' ',normalize-space(@class),' '),' art-date ')]
6author: //h1[contains(concat(' ',normalize-space(@class),' '),' author-name ')]
7body: //article[contains(concat(' ',normalize-space(@class),' '),' plain-art ')]
8
9# no toolbar, meta, etc, but misses excerpt
10# body: //div[contains(concat(' ',normalize-space(@class),' '),' entry ')]
11
12# Thus, we need to strip useless elements from the "plain-art"
13strip: //div[contains(concat(' ',normalize-space(@class),' '),' plain-post-topbar ')]
14strip: //div[contains(concat(' ',normalize-space(@class),' '),' single-type-block ')]
15strip: //header[contains(concat(' ',normalize-space(@class),' '),' entry-header ')]
16
17# And no pruning is needed because we stripped unwanted elements.
18prune: no
19
20test_url: http://www.contrepoints.org/2015/04/25/205709-leconomie-selon-ray-dalio
21test_url: http://www.contrepoints.org/2015/04/25/205734-huile-et-gaz-de-schiste-revolution-durable \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/cooper.com.txt b/inc/3rdparty/site_config/standard/cooper.com.txt
index a4244097..fc156f7b 100755
--- a/inc/3rdparty/site_config/standard/cooper.com.txt
+++ b/inc/3rdparty/site_config/standard/cooper.com.txt
@@ -1,4 +1,4 @@
1body: //*[contains(@class,'body')] 1body: //div[contains(@class,'post-body')]
2date: //abbr[@class='published'] 2date: //abbr[@class='published']
3 3
4test_url: http://www.cooper.com/journal/2012/08/2-weeks-left-to-win-your-way-to-the-woodstock-of-ux-coopers-ux-boot-camp.html/ \ No newline at end of file 4test_url: http://www.cooper.com/journal/2015/6/creating-personas
diff --git a/inc/3rdparty/site_config/standard/cwnp.com.txt b/inc/3rdparty/site_config/standard/cwnp.com.txt
new file mode 100755
index 00000000..169fdf84
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/cwnp.com.txt
@@ -0,0 +1,14 @@
1title: //div[@class='entry-pad']//h2
2body: //div[contains(concat(' ',normalize-space(@class),' '),' entry-pad ')]
3strip: //h1
4strip: //p
5strip: //h2
6strip: //div[@class='clear']
7
8prune: no
9tidy: no
10
11autodetect_on_failure: no
12
13test_url: https://www.cwnp.com/wotd.php
14test_url: https://www.cwnp.com/qotd.php
diff --git a/inc/3rdparty/site_config/standard/dailymail.co.uk.txt b/inc/3rdparty/site_config/standard/dailymail.co.uk.txt
index cd29a4d4..8535b19f 100755
--- a/inc/3rdparty/site_config/standard/dailymail.co.uk.txt
+++ b/inc/3rdparty/site_config/standard/dailymail.co.uk.txt
@@ -7,6 +7,15 @@ strip_id_or_class: googleAds
7strip_id_or_class: digg-button 7strip_id_or_class: digg-button
8strip_id_or_class: article-icon-links-container 8strip_id_or_class: article-icon-links-container
9strip_id_or_class: clickToEnlarge 9strip_id_or_class: clickToEnlarge
10strip_id_or_class: articleIconLinksContainer
11strip_id_or_class: related-carousel
12strip_id_or_class: reader-comments
13strip_id_or_class: most-watched
14strip_id_or_class: most-read
15
16find_string:blkBorder img-share
17replace_string: nothing
18
10tidy: no 19tidy: no
11 20
12test_url: http://www.dailymail.co.uk/news/article-1375423/Royal-wedding-Texan-billionaire-Joe-Albritton-invited-Prince-Charles.html \ No newline at end of file 21test_url: http://www.dailymail.co.uk/news/article-1375423/Royal-wedding-Texan-billionaire-Joe-Albritton-invited-Prince-Charles.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dailytelegraph.com.au.txt b/inc/3rdparty/site_config/standard/dailytelegraph.com.au.txt
new file mode 100755
index 00000000..571e8111
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/dailytelegraph.com.au.txt
@@ -0,0 +1,5 @@
1title: //h1[@class="heading"]
2author: //cite[@class='author']
3date: //li[contains(@class, 'date-and-time')]
4
5
diff --git a/inc/3rdparty/site_config/standard/deadspin.com.txt b/inc/3rdparty/site_config/standard/deadspin.com.txt
new file mode 100755
index 00000000..e6ca16ae
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/deadspin.com.txt
@@ -0,0 +1 @@
http_header(user-agent): PHP/5.3 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/derbund.ch.txt b/inc/3rdparty/site_config/standard/derbund.ch.txt
new file mode 100755
index 00000000..1363eff6
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/derbund.ch.txt
@@ -0,0 +1,13 @@
1# Author: cirnod@gmail.com
2
3tidy: no
4prune: no
5
6body: //div[@id="article"]/h3 | //*[@id="mainContent"]
7
8# General Cleanup
9#strip_id_or_class: info_panel
10
11
12# Try yourself
13test_url: http://www.derbund.ch/bern/nachrichten/Fossilienforscher-stehen-auf-Heavy-Metal/story/20919522
diff --git a/inc/3rdparty/site_config/standard/designbuildsource.com.au.txt b/inc/3rdparty/site_config/standard/designbuildsource.com.au.txt
new file mode 100755
index 00000000..93d3507e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/designbuildsource.com.au.txt
@@ -0,0 +1,2 @@
1date: substring-after(//p[@class='post_date'], 'on')
2
diff --git a/inc/3rdparty/site_config/standard/dilbert.com.txt b/inc/3rdparty/site_config/standard/dilbert.com.txt
index 85cc78e5..b8788553 100755
--- a/inc/3rdparty/site_config/standard/dilbert.com.txt
+++ b/inc/3rdparty/site_config/standard/dilbert.com.txt
@@ -1,11 +1,9 @@
1#title: substring(substring-after(//title, ':'), 1, string-length(substring-after(//title, ':')) - 10) 1title: //a[@class="post-title"]/text()
2title: //div[contains(@class, 'SB_Title')]//a 2title: //meta[@name="twitter:title"]/@content
3body: //div[contains(@class, 'STR_Image')] 3body: //img[@class="img-responsive img-comic"]
4body: //*[contains(@class, 'SB_Content')]
5author: string('Scott Adams') 4author: string('Scott Adams')
6date: //*[contains(@class, 'SB_Detail')]/text()[1] 5date: //meta[@property="article:publish_date"]/@content
7
8 6
9test_url: http://dilbert.com/blog/entry/death_by_hypnosis_or_not/ 7test_url: http://dilbert.com/blog/entry/death_by_hypnosis_or_not/
10test_url: http://dilbert.com/strips/comic/2013-10-22 8test_url: http://dilbert.com/strips/comic/2013-10-22
11test_url: http://feed.dilbert.com/dilbert/daily_strip \ No newline at end of file 9test_url: http://feed.dilbert.com/dilbert/daily_strip
diff --git a/inc/3rdparty/site_config/standard/dn.se.txt b/inc/3rdparty/site_config/standard/dn.se.txt
index 5283a0cd..a2ad609b 100755
--- a/inc/3rdparty/site_config/standard/dn.se.txt
+++ b/inc/3rdparty/site_config/standard/dn.se.txt
@@ -15,6 +15,9 @@ strip_id_or_class: hook
15strip_id_or_class: right 15strip_id_or_class: right
16strip_id_or_class: footer 16strip_id_or_class: footer
17 17
18strip_id_or_class: ad-head
19strip_id_or_class: atc-share-title
20
18# Other news 21# Other news
19strip: //div[@id="mirrors"] 22strip: //div[@id="mirrors"]
20 23
@@ -25,4 +28,5 @@ author: //div[@id="byline"]/div/p/strong
25date: substring(substring-after(//p[@class="published"], 'Publicerad '), 0, 11) 28date: substring(substring-after(//p[@class="published"], 'Publicerad '), 0, 11)
26 29
27test_url: http://www.dn.se/nyheter/varlden/landade-flygplan-mitt-i-villaomrade 30test_url: http://www.dn.se/nyheter/varlden/landade-flygplan-mitt-i-villaomrade
28test_url: http://www.dn.se/m/rss/senaste-nytt \ No newline at end of file 31test_contains: Ett tekniskt haveri tvingade
32test_url: http://www.dn.se/rss/senaste-nytt
diff --git a/inc/3rdparty/site_config/standard/economie.gouv.fr.txt b/inc/3rdparty/site_config/standard/economie.gouv.fr.txt
new file mode 100755
index 00000000..b0db03c1
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/economie.gouv.fr.txt
@@ -0,0 +1,4 @@
1body: //div[contains(@class, 'txtVisu')]
2prune: no
3
4test_url: http://www.economie.gouv.fr/dgccrf/Publications/Vie-pratique/Fiches-pratiques/Assurance \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/entwickler.de.txt b/inc/3rdparty/site_config/standard/entwickler.de.txt
new file mode 100755
index 00000000..316f3991
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/entwickler.de.txt
@@ -0,0 +1,7 @@
1title: //h1[@class="post-title"]
2body: //section[@class="article-content"]
3author: //div[@class="post-bottom-meta"]/span[@class="post-author"]
4date: //div[@class="post-date"]/time/@datetime
5
6test_url: https://entwickler.de/online/mobile-welt-offline-welt-was-der-offline-first-ansatz-fuer-app-entwickler-heisst-140602.html
7test_url: https://entwickler.de/online/development/plex-docker-joomla-165345.html
diff --git a/inc/3rdparty/site_config/standard/explosm.net.txt b/inc/3rdparty/site_config/standard/explosm.net.txt
new file mode 100755
index 00000000..f2d0a20f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/explosm.net.txt
@@ -0,0 +1,4 @@
1body: //img[@id='main-comic']
2author: substring(//small[@class="author-credit-name"], 4)
3
4test_url: http://explosm.net/comics/3954/
diff --git a/inc/3rdparty/site_config/standard/facebook.com.txt b/inc/3rdparty/site_config/standard/facebook.com.txt
index 26d4f905..2641a0b2 100755
--- a/inc/3rdparty/site_config/standard/facebook.com.txt
+++ b/inc/3rdparty/site_config/standard/facebook.com.txt
@@ -1,12 +1,14 @@
1body: //div[@id='imagestage'] 1body: //div[@id='imagestage']
2body: //div[contains(@class, 'userContentWrapper')] 2body: //div[contains(@class, 'userContentWrapper')]
3 3body: //div[@id='m_story_permalink_view' or contains(@data-sigil, 'm-story-view')]
4strip_id_or_class: commentable 4strip_id_or_class: commentable
5strip: //div[contains(@data-sigil, 'm-mentions-expand')]
5 6
6prune: no 7prune: no
7tidy: no 8tidy: no
8 9
9# single_page_link: replace(substring-after(//noscript//meta[@http-equiv="refresh"]/@content, 'URL='), "&amp;", "&") 10single_page_link: concat("https://m.", substring-after(//link[@rel="alternate" and @media="handheld"]/@href, "//www."))
11if_page_contains: //link[@rel="alternate" and @media="handheld"]
10 12
11test_url: https://www.facebook.com/permalink.php?story_fbid=10154584776550183&id=294468630182 13test_url: https://www.facebook.com/permalink.php?story_fbid=10154584776550183&id=294468630182
12test_contains: holding an extraordinary session in Brussels this month 14test_contains: holding an extraordinary session in Brussels this month
diff --git a/inc/3rdparty/site_config/standard/fastcompany.com.txt b/inc/3rdparty/site_config/standard/fastcompany.com.txt
index a6417237..bf8375ee 100755
--- a/inc/3rdparty/site_config/standard/fastcompany.com.txt
+++ b/inc/3rdparty/site_config/standard/fastcompany.com.txt
@@ -1,16 +1,20 @@
1title: //h1 1author: //div[@class='byline']//a
2author: //h5[@class='byline']//a 2date: //meta[@property='article:published_time']/@content
3date: //h5[@class='date'] 3body: //figure[@class='jumbotron'] | //div[@itemprop='body']
4body: //figure[@class='node-poster'] | //div[contains(@class, "node-content")] 4
5strip_id_or_class: article-top-wrapper 5prune: no
6strip_id_or_class: footer-message 6
7strip_id_or_class: print-logo 7#strip_id_or_class: article-top-wrapper
8strip: //cite 8#strip_id_or_class: footer-message
9strip://*[@class='timestamp'] 9#strip_id_or_class: print-logo
10strip://div[@id='page_right'] 10#strip: //cite
11strip://section[@id='header_region'] 11#strip://*[@class='timestamp']
12strip://h1[@class='node-title'] 12#strip://div[@id='page_right']
13strip://div[@class='node-submitted'] 13#strip://section[@id='header_region']
14strip_id_or_class: skipnav 14#strip://h1[@class='node-title']
15#strip://div[@class='node-submitted']
16#strip_id_or_class: skipnav
17
15test_url: http://www.fastcompany.com/3000226/link-between-quietness-and-productivity 18test_url: http://www.fastcompany.com/3000226/link-between-quietness-and-productivity
16test_url: http://www.fastcompany.com/3003586/6-simple-rituals-reach-your-potential-every-day \ No newline at end of file 19test_contains: Some of you may have tried to reach me this morning
20test_url: http://www.fastcompany.com/3003586/6-simple-rituals-reach-your-potential-every-day
diff --git a/inc/3rdparty/site_config/standard/fok.nl.txt b/inc/3rdparty/site_config/standard/fok.nl.txt
new file mode 100755
index 00000000..012f07df
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/fok.nl.txt
@@ -0,0 +1,4 @@
1# skip cookie warning
2single_page_link: concat(//form/@action, '?allowcookies=yes')
3
4test_url: http://fok.nl/687116 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/forsvaret.no.txt b/inc/3rdparty/site_config/standard/forsvaret.no.txt
index c1bd2bac..ec9e5807 100755
--- a/inc/3rdparty/site_config/standard/forsvaret.no.txt
+++ b/inc/3rdparty/site_config/standard/forsvaret.no.txt
@@ -6,4 +6,5 @@ strip: //div[contains(@class,"aside")]
6# remove some SharePoint webpart label junk 6# remove some SharePoint webpart label junk
7strip: //div[@id="ctl00_PlaceHolderMain_ArticleLeadField_label"] 7strip: //div[@id="ctl00_PlaceHolderMain_ArticleLeadField_label"]
8strip: //div[@id="ctl00_PlaceHolderMain_PublishingPageContentField_label"] 8strip: //div[@id="ctl00_PlaceHolderMain_PublishingPageContentField_label"]
9test_url: http://forsvaret.no/aktuelt/publisert/nyheter/Sider/F5-fly-til-Skedsmo.aspx \ No newline at end of file 9test_url: https://forsvaret.no/aktuelt/historisk-medaljeutdeling
10test_contains: Samarbeidet med Marinen har vært en sann glede
diff --git a/inc/3rdparty/site_config/standard/france24.com.txt b/inc/3rdparty/site_config/standard/france24.com.txt
new file mode 100755
index 00000000..6356e048
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/france24.com.txt
@@ -0,0 +1,14 @@
1# Generated by FiveFilters.org's web-based selection tool
2# Place this file inside your site_config/custom/ folder
3# Source: http://siteconfig.fivefilters.org/grab.php?url=http%3A%2F%2Fwww.france24.com%2Fen%2F20150427-togo-gnassingbe-poised-extend-power-election%2F
4
5body: //article[contains(concat(' ',normalize-space(@class),' '),' article-long ')]//div[contains(concat(' ',normalize-space(@class),' '),' bd ')]
6title: //h1[@class="title"]
7author://p[@class="author"]
8date://p[@class="modification"]
9
10find_string: <p class="modification">Latest update :
11replace_string: <p class="modification">
12
13
14test_url: http://www.france24.com/en/20150427-togo-gnassingbe-poised-extend-power-election/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/galwayindependent.com.txt b/inc/3rdparty/site_config/standard/galwayindependent.com.txt
new file mode 100755
index 00000000..d45b7acf
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/galwayindependent.com.txt
@@ -0,0 +1,3 @@
1title: //div[@class='leftCol']/h1
2
3prune: no
diff --git a/inc/3rdparty/site_config/standard/gameblog.fr.txt b/inc/3rdparty/site_config/standard/gameblog.fr.txt
index 73f8342f..227d39ac 100755
--- a/inc/3rdparty/site_config/standard/gameblog.fr.txt
+++ b/inc/3rdparty/site_config/standard/gameblog.fr.txt
@@ -1,5 +1,7 @@
1title: //meta[@property="og:title"]/@content 1title: //meta[@property="og:title"]/@content
2body: //div[@id='GBTVPlayer'] | //div[contains(@class, 'col490')] 2body: //div[@id='GBTVPlayer'] | //div[contains(@class, 'col490')]
3author: //span[contains(concat(' ',normalize-space(@class),' '),' author ')]
4date: //header[@id='gbArticleHeader']//div//time/@datetime
3 5
4prune: no 6prune: no
5 7
@@ -7,4 +9,4 @@ strip_id_or_class: noprint
7strip: //div[@id='gbNewsTextContent']/following-sibling::* 9strip: //div[@id='gbNewsTextContent']/following-sibling::*
8 10
9test_url: http://www.gameblog.fr/news/26330-les-sims-3-showtime-s-annonce-en-video 11test_url: http://www.gameblog.fr/news/26330-les-sims-3-showtime-s-annonce-en-video
10test_url: http://www.gameblog.fr/news/26306-mise-a-jour-du-dashboard-de-la-xbox-360-disponible \ No newline at end of file 12test_url: http://www.gameblog.fr/news/26306-mise-a-jour-du-dashboard-de-la-xbox-360-disponible
diff --git a/inc/3rdparty/site_config/standard/gawker.com.txt b/inc/3rdparty/site_config/standard/gawker.com.txt
index 9bc5613a..27e4b4bb 100755
--- a/inc/3rdparty/site_config/standard/gawker.com.txt
+++ b/inc/3rdparty/site_config/standard/gawker.com.txt
@@ -3,4 +3,6 @@ body: //div[@class="post-body"]
3# Remove 'content is restricted' 3# Remove 'content is restricted'
4strip: //div[@id='agegate_IDHERE'] 4strip: //div[@id='agegate_IDHERE']
5 5
6http_header(user-agent): PHP/5.3
7
6test_url: http://gawker.com/#!5782070/russian-bomb-squad-successfully-defuses-sex-toy \ No newline at end of file 8test_url: http://gawker.com/#!5782070/russian-bomb-squad-successfully-defuses-sex-toy \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/getpocket.com.txt b/inc/3rdparty/site_config/standard/getpocket.com.txt
new file mode 100755
index 00000000..e6ca16ae
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/getpocket.com.txt
@@ -0,0 +1 @@
http_header(user-agent): PHP/5.3 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gist.github.com.txt b/inc/3rdparty/site_config/standard/gist.github.com.txt
index f11b7b42..90207862 100755
--- a/inc/3rdparty/site_config/standard/gist.github.com.txt
+++ b/inc/3rdparty/site_config/standard/gist.github.com.txt
@@ -1,4 +1,6 @@
1body: //div[@class="highlight"]/pre
1 2
2title: //div[contains(@class,'gist-description')] 3prune: no
3body: //div[contains(@class,'blob-wrapper')] 4tidy: no
4test_url: https://gist.github.com/staltz/868e7e9bc2a7b8c1f754 5
6test_url: https://gist.github.com/1258908 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gizmodo.com.au.txt b/inc/3rdparty/site_config/standard/gizmodo.com.au.txt
new file mode 100755
index 00000000..9dbfc152
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/gizmodo.com.au.txt
@@ -0,0 +1,8 @@
1body: //div[@id='content_post' or @class="post-body" or contains(@class, 'illustration top')]
2author: (//cite//span[@class="plus-icon"])[1]
3date: //span[@class="date"]
4date: //time
5
6prune: no
7
8test_url: http://gizmodo.com/5880147/kuhn-rikon-improves-their-spice-grinder-with-grade-school-science
diff --git a/inc/3rdparty/site_config/standard/gizmodo.com.txt b/inc/3rdparty/site_config/standard/gizmodo.com.txt
index e73ec9d2..535041cd 100755
--- a/inc/3rdparty/site_config/standard/gizmodo.com.txt
+++ b/inc/3rdparty/site_config/standard/gizmodo.com.txt
@@ -6,6 +6,8 @@ date: //span[@class="date"]
6 6
7prune: no 7prune: no
8 8
9http_header(user-agent): PHP/5.3
10
9test_url: http://gizmodo.com/5880147/kuhn-rikon-improves-their-spice-grinder-with-grade-school-science 11test_url: http://gizmodo.com/5880147/kuhn-rikon-improves-their-spice-grinder-with-grade-school-science
10test_url: http://gizmodo.com/what-van-goghs-paintings-would-look-like-if-they-came-874035680 12test_url: http://gizmodo.com/what-van-goghs-paintings-would-look-like-if-they-came-874035680
11test_url: http://gizmodo.com/vip.xml \ No newline at end of file 13test_url: http://gizmodo.com/vip.xml \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/globalgrind.com.txt b/inc/3rdparty/site_config/standard/globalgrind.com.txt
new file mode 100755
index 00000000..e2f4e233
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/globalgrind.com.txt
@@ -0,0 +1,6 @@
1body: //div[contains(@class, 'content-body')]
2
3prune: no
4
5test_url: http://globalgrind.com/2015/04/26/listen-jeremih-featuring-chance-the-rapper-the-social-experiment-planes-remix-new-music/
6test_contains: The Chicago rapper has made a name for himself \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gocomics.com.txt b/inc/3rdparty/site_config/standard/gocomics.com.txt
new file mode 100755
index 00000000..212c02d5
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/gocomics.com.txt
@@ -0,0 +1,5 @@
1body: //a[@class="photo"]/img[@class="strip"]
2author: //meta[@name="author"]/@content
3date: //meta[@property="gocomics:publish_date"]/@content
4
5test_url: http://www.gocomics.com/garfield/2015/06/13
diff --git a/inc/3rdparty/site_config/standard/help.fivefilters.org.txt b/inc/3rdparty/site_config/standard/help.fivefilters.org.txt
new file mode 100755
index 00000000..70a7d156
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/help.fivefilters.org.txt
@@ -0,0 +1,2 @@
1title: //div[@class="title"]/h3
2date: substring-after(//div[@class="meta"], ": ")
diff --git a/inc/3rdparty/site_config/standard/heraldsun.com.au.txt b/inc/3rdparty/site_config/standard/heraldsun.com.au.txt
new file mode 100755
index 00000000..b0ce56c5
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/heraldsun.com.au.txt
@@ -0,0 +1,12 @@
1#body: //div[@class='story-body']
2body: //div[contains(@class, 'story-body')]
3title: //div[@class='story-headline']//h1
4author: //cite[contains(@class, 'author')]
5date: //span[@class='datestamp']
6
7strip_id_or_class: story-info
8strip: //div[contains(@class, 'story-promo')]
9strip: //div[contains(@class, 'story-related')]
10
11prune: no
12tidy: no
diff --git a/inc/3rdparty/site_config/standard/hiiraan.com.txt b/inc/3rdparty/site_config/standard/hiiraan.com.txt
new file mode 100755
index 00000000..cf1f7942
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/hiiraan.com.txt
@@ -0,0 +1,10 @@
1# Generated by FiveFilters.org's web-based selection tool
2# Place this file inside your site_config/custom/ folder
3# Source: http://siteconfig.fivefilters.org/grab.php?url=http%3A%2F%2Fwww.hiiraan.com%2Fnews%2F2014%2FDec%2Fwararka_maanta20-89428.htm
4
5body: //div[contains(concat(' ',normalize-space(@class),' '),' single ')]//div[contains(concat(' ',normalize-space(@class),' '),' description ')]
6
7prune: no
8
9test_url: http://www.hiiraan.com/news/2014/Dec/wararka_maanta20-89428.htm
10test_url: http://rss.hiiraan.com/wararka_maanta_rss.xml \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/independent.co.uk.txt b/inc/3rdparty/site_config/standard/independent.co.uk.txt
index af742209..6711a0a2 100755
--- a/inc/3rdparty/site_config/standard/independent.co.uk.txt
+++ b/inc/3rdparty/site_config/standard/independent.co.uk.txt
@@ -1,9 +1,16 @@
1title: //meta[@property='og:title']/@content 1title: //meta[@property='og:title']/@content
2body: //div[contains(@class, 'articleContent')] 2body: //img[contains(@class, 'FirstImage')] | //div[contains(@class, 'articleContent')]
3date: //meta[@property='article:published_time']/@content 3date: //meta[@property='article:published_time']/@content
4author: //div[@id='main']//div[@class='byline']//span[@class='authorName'] 4author: //div[@id='main']//div[@class='byline']//span[@class='authorName']
5 5
6strip_id_or_class: RelatedArtTag 6strip_id_or_class: RelatedArtTag
7 7
8strip: //h5[contains(., 'READ MORE:')]
9strip: //h5[contains(., 'Read more:')]
10
8tidy: no 11tidy: no
9test_url: http://www.independent.co.uk/news/world/middle-east/syria-could-face-human-rights-probe-2274326.html \ No newline at end of file 12test_url: http://www.independent.co.uk/news/world/middle-east/syria-could-face-human-rights-probe-2274326.html
13test_url: http://www.independent.co.uk/voices/comment/robert-fisk-on-the-cia-torture-report-once-again-language-is-distorted-in-order-to-hide-us-state-wrongdoing-9924501.html
14test_contains: Thank God for Noam Chomsky.
15
16test_url: http://www.independent.co.uk/news/uk/rss \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/io9.com.txt b/inc/3rdparty/site_config/standard/io9.com.txt
new file mode 100755
index 00000000..e6ca16ae
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/io9.com.txt
@@ -0,0 +1 @@
http_header(user-agent): PHP/5.3 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ippmedia.com.txt b/inc/3rdparty/site_config/standard/ippmedia.com.txt
new file mode 100755
index 00000000..99f25dc0
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/ippmedia.com.txt
@@ -0,0 +1,4 @@
1title: //div[@class="content_title"]//h2
2author: substring-after(//div[@class="byline"], "By ")
3date: //div[@class="publish_date"]
4strip: //div[@class="read_image_box"]
diff --git a/inc/3rdparty/site_config/standard/itnews.com.au.txt b/inc/3rdparty/site_config/standard/itnews.com.au.txt
new file mode 100755
index 00000000..47cbb0f3
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/itnews.com.au.txt
@@ -0,0 +1,5 @@
1title: //h1[@class='article-header']
2body: //div[@class='body-content']
3author: //span[@class='author-byline']/a[contains(@id, 'Author')]
4
5strip: //span[contains(@id, 'Article_SourceLabel')]
diff --git a/inc/3rdparty/site_config/standard/jalopnik.com.txt b/inc/3rdparty/site_config/standard/jalopnik.com.txt
index fc2eef8e..7823dbd7 100755
--- a/inc/3rdparty/site_config/standard/jalopnik.com.txt
+++ b/inc/3rdparty/site_config/standard/jalopnik.com.txt
@@ -1,2 +1,5 @@
1author: //span[@class='plus-icon'] 1author: //span[@class='plus-icon']
2
3http_header(user-agent): PHP/5.3
4
2test_url: http://jalopnik.com/5892124/1955-porsche-550-spyder-sells-for-record-3685-million/ \ No newline at end of file 5test_url: http://jalopnik.com/5892124/1955-porsche-550-spyder-sells-for-record-3685-million/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/jezebel.com.txt b/inc/3rdparty/site_config/standard/jezebel.com.txt
new file mode 100755
index 00000000..e6ca16ae
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/jezebel.com.txt
@@ -0,0 +1 @@
http_header(user-agent): PHP/5.3 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/kotaku.com.txt b/inc/3rdparty/site_config/standard/kotaku.com.txt
index be439d75..61ccbc46 100755
--- a/inc/3rdparty/site_config/standard/kotaku.com.txt
+++ b/inc/3rdparty/site_config/standard/kotaku.com.txt
@@ -1,2 +1,5 @@
1author: //span[@class="plus-icon"] 1author: //span[@class="plus-icon"]
2
3http_header(user-agent): PHP/5.3
4
2test_url: http://kotaku.com/5920211/save-the-furries-on-your-wii-in-this-weeks-nintendo-download \ No newline at end of file 5test_url: http://kotaku.com/5920211/save-the-furries-on-your-wii-in-this-weeks-nintendo-download \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/lefigaro.fr.txt b/inc/3rdparty/site_config/standard/lefigaro.fr.txt
index e720e377..7e1d12d7 100755
--- a/inc/3rdparty/site_config/standard/lefigaro.fr.txt
+++ b/inc/3rdparty/site_config/standard/lefigaro.fr.txt
@@ -2,7 +2,8 @@ title: //meta[@name='title']/@content
2author: //span[@class='sign']//a[@class='journaliste'] 2author: //span[@class='sign']//a[@class='journaliste']
3author: //meta[@name='author']/@content 3author: //meta[@name='author']/@content
4body: //*[@id='article']/div[@class='photo'] | //*[@id='article']/h2 | //*[@id='article']/div[@class='texte'] 4body: //*[@id='article']/div[@class='photo'] | //*[@id='article']/h2 | //*[@id='article']/div[@class='texte']
5date: //time[@pubdate]/@datetime 5date: //li[contains(concat(' ',normalize-space(@class),' '),' fig-date-pub ')]//time
6prune: no 6prune: no
7test_url: http://www.lefigaro.fr/environnement/2011/11/10/01029-20111110ARTFIG00801-la-chine-confrontee-a-un-immense-defi-ecologique.php 7test_url: http://www.lefigaro.fr/environnement/2011/11/10/01029-20111110ARTFIG00801-la-chine-confrontee-a-un-immense-defi-ecologique.php
8test_url: http://www.lefigaro.fr/conjoncture/2012/11/20/20002-20121120ARTFIG00609-l-usager-devrait-payer-plus-pour-financer-les-transports.php \ No newline at end of file 8test_url: http://www.lefigaro.fr/conjoncture/2012/11/20/20002-20121120ARTFIG00609-l-usager-devrait-payer-plus-pour-financer-les-transports.php
9test_url: http://www.lefigaro.fr/social/2015/03/10/09010-20150310ARTFIG00312-encore-une-annee-noire-pour-l-emploi-salarie.php
diff --git a/inc/3rdparty/site_config/standard/lifehacker.com.txt b/inc/3rdparty/site_config/standard/lifehacker.com.txt
index ec97f06c..330c4e78 100755
--- a/inc/3rdparty/site_config/standard/lifehacker.com.txt
+++ b/inc/3rdparty/site_config/standard/lifehacker.com.txt
@@ -42,6 +42,12 @@ strip: //p[@class="arrow"]
42 42
43# Remove "track" image from article body 43# Remove "track" image from article body
44strip: //img[@alt="track"] 44strip: //img[@alt="track"]
45
46# Remove hidden URLs
47strip: //a[@x-inset="hidden"]
48
49http_header(user-agent): PHP/5.3
50
45test_url: http://lifehacker.com/5925801/how-can-i-turn-vague-goals-into-actionable-to+dos 51test_url: http://lifehacker.com/5925801/how-can-i-turn-vague-goals-into-actionable-to+dos
46test_url: http://lifehacker.com/5941600/hack-an-old-computer-mouse-into-a-wireless-bluetooth-mouse 52test_url: http://lifehacker.com/5941600/hack-an-old-computer-mouse-into-a-wireless-bluetooth-mouse
47test_url: http://lifehacker.com/what-happens-to-the-brain-when-you-meditate-and-how-it-1202533314 \ No newline at end of file 53test_url: http://lifehacker.com/what-happens-to-the-brain-when-you-meditate-and-how-it-1202533314
diff --git a/inc/3rdparty/site_config/standard/linuxjournal.com.txt b/inc/3rdparty/site_config/standard/linuxjournal.com.txt
new file mode 100755
index 00000000..c5e64463
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/linuxjournal.com.txt
@@ -0,0 +1,6 @@
1body: //div[@class='content-area']
2next_page_link: //a[@title='Go to next page']
3author: //a[@title='View user profile.']
4strip_id_or_class: comments
5
6test_url: http://www.linuxjournal.com/content/be-mechanicwith-android-and-linux
diff --git a/inc/3rdparty/site_config/standard/m.bbc.co.uk.txt b/inc/3rdparty/site_config/standard/m.bbc.co.uk.txt
index d1ff0b43..7037c64b 100755
--- a/inc/3rdparty/site_config/standard/m.bbc.co.uk.txt
+++ b/inc/3rdparty/site_config/standard/m.bbc.co.uk.txt
@@ -3,6 +3,12 @@ body: //div[@class="story-body"]
3date: //p[@class='date']/strong 3date: //p[@class='date']/strong
4author: substring-after(//div[@class="story-inner"]/div[@class="byline"]//span[@class='name'], 'By') 4author: substring-after(//div[@class="story-inner"]/div[@class="byline"]//span[@class='name'], 'By')
5 5
6find_string: http://ichef.bbci.co.uk/news/200/
7replace_string: http://ichef.bbci.co.uk/news/624/
8
9find_string: http://ichef.bbci.co.uk/news/304/
10replace_string: http://ichef.bbci.co.uk/news/624/
11
6strip: //div[@class="story-inner"]/div[@class="byline"] 12strip: //div[@class="story-inner"]/div[@class="byline"]
7 13
8test_url: http://m.bbc.co.uk/news/science-environment-19144464 \ No newline at end of file 14test_url: http://m.bbc.co.uk/news/science-environment-19144464 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/m.facebook.com.txt b/inc/3rdparty/site_config/standard/m.facebook.com.txt
new file mode 100755
index 00000000..1b9c1b34
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/m.facebook.com.txt
@@ -0,0 +1,12 @@
1body: //div[@id='m_story_permalink_view' or contains(@data-sigil, 'm-story-view')]
2
3title: //div[@id='m_story_permalink_view' or contains(@data-sigil, 'm-story-view')]//h3
4
5strip_id_or_class: commentable
6strip: //*[contains(@data-sigil, 'm-mentions-expand') or contains(@data-sigil, 'story-popup-context') or contains(@data-sigil, 'share') or contains(@data-sigil, 'translate')]
7
8prune: no
9tidy: no
10
11test_url: https://www.facebook.com/permalink.php?story_fbid=10154584776550183&id=294468630182
12test_contains: holding an extraordinary session in Brussels this month
diff --git a/inc/3rdparty/site_config/standard/m.theregister.co.uk.txt b/inc/3rdparty/site_config/standard/m.theregister.co.uk.txt
new file mode 100755
index 00000000..64cb1c32
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/m.theregister.co.uk.txt
@@ -0,0 +1,4 @@
1strip: //div[@class='wptl btm']
2body: //div[@id='article']//h2 | //div[@id='body']
3
4test_url: http://m.theregister.co.uk/2015/07/06/geeks_guide_spaceguard_center/
diff --git a/inc/3rdparty/site_config/standard/marketingmag.com.au.txt b/inc/3rdparty/site_config/standard/marketingmag.com.au.txt
new file mode 100755
index 00000000..910741f3
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/marketingmag.com.au.txt
@@ -0,0 +1 @@
strip: //h3[@class="related-posts"]
diff --git a/inc/3rdparty/site_config/standard/medium.com.txt b/inc/3rdparty/site_config/standard/medium.com.txt
index 9e9c6895..5ab3ac5e 100755
--- a/inc/3rdparty/site_config/standard/medium.com.txt
+++ b/inc/3rdparty/site_config/standard/medium.com.txt
@@ -1,4 +1,4 @@
1body: //div[contains(@class, 'postContent-inner')] 1body: //div[contains(@class, 'postArticle-content')]
2strip_id_or_class: supplementalPostContent 2strip_id_or_class: supplementalPostContent
3 3
4prune: no 4prune: no
diff --git a/inc/3rdparty/site_config/standard/menshealth.com.sg.txt b/inc/3rdparty/site_config/standard/menshealth.com.sg.txt
index 6a669253..af450b5e 100755
--- a/inc/3rdparty/site_config/standard/menshealth.com.sg.txt
+++ b/inc/3rdparty/site_config/standard/menshealth.com.sg.txt
@@ -3,10 +3,5 @@ body: //div[@style="float:left;width:740px;"]
3 3
4tidy: no 4tidy: no
5 5
6test_url: http://www.menshealth.com.sg/fitness/mh-picks-under-armour-clutchfit-nitro-mid-cleats 6# broken feed?
7test_contains: These cleats are made for one thing 7test_url: http://www.menshealth.com.sg/fitness/feed
8
9test_url: http://www.menshealth.com.sg/fitness/top-10-fat-burning-bodyweight-moves-you-can-do-10-minutes
10test_contains: let this workout fool you
11
12test_url: http://www.menshealth.com.sg/fitness/feed \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mitchellrepublic.com.txt b/inc/3rdparty/site_config/standard/mitchellrepublic.com.txt
new file mode 100755
index 00000000..fae858a3
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/mitchellrepublic.com.txt
@@ -0,0 +1,3 @@
1body: //div[@class='section']
2strip_id_or_class: mediumtxt
3strip: //strong[contains
diff --git a/inc/3rdparty/site_config/standard/mobile.nytimes.com.txt b/inc/3rdparty/site_config/standard/mobile.nytimes.com.txt
index c60252ef..ef1ce98d 100755
--- a/inc/3rdparty/site_config/standard/mobile.nytimes.com.txt
+++ b/inc/3rdparty/site_config/standard/mobile.nytimes.com.txt
@@ -1,4 +1,7 @@
1title: //h1[contains(@class, 'headline')] 1title: //h1[contains(@class, 'headline')]
2body: //article[contains(@class, 'full-art')] 2body: //article[contains(@class, 'full-art')]
3date: //meta[@name="pdate"]/@content
4author: //meta[@name="byl"]/@content
5
3strip_id_or_class: image-credit 6strip_id_or_class: image-credit
4test_url: http://mobile.nytimes.com/2014/06/19/opinion/gail-collins-romney-and-the-2016-contenders-huddle.html \ No newline at end of file 7test_url: http://mobile.nytimes.com/2014/06/19/opinion/gail-collins-romney-and-the-2016-contenders-huddle.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/moneymanagement.com.au.txt b/inc/3rdparty/site_config/standard/moneymanagement.com.au.txt
new file mode 100755
index 00000000..9892f662
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/moneymanagement.com.au.txt
@@ -0,0 +1,2 @@
1date: //span[@class="publishdate"]//time
2author: //span[@class="byline"]
diff --git a/inc/3rdparty/site_config/standard/nbnnews.com.au.txt b/inc/3rdparty/site_config/standard/nbnnews.com.au.txt
new file mode 100755
index 00000000..a2409878
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/nbnnews.com.au.txt
@@ -0,0 +1,3 @@
1body: //div[contains(concat(' ',normalize-space(@class),' '),' entry ') or contains(@class, 'single-post-thumb')]
2test_url: http://www.nbnnews.com.au/2015/03/24/lismore-man-will-attempt-to-run-around-australia/
3test_url: http://www.nbnnews.com.au/category/nthn-rivers-sport/feed/
diff --git a/inc/3rdparty/site_config/standard/news.com.au.txt b/inc/3rdparty/site_config/standard/news.com.au.txt
new file mode 100755
index 00000000..57b89a54
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/news.com.au.txt
@@ -0,0 +1,3 @@
1body: //div[@class='story-body']
2prune: no
3tidy: no
diff --git a/inc/3rdparty/site_config/standard/news.menshealth.com.txt b/inc/3rdparty/site_config/standard/news.menshealth.com.txt
new file mode 100755
index 00000000..a07fdacc
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/news.menshealth.com.txt
@@ -0,0 +1 @@
strip: //span[@style="color: #cf1206;"]
diff --git a/inc/3rdparty/site_config/standard/news.ninemsn.com.au.txt b/inc/3rdparty/site_config/standard/news.ninemsn.com.au.txt
new file mode 100755
index 00000000..ddd64065
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/news.ninemsn.com.au.txt
@@ -0,0 +1,3 @@
1strip: //a[@class="contact"]
2strip: //div[@class="article-media video-item"]
3date: //div[@class='display-date']
diff --git a/inc/3rdparty/site_config/standard/parool.nl.txt b/inc/3rdparty/site_config/standard/parool.nl.txt
new file mode 100755
index 00000000..2ceef940
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/parool.nl.txt
@@ -0,0 +1,7 @@
1#bypass cookie check
2single_page_link: //a[contains(@href, '/acceptCookieCheck.do?url=')]
3
4test_url: http://www.parool.nl/parool/nl/4/AMSTERDAM/article/detail/4042734/2015/05/29/MRSA-bacterie-niet-verder-verspreid-in-Bijlmerbajes.dhtml
5test_contains: De twee gevangenen die
6
7test_url: http://www.parool.nl/amsterdam/rss.xml \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/perthnow.com.au.txt b/inc/3rdparty/site_config/standard/perthnow.com.au.txt
new file mode 100755
index 00000000..b0ce56c5
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/perthnow.com.au.txt
@@ -0,0 +1,12 @@
1#body: //div[@class='story-body']
2body: //div[contains(@class, 'story-body')]
3title: //div[@class='story-headline']//h1
4author: //cite[contains(@class, 'author')]
5date: //span[@class='datestamp']
6
7strip_id_or_class: story-info
8strip: //div[contains(@class, 'story-promo')]
9strip: //div[contains(@class, 'story-related')]
10
11prune: no
12tidy: no
diff --git a/inc/3rdparty/site_config/standard/planetsave.com.txt b/inc/3rdparty/site_config/standard/planetsave.com.txt
new file mode 100755
index 00000000..d6f34e22
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/planetsave.com.txt
@@ -0,0 +1 @@
strip_id_or_class: author-bio-box
diff --git a/inc/3rdparty/site_config/standard/presseportal.de.txt b/inc/3rdparty/site_config/standard/presseportal.de.txt
new file mode 100755
index 00000000..703806d8
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/presseportal.de.txt
@@ -0,0 +1,11 @@
1body: //div[contains(concat(' ',normalize-space(@class),' '),' story-text ')]
2
3strip_id_or_class: news-bodycopy
4
5parser: html5php
6tidy: no
7
8test_url: http://www.presseportal.de/pm/103258/2930232/felix-neureuther-vor-der-ski-wm-ich-denke-von-rennen-zu-rennen
9test_url: http://www.presseportal.de/pm/66749/2933779/koelner-stadt-anzeiger-bahnmitarbeiter-werden-in-nrw-immer-haeufiger-angegriffen-zahl-der/rss
10test_contains: kleineren Bahnhöfen installieren und erhofft
11test_url: http://www.presseportal.de/rss/presseportal.rss2
diff --git a/inc/3rdparty/site_config/standard/quechoisir.org.txt b/inc/3rdparty/site_config/standard/quechoisir.org.txt
new file mode 100644
index 00000000..a8bacdb7
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/quechoisir.org.txt
@@ -0,0 +1,10 @@
1title: //h1[@id='titre']
2body://h2[@id="surtitre"]|//div[@id="ctn_introarticle"]|//div[@class="ctn_globalcontent"]
3
4strip_id_or_class: qc-container-main
5strip_id_or_class: article_footer
6
7tidy: no
8prune: no
9
10test_url: http://www.quechoisir.org/alimentation/securite-hygiene/actualite-acrylamide-un-contaminant-trop-present-dans-nos-assiettes
diff --git a/inc/3rdparty/site_config/standard/quora.com.txt b/inc/3rdparty/site_config/standard/quora.com.txt
index 732d12d7..f2b75a99 100755
--- a/inc/3rdparty/site_config/standard/quora.com.txt
+++ b/inc/3rdparty/site_config/standard/quora.com.txt
@@ -1,8 +1,10 @@
1tidy: no 1tidy: no
2prune: no 2prune: no
3body: //div[contains(@class, 'main_col')] 3body: //div[contains(concat(' ',normalize-space(@class),' '),' Answer ')] | //div[contains(concat(' ',normalize-space(@class),' '),' header ')] | //div[contains(concat(' ',normalize-space(@class),' '),' AnswerWikiArea ')] | //hr
4title: //h1 4#body: //div[contains(@class, 'main_col')]
5 5
6strip_id_or_class: AnswerFooter
7strip_id_or_class: ActionBar
6strip_id_or_class: hidden 8strip_id_or_class: hidden
7strip_id_or_class: item_action_bar 9strip_id_or_class: item_action_bar
8strip_id_or_class: answer_voters 10strip_id_or_class: answer_voters
@@ -13,5 +15,15 @@ strip_id_or_class: view_tag
13strip_id_or_class: include_details 15strip_id_or_class: include_details
14strip_id_or_class: sig_edit 16strip_id_or_class: sig_edit
15strip_id_or_class: profile_photo_img 17strip_id_or_class: profile_photo_img
18strip_id_or_class: question_text_icons
16 19
17test_url: http://www.quora.com/What-everyday-habit-do-you-wish-you-had-developed-earlier-in-life \ No newline at end of file 20# insert hr between answers
21find_string: <div class="Answer"
22replace_string: <hr /><div class="Answer"
23
24test_url: http://www.quora.com/What-everyday-habit-do-you-wish-you-had-developed-earlier-in-life
25test_contains: Please provide a specific practical/measurable action-based everyday
26test_contains: Exercise every day
27
28test_url: http://www.quora.com/What-is-the-greatest-illusion-in-life
29test_contains: What is the greatest illusion in life?
diff --git a/inc/3rdparty/site_config/standard/reddit.com.txt b/inc/3rdparty/site_config/standard/reddit.com.txt
index c3f2d3e5..ba342c7c 100755
--- a/inc/3rdparty/site_config/standard/reddit.com.txt
+++ b/inc/3rdparty/site_config/standard/reddit.com.txt
@@ -7,9 +7,7 @@ author: //p[@class="tagline"]/a
7# this doesn't work for some reason...? 7# this doesn't work for some reason...?
8date: //p[@class="tagline"]//@datetime 8date: //p[@class="tagline"]//@datetime
9 9
10#body: (//div[contains(@class, 'noncollapsed')]//div[contains(@class, 'usertext-body')])[1] 10body: (//div[contains(@class, 'noncollapsed')]//div[contains(@class, 'usertext-body')])[1]
11
12body: //div[contains(concat(' ',normalize-space(@class),' '),' usertext-body ') and (contains(concat(' ',normalize-space(@class),' '),' may-blank-within ')) and (contains(concat(' ',normalize-space(@class),' '),' md-container '))]//div[contains(concat(' ',normalize-space(@class),' '),' md ')]
13 11
14strip_id_or_class: tagline 12strip_id_or_class: tagline
15strip_id_or_class: unvotable-message 13strip_id_or_class: unvotable-message
@@ -20,4 +18,4 @@ single_page_link: //p[@class="title"]/a[contains(@href, 'http://')]
20 18
21test_url: http://www.reddit.com/r/truegaming/comments/wfe7r/i_wrote_about_the_problems_i_honestly_feel_that/ 19test_url: http://www.reddit.com/r/truegaming/comments/wfe7r/i_wrote_about_the_problems_i_honestly_feel_that/
22test_url: http://www.reddit.com/r/worldnews/comments/1as37r/twelve_north_korean_soldiers_attempting_to_defect/ 20test_url: http://www.reddit.com/r/worldnews/comments/1as37r/twelve_north_korean_soldiers_attempting_to_defect/
23test_url: http://www.reddit.com/r/WritingPrompts/comments/2786lw/wp_in_a_world_where_puns_are_illegal_one_man/chybk8e 21test_url: http://www.reddit.com/r/WritingPrompts/comments/2786lw/wp_in_a_world_where_puns_are_illegal_one_man/chybk8e \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/rockpapershotgun.com.txt b/inc/3rdparty/site_config/standard/rockpapershotgun.com.txt
index f8c9541f..83342cb7 100755
--- a/inc/3rdparty/site_config/standard/rockpapershotgun.com.txt
+++ b/inc/3rdparty/site_config/standard/rockpapershotgun.com.txt
@@ -1,8 +1,10 @@
1title: //h2 1title: //h2
2 2
3strip: //div[ contains(@class, 'respond') ] | //h2 | //h1 3strip: //div[ contains(@class, 'respond') ] | //h2 | //h1
4strip_id_or_class: social
5strip_id_or_class: dd_post_share
4 6
5date: substring-after(//p[@class='info'], ' on ') 7date: substring-after(//p[@class='info'], ' on ')
6 8
7author: //p[@class='info']//a 9author: //p[@class='info']//a
8test_url: http://www.rockpapershotgun.com/2010/07/29/rps-half-verdict-starcraft-2/ \ No newline at end of file 10test_url: http://www.rockpapershotgun.com/2010/07/29/rps-half-verdict-starcraft-2/
diff --git a/inc/3rdparty/site_config/standard/saadaalnews.net.txt b/inc/3rdparty/site_config/standard/saadaalnews.net.txt
new file mode 100755
index 00000000..b9ce04e5
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/saadaalnews.net.txt
@@ -0,0 +1,11 @@
1body: //div[contains(@class, 'section-content-left')]
2
3strip_id_or_class: related
4strip_id_or_class: nocontent
5strip_id_or_class: comment
6strip_id_or_class: widget
7strip_id_or_class: respond
8strip: //h3[.='Comments']
9strip: //p[.='comments']
10
11test_url: http://saadaalnews.net/?p=42624
diff --git a/inc/3rdparty/site_config/standard/smh.com.au.txt b/inc/3rdparty/site_config/standard/smh.com.au.txt
new file mode 100755
index 00000000..f647f812
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/smh.com.au.txt
@@ -0,0 +1,14 @@
1body: //div[@id='content']
2title: //h1[@class='cN-headingPage']
3author: //h3[@class='authorName']
4date: //dd[@class='updated dtstamp']
5
6strip: //ul[@class='social sponsored cfix']
7strip: //div[contains(@class, 'hiddenVisually')]
8strip: //dd[@class='updated dtstamp']
9strip: //h3[@class='authorName']
10strip: //ul[@class='social cfix']
11strip: //div[contains(@id, 'adspot')]
12
13strip: //div[contains(@class, 'overlayPlayCountdown')]
14strip: //div[@class='fdVideoWof']//span[@class='gone']
diff --git a/inc/3rdparty/site_config/standard/smh.drive.com.au.txt b/inc/3rdparty/site_config/standard/smh.drive.com.au.txt
new file mode 100755
index 00000000..463fd88b
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/smh.drive.com.au.txt
@@ -0,0 +1,13 @@
1body: //div[@id='content']
2title: //h1[@class='cN-headingPage']
3author: //h3[@class='authorName']
4date: //dd[@class='updated dtstamp']
5
6strip: //ul[@class='social sponsored cfix']
7strip: //div[contains(@class, 'hiddenVisually')]
8strip: //dd[@class='updated dtstamp']
9strip: //h3[@class='authorName']
10strip: //ul[@class='social cfix']
11strip: //div[contains(@id, 'adspot')]
12
13test_url: http://smh.drive.com.au/roads-and-traffic/driver-distraction-responsible-for-more-car-crashes-than-alcohol-20130503-2iyg0.html
diff --git a/inc/3rdparty/site_config/standard/smithsonianmag.com.txt b/inc/3rdparty/site_config/standard/smithsonianmag.com.txt
index 3e8fee95..fc479c2a 100755
--- a/inc/3rdparty/site_config/standard/smithsonianmag.com.txt
+++ b/inc/3rdparty/site_config/standard/smithsonianmag.com.txt
@@ -7,6 +7,9 @@ body://div[@id = 'article-body']
7# full content 7# full content
8single_page_link://td/li[@class = 'article-singlepage']/a 8single_page_link://td/li[@class = 'article-singlepage']/a
9 9
10# continue link
11single_page_link: //a[@id='continue-btn']
12
10# caption clean up 13# caption clean up
11wrap_in(i)://span[@class='articleImageCaptionwide'] 14wrap_in(i)://span[@class='articleImageCaptionwide']
12move_into (//span[@class='articleImageCaptionwide'])://div[@id = 'articleImage']/p 15move_into (//span[@class='articleImageCaptionwide'])://div[@id = 'articleImage']/p
@@ -17,4 +20,4 @@ strip://p[@id = 'articlePaginationWrapper']
17strip://ul[contains(@class, 'cat-breadcrumb')] 20strip://ul[contains(@class, 'cat-breadcrumb')]
18strip://div [@class= 'viewMorePhotos'] 21strip://div [@class= 'viewMorePhotos']
19 22
20test_url: http://www.smithsonianmag.com/history-archaeology/The-Goddess-Goes-Home.html \ No newline at end of file 23test_url: http://www.smithsonianmag.com/history-archaeology/The-Goddess-Goes-Home.html
diff --git a/inc/3rdparty/site_config/standard/snip.ly.txt b/inc/3rdparty/site_config/standard/snip.ly.txt
new file mode 100755
index 00000000..4e80fcae
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/snip.ly.txt
@@ -0,0 +1,3 @@
1single_page_link: //meta[@property="og:url"]/@content
2
3test_url: http://snip.ly/qa1R \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/soundcity.tv.txt b/inc/3rdparty/site_config/standard/soundcity.tv.txt
new file mode 100755
index 00000000..c26b9f95
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/soundcity.tv.txt
@@ -0,0 +1,3 @@
1strip_id_or_class: sharing
2
3test_url: http://soundcity.tv/feed/
diff --git a/inc/3rdparty/site_config/standard/spiegel.de.txt b/inc/3rdparty/site_config/standard/spiegel.de.txt
index 413e0155..7b7b1752 100755
--- a/inc/3rdparty/site_config/standard/spiegel.de.txt
+++ b/inc/3rdparty/site_config/standard/spiegel.de.txt
@@ -8,6 +8,9 @@
8# - Fixed single_page_link 8# - Fixed single_page_link
9# - Included intro text in single page view 9# - Included intro text in single page view
10# - Added body in default view 10# - Added body in default view
11# stesie@
12# - removed copyright box
13# - removed "print more" box
11 14
12# set body 15# set body
13tidy: no 16tidy: no
@@ -15,6 +18,7 @@ tidy: no
15body: //div[@id="spArticleContent"] 18body: //div[@id="spArticleContent"]
16# body in default view 19# body in default view
17body: //div[@id="spArticleSection"] 20body: //div[@id="spArticleSection"]
21body: //div[contains(@class, 'article-section')] | //div[@id='js-article-top-wide-asset'] | //p[contains(@class, 'article-intro')] | //div[contains(@class, 'js-module-box-image')]
18# body in "Fotostrecke" 22# body in "Fotostrecke"
19body: //div[@id="spBigaContent"] 23body: //div[@id="spBigaContent"]
20 24
@@ -25,6 +29,8 @@ strip: //div[@id="spArticleContent"]/h3
25# set date in "Fotostrecke" 29# set date in "Fotostrecke"
26date: //div[@id="spBigaDatum"] 30date: //div[@id="spBigaDatum"]
27 31
32# title in default view
33title: //h2[contains(@class, 'article-title')]
28#set title in single page view 34#set title in single page view
29title: //div[@id='spArticleContent']/h2 35title: //div[@id='spArticleContent']/h2
30# strip title 36# strip title
@@ -49,7 +55,7 @@ strip: //*/div[@class='spCredit']/following-sibling::p
49strip: //div[@class='spMInline'] 55strip: //div[@class='spMInline']
50 56
51# remove photogalleries and extras 57# remove photogalleries and extras
52strip: //div[@class='spPhotoGallery'] 58strip: //div[contains(@class, 'spPhotoGallery')]
53strip: //div[@class='spPhotoGallery']/following-sibling::br 59strip: //div[@class='spPhotoGallery']/following-sibling::br
54strip: //div[@class='spAssetAlignleft'] 60strip: //div[@class='spAssetAlignleft']
55strip: //div[contains(@class,'spAsset')] 61strip: //div[contains(@class,'spAsset')]
@@ -67,9 +73,24 @@ strip: //div[@id='spBigaLatestEntries']
67strip: //div[contains(@class, 'spBigaNavi')] 73strip: //div[contains(@class, 'spBigaNavi')]
68strip: //div[@class='spDottedLine'] 74strip: //div[@class='spDottedLine']
69 75
76strip: //div[@class='asset-box article-print-more']
77strip: //div[@class='article-copyright']
78strip: //span[@class='image-buttons']
79
70# Use link to print article for single page view 80# Use link to print article for single page view
71single_page_link: //a[contains(@href, '-druck')] 81single_page_link: //a[contains(@href, '-druck')]
82if_page_contains: //div[contains(@class, 'multi-pager-control')]
83
84# Clean up title in print view
85find_string: <title>Druckversion -
86replace_string: <title>
72 87
73# use next link in "Fotostrecke" 88# use next link in "Fotostrecke"
74next_page_link: //a[@class='spBigaControlForw'] 89next_page_link: //a[@class='spBigaControlForw']
75test_url: http://www.spiegel.de/politik/deutschland/0,1518,787602,00.html \ No newline at end of file 90test_url: http://www.spiegel.de/politik/deutschland/0,1518,787602,00.html
91
92# regular article
93test_url: http://www.spiegel.de/wirtschaft/soziales/griechenland-was-den-griechischen-buergern-nun-droht-a-1042682.html
94
95# multipage article
96test_url: http://www.spiegel.de/spiegel/a-710880.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/srf.ch.txt b/inc/3rdparty/site_config/standard/srf.ch.txt
new file mode 100755
index 00000000..d07a9050
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/srf.ch.txt
@@ -0,0 +1,24 @@
1# Author: cirnod@gmail.com
2
3tidy: no
4prune: no
5
6body: //div[@id="article-content"]/p | //div[@class="main-article-content clearfix"]
7
8# General Cleanup
9strip_id_or_class: offscreen
10strip_id_or_class: video-description
11strip_id_or_class: v2 big-video
12strip_id_or_class: module smb freetext
13strip_id_or_class: asset span3
14strip_id_or_class: module smb related-links
15
16# fix image-galleries
17strip_id_or_class: module lightbox-gallery image hide
18replace_string(width="624"): width="100%"
19replace_string(height="468"): height="%"
20
21# Try yourself
22test_url: http://www.srf.ch/news/wirtschaft/weltbank-korrigiert-konjunktur-erwartungen-nach-unten
23test_url: http://www.srf.ch/news/wirtschaft/ural-statt-alpen-russische-touristen-bleiben-zuhause
24test_url: http://www.srf.ch/news/international/zwei-schweizer-bei-blutigem-attentat-in-mali-verletzt \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sueddeutsche.de.txt b/inc/3rdparty/site_config/standard/sueddeutsche.de.txt
index 74b8d451..26e05605 100755
--- a/inc/3rdparty/site_config/standard/sueddeutsche.de.txt
+++ b/inc/3rdparty/site_config/standard/sueddeutsche.de.txt
@@ -1,6 +1,6 @@
1# 2012-12-04: complete rewrite after Süddeutsche.de relaunch - carlo@... 1# 2012-12-04: complete rewrite after Süddeutsche.de relaunch - carlo@...
2 2
3single_page_link: //a[ contains( @href, "/2.220/" ) ] 3single_page_link: //li[@id="article-sidebar-action-print"]/@data-clickurl
4 4
5body: //article[@id="sitecontent"]/section[@class="body"] 5body: //article[@id="sitecontent"]/section[@class="body"]
6author: //address[@class="author"] 6author: //address[@class="author"]
diff --git a/inc/3rdparty/site_config/standard/sunshinecoastdaily.com.au.txt b/inc/3rdparty/site_config/standard/sunshinecoastdaily.com.au.txt
new file mode 100755
index 00000000..bf5e9189
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/sunshinecoastdaily.com.au.txt
@@ -0,0 +1,10 @@
1body: //section//article//p
2
3strip: //aside
4strip: //div[@class='margin-top-15']
5strip: //p[@class='tags']
6
7author: //span[@class='byline']//ul[@class='piped']//li[1]
8date: //span[@class='byline']//ul[@class='piped']//li[2]
9
10parser: html5lib
diff --git a/inc/3rdparty/site_config/standard/sz.de.txt b/inc/3rdparty/site_config/standard/sz.de.txt
index f67637d2..f194271f 100755
--- a/inc/3rdparty/site_config/standard/sz.de.txt
+++ b/inc/3rdparty/site_config/standard/sz.de.txt
@@ -1,6 +1,6 @@
1# 2012-12-04: complete rewrite after Süddeutsche.de relaunch - carlo@... 1# 2012-12-04: complete rewrite after Süddeutsche.de relaunch - carlo@...
2 2
3single_page_link: //a[ contains( @href, "/2.220/" ) ] 3single_page_link: //li[@id="article-sidebar-action-print"]/@data-clickurl
4 4
5body: //article[@id="sitecontent"]/section[@class="body"] 5body: //article[@id="sitecontent"]/section[@class="body"]
6author: //address[@class="author"] 6author: //address[@class="author"]
diff --git a/inc/3rdparty/site_config/standard/tagesanzeiger.ch.txt b/inc/3rdparty/site_config/standard/tagesanzeiger.ch.txt
new file mode 100755
index 00000000..45c5cd02
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/tagesanzeiger.ch.txt
@@ -0,0 +1,14 @@
1# Author: cirnod@gmail.com
2
3tidy: no
4prune: no
5
6body: //div[@id="article"]/h3 | //*[@id="mainContent"]
7
8# General Cleanup
9#strip_id_or_class: info_panel
10
11
12# Try yourself
13test_url: http://www.tagesanzeiger.ch/zuerich/stadt/Nach-spektakulaerer-Abseilaktion-verhaftet/story/18039895
14test_url: http://www.tagesanzeiger.ch/ausland/naher-osten-und-afrika/IS-zerstoert-auch-das-antike-Hatra/story/19865699
diff --git a/inc/3rdparty/site_config/standard/tagesschau.de.txt b/inc/3rdparty/site_config/standard/tagesschau.de.txt
index be76cd05..ba3b1d3b 100755
--- a/inc/3rdparty/site_config/standard/tagesschau.de.txt
+++ b/inc/3rdparty/site_config/standard/tagesschau.de.txt
@@ -1,23 +1,14 @@
1title://h1[1] 1body: //div[contains(@class, 'sectionArticle')]//div[contains(concat(' ',normalize-space(@class),' '),' box ')]
2 2
3author: substring-after(//em, 'Von ') 3strip_id_or_class: infokasten
4author:string('tagesschau.de') 4strip_id_or_class: teaserImTeaser
5strip_id_or_class: Comments
6strip_id_or_class: mediaInfo
7strip: //div[contains(@class, 'mediaCon')]//iframe
5 8
6date:substring-after(//div[@class='standDatum'], 'Stand: ') 9prune: no
7 10
8body://div[contains(@class, 'article')] | //div[contains(@class, 'centerCol')] 11test_url: http://www.tagesschau.de/ausland/snowden-dateien-entschluesselung-101.html
12test_contains: Snowden hatte zunächst für
9 13
10strip://h1[1] 14test_url: http://www.tagesschau.de/xml/rss2
11strip: //div[contains(@class, 'directLinks')]
12strip: //div[contains(@class, 'zitatBox')]
13strip: //div[contains(@class, 'teaserBox metaBlock')]
14strip: //*[contains(@class, 'inv')]
15strip: //span[@class='imgSubline']
16strip: //*[contains(@class, 'topline')][1]
17strip: //div[@id='rightCol'][1]
18strip: //div[@id="footer"][1]
19strip: //div[@class="fPlayer"]
20strip: //div[@id='seitenanfang']
21strip: //div[@class='standDatum']
22strip: //em
23test_url: http://www.tagesschau.de/ausland/wahlkampffrankreich102.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/taz.de.txt b/inc/3rdparty/site_config/standard/taz.de.txt
index cf853662..a3368568 100755
--- a/inc/3rdparty/site_config/standard/taz.de.txt
+++ b/inc/3rdparty/site_config/standard/taz.de.txt
@@ -1,8 +1,9 @@
1date: //div[@class='secthead'] 1date: //div[@class='secthead']
2body: //div[@class='sectbody'] 2body: (//div[@class='sectbody'])[1]
3title: concat(//div[@class='sectbody']/h4,': ',//div[@class='sectbody']/h1) 3title: concat(//div[@class='sectbody']/h4,': ',//div[@class='sectbody']/h1)
4author: //span[@class='author'] 4author: //span[@class='author']
5strip: //p[@class='caption'] 5strip: //p[@class='caption']
6strip_id_or_class: ad_bin
6strip_id_or_class: rack 7strip_id_or_class: rack
7 8
8test_url: http://www.taz.de/Protestbewegung-Occupy/!80188/ \ No newline at end of file 9test_url: http://www.taz.de/Protestbewegung-Occupy/!80188/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/theatlantic.com.txt b/inc/3rdparty/site_config/standard/theatlantic.com.txt
index 3fc5611b..36864197 100755
--- a/inc/3rdparty/site_config/standard/theatlantic.com.txt
+++ b/inc/3rdparty/site_config/standard/theatlantic.com.txt
@@ -1,5 +1,6 @@
1title: //div[contains(@class, 'articleHead')]//h1 1title: //div[contains(@class, 'articleHead')]//h1
2 2
3body: //div[@itemprop='articleBody']
3body: //div[@class='articleText'] 4body: //div[@class='articleText']
4body: //div[@class='articleContent'] 5body: //div[@class='articleContent']
5body: //div[@id='article'] 6body: //div[@id='article']
@@ -13,10 +14,14 @@ strip: //p[contains(., 'This article available online at:')]
13strip: //p[contains(., 'This article available online at:')]/following::* 14strip: //p[contains(., 'This article available online at:')]/following::*
14strip: //div[@class='earthbox'] 15strip: //div[@class='earthbox']
15 16
16single_page_link: //article//a[contains(@class, 'print')] 17single_page_link: //div[contains(@class, 'article-tools')]//a[contains(@class, 'print')]
17 18
18native_ad_clue: //meta[@property="og:url" and contains(@content, '/sponsored/')] 19native_ad_clue: //meta[@property="og:url" and contains(@content, '/sponsored/')]
19 20
21#multi-page article
22test_url: http://www.theatlantic.com/magazine/archive/2014/12/the-real-roots-of-midlife-crisis/382235/
23test_contains: The curve tends to evince itself
24
20test_url: http://www.theatlantic.com/technology/archive/2011/04/want-to-see-how-crazy-a-bot-run-market-can-be/237773/ 25test_url: http://www.theatlantic.com/technology/archive/2011/04/want-to-see-how-crazy-a-bot-run-market-can-be/237773/
21test_url: http://www.theatlantic.com/magazine/archive/2007/11/the-autumn-of-the-multitaskers/6342/ 26test_url: http://www.theatlantic.com/magazine/archive/2007/11/the-autumn-of-the-multitaskers/6342/
22test_url: http://www.theatlantic.com/entertainment/archive/2012/04/30-rock-live-a-funny-reminder-of-why-sitcoms-arent-shot-live-anymore/256447/ \ No newline at end of file 27test_url: http://www.theatlantic.com/entertainment/archive/2012/04/30-rock-live-a-funny-reminder-of-why-sitcoms-arent-shot-live-anymore/256447/
diff --git a/inc/3rdparty/site_config/standard/theaustralian.com.au.txt b/inc/3rdparty/site_config/standard/theaustralian.com.au.txt
new file mode 100755
index 00000000..1245efca
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/theaustralian.com.au.txt
@@ -0,0 +1,6 @@
1body: //div[contains(@class, 'story-body')]
2author: //cite[contains(@class, 'author')]
3date: //span[@class='datestamp']
4
5strip: //div[@class='story-info']
6
diff --git a/inc/3rdparty/site_config/standard/thebostonchannel.com.txt b/inc/3rdparty/site_config/standard/thebostonchannel.com.txt
index b74442de..808876da 100755
--- a/inc/3rdparty/site_config/standard/thebostonchannel.com.txt
+++ b/inc/3rdparty/site_config/standard/thebostonchannel.com.txt
@@ -1,7 +1,7 @@
1title: //meta[@name='og:title']/@content 1title: //meta[@name='og:title']/@content
2date: //meta[@name='created']/@content 2date: //meta[@name='created']/@content
3body: //div[@class="StoryBody" or @class="storyTeaser"] 3body: //div[contains(@class, "article-body")]
4 4
5replace_string(<p></p>): <br /><br /> 5replace_string(<p></p>): <br /><br />
6 6
7test_url: http://www.thebostonchannel.com/slideshow/news/28210648/detail.html \ No newline at end of file 7test_url: http://www.wcvb.com/news/2-teens-arrested-in-fatal-dorchester-shooting-of-16yearold-boy/33564886
diff --git a/inc/3rdparty/site_config/standard/theguardian.com.txt b/inc/3rdparty/site_config/standard/theguardian.com.txt
index 88e2ecf4..c8b70e6f 100755
--- a/inc/3rdparty/site_config/standard/theguardian.com.txt
+++ b/inc/3rdparty/site_config/standard/theguardian.com.txt
@@ -1,5 +1,5 @@
1title: //div[@id='main-article-info']//h1 1title: //div[@id='main-article-info']//h1
2body: //div[@id='article-wrapper'] 2body: //figure[contains(@itemprop, "associatedMedia")] | //div[contains(@itemprop, "articleBody")]
3date: //li[@class='publication']//time[@pubdate] | //li[@class='publication']//data[@pubdate] 3date: //li[@class='publication']//time[@pubdate] | //li[@class='publication']//data[@pubdate]
4strip: //div[contains(@class, 'email-subscription')] 4strip: //div[contains(@class, 'email-subscription')]
5strip: //div[contains(@class, 'kindleWidget')] 5strip: //div[contains(@class, 'kindleWidget')]
@@ -11,6 +11,8 @@ native_ad_clue: //meta[@property="video:tag" and contains(@content, "Partner zon
11prune: no 11prune: no
12tidy: no 12tidy: no
13 13
14strip_id_or_class: -expand-
15
14test_url: http://www.theguardian.com/world/2013/oct/04/nsa-gchq-attack-tor-network-encryption 16test_url: http://www.theguardian.com/world/2013/oct/04/nsa-gchq-attack-tor-network-encryption
15test_contains: The National Security Agency has made repeated attempts to develop 17test_contains: The National Security Agency has made repeated attempts to develop
16test_contains: The agency did not directly address those questions, instead providing a statement. 18test_contains: The agency did not directly address those questions, instead providing a statement.
diff --git a/inc/3rdparty/site_config/standard/theregister.co.uk.txt b/inc/3rdparty/site_config/standard/theregister.co.uk.txt
index 5d30230d..70d3d437 100755
--- a/inc/3rdparty/site_config/standard/theregister.co.uk.txt
+++ b/inc/3rdparty/site_config/standard/theregister.co.uk.txt
@@ -1,8 +1,9 @@
1# Updated 25-Jan-2014 1single_page_link: //link[contains(@href, 'm.theregister')]
2single_page_link: //a[contains(@href, '/Print/')] 2if_page_contains: //div[@id='nextpage']
3strip: //div[@class='wptl btm']
4body: //div[contains(@class,'article_head')]//h2 | //div[@id='body']
3 5
4title: //div[@id="article"]/h2 6#multipage
5author: //p[@class="byline"]/a 7test_url: http://www.theregister.co.uk/2015/07/06/geeks_guide_spaceguard_center/
6date: //p[@class="dateline"]/a[last()] 8#singlepage
7 9test_url: http://www.theregister.co.uk/2015/07/06/us_japan_massive_robots_in_the_ring/
8test_url: http://www.theregister.co.uk/2014/01/24/thirty_years_of_the_apple_macintosh_part_2/
diff --git a/inc/3rdparty/site_config/standard/theverge.com.txt b/inc/3rdparty/site_config/standard/theverge.com.txt
index 78f8654a..cee50c9b 100755
--- a/inc/3rdparty/site_config/standard/theverge.com.txt
+++ b/inc/3rdparty/site_config/standard/theverge.com.txt
@@ -33,6 +33,8 @@ strip_id_or_class: m-ad
33strip_id_or_class: social-sharing 33strip_id_or_class: social-sharing
34strip_id_or_class: m-video-entry__excerpt 34strip_id_or_class: m-video-entry__excerpt
35strip_id_or_class: hidden 35strip_id_or_class: hidden
36strip_id_or_class: m-article__follow-bar
37strip_id_or_class: m-article__share-buttons
36 38
37replace_string(<noscript>): <div> 39replace_string(<noscript>): <div>
38replace_string(</noscript>): </div> 40replace_string(</noscript>): </div>
diff --git a/inc/3rdparty/site_config/standard/titanic-magazin.de.txt b/inc/3rdparty/site_config/standard/titanic-magazin.de.txt
new file mode 100755
index 00000000..70108e36
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/titanic-magazin.de.txt
@@ -0,0 +1,8 @@
1body: //div[contains(@class, 'tt_news-bodytext')]
2
3# cut html short
4find_string: <!--TYPO3SEARCH_end-->
5replace_string: </div></body></html>
6
7test_url: http://www.titanic-magazin.de/ich.war.bei.der.waffen.rss
8test_url: http://www.titanic-magazin.de/news/wenig-bekannte-fakten-ueber-2014-6986/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tofugu.com.txt b/inc/3rdparty/site_config/standard/tofugu.com.txt
new file mode 100644
index 00000000..5ac9d6a0
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/tofugu.com.txt
@@ -0,0 +1,3 @@
1body://div[@class='entry-content']
2
3test_url: http://www.tofugu.com/2015/07/20/interview-with-toriena-japanese-chiptune/
diff --git a/inc/3rdparty/site_config/standard/truongtx.me.txt b/inc/3rdparty/site_config/standard/truongtx.me.txt
new file mode 100755
index 00000000..6b10adce
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/truongtx.me.txt
@@ -0,0 +1,9 @@
1prune: false
2tidy: false
3
4body: //div[@class='col-md-9']
5author: //meta[@name='author']
6date: //i[@class='fa fa-calendar']/../span
7title: //div[@class='page-header']/h1
8
9test_url: https://truongtx.me/2014/04/20/emacs-javascript-completion-and-refactoring/
diff --git a/inc/3rdparty/site_config/standard/utdailybeacon.com.txt b/inc/3rdparty/site_config/standard/utdailybeacon.com.txt
index d37911bc..c4593d55 100755
--- a/inc/3rdparty/site_config/standard/utdailybeacon.com.txt
+++ b/inc/3rdparty/site_config/standard/utdailybeacon.com.txt
@@ -1,5 +1,2 @@
1title: //h1 1body: //div[@id='blox-story-text']
2author: //*[@class='byline'] 2test_url: http://www.utdailybeacon.com/news/article_ccf6d024-0f15-11e5-ae29-9f63598deb81.html
3date: substring-after(//*[@class='pubdatetime'], 'Published: ')
4body: //*[@class='body-block']
5test_url: http://utdailybeacon.com/news/2012/oct/8/energy-forum-continues/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/volkskrant.nl.txt b/inc/3rdparty/site_config/standard/volkskrant.nl.txt
new file mode 100755
index 00000000..c2770349
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/volkskrant.nl.txt
@@ -0,0 +1,15 @@
1#bypass cookie check
2single_page_link: //a[contains(@href, '/cookiewall/accept.do?')]
3
4title: //h1[@itemprop="headline"]
5body: //figure[contains(@class, 'article__top-image')] | //div[@itemprop="articleBody"]
6
7strip: //div[contains(@class, 'media-container') and contains(@class, 'pull-right')]
8
9tidy: no
10prune: no
11
12test_url: http://www.volkskrant.nl/sport/dossier-wereldvoetbalbond-fifa-wankelt~a4042695/
13test_contains: De belangrijkste spil in het
14
15test_url: http://www.volkskrant.nl/nieuws/rss.xml \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/warnerbros.fr.txt b/inc/3rdparty/site_config/standard/warnerbros.fr.txt
index 21f56352..6215b727 100755
--- a/inc/3rdparty/site_config/standard/warnerbros.fr.txt
+++ b/inc/3rdparty/site_config/standard/warnerbros.fr.txt
@@ -1,3 +1,2 @@
1title: //h3 1body: //div[@class="article-body"]
2body: //div[@class="content_wysiwyg"] 2test_url: https://www.warnerbros.fr/articles/magic-mike-xxl-adam-rodriguez-portrait
3test_url: http://www.warnerbros.fr/game-of-thrones-un-junket-vu-de-l-interieur-268.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/washingtonpost.com.txt b/inc/3rdparty/site_config/standard/washingtonpost.com.txt
index 0aa9f1d8..c29af00f 100755
--- a/inc/3rdparty/site_config/standard/washingtonpost.com.txt
+++ b/inc/3rdparty/site_config/standard/washingtonpost.com.txt
@@ -5,9 +5,14 @@ body: //div[contains(@class, "article_body")]
5body: //div[@id='print_facet']//div[@id='body'] 5body: //div[@id='print_facet']//div[@id='body']
6 6
7author://meta[@name='DC.creator']/@content 7author://meta[@name='DC.creator']/@content
8author://span[@class="pb-byline"]
9author://h3[@property='dc.creator']//a[@rel='author']
8title://meta[@name='title']/@content 10title://meta[@name='title']/@content
9date://div[contains(@class,'byline')]//span[contains(@class,'published')]/@title 11date://div[contains(@class,'byline')]//span[contains(@class,'published')]/@title
10date://meta[@name="DC.date.issued"]/@content 12date://meta[@name="DC.date.issued"]/@content
13date://span[contains(@class,"pb-timestamp")]
14date://meta[@name="eomportal-lastUpdate"]/@content
15
11strip://div[@class="relative primary-slot padding-top img-border gallery-container photo-wrapper"] 16strip://div[@class="relative primary-slot padding-top img-border gallery-container photo-wrapper"]
12strip://div[@id="wp-column six end"] 17strip://div[@id="wp-column six end"]
13strip://div[contains(@class,'hidden')] 18strip://div[contains(@class,'hidden')]
@@ -23,6 +28,7 @@ strip_id_or_class: module
23 28
24# Change gJQAwdJG4U_story.html to gJQAwdJG4U_print.html 29# Change gJQAwdJG4U_story.html to gJQAwdJG4U_print.html
25single_page_link: concat(substring-before(//link[@rel="canonical"]/@href, "_story.html"), "_print.html") 30single_page_link: concat(substring-before(//link[@rel="canonical"]/@href, "_story.html"), "_print.html")
31if_page_contains: //link[@rel="canonical" and contains(@href, '_story.html')]
26 32
27# [OLD] Change gJQAwdJG4U_story.html to gJQAwdJG4U_story_print.html 33# [OLD] Change gJQAwdJG4U_story.html to gJQAwdJG4U_story_print.html
28#single_page_link: concat(substring-before(//link[@rel="canonical"]/@href, "_story.html"), "_story_print.html") 34#single_page_link: concat(substring-before(//link[@rel="canonical"]/@href, "_story.html"), "_story_print.html")
diff --git a/inc/3rdparty/site_config/standard/watoday.com.au.txt b/inc/3rdparty/site_config/standard/watoday.com.au.txt
new file mode 100755
index 00000000..4302ac5e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/watoday.com.au.txt
@@ -0,0 +1,7 @@
1author: //h3[@class="authorName"]
2date: //dd[@class='updated dtstamp']//time
3
4strip: //div[contains(@class, "adspot")]
5strip: //noscript
6strip: //p//small
7
diff --git a/inc/3rdparty/site_config/standard/weeklytimesnow.com.au.txt b/inc/3rdparty/site_config/standard/weeklytimesnow.com.au.txt
new file mode 100755
index 00000000..a79871f3
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/weeklytimesnow.com.au.txt
@@ -0,0 +1,4 @@
1body: //div[@class='main-col' or @class='article-image-wide']
2title: //h1[@class='article-title']
3author: substring-before(//span[@class='author'], "|")
4date: //span[@class='date']
diff --git a/inc/3rdparty/site_config/standard/westernadvocate.com.au.txt b/inc/3rdparty/site_config/standard/westernadvocate.com.au.txt
new file mode 100755
index 00000000..eb00f776
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/westernadvocate.com.au.txt
@@ -0,0 +1,11 @@
1title: //header[contains(@class, "news-article-title")]//h1
2date: //div[@class="news-article-byline"]//time
3author: //h2[@class="news-article-author"]//cite
4
5# Turns out that westernadvocate is doing funky things with the slide show images. :<
6# body: //ul[@class="slides"]//img | //div[contains(@class, "news-article-body")]
7body: //div[contains(@class, "news-article-body")]
8
9strip: //div[contains(@class, "flexslider")]
10
11test_url: http://www.westernadvocate.com.au/story/1532050/roos-accept-ziebell-ban-commentators-do-not/
diff --git a/inc/3rdparty/site_config/standard/wsj.com.txt b/inc/3rdparty/site_config/standard/wsj.com.txt
new file mode 100755
index 00000000..467c39c2
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/wsj.com.txt
@@ -0,0 +1,29 @@
1title: //meta[@property="og:title"]/@content
2body: //div[@id='wsj-article-wrap']
3# is this still used?
4body: //div[@id='article_story_body']
5
6author: //h3[@class='byline']/a
7# for slide show content
8body: //ul[@id='imageSlide']//li[@class='firstSlide']//img | (//div[@class='txt_body']//p)[1]
9date: //li[@class='dateStamp']/small
10
11strip_id_or_class: insetFullBracket
12strip_id_or_class: insettipBox
13#strip_id_or_class: legacyInset
14strip_id_or_class: recipeACShopAndBuyText
15
16strip: //div[contains(@class, 'insetContent')]//cite
17strip: //*[contains(@style, 'visibility: hidden;')]
18strip: //div[contains(@class, 'insetContent') and not(contains(@class, 'image'))]
19strip: //div[contains(@class, 'carousel')]
20
21prune: no
22tidy: no
23
24test_url: http://www.wsj.com/articles/airasia-flight-8501-tail-recovered-1420878809
25test_contains: Saturday evening that the black boxes
26test_url: http://www.wsj.com/news/articles/SB10001424052702304626304579509100018004342
27test_url: http://www.wsj.com/article/SB10001424052970203363504577185322849515102.html
28# slide show
29test_url: http://www.wsj.com/article/SB10001424052970204791104577110550376458164.html
diff --git a/inc/3rdparty/site_config/standard/yourerie.com.txt b/inc/3rdparty/site_config/standard/yourerie.com.txt
index b46b09e8..46ee5ba1 100755
--- a/inc/3rdparty/site_config/standard/yourerie.com.txt
+++ b/inc/3rdparty/site_config/standard/yourerie.com.txt
@@ -1,2 +1,2 @@
1body: //div[@class="nxFullTextData"] 1body: //div[@itemprop="articleBody"]
2test_url: http://yourerie.com/fulltext?nxd_id=306552 2test_url: http://www.yourerie.com/news/news-article/d/story/cd-release-party-at-pi-downs/22898/G_gFL3mSQkWH_DW2wLuMOA
diff --git a/inc/3rdparty/site_config/standard/zeit.de.txt b/inc/3rdparty/site_config/standard/zeit.de.txt
index 9815d478..4e008946 100755
--- a/inc/3rdparty/site_config/standard/zeit.de.txt
+++ b/inc/3rdparty/site_config/standard/zeit.de.txt
@@ -1,3 +1,4 @@
1# 2015.07.08 [Marvin Dickhaus] fixed single_page_link
1# 2013.10.30 [rezor92] fixed single_page_link 2# 2013.10.30 [rezor92] fixed single_page_link
2# 2012-12-23 [carlo@...] fixed half-assed headlines in articles, removed inline author profiles, adjusted picture captions 3# 2012-12-23 [carlo@...] fixed half-assed headlines in articles, removed inline author profiles, adjusted picture captions
3# 2012-03-17 [dkless@...] Cut metadata parts in the beginning and the ends of the content block; copyright entries for pictures removed; Author fixed, not sure if old entries still valid (I left them); Weird problems with some pages addressed (see last section for removing hidden section) 4# 2012-03-17 [dkless@...] Cut metadata parts in the beginning and the ends of the content block; copyright entries for pictures removed; Author fixed, not sure if old entries still valid (I left them); Weird problems with some pages addressed (see last section for removing hidden section)
@@ -5,8 +6,7 @@
5# 2011-08-23 [carlo@...] changed single page link to use print version: page works better, less ambiguity. Related cleanups and simplifications. 6# 2011-08-23 [carlo@...] changed single page link to use print version: page works better, less ambiguity. Related cleanups and simplifications.
6# 2011-08-20 [carlo@...] added author, fixed date 7# 2011-08-20 [carlo@...] added author, fixed date
7 8
8 9single_page_link: //a[contains(@href, 'komplettansicht')]
9single_page_link: //a[@title='Auf einer Seite']
10tidy: no 10tidy: no
11 11
12title: //title 12title: //title
@@ -24,6 +24,8 @@ strip: //p[@class="copyright"]
24strip: //div[@class="copyright"] 24strip: //div[@class="copyright"]
25#Removes pagination links at the end 25#Removes pagination links at the end
26strip: //div[@class="pagination"] 26strip: //div[@class="pagination"]
27#Removes link to main page at the bottom of some articles (Zur Startseite)
28strip: //a[@href='http://www.zeit.de']
27 29
28# Fix picture captions 30# Fix picture captions
29wrap_in(small): //p[@class="caption"]/text() 31wrap_in(small): //p[@class="caption"]/text()
@@ -43,3 +45,4 @@ strip_id_or_class:"pagination"
43 45
44footnotes: no 46footnotes: no
45test_url: http://www.zeit.de/kultur/film/2012-12/Kurzfilmtag 47test_url: http://www.zeit.de/kultur/film/2012-12/Kurzfilmtag
48test_url: http://www.zeit.de/kultur/2015-07/kapitalismuskritik-selbstberuhigung-armin-nassehi