aboutsummaryrefslogtreecommitdiffhomepage
path: root/inc
diff options
context:
space:
mode:
Diffstat (limited to 'inc')
-rw-r--r--inc/3rdparty/FlattrItem.class.php36
-rw-r--r--inc/3rdparty/Session.class.php34
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/24ways.org.txt8
-rwxr-xr-xinc/3rdparty/site_config/standard/36kr.com.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/37signals.com.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/3quarksdaily.com.txt16
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/3voor12.vpro.nl.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/43folders.com.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/500px.com.txt50
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/512pixels.net.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/5by5.tv.txt14
-rwxr-xr-xinc/3rdparty/site_config/standard/7newsbelize.com.txt7
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/944.com.txt14
-rwxr-xr-xinc/3rdparty/site_config/standard/README.md38
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/aachener-nachrichten.de.txt18
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/aachener-zeitung.de.txt18
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/abc.es.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/abc.net.au.txt26
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/abcnews.go.com.txt52
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/accesstoinsight.org.txt16
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/acidcow.com.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/acquia.com.txt14
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/acroswing.fr.txt6
-rwxr-xr-xinc/3rdparty/site_config/standard/aftenposten.no.txt5
-rwxr-xr-xinc/3rdparty/site_config/standard/aftonbladet.se.txt13
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/aht.seriouseats.com.txt26
-rwxr-xr-xinc/3rdparty/site_config/standard/albayan.ae.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/alex.mullr.net.txt0
-rwxr-xr-xinc/3rdparty/site_config/standard/alexduner.com.txt4
-rwxr-xr-xinc/3rdparty/site_config/standard/alexduner.squarespace.com.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/alistapart.com.txt20
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/aljazeera.com.txt14
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/allrecipes.com.txt24
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/allthingsd.com.txt21
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/allyou.com.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/alphabeta.argaam.com.txt18
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/alriyadh.com.txt16
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/alseraj.net.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/alt1040.com.txt0
-rwxr-xr-xinc/3rdparty/site_config/standard/alternet.org.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/altfoto.com.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/alumni.stanford.edu.txt16
-rwxr-xr-xinc/3rdparty/site_config/standard/amandala.com.bz.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/amazon.com.txt36
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/americandrink.net.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/americascup.com.txt18
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/americastestkitchenfeed.com.txt6
-rwxr-xr-xinc/3rdparty/site_config/standard/amptoons.com.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/anandtech.com.txt20
-rwxr-xr-xinc/3rdparty/site_config/standard/androidpolice.com.txt5
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/andyrutledge.com.txt16
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/annatravelling.wordpress.com.txt14
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/applature.com.txt34
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/apple.com.txt12
-rwxr-xr-xinc/3rdparty/site_config/standard/appledaily.com.tw.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/appleinsider.com.txt34
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/appleweblog.com.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/archdaily.com.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/archiveofourown.org.txt38
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/arstechnica.com.txt33
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/articles.boston.com.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/articles.courant.com.txt18
-rwxr-xr-xinc/3rdparty/site_config/standard/articles.washingtonpost.com.txt11
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/asahi.com.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/ascarter.net.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/astronews.com.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/asymco.com.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/autoblog.com.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/avclub.com.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/baltimoresun.com.txt20
-rwxr-xr-xinc/3rdparty/site_config/standard/baseballprospectus.com.txt13
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/basicthinking.de.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/bb.is.txt22
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/bbc.co.uk.txt74
-rwxr-xr-xinc/3rdparty/site_config/standard/bbcgoodfood.com.txt16
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/benoitmaison.org.txt28
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/berlingske.dk.txt2
-rwxr-xr-xinc/3rdparty/site_config/standard/bernama.com.txt5
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/betabeat.com.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/betanews.com.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/biography.com.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/bitelia.com.txt0
-rwxr-xr-xinc/3rdparty/site_config/standard/bizjournals.com.txt13
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/bjango.com.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/blog.arsln.org.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/blog.asmartbear.com.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/blog.cloudflare.com.txt14
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/blog.fefe.de.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/blog.instagram.com.txt18
-rwxr-xr-xinc/3rdparty/site_config/standard/blog.instapaper.com.txt9
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/blog.jaysalvat.com.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/blog.kaelig.fr.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/blog.naver.com.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/blog.pchome.net.txt20
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/blog.pinboard.in.txt8
-rwxr-xr-xinc/3rdparty/site_config/standard/blog.renren.com.txt11
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/blog.sina.com.cn.txt50
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/blog.spu.edu.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/blog.wells.ee.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/blogs.aljazeera.net.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/blogs.forbes.com.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/blogs.hbr.org.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/blogs.msdn.com.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/blogs.reuters.com.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/blogs.scientificamerican.com.txt28
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/blogs.smithsonianmag.com.txt26
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/blogs.technet.com.txt13
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/bluetouff.com.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/boagworld.com.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/boingboing.net.txt18
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/boldizsar.palotas.eu.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/book.douban.com.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/bookforum.com.txt34
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/borderhouseblog.com.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/bostonglobe.com.txt28
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/bostonreview.net.txt26
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/boundlessline.org.txt6
-rwxr-xr-xinc/3rdparty/site_config/standard/bowdoinorient.com.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/brainfacts.org.txt16
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/brandeins.de.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/brandingstrategyinsider.com.txt2
-rwxr-xr-xinc/3rdparty/site_config/standard/brasil.elpais.com.txt23
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/brettterpstra.com.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/brisbanetimes.com.au.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/brookings.edu.txt22
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/brooksreview.net.txt10
-rwxr-xr-xinc/3rdparty/site_config/standard/bt.no.txt12
-rwxr-xr-xinc/3rdparty/site_config/standard/buffed.de.txt14
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/buquad.com.txt12
-rwxr-xr-xinc/3rdparty/site_config/standard/business2community.com.txt5
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/businessinsider.com.txt28
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/businessnews.com.tn.txt22
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/businessweek.com.txt58
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/buzzfeed.com.txt26
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/bygonebureau.com.txt8
-rwxr-xr-xinc/3rdparty/site_config/standard/cable.co.uk.txt11
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/cardboardconnection.com.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/carpeaqua.com.txt8
-rwxr-xr-xinc/3rdparty/site_config/standard/cars.com.txt7
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/catb.org.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/cbc.ca.txt6
-rwxr-xr-xinc/3rdparty/site_config/standard/cbn.com.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/cbsnews.com.txt29
-rwxr-xr-xinc/3rdparty/site_config/standard/cedarrepublican.com.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/chareidi.org.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/chinamining.org.txt16
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/chomsky.info.txt8
-rwxr-xr-xinc/3rdparty/site_config/standard/chrisltd.com.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/christianitytoday.com.txt22
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/christianpf.com.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/christies.com.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/chrome.google.com.txt14
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/chronicle.com.txt30
-rwxr-xr-xinc/3rdparty/site_config/standard/ciaosamin.com.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/cicero.de.txt62
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/ciperchile.cl.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/cjr.org.txt10
-rwxr-xr-xinc/3rdparty/site_config/standard/classyllama.com.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/clientk.com.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/clubic.com.txt20
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/cmswire.com.txt8
-rwxr-xr-xinc/3rdparty/site_config/standard/cn.engadget.com.txt5
-rwxr-xr-xinc/3rdparty/site_config/standard/cn.reuters.com.txt5
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/cnet.com.txt30
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/cnn.com.txt42
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/cnnsi.com.txt50
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/code.activestate.com.txt18
-rwxr-xr-xinc/3rdparty/site_config/standard/code.fivefilters.org.txt1
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/code.google.com.txt8
-rwxr-xr-xinc/3rdparty/site_config/standard/codeproject.com.txt3
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/codinghorror.com.txt26
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/collegehumor.com.txt24
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/communities-dominate.blogs.com.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/community.service-now.com.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/computer.org.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/computerbase.de.txt34
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/computerworld.com.txt42
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/computerworld.dk.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/contemporist.com.txt14
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/conversaciones.nokia.com.txt12
-rwxr-xr-xinc/3rdparty/site_config/standard/cooper.com.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/core77.com.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/counterpunch.org.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/crazybutable.com.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/crimemagazine.com.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/crimethinc.com.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/crn.de.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/csmonitor.com.txt32
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/csnbayarea.com.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/csnphilly.com.txt42
-rwxr-xr-xinc/3rdparty/site_config/standard/css-tricks.com.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/cucharasonica.com.txt0
-rwxr-xr-xinc/3rdparty/site_config/standard/cw.com.tw.txt14
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/da.feedsportal.com.txt6
-rwxr-xr-xinc/3rdparty/site_config/standard/dagogtid.no.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/dailydot.com.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/dailykos.com.txt20
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/dailymail.co.uk.txt22
-rwxr-xr-xinc/3rdparty/site_config/standard/dailystar.com.lb.txt6
-rwxr-xr-xinc/3rdparty/site_config/standard/danleech.com.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/dansdata.com.txt6
-rwxr-xr-xinc/3rdparty/site_config/standard/dantri.com.vn.txt7
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/daringfireball.net.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/datanami.com.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/dcurt.is.txt14
-rwxr-xr-xinc/3rdparty/site_config/standard/defomicron.net.txt9
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/delong.typepad.com.txt4
-rwxr-xr-xinc/3rdparty/site_config/standard/democracynow.org.txt5
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/derstandard.at.txt22
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/designtagebuch.de.txt18
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/desitvforum.net.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/details.com.txt14
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/developers.facebook.com.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/devlinsangle.blogspot.co.at.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/dictionary.reference.com.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/diepresse.com.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/digiphoto.techbang.com.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/digital-photography-school.com.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/digitalspy.co.uk.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/dilbert.com.txt17
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/dinamalar.com.txt34
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/dn.se.txt54
-rwxr-xr-xinc/3rdparty/site_config/standard/dobreprogramy.pl.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/doctac.com.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/domusweb.it.txt38
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/dou.ua.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/douban.com.txt40
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/dpreview.com.txt14
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/dr.dk.txt14
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/dramasonline.com.txt18
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/drdobbs.com.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/drive2.ru.txt20
-rwxr-xr-xinc/3rdparty/site_config/standard/dropbox.com.txt1
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/drupal.org.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/dukebasketballreport.com.txt18
-rwxr-xr-xinc/3rdparty/site_config/standard/dushumashang.com.txt17
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/dvice.com.txt14
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/eamesinerudition.com.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/eandt.theiet.org.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/eastoftheweb.com.txt32
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/ebay.com.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/ecetia.com.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/econlog.econlib.org.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/economia.estadao.com.br.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/economist.com.txt16
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/edge-online.com.txt24
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/edge.org.txt8
-rwxr-xr-xinc/3rdparty/site_config/standard/edition.channel5belize.com.txt9
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/edition.cnn.com.txt25
-rwxr-xr-xinc/3rdparty/site_config/standard/eetimes.com.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/ekultura.hu.txt18
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/elance.com.txt2
-rwxr-xr-xinc/3rdparty/site_config/standard/elderscrollsonline.com.txt22
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/elektroniknet.de.txt50
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/elmalpensante.com.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/elpais.com.txt40
-rwxr-xr-xinc/3rdparty/site_config/standard/emaratalyoum.com.txt7
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/en.espnf1.com.txt16
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/engadget.com.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/engineering.tumblr.com.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/english.aljazeera.net.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/enikos.gr.txt16
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/entertainment.timesonline.co.uk.txt16
-rwxr-xr-xinc/3rdparty/site_config/standard/ericsuh.com.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/es.hu.txt18
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/escapistmagazine.com.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/espn.go.com.txt20
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/esquire.com.txt21
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/essentialpublicradio.org.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/etc.se.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/eternabuenosaires.com.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/eurogamer.net.txt14
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/evo.co.uk.txt18
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/expressen.se.txt19
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/extracine.com.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/f1actual.com.txt0
-rwxr-xr-xinc/3rdparty/site_config/standard/facebook.com.txt5
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/facta.co.jp.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/falter.at.txt32
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/fanfiction.net.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/fastcompany.com.txt30
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/faz.net.txt66
-rwxr-xr-xinc/3rdparty/site_config/standard/fertigung.de.txt23
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/fictionpress.com.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/ficwad.com.txt20
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/finance.yahoo.com.txt22
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/findtheswagger.tumblr.com.txt16
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/firstthings.com.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/fivechapters.com.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/fivefilters.org.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/fivethirtyeight.com.txt10
-rwxr-xr-xinc/3rdparty/site_config/standard/flyingmachinestudios.com.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/fm4.orf.at.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/fnal.gov.txt26
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/focus.de.txt34
-rwxr-xr-xinc/3rdparty/site_config/standard/folklore.org.txt4
-rwxr-xr-xinc/3rdparty/site_config/standard/food.com.txt11
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/fool.com.txt20
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/forbes.com.txt43
-rwxr-xr-xinc/3rdparty/site_config/standard/foreignaffairs.com.txt34
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/foreignpolicy.com.txt26
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/forsvaret.no.txt14
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/foxnews.com.txt16
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/freelancer.com.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/freytag-film.com.txt6
-rwxr-xr-xinc/3rdparty/site_config/standard/fria.nu.txt8
-rwxr-xr-xinc/3rdparty/site_config/standard/friatidningen.se.txt7
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/friendskorner.com.txt20
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/ft.com.txt6
-rwxr-xr-xinc/3rdparty/site_config/standard/ftchinese.com.txt18
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/ftd.de.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/fubiz.net.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/futurezone.at.txt18
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/gamasutra.com.txt36
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/gameblog.fr.txt18
-rwxr-xr-xinc/3rdparty/site_config/standard/gamechurch.com.txt10
-rwxr-xr-xinc/3rdparty/site_config/standard/gamer.no.txt11
-rwxr-xr-xinc/3rdparty/site_config/standard/gamereactor.no.txt11
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/garythink.com.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/gasteroprod.com.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/gatopardo.com.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/gawker.com.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/geeksofdoom.com.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/geenstijl.nl.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/getnews.jp.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/giantbomb.com.txt18
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/giga.de.txt36
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/gigaom.com.txt29
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/gihyo.jp.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/gist.github.com.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/givemesomethingtoread.com.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/gizmodo.co.uk.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/gizmodo.com.txt18
-rwxr-xr-xinc/3rdparty/site_config/standard/gizmodo.uol.com.br.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/gizmologia.com.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/gizmovil.com.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/global.txt22
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/globalissues.org.txt28
-rwxr-xr-xinc/3rdparty/site_config/standard/globoesporte.globo.com.txt25
-rwxr-xr-xinc/3rdparty/site_config/standard/gloswielkopolski.pl.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/goal.com.txt30
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/golem.de.txt48
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/good.is.txt6
-rwxr-xr-xinc/3rdparty/site_config/standard/goodfil.ms.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/gossip-tv.gr.txt26
-rwxr-xr-xinc/3rdparty/site_config/standard/goteborgsfria.se.txt7
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/gothamist.com.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/gotomanager.com.txt38
-rwxr-xr-xinc/3rdparty/site_config/standard/gov.ky.txt4
-rwxr-xr-xinc/3rdparty/site_config/standard/gp.se.txt11
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/gq.com.txt14
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/grantland.com.txt38
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/greatergreaterwashington.org.txt18
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/groups.drupal.org.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/gulfnews.com.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/guokr.com.txt42
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/haberler.com.txt6
-rwxr-xr-xinc/3rdparty/site_config/standard/habrahabr.ru.txt21
-rwxr-xr-xinc/3rdparty/site_config/standard/hackmake.org.txt7
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/halo.bungie.org.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/hammers.theoffside.com.txt10
-rwxr-xr-xinc/3rdparty/site_config/standard/handelsblatt.com.txt31
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/hanselman.com.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/hardware.fr.txt8
-rwxr-xr-xinc/3rdparty/site_config/standard/hardware.no.txt16
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/hbr.org.txt13
-rwxr-xr-xinc/3rdparty/site_config/standard/headrush.typepad.com.txt14
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/heise-online.mobi.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/heise.de.txt12
-rwxr-xr-xinc/3rdparty/site_config/standard/hemmings.com.txt9
-rwxr-xr-xinc/3rdparty/site_config/standard/heroturko.me.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/hespress.com.txt12
-rwxr-xr-xinc/3rdparty/site_config/standard/hiamag.com.txt3
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/highscalability.com.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/hiperpop.com.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/hiphopleeft.nl.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/historytoday.com.txt18
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/hmercer.com.txt6
-rwxr-xr-xinc/3rdparty/site_config/standard/hollywoodlife.com.txt22
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/hometheaterreview.com.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/hosted.ap.org.txt8
-rwxr-xr-xinc/3rdparty/site_config/standard/howtogeek.com.txt11
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/hs.fi.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/ht.ly.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/huffingtonpost.com.txt37
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/humantransit.org.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/hurriyet.com.tr.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/hvg.hu.txt14
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/hypebeast.com.txt18
-rwxr-xr-xinc/3rdparty/site_config/standard/icannabis.tumblr.com.txt9
-rwxr-xr-xinc/3rdparty/site_config/standard/idealog.co.nz.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/idlewords.com.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/igeneration.fr.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/ignoredbydinosaurs.com.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/ilounge.com.txt22
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/ilyabirman.ru.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/inc.com.txt40
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/independent.co.uk.txt14
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/indiatimes.com.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/inessential.com.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/info.abril.com.br.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/infoq.com.txt24
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/informador.com.mx.txt14
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/information.dk.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/informationarchitects.net.txt16
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/informationclearinghouse.info.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/informit.com.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/infoworld.com.txt20
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/infzm.com.txt14
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/inhabitat.com.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/instagr.am.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/interest.co.nz.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/iolanguage.com.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/ipadclub.nl.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/ipadplanet.nl.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/iphoneclub.nl.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/iphonehacks.com.txt16
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/iplaysoft.com.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/isource.com.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/itavisen.no.txt8
-rwxr-xr-xinc/3rdparty/site_config/standard/itmedia.co.jp.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/itstactical.com.txt20
-rwxr-xr-xinc/3rdparty/site_config/standard/itwire.com.txt5
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/itworld.com.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/izismile.com.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/jalopnik.com.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/jandan.net.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/jetzt.sueddeutsche.de.txt40
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/jjahnke.net.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/jobbank.gc.ca.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/joelonsoftware.com.txt38
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/jouire.com.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/joystiq.com.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/juedische-allgemeine.de.txt36
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/juppy.org.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/kachestvo.ru.txt2
-rwxr-xr-xinc/3rdparty/site_config/standard/kachiblog.com.txt7
-rwxr-xr-xinc/3rdparty/site_config/standard/kathimerini.gr.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/kenrockwell.com.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/kicker.de.txt38
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/kickstarter.com.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/kingarthurflour.com.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/kotaku.com.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/kottke.org.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/kumailplus.com.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/kumb.com.txt16
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/kwerfeldein.de.txt14
-rwxr-xr-xinc/3rdparty/site_config/standard/landetsfria.se.txt7
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/laphamsquarterly.org.txt24
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/laprensagrafica.com.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/laquadrature.net.txt18
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/lareviewofbooks.org.txt24
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/latimes.com.txt18
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/laughingsquid.com.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/leancrew.com.txt14
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/lefigaro.fr.txt14
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/lemonde.fr.txt31
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/lesnumeriques.com.txt16
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/letemps.ch.txt2
-rwxr-xr-xinc/3rdparty/site_config/standard/libcom.org.txt7
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/lifeandculture.fr.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/lifehacker.com.txt89
-rwxr-xr-xinc/3rdparty/site_config/standard/lifestyle.inquirer.net.txt7
-rwxr-xr-xinc/3rdparty/site_config/standard/lifeweek.com.cn.txt23
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/linkedin.com.txt0
-rwxr-xr-xinc/3rdparty/site_config/standard/livescience.com.txt20
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/longform.org.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/loopinsight.com.txt16
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/lostgarden.com.txt2
-rwxr-xr-xinc/3rdparty/site_config/standard/lovefm.com.txt6
-rwxr-xr-xinc/3rdparty/site_config/standard/lovetv.com.bz.txt9
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/lrb.co.uk.txt20
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/luminous-landscape.com.txt8
-rwxr-xr-xinc/3rdparty/site_config/standard/luxuo.com.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/m.bbc.co.uk.txt12
-rwxr-xr-xinc/3rdparty/site_config/standard/m.douban.com.txt13
-rwxr-xr-xinc/3rdparty/site_config/standard/m.vanityfair.com.txt11
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/mac4ever.com.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/macdrifter.com.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/macformat.techradar.com.txt14
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/macgeneration.com.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/macmagazine.com.br.txt38
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/macrumors.com.txt20
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/macstories.net.txt14
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/mactalk.com.au.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/mactechnews.de.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/macworld.com.txt46
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/mainichi.jp.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/mainpost.de.txt52
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/makeuseof.com.txt10
-rwxr-xr-xinc/3rdparty/site_config/standard/manager.co.th.txt26
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/marco.org.txt14
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/marksdailyapple.com.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/martinfowler.com.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/mashable.com.txt13
-rwxr-xr-xinc/3rdparty/site_config/standard/matt.might.net.txt5
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/mattcutts.com.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/mbl.is.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/medialens.org.txt4
-rwxr-xr-xinc/3rdparty/site_config/standard/medium.com.txt7
-rwxr-xr-xinc/3rdparty/site_config/standard/megamp3.eu.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/menshealth.com.txt30
-rwxr-xr-xinc/3rdparty/site_config/standard/metafilter.com.txt8
-rwxr-xr-xinc/3rdparty/site_config/standard/mforum.cari.com.my.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/mikeash.com.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/mikeindustries.com.txt14
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/minnesota.publicradio.org.txt16
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/minnpost.com.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/mirrorfootball.co.uk.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/mises.org.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/mlb.mlb.com.txt26
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/mlb.sbnation.com.txt24
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/mlssoccer.com.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/mmo-champion.com.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/mnn.com.txt18
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/mno.hu.txt24
-rwxr-xr-xinc/3rdparty/site_config/standard/mobile.nytimes.com.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/mobile.slate.com.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/mobileopportunity.blogspot.com.txt18
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/modernghana.com.txt14
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/money.cnn.com.txt46
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/monkeyzen.com.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/moonsault.de.txt22
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/moreintelligentlife.com.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/motherboard.vice.com.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/mothering.com.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/motherjones.com.txt26
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/motorfull.com.txt0
-rwxr-xr-xinc/3rdparty/site_config/standard/movie.douban.com.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/msdn.microsoft.com.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/msnbc.msn.com.txt38
-rwxr-xr-xinc/3rdparty/site_config/standard/myfoxatlanta.com.txt5
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/myfoxboston.com.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/myrecipes.com.txt22
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/narenji.ir.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/nasa.gov.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/nbweekly.com.txt16
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/neh.gov.txt30
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/neomoney.co.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/net-security.org.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/netmagazine.com.txt28
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/netzpolitik.org.txt8
-rwxr-xr-xinc/3rdparty/site_config/standard/newleftproject.org.txt3
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/newmatilda.com.txt14
-rwxr-xr-xinc/3rdparty/site_config/standard/newrepublic.com.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/news-gazette.com.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/news.cnet.com.txt20
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/news.detik.com.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/news.kanaloco.jp.txt14
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/news.mynavi.jp.txt18
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/news.orf.at.txt18
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/news.rambler.ru.txt14
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/news.techmeme.com.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/news.yahoo.com.txt20
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/news.ycombinator.com.txt2
-rwxr-xr-xinc/3rdparty/site_config/standard/news.zing.vn.txt3
-rwxr-xr-xinc/3rdparty/site_config/standard/news247.gr.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/newsbomb.gr.txt16
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/newsle.com.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/newsmill.se.txt22
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/newsunspun.org.txt18
-rwxr-xr-xinc/3rdparty/site_config/standard/newsweek.com.txt6
-rwxr-xr-xinc/3rdparty/site_config/standard/newswise.com.txt17
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/newyorker.com.txt21
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/next-gen.biz.txt28
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/nfl.com.txt18
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/ngm.nationalgeographic.com.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/nhk.or.jp.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/nintendoworldreport.com.txt22
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/nojesguiden.se.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/northumberlandview.ca.txt20
-rwxr-xr-xinc/3rdparty/site_config/standard/nosalty.hu.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/nplusonemag.com.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/npr.org.txt66
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/nybooks.com.txt24
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/nymag.com.txt14
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/nyteknik.se.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/nytimes.com.txt85
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/nzz.ch.txt22
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/observer.com.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/off.net.mk.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/omaha.com.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/omiliya.org.txt14
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/on.net.mk.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/online.wsj.com.txt48
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/onlinewelten.com.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/onstartups.com.txt0
-rwxr-xr-xinc/3rdparty/site_config/standard/ontologicalgeek.com.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/opensource.org.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/openthemagazine.com.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/openwebx.org.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/orf.at.txt18
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/origo.hu.txt32
-rwxr-xr-xinc/3rdparty/site_config/standard/oschina.net.txt3
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/pakistantvdekho.com.txt20
-rwxr-xr-xinc/3rdparty/site_config/standard/pakmedia.tv.txt17
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/pandagon.net.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/pandodaily.com.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/panic.com.txt2
-rwxr-xr-xinc/3rdparty/site_config/standard/papodehomem.com.br.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/parislemon.com.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/parliament.uk.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/pastebin.com.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/pastepad.fivefilters.org.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/pathawks.com.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/pcast.me.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/pcmag.com.txt16
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/pcworld.com.txt36
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/penny-arcade.com.txt44
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/pentaxforums.com.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/philadelphiaeagles.com.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/philly.com.txt16
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/photo.tutsplus.com.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/php.net.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/physicstoday.org.txt10
-rwxr-xr-xinc/3rdparty/site_config/standard/pinterest.com.txt5
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/pitchfork.com.txt28
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/pittnews.com.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/pittsburgh.pirates.mlb.com.txt26
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/pittsburghlive.com.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/pittsburghmagazine.com.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/pittsburghpanthers.com.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/pittscriptblog.com.txt12
-rwxr-xr-xinc/3rdparty/site_config/standard/planetvita.de.txt5
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/playboy.com.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/plus.google.com.txt32
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/plzkthxbai.com.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/pogue.blogs.nytimes.com.txt4
-rwxr-xr-xinc/3rdparty/site_config/standard/politico.com.txt26
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/politifact.com.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/politiken.dk.txt22
-rwxr-xr-xinc/3rdparty/site_config/standard/polygon.com.txt34
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/popularmechanics.com.txt12
-rwxr-xr-xinc/3rdparty/site_config/standard/portertech.ca.txt3
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/positioningmag.com.txt34
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/post-gazette.com.txt50
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/posta.com.tr.txt26
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/prb.org.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/prog21.dadgum.com.txt14
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/prolost.com.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/propublica.org.txt18
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/prosa.dk.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/prospectmagazine.co.uk.txt50
-rwxr-xr-xinc/3rdparty/site_config/standard/protothema.gr.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/psychologytoday.com.txt14
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/publications.parliament.uk.txt4
-rwxr-xr-xinc/3rdparty/site_config/standard/publico.pt.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/purpleplanetmedia.com.txt4
-rwxr-xr-xinc/3rdparty/site_config/standard/qctimes.com.txt5
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/quantumdiaries.org.txt24
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/queerty.com.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/quepasa.cl.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/quora.com.txt30
-rwxr-xr-xinc/3rdparty/site_config/standard/racjonalista.pl.txt5
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/radar.oreilly.com.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/radionz.co.nz.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/randsinrepose.com.txt20
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/readability.com.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/readwriteweb.com.txt14
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/real.gr.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/recipe.com.txt18
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/red-hot-girls.com.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/reddit.com.txt36
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/redmondpie.com.txt24
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/redtape.msnbc.msn.com.txt36
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/reflets.info.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/renenekuda.cz.txt2
-rwxr-xr-xinc/3rdparty/site_config/standard/resume.se.txt9
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/retrieverweekly.com.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/reuters.com.txt18
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/revistapiaui.estadao.com.br.txt16
-rw-r--r--inc/3rdparty/site_config/standard/rezeptwelt.de.txt5
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/richardmuscat.wordpress.com.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/ritemail.blogspot.com.txt8
-rwxr-xr-xinc/3rdparty/site_config/standard/ritholtz.com.txt5
-rwxr-xr-xinc/3rdparty/site_config/standard/robertsspaceindustries.com.txt4
-rwxr-xr-xinc/3rdparty/site_config/standard/robots.thoughtbot.com.txt5
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/rockpapershotgun.com.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/rodrigo.sharpcube.com.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/rogerebert.com.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/rolfinjapan.nl.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/rollingstone.com.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/rottentomatoes.com.txt18
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/roughtype.com.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/roy.gbiv.com.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/rpgsite.net.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/rubysfera.pl.txt14
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/ruhlman.com.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/ruttloff.org.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/salon.com.txt20
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/salzburg.com.txt10
-rwxr-xr-xinc/3rdparty/site_config/standard/sanpedrosun.com.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/saveyourself.ca.txt46
-rwxr-xr-xinc/3rdparty/site_config/standard/sayidaty.net.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/sbnation.com.txt52
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/schneier.com.txt48
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/science.orf.at.txt18
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/scienceblogs.de.txt20
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/scienceticker.info.txt18
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/scientificamerican.com.txt48
-rwxr-xr-xinc/3rdparty/site_config/standard/scilogs.de.txt15
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/scotusblog.com.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/scraplab.net.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/scripting.com.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/sct.temple.edu.txt6
-rwxr-xr-xinc/3rdparty/site_config/standard/searchenginejournal.com.txt5
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/searchengineland.com.txt36
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/seattletransitblog.com.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/sebbo.net.txt4
-rwxr-xr-xinc/3rdparty/site_config/standard/select.yeeyan.org.txt18
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/seriouseats.com.txt26
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/sf.curbed.com.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/sf.eater.com.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/sfgate.com.txt20
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/sfweekly.com.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/shabayek.com.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/shawnblanc.net.txt20
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/shifteleven.com.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/siasat.pk.txt20
-rwxr-xr-xinc/3rdparty/site_config/standard/signalscv.com.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/simonwillison.net.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/singaporeanstocksinvestor.blogspot.com.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/singularityhub.com.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/sintagoulis.gr.txt10
-rwxr-xr-xinc/3rdparty/site_config/standard/sivers.org.txt6
-rwxr-xr-xinc/3rdparty/site_config/standard/skanesfria.se.txt7
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/slashfilm.com.txt28
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/slate.com.txt36
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/slice.seriouseats.com.txt26
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/slog.thestranger.com.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/smartinvestor.de.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/sme.sk.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/smithsonianmag.com.txt36
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/smokingapples.com.txt8
-rwxr-xr-xinc/3rdparty/site_config/standard/somethingawful.com.txt17
-rwxr-xr-xinc/3rdparty/site_config/standard/songshuhui.net.txt7
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/sourcebooks.com.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/spectator.co.uk.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/spectrum.ieee.org.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/speirs.org.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/spiegel.de.txt148
-rwxr-xr-xinc/3rdparty/site_config/standard/spiked-online.com.txt7
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/spin.com.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/splatf.com.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/splitsider.com.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/sport.detik.com.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/sport.orf.at.txt18
-rwxr-xr-xinc/3rdparty/site_config/standard/sport365.fr.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/sports.espn.go.com.txt22
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/sports.yahoo.com.txt14
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/sportschau.de.txt40
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/sportsillustrated.cnn.com.txt50
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/sprengsatz.de.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/sqlite.org.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/squashed.tumblr.com.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/stackoverflow.com.txt24
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/stalbansreview.co.uk.txt26
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/standard.co.uk.txt28
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/staradvertiser.com.txt20
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/stephenfry.com.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/stlbeacon.org.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/stockholm.etc.se.txt6
-rwxr-xr-xinc/3rdparty/site_config/standard/stockholmsfria.nu.txt7
-rwxr-xr-xinc/3rdparty/site_config/standard/straightdope.com.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/streetsblog.net.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/stuff.co.nz.txt42
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/stumbleupon.com.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/subtraction.com.txt32
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/sueddeutsche.de.txt34
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/summify.com.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/suntimes.com.txt24
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/svd.se.txt16
-rwxr-xr-xinc/3rdparty/site_config/standard/svt.se.txt16
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/sydsvenskan.se.txt29
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/symmetrymagazine.org.txt20
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/sz-magazin.sueddeutsche.de.txt26
-rwxr-xr-xinc/3rdparty/site_config/standard/sz.de.txt18
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/tagesschau.de.txt42
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/tampabay.com.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/taptaptap.com.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/tasteofhome.com.txt26
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/taz.de.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/tbray.org.txt6
-rwxr-xr-xinc/3rdparty/site_config/standard/tcmanila.tk.txt7
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/tcng.org.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/tech.fortune.cnn.com.txt4
-rwxr-xr-xinc/3rdparty/site_config/standard/tech.gilt.com.txt5
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/tech.sina.com.cn.txt18
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/techcrunch.com.txt34
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/techdirt.com.txt20
-rwxr-xr-xinc/3rdparty/site_config/standard/techhive.com.txt18
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/techmeme.com.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/technicallyjordan.tumblr.com.txt14
-rwxr-xr-xinc/3rdparty/site_config/standard/technologizer.com.txt5
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/technologyreview.com.txt30
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/techpinions.com.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/techradar.com.txt20
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/telegraaf.nl.txt14
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/telegraph.co.uk.txt16
-rwxr-xr-xinc/3rdparty/site_config/standard/thanhnien.com.vn.txt4
-rwxr-xr-xinc/3rdparty/site_config/standard/the-magazine.org.txt3
-rwxr-xr-xinc/3rdparty/site_config/standard/theage.com.au.txt5
-rwxr-xr-xinc/3rdparty/site_config/standard/theamericanscholar.org.txt13
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/theappleblog.com.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/theatlantic.com.txt36
-rwxr-xr-xinc/3rdparty/site_config/standard/theatlanticcities.com.txt17
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/thebostonchannel.com.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/thebrowser.com.txt16
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/thecarton.net.txt18
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/thedaily.com.txt46
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/thedailybeast.com.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/thedailymash.co.uk.txt26
-rwxr-xr-xinc/3rdparty/site_config/standard/thedisneyblog.com.txt7
-rwxr-xr-xinc/3rdparty/site_config/standard/theeuropean-magazine.com.txt17
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/thefilmexperience.net.txt0
-rwxr-xr-xinc/3rdparty/site_config/standard/thegamedesignforum.com.txt14
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/theglobalmail.org.txt78
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/theglobeandmail.com.txt6
-rwxr-xr-xinc/3rdparty/site_config/standard/thegreatdiscontent.com.txt6
-rwxr-xr-xinc/3rdparty/site_config/standard/theguardian.com.txt13
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/theindychannel.com.txt22
-rwxr-xr-xinc/3rdparty/site_config/standard/themarker.com.txt11
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/themillions.com.txt16
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/themuseumofinnocence.com.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/thenation.com.txt22
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/thenetworkgarden.blogs.com.txt4
-rwxr-xr-xinc/3rdparty/site_config/standard/thenextgeneration.org.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/thenextweb.com.txt22
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/theoaklandpress.com.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/theonion.com.txt18
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/thepioneerwoman.com.txt18
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/theregister.co.uk.txt13
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/theroot.com.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/therumpus.net.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/thesiasat.com.txt20
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/thesimpledollar.com.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/thespoiler.co.uk.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/thespoof.com.txt16
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/thestranger.com.txt20
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/thestreet.com.txt48
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/thethaovanhoa.vn.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/theverge.com.txt79
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/theweek.com.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/thinkprogress.org.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/thisdaylive.com.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/thisismynext.com.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/tidbits.com.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/time.com.txt26
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/timeshighereducation.co.uk.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/tipb.com.txt16
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/tnr.com.txt32
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/tomdispatch.com.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/tomshardware.com.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/tomshardware.de.txt20
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/toolsandtoys.net.txt8
-rwxr-xr-xinc/3rdparty/site_config/standard/tracks.ranea.org.txt14
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/trailer.web-view.net.txt0
-rwxr-xr-xinc/3rdparty/site_config/standard/trailerzone.de.txt9
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/traningslara.se.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/triblive.com.txt24
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/truthdig.com.txt22
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/tthfanfic.org.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/tthor.com.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/tuaw.com.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/tuckreview.com.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/tvtropes.org.txt36
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/twitter.com.txt16
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/uefa.com.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/uk.xbox360.ign.com.txt42
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/uni-watch.com.txt30
-rwxr-xr-xinc/3rdparty/site_config/standard/unwinnable.com.txt9
-rwxr-xr-xinc/3rdparty/site_config/standard/uppsalafria.se.txt7
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/urbandictionary.com.txt6
-rwxr-xr-xinc/3rdparty/site_config/standard/usatoday.com.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/usccb.org.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/useit.com.txt12
-rwxr-xr-xinc/3rdparty/site_config/standard/usfirst.org.txt6
-rwxr-xr-xinc/3rdparty/site_config/standard/utdailybeacon.com.txt5
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/ux.artu.tv.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/uzivatelsketestovani.cz.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/vanityfair.com.txt58
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/varingen.no.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/varsity.co.uk.txt4
-rwxr-xr-xinc/3rdparty/site_config/standard/vea.gov.vn.txt7
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/vedomosti.ru.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/veggbilder.no.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/vemedio.com.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/venturebeat.com.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/version2.dk.txt20
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/verybestbaking.com.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/vg.no.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/video.forbes.com.txt16
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/videogum.com.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/villagevoice.com.txt14
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/vimeo.com.txt32
-rwxr-xr-xinc/3rdparty/site_config/standard/viply.de.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/visir.is.txt24
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/vitispr.com.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/vivirmexico.com.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/vnexpress.net.txt14
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/voices.washingtonpost.com.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/vworker.com.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/waffle.wootest.net.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/walrusmagazine.com.txt24
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/warnerbros.fr.txt2
-rwxr-xr-xinc/3rdparty/site_config/standard/washingtoninstitute.org.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/washingtonmonthly.com.txt16
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/washingtonpost.com.txt51
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/web-libre.org.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/weblog.bignerdranch.com.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/weblogs.asp.net.txt14
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/webpaper.nzz.ch.txt12
-rwxr-xr-xinc/3rdparty/site_config/standard/webwereld.nl.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/welt.de.txt42
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/westhamtillidie.com.txt8
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/what-if.xkcd.com.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/whatever.scalzi.com.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/wheelyric.com.txt18
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/wiki.guildwars.com.txt12
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/wiki.guildwars2.com.txt12
-rwxr-xr-xinc/3rdparty/site_config/standard/wikihow.com.txt15
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/wikitravel.org.txt24
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/will-self.com.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/williampfaff.com.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/winfuture.de.txt20
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/winrumors.com.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/winsupersite.com.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/wired.com.txt47
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/wmnf.org.txt22
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/wmpoweruser.com.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/worldpoultry.net.txt6
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/worldwidewords.org.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/wow.joystiq.com.txt8
-rwxr-xr-xinc/3rdparty/site_config/standard/wpmayor.com.txt8
-rwxr-xr-xinc/3rdparty/site_config/standard/wtatennis.com.txt7
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/www1.folha.uol.com.br.txt28
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/www3.imperial.ac.uk.txt0
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/wyborcza.pl.txt20
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/wyctim.com.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/wz-newsline.de.txt6
-rwxr-xr-xinc/3rdparty/site_config/standard/xfgjls.com.txt11
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/xoeb.us.txt4
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/yated.com.txt0
-rwxr-xr-xinc/3rdparty/site_config/standard/ynet.co.il.txt26
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/yostivanich.com.txt6
-rwxr-xr-xinc/3rdparty/site_config/standard/yourerie.com.txt2
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/youtube.com.txt28
-rwxr-xr-xinc/3rdparty/site_config/standard/zcommunications.org.txt7
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/zdnet.com.txt18
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/zeit.de.txt89
-rwxr-xr-xinc/3rdparty/site_config/standard/zerohedge.com.txt10
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/zerokspot.com.txt2
-rwxr-xr-xinc/3rdparty/site_config/standard/zhihu.com.txt19
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/zingtrain.com.txt2
-rwxr-xr-xinc/poche/Database.class.php175
-rw-r--r--inc/poche/Language.class.php113
-rwxr-xr-xinc/poche/Poche.class.php950
-rwxr-xr-xinc/poche/Routing.class.php153
-rw-r--r--inc/poche/Template.class.php235
-rwxr-xr-xinc/poche/Tools.class.php187
-rw-r--r--inc/poche/Url.class.php2
-rw-r--r--inc/poche/User.class.php11
-rw-r--r--inc/poche/WallabagEpub.class.php137
-rwxr-xr-xinc/poche/config.inc.default.php4
-rwxr-xr-xinc/poche/global.inc.php30
-rw-r--r--inc/poche/pochePictures.php262
966 files changed, 8942 insertions, 6659 deletions
diff --git a/inc/3rdparty/FlattrItem.class.php b/inc/3rdparty/FlattrItem.class.php
index 711b4ee0..ef8c62f7 100644
--- a/inc/3rdparty/FlattrItem.class.php
+++ b/inc/3rdparty/FlattrItem.class.php
@@ -1,28 +1,35 @@
1<?php 1<?php
2/* 2/**
3* Class for Flattr querying 3 * wallabag, self hostable application allowing you to not miss any content anymore
4*/ 4 *
5class FlattrItem { 5 * @category wallabag
6 * @author Nicolas Lœuillet <nicolas@loeuillet.org>
7 * @copyright 2013
8 * @license http://opensource.org/licenses/MIT see COPYING file
9 */
6 10
11class FlattrItem
12{
7 public $status; 13 public $status;
8 public $urltoflattr; 14 public $urlToFlattr;
9 public $flattrItemURL; 15 public $flattrItemURL;
10 public $numflattrs; 16 public $numFlattrs;
11 17
12 public function checkItem($urltoflattr,$id) { 18 public function checkItem($urlToFlattr, $id)
13 $this->cacheflattrfile($urltoflattr, $id); 19 {
20 $this->_cacheFlattrFile($urlToFlattr, $id);
14 $flattrResponse = file_get_contents(CACHE . "/flattr/".$id.".cache"); 21 $flattrResponse = file_get_contents(CACHE . "/flattr/".$id.".cache");
15 if($flattrResponse != FALSE) { 22 if($flattrResponse != FALSE) {
16 $result = json_decode($flattrResponse); 23 $result = json_decode($flattrResponse);
17 if (isset($result->message)){ 24 if (isset($result->message)) {
18 if ($result->message == "flattrable") { 25 if ($result->message == "flattrable") {
19 $this->status = FLATTRABLE; 26 $this->status = FLATTRABLE;
20 } 27 }
21 } 28 }
22 elseif (is_object($result) && $result->link) { 29 elseif (is_object($result) && $result->link) {
23 $this->status = FLATTRED; 30 $this->status = FLATTRED;
24 $this->flattrItemURL = $result->link; 31 $this->flattrItemURL = $result->link;
25 $this->numflattrs = $result->flattrs; 32 $this->numFlattrs = $result->flattrs;
26 } 33 }
27 else { 34 else {
28 $this->status = NOT_FLATTRABLE; 35 $this->status = NOT_FLATTRABLE;
@@ -33,17 +40,18 @@ class FlattrItem {
33 } 40 }
34 } 41 }
35 42
36 private function cacheflattrfile($urltoflattr, $id) { 43 private function _cacheFlattrFile($urlToFlattr, $id)
44 {
37 if (!is_dir(CACHE . '/flattr')) { 45 if (!is_dir(CACHE . '/flattr')) {
38 mkdir(CACHE . '/flattr', 0777); 46 mkdir(CACHE . '/flattr', 0777);
39 } 47 }
40 48
41 // if a cache flattr file for this url already exists and it's been less than one day than it have been updated, see in /cache 49 // if a cache flattr file for this url already exists and it's been less than one day than it have been updated, see in /cache
42 if ((!file_exists(CACHE . "/flattr/".$id.".cache")) || (time() - filemtime(CACHE . "/flattr/".$id.".cache") > 86400)) { 50 if ((!file_exists(CACHE . "/flattr/".$id.".cache")) || (time() - filemtime(CACHE . "/flattr/".$id.".cache") > 86400)) {
43 $askForFlattr = Tools::getFile(FLATTR_API . $urltoflattr); 51 $askForFlattr = Tools::getFile(FLATTR_API . $urlToFlattr);
44 $flattrCacheFile = fopen(CACHE . "/flattr/".$id.".cache", 'w+'); 52 $flattrCacheFile = fopen(CACHE . "/flattr/".$id.".cache", 'w+');
45 fwrite($flattrCacheFile, $askForFlattr); 53 fwrite($flattrCacheFile, $askForFlattr);
46 fclose($flattrCacheFile); 54 fclose($flattrCacheFile);
47 } 55 }
48 } 56 }
49} \ No newline at end of file 57}
diff --git a/inc/3rdparty/Session.class.php b/inc/3rdparty/Session.class.php
index 59dfbe67..b56e4c54 100644
--- a/inc/3rdparty/Session.class.php
+++ b/inc/3rdparty/Session.class.php
@@ -309,4 +309,38 @@ class Session
309 309
310 return true; // User is not banned. 310 return true; // User is not banned.
311 } 311 }
312
313
314 /**
315 * Tells if a param exists in session
316 *
317 * @param $name name of the param to test
318 * @return bool
319 */
320 public static function isInSession($name)
321 {
322 return (isset($_SESSION[$name]) ? : FALSE);
323 }
324
325 /**
326 * Returns param in session
327 *
328 * @param $name name of the param to return
329 * @return mixed param or null
330 */
331 public static function getParam($name)
332 {
333 return (self::isInSession($name) ? $_SESSION[$name] : NULL);
334 }
335
336 /**
337 * Store value in session
338 *
339 * @param $name name of the variable to store
340 * @param $value value to store
341 */
342 public static function setParam($name, $value)
343 {
344 $_SESSION[$name] = $value;
345 }
312} 346}
diff --git a/inc/3rdparty/site_config/standard/24ways.org.txt b/inc/3rdparty/site_config/standard/24ways.org.txt
index 03bd1950..86c9e077 100644..100755
--- a/inc/3rdparty/site_config/standard/24ways.org.txt
+++ b/inc/3rdparty/site_config/standard/24ways.org.txt
@@ -1,6 +1,6 @@
1title: //div[@class='meta']/h2/a 1title: //div[@class='meta']/h2/a
2author: //div[@class='meta']/h2/following-sibling::p/a/text() 2author: //div[@class='meta']/h2/following-sibling::p/a/text()
3date://div[@class='meta']/h2/strong 3date://div[@class='meta']/h2/strong
4body: //div[@id='article'] 4body: //div[@id='article']
5strip: //div[@class='domore'] 5strip: //div[@class='domore']
6test_url: http://24ways.org/2011/composing-the-new-canon \ No newline at end of file 6test_url: http://24ways.org/2011/composing-the-new-canon \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/36kr.com.txt b/inc/3rdparty/site_config/standard/36kr.com.txt
new file mode 100755
index 00000000..d73d7de5
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/36kr.com.txt
@@ -0,0 +1,8 @@
1title: //h1[contains(@class, 'entry-title')]
2date: //meta[@name='weibo: article:create_at']/@content
3body: //div[contains(@class, 'mainContent')]
4strip_id_or_class: related_topics
5
6prune: no
7
8test_url: http://www.36kr.com/p/207879.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/37signals.com.txt b/inc/3rdparty/site_config/standard/37signals.com.txt
index 43a10ae5..531cac1e 100644..100755
--- a/inc/3rdparty/site_config/standard/37signals.com.txt
+++ b/inc/3rdparty/site_config/standard/37signals.com.txt
@@ -1,6 +1,6 @@
1title: //div[@class='post_header']//h2/a 1title: //div[@class='post_header']//h2/a
2author: //span[@class='author'] 2author: //span[@class='author']
3date: //span[@class='date'] 3date: //span[@class='date']
4body: //div[@id='Content'] 4body: //div[@id='Content']
5 5
6test_url: http://37signals.com/svn/posts/2785-the-end-of-the-it-department \ No newline at end of file 6test_url: http://37signals.com/svn/posts/2785-the-end-of-the-it-department \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/3quarksdaily.com.txt b/inc/3rdparty/site_config/standard/3quarksdaily.com.txt
index c4e7940f..80a3958f 100644..100755
--- a/inc/3rdparty/site_config/standard/3quarksdaily.com.txt
+++ b/inc/3rdparty/site_config/standard/3quarksdaily.com.txt
@@ -1,9 +1,9 @@
1body: //div[@class='content'] 1body: //div[@class='content']
2date: //div[@class='content']/h2 2date: //div[@class='content']/h2
3strip: //div[@class='content']/h2 3strip: //div[@class='content']/h2
4title: //div[@class='content']/h3 4title: //div[@class='content']/h3
5 5
6strip: //div[@id='postmenu'] 6strip: //div[@id='postmenu']
7strip: //div[@class='trackback'] 7strip: //div[@class='trackback']
8tidy: no 8tidy: no
9test_url: http://www.3quarksdaily.com/3quarksdaily/2012/01/martin-luther-king-i-have-a-dream.html \ No newline at end of file 9test_url: http://www.3quarksdaily.com/3quarksdaily/2012/01/martin-luther-king-i-have-a-dream.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/3voor12.vpro.nl.txt b/inc/3rdparty/site_config/standard/3voor12.vpro.nl.txt
index b846b050..b846b050 100644..100755
--- a/inc/3rdparty/site_config/standard/3voor12.vpro.nl.txt
+++ b/inc/3rdparty/site_config/standard/3voor12.vpro.nl.txt
diff --git a/inc/3rdparty/site_config/standard/43folders.com.txt b/inc/3rdparty/site_config/standard/43folders.com.txt
index e8073f6f..3777c66f 100644..100755
--- a/inc/3rdparty/site_config/standard/43folders.com.txt
+++ b/inc/3rdparty/site_config/standard/43folders.com.txt
@@ -1,4 +1,4 @@
1body: //*[@class = 'content'] 1body: //*[@class = 'content']
2author: //*[@class = 'submitted']/a 2author: //*[@class = 'submitted']/a
3date: substring-after(//*[@class = 'submitted']/text(), '|') 3date: substring-after(//*[@class = 'submitted']/text(), '|')
4test_url: http://www.43folders.com/2011/04/22/cranking \ No newline at end of file 4test_url: http://www.43folders.com/2011/04/22/cranking \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/500px.com.txt b/inc/3rdparty/site_config/standard/500px.com.txt
index 68e6b2d0..b9b7e9dd 100644..100755
--- a/inc/3rdparty/site_config/standard/500px.com.txt
+++ b/inc/3rdparty/site_config/standard/500px.com.txt
@@ -1,27 +1,27 @@
1# very loose setup for both 500px.com/photo/* and 500px.com/blog/* 1# very loose setup for both 500px.com/photo/* and 500px.com/blog/*
2# photo page example: http://500px.com/photo/4181666 2# photo page example: http://500px.com/photo/4181666
3# blog page example: http://500px.com/blog/110 3# blog page example: http://500px.com/blog/110
4 4
5# avoid "no text" error 5# avoid "no text" error
6tidy:no 6tidy:no
7prune:no 7prune:no
8 8
9# reorganize photo page elements 9# reorganize photo page elements
10#body://div[contains(@class,'container')] 10#body://div[contains(@class,'container')]
11move_into(body)://div[contains(@id,'thephoto')] 11move_into(body)://div[contains(@id,'thephoto')]
12move_into(body)://div[contains(@id,'description')] 12move_into(body)://div[contains(@id,'description')]
13move_into(body)://div[contains(@id,'tags')] 13move_into(body)://div[contains(@id,'tags')]
14move_into(body)://div[contains(@id,'photo-info')] 14move_into(body)://div[contains(@id,'photo-info')]
15 15
16# clean photo page info 16# clean photo page info
17strip://span[contains(@id,'copyright')] 17strip://span[contains(@id,'copyright')]
18strip://*[contains(@id,'store')] 18strip://*[contains(@id,'store')]
19strip://*[contains(@id,'user-info')] 19strip://*[contains(@id,'user-info')]
20strip://*[contains(@id,'photo-stats')] 20strip://*[contains(@id,'photo-stats')]
21strip://*[contains(@id,'voting_controls_container')] 21strip://*[contains(@id,'voting_controls_container')]
22strip://*[contains(@id,'more-photos')] 22strip://*[contains(@id,'more-photos')]
23strip://*[contains(@id,'embed-photo')] 23strip://*[contains(@id,'embed-photo')]
24 24
25# clean blog page side bar 25# clean blog page side bar
26strip://*[contains(@class,'col d3 clearafter')] 26strip://*[contains(@class,'col d3 clearafter')]
27test_url: http://500px.com/photo/3641041?from=editors \ No newline at end of file 27test_url: http://500px.com/photo/3641041?from=editors \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/512pixels.net.txt b/inc/3rdparty/site_config/standard/512pixels.net.txt
index e458980f..e458980f 100644..100755
--- a/inc/3rdparty/site_config/standard/512pixels.net.txt
+++ b/inc/3rdparty/site_config/standard/512pixels.net.txt
diff --git a/inc/3rdparty/site_config/standard/5by5.tv.txt b/inc/3rdparty/site_config/standard/5by5.tv.txt
index dce0df4e..59b70a99 100644..100755
--- a/inc/3rdparty/site_config/standard/5by5.tv.txt
+++ b/inc/3rdparty/site_config/standard/5by5.tv.txt
@@ -1,9 +1,9 @@
1body: //*[@id="episode"] 1body: //*[@id="episode"]
2prune: no 2prune: no
3tidy: no 3tidy: no
4 4
5autodetect_next_page: no 5autodetect_next_page: no
6strip_id_or_class: player 6strip_id_or_class: player
7 7
8strip://*[@id="header"] 8strip://*[@id="header"]
9test_url: http://5by5.tv/buildanalyze/60 \ No newline at end of file 9test_url: http://5by5.tv/buildanalyze/60 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/7newsbelize.com.txt b/inc/3rdparty/site_config/standard/7newsbelize.com.txt
new file mode 100755
index 00000000..46d09f8e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/7newsbelize.com.txt
@@ -0,0 +1,7 @@
1title: //*[@id='sstitle']
2body: //div[@id='sstory']
3strip_id_or_class: newsoptions
4prune: no
5
6test_url: http://www.7newsbelize.com/sstory.php?nid=25654
7test_url: http://www.7newsbelize.com/7news.xml \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/944.com.txt b/inc/3rdparty/site_config/standard/944.com.txt
index 84380e79..8bf6a4c2 100644..100755
--- a/inc/3rdparty/site_config/standard/944.com.txt
+++ b/inc/3rdparty/site_config/standard/944.com.txt
@@ -1,9 +1,9 @@
1title: //h2[@class='border'] 1title: //h2[@class='border']
2body: //div[@class='padding'] 2body: //div[@class='padding']
3 3
4convert_double_br_tags: yes 4convert_double_br_tags: yes
5 5
6strip: //div[@id='social_sharing'] 6strip: //div[@id='social_sharing']
7strip: //div[@class='socialLinks'] 7strip: //div[@class='socialLinks']
8 8
9test_url: http://www.944.com/articles/mild-obsessions-frock-la-get-to-know-victoria-tik-s-haute-sustainable-fashion-line/ \ No newline at end of file 9test_url: http://www.944.com/articles/mild-obsessions-frock-la-get-to-know-victoria-tik-s-haute-sustainable-fashion-line/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/README.md b/inc/3rdparty/site_config/standard/README.md
new file mode 100755
index 00000000..9040ba85
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/README.md
@@ -0,0 +1,38 @@
1Full-Text RSS site config files
2================
3
4[Full-Text RSS](http://fivefilters.org/content-only/), our article extraction tool, makes use of site-specific extraction rules to improve results. Each time a URL is processed, it checks to see if there are extraction rules for the site being processed. If there are no site patterns, it tries to detect the content block automatically.
5
6This repository contains the site config files we use in Full-Text RSS.
7
8### Contributing changes
9
10We chose GitHub for this set of files because they offer one feature which we hope will make contributing changes easier: [file editing](https://github.com/blog/844-forking-with-the-edit-button) through the web interface.
11
12You can now make changes to any of our site config files and request that your changes be pulled into the main set we maintain. This is what GitHub calls the Fork and Pull model:
13
14> The Fork & Pull Model lets anyone fork an existing repository and push changes to their personal fork without requiring access be granted to the source repository. The changes must then be pulled into the source repository by the project maintainer. This model reduces the amount of friction for new contributors and is popular with open source projects because it allows people to work independently without upfront coordination.
15
16When we receive a pull request we'll review the changes and if everything's okay we'll update our copy.
17
18If a site is not in our set, you can create a file for it in the same way. See [Creating files on GitHub](https://github.com/blog/1327-creating-files-on-github).
19
20### How to write a site config file
21
22The quickest and simplest way is to use our [point-and-click interface](http://siteconfig.fivefilters.org). It's a simple tool only intended to create a rule to extract the correct content block.
23
24For further refinements, e.g. selecting the title, stripping elements, dealing with multi-page articles, please see our [help page](http://help.fivefilters.org/customer/portal/articles/223153-site-patterns).
25
26### Instapaper
27
28When we introduced site patterns, we chose to adopt the [same format](http://blog.instapaper.com/post/730281947) used by Instapaper. This allows us to make use of the existing extraction rules contributed by Instapaper users.
29
30Marco, Instapaper's creator, graciously opened up the database of contributions to everyone:
31
32> And, recognizing that your efforts could be useful to a wide range of other tools and services, I'll make the list of all of these site-specific configurations available to the public, free, with no strings attached.
33
34Most of the extraction rules in our set are borrowed from Instapaper. You can see the list maintained by Instapaper at [instapaper.com/bodytext/](http://instapaper.com/bodytext/) (login required).
35
36### Testing site config files
37
38Currently you will have to have a copy of Full-Text RSS to test changes to the site config files. In the future we will try to make this process easier.
diff --git a/inc/3rdparty/site_config/standard/aachener-nachrichten.de.txt b/inc/3rdparty/site_config/standard/aachener-nachrichten.de.txt
index 379592e0..b60c15de 100644..100755
--- a/inc/3rdparty/site_config/standard/aachener-nachrichten.de.txt
+++ b/inc/3rdparty/site_config/standard/aachener-nachrichten.de.txt
@@ -1,10 +1,10 @@
1title: //meta[@property='og:title']/@content 1title: //meta[@property='og:title']/@content
2body: //*[@class='fliesstext_detail' or @class='detail_fliesstext'] | //img[@itemprop="image" and starts-with(@src, "/sixcms/media.php/")] 2body: //*[@class='fliesstext_detail' or @class='detail_fliesstext'] | //img[@itemprop="image" and starts-with(@src, "/sixcms/media.php/")]
3 3
4strip_id_or_class: socialshareprivacy1 4strip_id_or_class: socialshareprivacy1
5strip_id_or_class: zvaFacebookButton 5strip_id_or_class: zvaFacebookButton
6 6
7tidy: no 7tidy: no
8prune: no 8prune: no
9 9
10test_url: http://www.aachener-nachrichten.de/lokales/aachen-detail-an/2517757 \ No newline at end of file 10test_url: http://www.aachener-nachrichten.de/lokales/aachen-detail-an/2517757 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/aachener-zeitung.de.txt b/inc/3rdparty/site_config/standard/aachener-zeitung.de.txt
index 4d76fac7..013afa4c 100644..100755
--- a/inc/3rdparty/site_config/standard/aachener-zeitung.de.txt
+++ b/inc/3rdparty/site_config/standard/aachener-zeitung.de.txt
@@ -1,10 +1,10 @@
1title: //meta[@property='og:title']/@content 1title: //meta[@property='og:title']/@content
2body: //*[@class='fliesstext_detail' or @class='detail_fliesstext'] | //img[@itemprop="image" and starts-with(@src, "/sixcms/media.php/")] 2body: //*[@class='fliesstext_detail' or @class='detail_fliesstext'] | //img[@itemprop="image" and starts-with(@src, "/sixcms/media.php/")]
3 3
4strip_id_or_class: socialshareprivacy1 4strip_id_or_class: socialshareprivacy1
5strip_id_or_class: zvaFacebookButton 5strip_id_or_class: zvaFacebookButton
6 6
7tidy: no 7tidy: no
8prune: no 8prune: no
9 9
10test_url: http://www.aachener-zeitung.de/sixcms/detail.php?template=az_detail&id=2552718 \ No newline at end of file 10test_url: http://www.aachener-zeitung.de/sixcms/detail.php?template=az_detail&id=2552718 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/abc.es.txt b/inc/3rdparty/site_config/standard/abc.es.txt
index a99833de..43aadc49 100644..100755
--- a/inc/3rdparty/site_config/standard/abc.es.txt
+++ b/inc/3rdparty/site_config/standard/abc.es.txt
@@ -1,7 +1,7 @@
1title: //meta[@property='og:title']/@content 1title: //meta[@property='og:title']/@content
2body: //div[@class='datosi' or @class='date' or @class='photo-alt1' or @class='text'] 2body: //div[@class='datosi' or @class='date' or @class='photo-alt1' or @class='text' or @itemprop='articleBody']
3strip_id_or_class: colB 3strip_id_or_class: colB
4 4
5prune: no 5prune: no
6 6
7test_url: http://www.abc.es/20120209/tv-series/abci-house-ultima-temporada-201202090936.html \ No newline at end of file 7test_url: http://www.abc.es/20120209/tv-series/abci-house-ultima-temporada-201202090936.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/abc.net.au.txt b/inc/3rdparty/site_config/standard/abc.net.au.txt
index 5e6269cb..22b3a0f4 100644..100755
--- a/inc/3rdparty/site_config/standard/abc.net.au.txt
+++ b/inc/3rdparty/site_config/standard/abc.net.au.txt
@@ -1,10 +1,18 @@
1title: //h1 1title: //div[@class='article section']//h1
2author: //div[@class="byline"]/a 2author: //div[@class="byline"]/a
3date: //span[@class="timestamp"] 3date: //span[@class="timestamp"]
4 4body: //div[@class="page section"]
5strip: //p[@class="topics"] 5
6strip: //h1 6strip: //a[@class="inline-caption"]
7strip: //div[@class="byline"] 7strip: //p[@class="ticker section noprint"]
8strip: //p[@class="published"] 8strip: //p[@class="topics"]
9strip: //h1
10strip: //div[@class="byline"]
11strip: //p[@class="published"]
9strip: //div[contains(@class,"featured-scroller")] 12strip: //div[contains(@class,"featured-scroller")]
10test_url: http://www.abc.net.au/news/2011-11-08/crabb-carbon-legislation-abbott-demolition/3652544 \ No newline at end of file 13strip_id_or_class: footer
14
15tidy: no
16
17test_url: http://www.abc.net.au/news/2013-03-27/open-speed-highways-change-clp-giles/4597892
18test_url: http://www.abc.net.au/news/2013-04-30/credit-growth-remains-subdued/4660054?section=business
diff --git a/inc/3rdparty/site_config/standard/abcnews.go.com.txt b/inc/3rdparty/site_config/standard/abcnews.go.com.txt
index c515d3e4..8d367351 100644..100755
--- a/inc/3rdparty/site_config/standard/abcnews.go.com.txt
+++ b/inc/3rdparty/site_config/standard/abcnews.go.com.txt
@@ -1,27 +1,27 @@
1title: //h1[@class='headline'] 1title: //h1[@class='headline']
2body: //div[@id='storyText'] 2body: //div[@id='storyText']
3# for video entries 3# for video entries
4body: //img[@id='ff-img'] | //div[@id='meta']//div[contains(@class, 'overview')] 4body: //img[@id='ff-img'] | //div[@id='meta']//div[contains(@class, 'overview')]
5author: //div[@class='byline'] 5author: //div[@class='byline']
6date: //div[@class='date'] 6date: //div[@class='date']
7strip: //*[@id='date_partner'] 7strip: //*[@id='date_partner']
8 8
9strip: //div[@class='breadcrumb'] 9strip: //div[@class='breadcrumb']
10strip: //div[contains(@class,'show_tools')] 10strip: //div[contains(@class,'show_tools')]
11strip: //div[@id='sponsoredByAd'] 11strip: //div[@id='sponsoredByAd']
12strip: //div[contains(@class,'rel_container')] 12strip: //div[contains(@class,'rel_container')]
13strip: //p[a[starts-with(@href, 'http://www.twitter.com')]] 13strip: //p[a[starts-with(@href, 'http://www.twitter.com')]]
14strip: //p[a[starts-with(@href, 'http://www.facebook.com')]] 14strip: //p[a[starts-with(@href, 'http://www.facebook.com')]]
15strip: //p[contains(., 'Click here to return to')] 15strip: //p[contains(., 'Click here to return to')]
16#strip_id_or_class: media 16#strip_id_or_class: media
17strip_id_or_class: mediaplayer 17strip_id_or_class: mediaplayer
18 18
19replace_string(<link rel="image_src" href="http): <img id="ff-img" src="http 19replace_string(<link rel="image_src" href="http): <img id="ff-img" src="http
20 20
21prune: no 21prune: no
22 22
23single_page_link: concat(//li[@class='pager']//a/@href, '&singlePage=true') 23single_page_link: concat(//li[@class='pager']//a/@href, '&singlePage=true')
24 24
25test_url: http://abcnews.go.com/Politics/newt-gingrich-rocky-rollout-presidential-campaign-recover/story?id=13632744 25test_url: http://abcnews.go.com/Politics/newt-gingrich-rocky-rollout-presidential-campaign-recover/story?id=13632744
26# multi-page 26# multi-page
27test_url: http://abcnews.go.com/Blotter/family-freed-american-hostage-somalia-seals-obama/story?id=15439544 \ No newline at end of file 27test_url: http://abcnews.go.com/Blotter/family-freed-american-hostage-somalia-seals-obama/story?id=15439544 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/accesstoinsight.org.txt b/inc/3rdparty/site_config/standard/accesstoinsight.org.txt
index b5d85079..45d66533 100644..100755
--- a/inc/3rdparty/site_config/standard/accesstoinsight.org.txt
+++ b/inc/3rdparty/site_config/standard/accesstoinsight.org.txt
@@ -1,9 +1,9 @@
1title: //div[@id='H_docTitle'] 1title: //div[@id='H_docTitle']
2 2
3body: //div[@id='H_meta' or @id='H_content' or @id='F_footer'] 3body: //div[@id='H_meta' or @id='H_content' or @id='F_footer']
4 4
5strip_id_or_class: F_toenail 5strip_id_or_class: F_toenail
6 6
7prune: no 7prune: no
8 8
9test_url: http://www.accesstoinsight.org/lib/authors/nyanaponika/wheel026.html \ No newline at end of file 9test_url: http://www.accesstoinsight.org/lib/authors/nyanaponika/wheel026.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/acidcow.com.txt b/inc/3rdparty/site_config/standard/acidcow.com.txt
index 60ede6a6..21958651 100644..100755
--- a/inc/3rdparty/site_config/standard/acidcow.com.txt
+++ b/inc/3rdparty/site_config/standard/acidcow.com.txt
@@ -1,3 +1,3 @@
1body: //div[starts-with(@id, 'news-id-')] 1body: //div[starts-with(@id, 'news-id-')]
2 2
3test_url: http://acidcow.com/fun/20933-acid-picdump-83-pics.html \ No newline at end of file 3test_url: http://acidcow.com/fun/20933-acid-picdump-83-pics.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/acquia.com.txt b/inc/3rdparty/site_config/standard/acquia.com.txt
index 5ddf542e..2803611f 100644..100755
--- a/inc/3rdparty/site_config/standard/acquia.com.txt
+++ b/inc/3rdparty/site_config/standard/acquia.com.txt
@@ -1,9 +1,9 @@
1title://h1[@class="title"] 1title://h1[@class="title"]
2author://div[@class="submitted"]/span/a 2author://div[@class="submitted"]/span/a
3date://div[@class="submitted"]/span 3date://div[@class="submitted"]/span
4body://div[@class="content-wrapper"] 4body://div[@class="content-wrapper"]
5 5
6strip://div[@id="skip-link"] 6strip://div[@id="skip-link"]
7strip://div[@id="region-content-3-3"] 7strip://div[@id="region-content-3-3"]
8strip://div[@id="section-footer"] 8strip://div[@id="section-footer"]
9test_url: https://www.acquia.com/blog/drupals-long-warmth-toward-third-party-code \ No newline at end of file 9test_url: https://www.acquia.com/blog/drupals-long-warmth-toward-third-party-code \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/acroswing.fr.txt b/inc/3rdparty/site_config/standard/acroswing.fr.txt
index 57d86d2f..6b1d67fe 100644..100755
--- a/inc/3rdparty/site_config/standard/acroswing.fr.txt
+++ b/inc/3rdparty/site_config/standard/acroswing.fr.txt
@@ -1,5 +1,5 @@
1tidy:no 1tidy:no
2date: //time[@class='updated'] 2date: //time[@class='updated']
3dissolve: //ul[@class='video-gallery']/li 3dissolve: //ul[@class='video-gallery']/li
4dissolve: //ul[@class='video-gallery'] 4dissolve: //ul[@class='video-gallery']
5test_url: http://www.acroswing.fr/actualites/competition_rock/selectif_bellegarde_sur_valserine__2012-02-26.php \ No newline at end of file 5test_url: http://www.acroswing.fr/actualites/competition_rock/selectif_bellegarde_sur_valserine__2012-02-26.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/aftenposten.no.txt b/inc/3rdparty/site_config/standard/aftenposten.no.txt
new file mode 100755
index 00000000..8a69c357
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/aftenposten.no.txt
@@ -0,0 +1,5 @@
1title: //h1[@class='articleTitle ']
2body: //div[@class='bodyText widget storyContent']
3strip: //p/span[@class='quote']/..
4strip_id_or_class: 'pull1'
5test_url: https://www.aftenposten.no/meninger/spaltister/Portrett-av-scenekunstneren-som-ung-mann-7167959.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/aftonbladet.se.txt b/inc/3rdparty/site_config/standard/aftonbladet.se.txt
new file mode 100755
index 00000000..b6c576a8
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/aftonbladet.se.txt
@@ -0,0 +1,13 @@
1author: //article//address[contains(@class, 'author')]
2body: //article[.//div[contains(@class, 'abBodyText')]]//*[contains(@class, 'abLeadText') or contains(@class, 'abBodyText') or contains(@class, 'abImageBlock') or contains(@class, 'abIGSatellite')]
3
4strip: //address//img
5strip: //footer
6strip_id_or_class: abSticky
7
8prune: no
9
10test_url: http://www.aftonbladet.se/sportbladet/hockey/sverige/allsvenskan/article17498194.ab
11test_url: http://www.aftonbladet.se/debatt/article16207536.ab
12test_url: http://www.aftonbladet.se/debatt/debattamnen/politik/article17483377.ab
13test_url: http://www.aftonbladet.se/rss.xml \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/aht.seriouseats.com.txt b/inc/3rdparty/site_config/standard/aht.seriouseats.com.txt
index 408e9099..b2d88a05 100644..100755
--- a/inc/3rdparty/site_config/standard/aht.seriouseats.com.txt
+++ b/inc/3rdparty/site_config/standard/aht.seriouseats.com.txt
@@ -1,15 +1,15 @@
1body: //div[@id='content'] 1body: //div[@id='content']
2 2
3# clean up recipe pages 3# clean up recipe pages
4strip: //h2[@class='fn'] | //h2[@class='double-lined'] | //h3 | //div[@id='threeColumn2'] | //div[@id='threeColumn3'] 4strip: //h2[@class='fn'] | //h2[@class='double-lined'] | //h3 | //div[@id='threeColumn2'] | //div[@id='threeColumn3']
5 5
6#recipe pages 6#recipe pages
7strip_id_or_class: "recipe-feedback" 7strip_id_or_class: "recipe-feedback"
8strip_id_or_class: "comments" 8strip_id_or_class: "comments"
9strip_id_or_class: "procedure-number" 9strip_id_or_class: "procedure-number"
10strip_id_or_class: "more-with-author" 10strip_id_or_class: "more-with-author"
11 11
12#slice 12#slice
13strip_id_or_class: "inner" 13strip_id_or_class: "inner"
14 14
15test_url: http://aht.seriouseats.com/archives/2009/12/the-burger-lab-salting-ground-beef.html \ No newline at end of file 15test_url: http://aht.seriouseats.com/archives/2009/12/the-burger-lab-salting-ground-beef.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/albayan.ae.txt b/inc/3rdparty/site_config/standard/albayan.ae.txt
new file mode 100755
index 00000000..f6c093d2
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/albayan.ae.txt
@@ -0,0 +1,6 @@
1body: //div[@id='main-column']//div[@class='content']
2
3prune: no
4
5test_url: http://www.albayan.ae/across-the-uae/education/2013-08-29-1.1949645
6test_url: http://www.albayan.ae/1.448?ot=ot.AjaxPageLayout \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/alex.mullr.net.txt b/inc/3rdparty/site_config/standard/alex.mullr.net.txt
index c5f15370..c5f15370 100644..100755
--- a/inc/3rdparty/site_config/standard/alex.mullr.net.txt
+++ b/inc/3rdparty/site_config/standard/alex.mullr.net.txt
diff --git a/inc/3rdparty/site_config/standard/alexduner.com.txt b/inc/3rdparty/site_config/standard/alexduner.com.txt
new file mode 100755
index 00000000..bd9de9d7
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/alexduner.com.txt
@@ -0,0 +1,4 @@
1body: //section[@class='content']
2date: //span[1]
3author: //h1[@id='sitetitle']
4test_url: https://alexduner.com/blog/2013/1/something-i-learned-today \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/alexduner.squarespace.com.txt b/inc/3rdparty/site_config/standard/alexduner.squarespace.com.txt
new file mode 100755
index 00000000..875405e4
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/alexduner.squarespace.com.txt
@@ -0,0 +1,4 @@
1body: //section[@class='content']
2date: //span[1]
3author: //h1[@id='sitetitle']
4test_url: https://alexduner.squarespace.com/blog/2013/1/tech-culture-from-the-outside-looking-in \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/alistapart.com.txt b/inc/3rdparty/site_config/standard/alistapart.com.txt
index 090f7eb1..7a7096e2 100644..100755
--- a/inc/3rdparty/site_config/standard/alistapart.com.txt
+++ b/inc/3rdparty/site_config/standard/alistapart.com.txt
@@ -1,12 +1,12 @@
1title: //h1[@class='title'] 1title: //h1[@class='title']
2author: //h3[@class='byline']/a 2author: //h3[@class='byline']/a
3date: //div[@class='ishinfo'] 3date: //div[@class='ishinfo']
4 4
5body: //*[@id='articletext'] 5body: //*[@id='articletext']
6strip_id_or_class: 'ishinfo' 6strip_id_or_class: 'ishinfo'
7strip_id_or_class: 'metastuff' 7strip_id_or_class: 'metastuff'
8strip_id_or_class: 'learnmore' 8strip_id_or_class: 'learnmore'
9strip_id_or_class: 'discuss' 9strip_id_or_class: 'discuss'
10 10
11prune: no 11prune: no
12test_url: http://www.alistapart.com/articles/organizing-mobile/ \ No newline at end of file 12test_url: http://www.alistapart.com/articles/organizing-mobile/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/aljazeera.com.txt b/inc/3rdparty/site_config/standard/aljazeera.com.txt
index 4f0148f4..d3bf4014 100644..100755
--- a/inc/3rdparty/site_config/standard/aljazeera.com.txt
+++ b/inc/3rdparty/site_config/standard/aljazeera.com.txt
@@ -1,8 +1,8 @@
1title: //span[@id='DetailedTitle'] 1title: //span[@id='DetailedTitle']
2body: //td[@id='tdTextContent'] 2body: //td[@id='tdTextContent']
3strip_id_or_class: Skyscrapper_Body 3strip_id_or_class: Skyscrapper_Body
4date: //span[@id='ctl00_cphBody_lblDate'] 4date: //span[@id='ctl00_cphBody_lblDate']
5author: //div[@id="dvAuthorInfo"]//a/text() 5author: //div[@id="dvAuthorInfo"]//a/text()
6strip: //table[ tbody/tr/td/object ] 6strip: //table[ tbody/tr/td/object ]
7prune: no 7prune: no
8test_url: http://www.aljazeera.com/indepth/opinion/2012/01/2012114121925380575.html \ No newline at end of file 8test_url: http://www.aljazeera.com/indepth/opinion/2012/01/2012114121925380575.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/allrecipes.com.txt b/inc/3rdparty/site_config/standard/allrecipes.com.txt
index e9767bda..85dc2a5a 100644..100755
--- a/inc/3rdparty/site_config/standard/allrecipes.com.txt
+++ b/inc/3rdparty/site_config/standard/allrecipes.com.txt
@@ -1,14 +1,14 @@
1title: //h1[@id='itemTitle'] 1title: //h1[@id='itemTitle']
2body: //img[@id="ctl00_CenterColumnPlaceHolder_recipe_photoStuff_imgPhoto"] | //div[@id='ctl00_CenterColumnPlaceHolder_recipe_divSubmitter'] | //div[contains(@class, 'recipe-details-content')] 2body: //img[@id="ctl00_CenterColumnPlaceHolder_recipe_photoStuff_imgPhoto"] | //div[@id='ctl00_CenterColumnPlaceHolder_recipe_divSubmitter'] | //div[contains(@class, 'recipe-details-content')]
3strip: //div[@class='top-left' or @class='top-right' or @class='bot-left' or @class='bot-right'] 3strip: //div[@class='top-left' or @class='top-right' or @class='bot-left' or @class='bot-right']
4strip: //div[contains(@class, 'rightcoltoolsdiv')] 4strip: //div[contains(@class, 'rightcoltoolsdiv')]
5strip: //div[contains(@class, 'servings-form')] 5strip: //div[contains(@class, 'servings-form')]
6strip: //p[@class='nutritional-information'] 6strip: //p[@class='nutritional-information']
7strip: //a[contains(@class, 'nutritional-information') or contains(@class, 'nutritionanchor')] 7strip: //a[contains(@class, 'nutritional-information') or contains(@class, 'nutritionanchor')]
8strip: //div[@id='nutri-info']/div[contains(@class, 'title')] 8strip: //div[@id='nutri-info']/div[contains(@class, 'title')]
9strip: //img[@id='ctl00_CenterColumnPlaceHolder_recipe_imgSubmitter'] 9strip: //img[@id='ctl00_CenterColumnPlaceHolder_recipe_imgSubmitter']
10strip_id_or_class: eshaAttribute 10strip_id_or_class: eshaAttribute
11strip_id_or_class: eshaParagraph 11strip_id_or_class: eshaParagraph
12prune: no 12prune: no
13 13
14test_url: http://allrecipes.com/Recipe/Taco-Pie/Detail.aspx?src=rotd \ No newline at end of file 14test_url: http://allrecipes.com/Recipe/Taco-Pie/Detail.aspx?src=rotd \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/allthingsd.com.txt b/inc/3rdparty/site_config/standard/allthingsd.com.txt
index cd52498f..f8c67d02 100644..100755
--- a/inc/3rdparty/site_config/standard/allthingsd.com.txt
+++ b/inc/3rdparty/site_config/standard/allthingsd.com.txt
@@ -1,10 +1,13 @@
1title://div[@class="article-title"]/h1[@class="title"] 1title://div[@class="article-title"]/h1[@class="title"]
2date: //p[@class="article-date"] 2date: //p[@class="article-date"]
3body://*[@class="article-body article-text"] 3body://div[contains(@class, "article-body")]
4# Trim out related posts at bottom of article 4# Trim out related posts at bottom of article
5strip://blockquote[@class="memo"] 5strip://blockquote[@class="memo"]
6 6
7# Yup, no idea why author won't work... 7tidy: no
8author://div[@class="page-header article-header clearfix"]/p[@class="title"] 8
9# Yup, no idea why author won't work...
10author://div[@class="page-header article-header clearfix"]/p[@class="title"]
9# [Marco:] Author won't work here because the page defines the "home" link under the author's name as rel="author", which always gets priority if the page has defined it. 11# [Marco:] Author won't work here because the page defines the "home" link under the author's name as rel="author", which always gets priority if the page has defined it.
10test_url: http://allthingsd.com/20120513/exclusive-yahoos-thompson-out-levinsohn-in-board-settlement-with-loeb-nears-completion/ \ No newline at end of file 12test_url: http://allthingsd.com/20120513/exclusive-yahoos-thompson-out-levinsohn-in-board-settlement-with-loeb-nears-completion/
13test_url: http://allthingsd.com/20131010/google-cio-ben-fried-on-how-google-works/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/allyou.com.txt b/inc/3rdparty/site_config/standard/allyou.com.txt
index 3c26c682..a13a7252 100644..100755
--- a/inc/3rdparty/site_config/standard/allyou.com.txt
+++ b/inc/3rdparty/site_config/standard/allyou.com.txt
@@ -1,8 +1,8 @@
1title: //div[@id='pageHdr']//h1 1title: //div[@id='pageHdr']//h1
2body: //div[@id='pageHdr']/*[@class='dek'] | //div[@id='printArticle' or @id='slideShowPrint'] 2body: //div[@id='pageHdr']/*[@class='dek'] | //div[@id='printArticle' or @id='slideShowPrint']
3strip: //div[contains(@class, 'infoBox') or @id='infoBox'] 3strip: //div[contains(@class, 'infoBox') or @id='infoBox']
4single_page_link: //li[@id='print']/a 4single_page_link: //li[@id='print']/a
5 5
6prune: no 6prune: no
7 7
8test_url: http://www.allyou.com/budget-home/money-shopping/freebies-online-00400000066392/ \ No newline at end of file 8test_url: http://www.allyou.com/budget-home/money-shopping/freebies-online-00400000066392/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/alphabeta.argaam.com.txt b/inc/3rdparty/site_config/standard/alphabeta.argaam.com.txt
index f5865f89..da1a67bc 100644..100755
--- a/inc/3rdparty/site_config/standard/alphabeta.argaam.com.txt
+++ b/inc/3rdparty/site_config/standard/alphabeta.argaam.com.txt
@@ -1,11 +1,11 @@
1body: //div[@class = 'entry'] 1body: //div[@class = 'entry']
2date: substring-after(//p[@class="date"],'بتاريخ ') 2date: substring-after(//p[@class="date"],'بتاريخ ')
3strip_id_or_class: date 3strip_id_or_class: date
4strip_id_or_class: follow-single 4strip_id_or_class: follow-single
5strip_id_or_class: ratingblock 5strip_id_or_class: ratingblock
6strip_id_or_class: newRatingHolder 6strip_id_or_class: newRatingHolder
7strip_id_or_class: postmetadata 7strip_id_or_class: postmetadata
8strip_id_or_class: addthis_toolbox 8strip_id_or_class: addthis_toolbox
9strip_id_or_class: addthis_default_style 9strip_id_or_class: addthis_default_style
10strip_id_or_class: size-full 10strip_id_or_class: size-full
11test_url: http://alphabeta.argaam.com/?p=35657 \ No newline at end of file 11test_url: http://alphabeta.argaam.com/?p=35657 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/alriyadh.com.txt b/inc/3rdparty/site_config/standard/alriyadh.com.txt
index d0060000..be7c43d5 100644..100755
--- a/inc/3rdparty/site_config/standard/alriyadh.com.txt
+++ b/inc/3rdparty/site_config/standard/alriyadh.com.txt
@@ -1,9 +1,9 @@
1body: //div[@id = "article-view"] 1body: //div[@id = "article-view"]
2body: //div[contains(@class, 'article')]//div[contains(@class, 'photo_bg')] 2body: //div[contains(@class, 'article')]//div[contains(@class, 'photo_bg')]
3author: //p[@class = "author"] 3author: //p[@class = "author"]
4strip: //h1 4strip: //h1
5strip: //h2 5strip: //h2
6strip_id_or_class: author 6strip_id_or_class: author
7prune: no 7prune: no
8test_url: http://www.alriyadh.com/2011/10/10/article674357.html 8test_url: http://www.alriyadh.com/2011/10/10/article674357.html
9test_url: http://www.alriyadh.com/net/article/780935 \ No newline at end of file 9test_url: http://www.alriyadh.com/net/article/780935 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/alseraj.net.txt b/inc/3rdparty/site_config/standard/alseraj.net.txt
index 107d82d6..107d82d6 100644..100755
--- a/inc/3rdparty/site_config/standard/alseraj.net.txt
+++ b/inc/3rdparty/site_config/standard/alseraj.net.txt
diff --git a/inc/3rdparty/site_config/standard/alt1040.com.txt b/inc/3rdparty/site_config/standard/alt1040.com.txt
index 4fd45719..4fd45719 100644..100755
--- a/inc/3rdparty/site_config/standard/alt1040.com.txt
+++ b/inc/3rdparty/site_config/standard/alt1040.com.txt
diff --git a/inc/3rdparty/site_config/standard/alternet.org.txt b/inc/3rdparty/site_config/standard/alternet.org.txt
new file mode 100755
index 00000000..e92252eb
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/alternet.org.txt
@@ -0,0 +1,4 @@
1single_page_link: //div[contains(@class, 'story_tools')]//a[contains(@href, '/print/')]
2
3test_url: http://www.alternet.org/civil-liberties/noam-chomsky-surveillance-state-beyond-imagination-being-created-one-freest
4test_url: http://feeds.feedblitz.com/alternet \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/altfoto.com.txt b/inc/3rdparty/site_config/standard/altfoto.com.txt
index d974cf4a..d974cf4a 100644..100755
--- a/inc/3rdparty/site_config/standard/altfoto.com.txt
+++ b/inc/3rdparty/site_config/standard/altfoto.com.txt
diff --git a/inc/3rdparty/site_config/standard/alumni.stanford.edu.txt b/inc/3rdparty/site_config/standard/alumni.stanford.edu.txt
index 7fd47193..a5bd03bf 100644..100755
--- a/inc/3rdparty/site_config/standard/alumni.stanford.edu.txt
+++ b/inc/3rdparty/site_config/standard/alumni.stanford.edu.txt
@@ -1,10 +1,10 @@
1title: //h1 1title: //h1
2 2
3author: substring-after(//div[@class="enableBullets"]/preceding-sibling::p[1], "By ") 3author: substring-after(//div[@class="enableBullets"]/preceding-sibling::p[1], "By ")
4 4
5date: //div/a[contains (@href, "issue")] 5date: //div/a[contains (@href, "issue")]
6 6
7move_into(//div[@class="enableBullets"]/p): (//div[@id="content"]//img)[1] 7move_into(//div[@class="enableBullets"]/p): (//div[@id="content"]//img)[1]
8 8
9body: //div[@class="enableBullets"] 9body: //div[@class="enableBullets"]
10test_url: http://alumni.stanford.edu/get/page/magazine/article/?article_id=54819 \ No newline at end of file 10test_url: http://alumni.stanford.edu/get/page/magazine/article/?article_id=54819 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/amandala.com.bz.txt b/inc/3rdparty/site_config/standard/amandala.com.bz.txt
new file mode 100755
index 00000000..fb0e21b8
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/amandala.com.bz.txt
@@ -0,0 +1,6 @@
1body: //div[@id='content']//div[contains(@class, 'content')]
2strip_id_or_class: widget
3strip: //a[contains(@href, 'upm_export=')]
4
5test_url: http://amandala.com.bz/news/feed/
6test_url: http://amandala.com.bz/news/poor-pse-results-30-raise/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/amazon.com.txt b/inc/3rdparty/site_config/standard/amazon.com.txt
index 1a23c4b7..cd7ad159 100644..100755
--- a/inc/3rdparty/site_config/standard/amazon.com.txt
+++ b/inc/3rdparty/site_config/standard/amazon.com.txt
@@ -1,19 +1,19 @@
1title: //span[@id = 'btAsinTitle'] 1title: //span[@id = 'btAsinTitle']
2body: (//*[@id='prodImageCell']//a)[1] | //div[@id = 'ps-content'] | //span[@id='actualPriceValue'] | //h2[.='Product Details']/following-sibling::div | //div[@class='h2' and .='Product Description']/following-sibling::div 2body: (//*[@id='prodImageCell']//a)[1] | //div[@id = 'ps-content'] | //span[@id='actualPriceValue'] | //h2[.='Product Details']/following-sibling::div | //div[@class='h2' and .='Product Description']/following-sibling::div
3#strip_id_or_class: quantityDropdownDiv 3#strip_id_or_class: quantityDropdownDiv
4#strip_id_or_class: addToCartSpan 4#strip_id_or_class: addToCartSpan
5#strip_id_or_class: oneClickDiv 5#strip_id_or_class: oneClickDiv
6strip_id_or_class: nocontent 6strip_id_or_class: nocontent
7strip_id_or_class: masDynamicConten 7strip_id_or_class: masDynamicConten
8strip_id_or_class: dynamic-content 8strip_id_or_class: dynamic-content
9prune: no 9prune: no
10 10
11find_string: <span id="actualPriceValue"> 11find_string: <span id="actualPriceValue">
12replace_string: <span id="actualPriceValue"><br />Price: 12replace_string: <span id="actualPriceValue"><br />Price:
13 13
14strip_id_or_class: collapsePS 14strip_id_or_class: collapsePS
15strip_id_or_class: expandPS 15strip_id_or_class: expandPS
16strip_id_or_class: psPlaceHolde 16strip_id_or_class: psPlaceHolde
17strip: //li[contains(., 'update product info') or contains(., 'give feedback on images')] 17strip: //li[contains(., 'update product info') or contains(., 'give feedback on images')]
18 18
19test_url: http://www.amazon.com/Common-Sense-Forestry-Living-Mother/dp/1931498210/ \ No newline at end of file 19test_url: http://www.amazon.com/Common-Sense-Forestry-Living-Mother/dp/1931498210/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/americandrink.net.txt b/inc/3rdparty/site_config/standard/americandrink.net.txt
index dee0e868..7145f3ff 100644..100755
--- a/inc/3rdparty/site_config/standard/americandrink.net.txt
+++ b/inc/3rdparty/site_config/standard/americandrink.net.txt
@@ -1,6 +1,6 @@
1title: //div[@class='head']/h2/a 1title: //div[@class='head']/h2/a
2author: //div[@class='head']/a 2author: //div[@class='head']/a
3date: //div[@class='head']/p[@class='date']/a 3date: //div[@class='head']/p[@class='date']/a
4body: //div[@class='copy'] 4body: //div[@class='copy']
5strip: //p[@class='meta'] 5strip: //p[@class='meta']
6test_url: http://americandrink.net/post/10567188712/free-the-hooch \ No newline at end of file 6test_url: http://americandrink.net/post/10567188712/free-the-hooch \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/americascup.com.txt b/inc/3rdparty/site_config/standard/americascup.com.txt
index b1673b6a..31723f81 100644..100755
--- a/inc/3rdparty/site_config/standard/americascup.com.txt
+++ b/inc/3rdparty/site_config/standard/americascup.com.txt
@@ -1,10 +1,10 @@
1title: //div[@class="editorial-content"]/h3 1title: //div[@class="editorial-content"]/h3
2body: //div[@class="hero-image" or @class="editorial-content"] 2body: //div[@class="hero-image" or @class="editorial-content"]
3 3
4strip: //ul[@class="hero-caption"] 4strip: //ul[@class="hero-caption"]
5strip_id_or_class: footer 5strip_id_or_class: footer
6 6
7prune: no 7prune: no
8tidy: no 8tidy: no
9 9
10test_url: http://www.americascup.com/en/Latest/News/2012/3/Coutts-and-Peyron-tell-transformative-tale-at-Global-Sports-Forum/ \ No newline at end of file 10test_url: http://www.americascup.com/en/Latest/News/2012/3/Coutts-and-Peyron-tell-transformative-tale-at-Global-Sports-Forum/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/americastestkitchenfeed.com.txt b/inc/3rdparty/site_config/standard/americastestkitchenfeed.com.txt
index 8bf31ec2..c2b62b5a 100644..100755
--- a/inc/3rdparty/site_config/standard/americastestkitchenfeed.com.txt
+++ b/inc/3rdparty/site_config/standard/americastestkitchenfeed.com.txt
@@ -1,5 +1,5 @@
1title: //h1[@class="post-title"] 1title: //h1[@class="post-title"]
2author: //span[@class="author"]/a 2author: //span[@class="author"]/a
3date: //span[@class="date"] 3date: //span[@class="date"]
4body: //div[@class="post-content main"] 4body: //div[@class="post-content main"]
5test_url: http://www.americastestkitchenfeed.com/gadgets-and-gear/2012/07/chill-out-with-tovolos-king-cube-silicone-ice-cube-tray/ \ No newline at end of file 5test_url: http://www.americastestkitchenfeed.com/gadgets-and-gear/2012/07/chill-out-with-tovolos-king-cube-silicone-ice-cube-tray/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/amptoons.com.txt b/inc/3rdparty/site_config/standard/amptoons.com.txt
new file mode 100755
index 00000000..87547c63
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/amptoons.com.txt
@@ -0,0 +1,8 @@
1title: //title
2
3body: //div[@class="entry-content"]
4
5author: //span[@class="author vcard"]
6
7date: //span[@class="entry-date"]
8test_url: http://www.amptoons.com/blog/2013/03/14/open-thread-and-link-farm-i-hate-being-sick-edition/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/anandtech.com.txt b/inc/3rdparty/site_config/standard/anandtech.com.txt
index 8067e03c..7d804918 100644..100755
--- a/inc/3rdparty/site_config/standard/anandtech.com.txt
+++ b/inc/3rdparty/site_config/standard/anandtech.com.txt
@@ -1,11 +1,11 @@
1author: //a[@class='b'][1] 1author: //a[@class='b'][1]
2date: substring-after(substring-before(//div, 'Posted in'), ' on ') 2date: substring-after(substring-before(//div, 'Posted in'), ' on ')
3strip_image_src: /content/images/globals/ 3strip_image_src: /content/images/globals/
4strip: //h2[. = 'Page 1']/preceding::p 4strip: //h2[. = 'Page 1']/preceding::p
5strip: //h2 5strip: //h2
6 6
7prune: no 7prune: no
8 8
9single_page_link: concat('http://www.anandtech.com/print/', substring-after(//meta[@property='og:url']/@content, '/show/')) 9single_page_link: concat('http://www.anandtech.com/print/', substring-after(//meta[@property='og:url']/@content, '/show/'))
10 10
11test_url: http://www.anandtech.com/show/5812/eurocom-monster-10-clevos-little-monster/ \ No newline at end of file 11test_url: http://www.anandtech.com/show/5812/eurocom-monster-10-clevos-little-monster/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/androidpolice.com.txt b/inc/3rdparty/site_config/standard/androidpolice.com.txt
new file mode 100755
index 00000000..8f9b1a21
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/androidpolice.com.txt
@@ -0,0 +1,5 @@
1body: //div[@class='post_content']
2date: //div[@class='date_day'] | div[@class='date_month']
3
4test_url: http://www.androidpolice.com/2014/03/30/music-boss-for-pebble-can-now-control-playback-and-volume-on-chromecast-content-from-your-smartwatch/
5
diff --git a/inc/3rdparty/site_config/standard/andyrutledge.com.txt b/inc/3rdparty/site_config/standard/andyrutledge.com.txt
index f9ffd3c3..ce31fcf5 100644..100755
--- a/inc/3rdparty/site_config/standard/andyrutledge.com.txt
+++ b/inc/3rdparty/site_config/standard/andyrutledge.com.txt
@@ -1,9 +1,9 @@
1title: //h2 1title: //h2
2author: string('Andy Rutledge') 2author: string('Andy Rutledge')
3date: //div[@class='articledate'] 3date: //div[@class='articledate']
4body: //div[@class='copybody'] 4body: //div[@class='copybody']
5 5
6strip: //*[@class='space'] 6strip: //*[@class='space']
7strip: //*[@class='articleFoot'] 7strip: //*[@class='articleFoot']
8 8
9test_url: http://www.andyrutledge.com/hungry-for-a-better-menu.php \ No newline at end of file 9test_url: http://www.andyrutledge.com/hungry-for-a-better-menu.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/annatravelling.wordpress.com.txt b/inc/3rdparty/site_config/standard/annatravelling.wordpress.com.txt
index a5c7c08a..2d8937f7 100644..100755
--- a/inc/3rdparty/site_config/standard/annatravelling.wordpress.com.txt
+++ b/inc/3rdparty/site_config/standard/annatravelling.wordpress.com.txt
@@ -1,9 +1,9 @@
1title: //h1[@class="title"] 1title: //h1[@class="title"]
2 2
3author: ("Anna Manasova") 3author: ("Anna Manasova")
4# is ignored, unfortunately 4# is ignored, unfortunately
5 5
6date: //p[@class="date"] 6date: //p[@class="date"]
7 7
8body: //div[@class="entry"] 8body: //div[@class="entry"]
9test_url: http://annatravelling.wordpress.com/2011/11/07/a-day-of-cooking-thai/ \ No newline at end of file 9test_url: http://annatravelling.wordpress.com/2011/11/07/a-day-of-cooking-thai/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/applature.com.txt b/inc/3rdparty/site_config/standard/applature.com.txt
index a78a6150..a820bba4 100644..100755
--- a/inc/3rdparty/site_config/standard/applature.com.txt
+++ b/inc/3rdparty/site_config/standard/applature.com.txt
@@ -1,18 +1,18 @@
1title: //h1[contains(@class, 'title')# 1title: //h1[contains(@class, 'title')#
2body: //div[@id='mainContent']//div[contains(@class, 'section_content')] | //ul[@class='section_footer'] 2body: //div[@id='mainContent']//div[contains(@class, 'section_content')] | //ul[@class='section_footer']
3date: //div[@class='date'] 3date: //div[@class='date']
4 4
5strip_id_or_class: sharethis 5strip_id_or_class: sharethis
6strip_id_or_class: stats 6strip_id_or_class: stats
7strip_id_or_class: apply_form 7strip_id_or_class: apply_form
8strip_id_or_class: job_map 8strip_id_or_class: job_map
9strip_id_or_class: respond 9strip_id_or_class: respond
10strip: //h1//span[@class='type'] 10strip: //h1//span[@class='type']
11strip: //li[@class='print' or @class='map'] 11strip: //li[@class='print' or @class='map']
12 12
13replace_string(<ul class="section_footer" style="display): <ul class="section_footer" style="display-bla 13replace_string(<ul class="section_footer" style="display): <ul class="section_footer" style="display-bla
14 14
15prune: no 15prune: no
16tidy: no 16tidy: no
17 17
18test_url: http://applature.com/mining-jobs/jobs/nickel-west-leinster-analytical-laboratory-technician/ \ No newline at end of file 18test_url: http://applature.com/mining-jobs/jobs/nickel-west-leinster-analytical-laboratory-technician/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/apple.com.txt b/inc/3rdparty/site_config/standard/apple.com.txt
index 4c483955..a54dccc8 100644..100755
--- a/inc/3rdparty/site_config/standard/apple.com.txt
+++ b/inc/3rdparty/site_config/standard/apple.com.txt
@@ -1,7 +1,7 @@
1strip: //p[@class='sosumi'] 1strip: //p[@class='sosumi']
2# Aren't they witty? 2# Aren't they witty?
3 3
4# I can't work out what causes the  before the title. 4# I can't work out what causes the  before the title.
5title: //h1[@class='title'] 5title: //h1[@class='title']
6strip: //h1[@class='title'] 6strip: //h1[@class='title']
7test_url: http://www.apple.com/pr/library/2011/02/15appstore.html \ No newline at end of file 7test_url: http://www.apple.com/pr/library/2011/02/15appstore.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/appledaily.com.tw.txt b/inc/3rdparty/site_config/standard/appledaily.com.tw.txt
new file mode 100755
index 00000000..82d6f376
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/appledaily.com.tw.txt
@@ -0,0 +1,4 @@
1body: //div[contains(@class, 'articulum')]
2
3test_url: http://www.appledaily.com.tw/realtimenews/article/new/20140120/330479
4test_url: http://www.appledaily.com.tw/rss/create/kind/rnews/type/new/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/appleinsider.com.txt b/inc/3rdparty/site_config/standard/appleinsider.com.txt
index 279fbce1..5ae1050b 100644..100755
--- a/inc/3rdparty/site_config/standard/appleinsider.com.txt
+++ b/inc/3rdparty/site_config/standard/appleinsider.com.txt
@@ -1,11 +1,23 @@
1title: //p[@class='title'] 1title: //h1[@class="art-head"]
2 2
3author: //p[text() = 'By ']/a/text() 3author: //p[contains(@class, 'byline')]/a
4strip: //p[text() = 'By '] 4#author: //p[text() = 'By ']/a/text()
5 5#strip: //p[text() = 'By ']
6body: //td[@class='bod'] 6
7strip_id_or_class: title 7date: //p[contains(@class, 'date-header')]
8strip_id_or_class: minor 8
9 9body: //div[@class="article"]
10strip_id_or_class: multipagefooter 10strip_id_or_class: lazy
11test_url: http://www.appleinsider.com/articles/12/02/29/inside_os_x_108_mountain_lion_safari_52_gets_a_simplified_user_interface_with_new_sharing_features.html \ No newline at end of file 11#strip_id_or_class: minor
12strip_id_or_class: multipagefooter
13strip_id_or_class: date-header
14strip_id_or_class: byline
15
16find_string: <noscript>
17replace_string: <div>
18find_string: </noscript>
19replace_string: </div>
20
21test_url: http://www.appleinsider.com/articles/12/02/29/inside_os_x_108_mountain_lion_safari_52_gets_a_simplified_user_interface_with_new_sharing_features.html
22test_url: http://appleinsider.com/articles/13/10/03/goldee-companion-app-for-philips-hue-bulbs-offers-shifting-dynamic-light-scenes
23test_url: http://appleinsider.com/appleinsider.rss \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/appleweblog.com.txt b/inc/3rdparty/site_config/standard/appleweblog.com.txt
index 023c9ccb..023c9ccb 100644..100755
--- a/inc/3rdparty/site_config/standard/appleweblog.com.txt
+++ b/inc/3rdparty/site_config/standard/appleweblog.com.txt
diff --git a/inc/3rdparty/site_config/standard/archdaily.com.txt b/inc/3rdparty/site_config/standard/archdaily.com.txt
index 9476cf56..0178639e 100644..100755
--- a/inc/3rdparty/site_config/standard/archdaily.com.txt
+++ b/inc/3rdparty/site_config/standard/archdaily.com.txt
@@ -1,5 +1,5 @@
1date: //div[@class='post_date'] 1date: //div[@class='post_date']
2 2
3body: //div[@class='post_content'] 3body: //div[@class='post_content']
4 4
5test_url: http://www.archdaily.com/185325/p10-mixed-use-building-studio-up \ No newline at end of file 5test_url: http://www.archdaily.com/185325/p10-mixed-use-building-studio-up \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/archiveofourown.org.txt b/inc/3rdparty/site_config/standard/archiveofourown.org.txt
index 50ff632d..579de517 100644..100755
--- a/inc/3rdparty/site_config/standard/archiveofourown.org.txt
+++ b/inc/3rdparty/site_config/standard/archiveofourown.org.txt
@@ -1,18 +1,22 @@
1# Description: Fix XPaths to include ALL chapters on 'view_full_work' pages. 1# Description: Fix XPaths to include ALL chapters on 'view_full_work' pages.
2# Include: work meta, summary, chapter information, and notes which Instapaper strips out on default. 2# Include: work meta, summary, chapter information, and notes which Instapaper strips out on default.
3# Exclude: header, footer, navigation, comments. 3# Exclude: header, footer, navigation, comments.
4# Notes: User is a newbie with XPaths. 4# Notes: User is a newbie with XPaths.
5 5
6title: //h2[@class='title'] 6title: //h2[@class='title']
7author: //h3[@class='byline'] 7author: //h3[@class='byline']
8author: //a[@class='login author'] 8author: //a[@class='login author']
9 9
10strip_id_or_class:header 10strip_id_or_class:header
11strip_id_or_class:navigation 11strip_id_or_class:navigation
12strip_id_or_class:feedback 12strip_id_or_class:feedback
13strip_id_or_class:kudos 13strip_id_or_class:kudos
14strip_id_or_class:add_comment_placeholder 14strip_id_or_class:add_comment_placeholder
15strip_id_or_class:add_comment 15strip_id_or_class:add_comment
16strip_id_or_class:globalize 16strip_id_or_class:globalize
17strip_id_or_class:footer 17strip_id_or_class:footer
18test_url: http://archiveofourown.org/works/229402?view_full_work=true \ No newline at end of file 18
19single_page_link: //div[@id='main']//a[contains(@href, 'view_adult=true')]
20
21test_url: http://archiveofourown.org/works/229402?view_full_work=true
22test_url: http://archiveofourown.org/works/750111/chapters/1399929 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/arstechnica.com.txt b/inc/3rdparty/site_config/standard/arstechnica.com.txt
index 49bb3dbc..767f6800 100644..100755
--- a/inc/3rdparty/site_config/standard/arstechnica.com.txt
+++ b/inc/3rdparty/site_config/standard/arstechnica.com.txt
@@ -1,16 +1,17 @@
1author: //p[@class='byline']/a 1author: //p[@class='byline']/a
2body: //div[contains(@class,'article-content')] 2body: //div[contains(@class,'article-content')]
3strip: //h2[@class='title'] 3strip: //h2[@class='title']
4strip_id_or_class: byline 4strip_id_or_class: byline
5prune: no 5strip_id_or_class: story-sidebar
6 6prune: no
7date: //div[@class='byline']/span[@class='posted']//abbr/@original-title 7
8date: //div[@class='byline']/span[@class='posted']//abbr 8date: //div[@class='byline']/span[@class='posted']//abbr/@original-title
9 9date: //div[@class='byline']/span[@class='posted']//abbr
10title: //div[@id='story']//h2[@class='title'] 10
11 11title: //div[@id='story']//h2[@class='title']
12strip: //div[@class='pager'] 12
13next_page_link: //nav//a[span/@class='next']/@href 13strip: //div[@class='pager']
14 14next_page_link: //nav//a[span/@class='next']/@href
15test_url: http://arstechnica.com/tech-policy/news/2012/02/gigabit-internet-for-80-the-unlikely-success-of-californias-sonicnet.ars 15
16test_url: http://arstechnica.com/apple/2005/04/macosx-10-4/ \ No newline at end of file 16test_url: http://arstechnica.com/tech-policy/news/2012/02/gigabit-internet-for-80-the-unlikely-success-of-californias-sonicnet.ars
17test_url: http://arstechnica.com/apple/2005/04/macosx-10-4/
diff --git a/inc/3rdparty/site_config/standard/articles.boston.com.txt b/inc/3rdparty/site_config/standard/articles.boston.com.txt
index e54423be..73bcdb4e 100644..100755
--- a/inc/3rdparty/site_config/standard/articles.boston.com.txt
+++ b/inc/3rdparty/site_config/standard/articles.boston.com.txt
@@ -1,6 +1,6 @@
1title: //div[@class="mod-bostonarticleheader mod-articleheader"]/h1 1title: //div[@class="mod-bostonarticleheader mod-articleheader"]/h1
2author: substring-after(//div[@class="mod-bostonarticlebyline mod-articlebyline"]/span[3],"By ") 2author: substring-after(//div[@class="mod-bostonarticlebyline mod-articlebyline"]/span[3],"By ")
3date: //div[@class="mod-bostonarticlebyline mod-articlebyline"]/span[@class="pubdate"] 3date: //div[@class="mod-bostonarticlebyline mod-articlebyline"]/span[@class="pubdate"]
4 4
5strip_id_or_class: mod-pagination 5strip_id_or_class: mod-pagination
6test_url: http://articles.boston.com/2011-10-23/news/30313691_1_bigfoot-free-speech-monadnock-state-park \ No newline at end of file 6test_url: http://articles.boston.com/2011-10-23/news/30313691_1_bigfoot-free-speech-monadnock-state-park \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/articles.courant.com.txt b/inc/3rdparty/site_config/standard/articles.courant.com.txt
index a08f2041..984d81de 100644..100755
--- a/inc/3rdparty/site_config/standard/articles.courant.com.txt
+++ b/inc/3rdparty/site_config/standard/articles.courant.com.txt
@@ -1,11 +1,11 @@
1title: //div[@class="mod-courantarticleheader mod-articleheader"]/h1 1title: //div[@class="mod-courantarticleheader mod-articleheader"]/h1
2date: //div[@class="mod-courantarticlebyline mod-articlebyline"]/span[@class="pubdate"] 2date: //div[@class="mod-courantarticlebyline mod-articlebyline"]/span[@class="pubdate"]
3author: //div[@class="mod-courantarticlebyline mod-articlebyline"]/span[3] 3author: //div[@class="mod-courantarticlebyline mod-articlebyline"]/span[3]
4 4
5strip_id_or_class: mod-article-byline 5strip_id_or_class: mod-article-byline
6strip_id_or_class: mod-article-header 6strip_id_or_class: mod-article-header
7strip_id_or_class: mod-article-subtitle 7strip_id_or_class: mod-article-subtitle
8#This leaves some crud after the article, but it's better than nothing. 8#This leaves some crud after the article, but it's better than nothing.
9#It would be ideal if we could set the body to every element matching //div[contains(@class, "mod-articletext")]/p, but it seems like body only takes the first matching element. 9#It would be ideal if we could set the body to every element matching //div[contains(@class, "mod-articletext")]/p, but it seems like body only takes the first matching element.
10 10
11test_url: http://articles.courant.com/2011-10-22/news/hc-green-drugsearch--1022-20111022_1_drugs-in-student-lockers-police-dogs-lockdown \ No newline at end of file 11test_url: http://articles.courant.com/2011-10-22/news/hc-green-drugsearch--1022-20111022_1_drugs-in-student-lockers-police-dogs-lockdown \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/articles.washingtonpost.com.txt b/inc/3rdparty/site_config/standard/articles.washingtonpost.com.txt
new file mode 100755
index 00000000..a76c2d02
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/articles.washingtonpost.com.txt
@@ -0,0 +1,11 @@
1body: //div[contains(@class, "article_body")]
2# print view
3body: //div[@id='print_facet']//div[@id='body']
4
5tidy: no
6prune: no
7
8single_page_link: concat(substring-before(//div[@id="echo_container_a"]/@guid, '_story.html'), '_print.html')
9
10test_url: http://articles.washingtonpost.com/2011-10-22/world/35279694_1_germany-acts-german-leaders-chancellor-angela-merkel
11test_url: http://articles.washingtonpost.com/2013-05-31/opinions/39658000_1_chemical-weapons-mass-destruction-cartels \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/asahi.com.txt b/inc/3rdparty/site_config/standard/asahi.com.txt
index 2562edb9..b4eec7bd 100644..100755
--- a/inc/3rdparty/site_config/standard/asahi.com.txt
+++ b/inc/3rdparty/site_config/standard/asahi.com.txt
@@ -1,3 +1,3 @@
1body: //div[@id='HeadLine'] 1body: //div[@id='HeadLine']
2strip: //div[@id='utility_right'] 2strip: //div[@id='utility_right']
3test_url: http://www.asahi.com/culture/update/0520/TKY201105200321.html \ No newline at end of file 3test_url: http://www.asahi.com/culture/update/0520/TKY201105200321.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ascarter.net.txt b/inc/3rdparty/site_config/standard/ascarter.net.txt
index 5236d09e..0327e846 100644..100755
--- a/inc/3rdparty/site_config/standard/ascarter.net.txt
+++ b/inc/3rdparty/site_config/standard/ascarter.net.txt
@@ -1,5 +1,5 @@
1title: //h1[@class='article_title'] 1title: //h1[@class='article_title']
2author: //span[@class='author'] 2author: //span[@class='author']
3date: //h2[@class='dateline'] 3date: //h2[@class='dateline']
4body: //div[@class='article_body'] 4body: //div[@class='article_body']
5test_url: http://ascarter.net/2012/02/20/enough-is-enough.html \ No newline at end of file 5test_url: http://ascarter.net/2012/02/20/enough-is-enough.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/astronews.com.txt b/inc/3rdparty/site_config/standard/astronews.com.txt
index 33e8153d..8de22270 100644..100755
--- a/inc/3rdparty/site_config/standard/astronews.com.txt
+++ b/inc/3rdparty/site_config/standard/astronews.com.txt
@@ -1,7 +1,7 @@
1title: //span[@class='titel'] 1title: //span[@class='titel']
2author: //span[@class='metadaten_C']/a//span[@class='metadaten_C'] 2author: //span[@class='metadaten_C']/a//span[@class='metadaten_C']
3date: substring-after(//span[@class='metadaten_C'],'astronews.com') 3date: substring-after(//span[@class='metadaten_C'],'astronews.com')
4strip: //span[@class='bu'] 4strip: //span[@class='bu']
5strip_image_src: '/_images/' 5strip_image_src: '/_images/'
6 6
7test_url: http://www.astronews.com/news/artikel/2011/10/1110-021.shtml \ No newline at end of file 7test_url: http://www.astronews.com/news/artikel/2011/10/1110-021.shtml \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/asymco.com.txt b/inc/3rdparty/site_config/standard/asymco.com.txt
index adad5f18..f639b048 100644..100755
--- a/inc/3rdparty/site_config/standard/asymco.com.txt
+++ b/inc/3rdparty/site_config/standard/asymco.com.txt
@@ -1,8 +1,8 @@
1# Johannes Sthler 1# Johannes Stühler
2 2
3title://h2 3title://h2
4author://span[@class='meta-content'] 4author://span[@class='meta-content']
5date://abbr[@class='date published']/@title 5date://abbr[@class='date published']/@title
6body://div[@class='entry-content'] 6body://div[@class='entry-content']
7 7
8test_url: http://www.asymco.com/2011/01/14/is-android-more-efficient-than-ios-at-generating-search-revenue/ \ No newline at end of file 8test_url: http://www.asymco.com/2011/01/14/is-android-more-efficient-than-ios-at-generating-search-revenue/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/autoblog.com.txt b/inc/3rdparty/site_config/standard/autoblog.com.txt
index 58681bf9..291db992 100644..100755
--- a/inc/3rdparty/site_config/standard/autoblog.com.txt
+++ b/inc/3rdparty/site_config/standard/autoblog.com.txt
@@ -1,6 +1,6 @@
1prune: no 1prune: no
2body: //div[@class='post-body'] 2body: //div[@class='post-body']
3author: //p[@class='byline']//a 3author: //p[@class='byline']//a
4date: substring-after(//div[@class='about']/p[2], 'Posted') 4date: substring-after(//div[@class='about']/p[2], 'Posted')
5strip: //div[@class='body']/div[@class='meta'] 5strip: //div[@class='body']/div[@class='meta']
6test_url: http://www.autoblog.com/2012/01/17/next-gen-bmw-x5-caught-again/ \ No newline at end of file 6test_url: http://www.autoblog.com/2012/01/17/next-gen-bmw-x5-caught-again/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/avclub.com.txt b/inc/3rdparty/site_config/standard/avclub.com.txt
index 776ee108..c365a7aa 100644..100755
--- a/inc/3rdparty/site_config/standard/avclub.com.txt
+++ b/inc/3rdparty/site_config/standard/avclub.com.txt
@@ -1,4 +1,4 @@
1author: //*[@id="article_wrapper"]/div[1]/a[1] 1author: //*[@id="article_wrapper"]/div[1]/a[1]
2body: //*[@id="article_wrapper"]/div[2] 2body: //*[@id="article_wrapper"]/div[2]
3date: //*[@id="article_wrapper"]/div[1]/text()[2] 3date: //*[@id="article_wrapper"]/div[1]/text()[2]
4test_url: http://www.avclub.com/articles/forgetmenot,70904 \ No newline at end of file 4test_url: http://www.avclub.com/articles/forgetmenot,70904 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/baltimoresun.com.txt b/inc/3rdparty/site_config/standard/baltimoresun.com.txt
index 32adff8d..35b62427 100644..100755
--- a/inc/3rdparty/site_config/standard/baltimoresun.com.txt
+++ b/inc/3rdparty/site_config/standard/baltimoresun.com.txt
@@ -1,12 +1,12 @@
1single_page_link: //div[@class='toppaginate']//a[@rel='nofollow'] 1single_page_link: //div[@class='toppaginate']//a[@rel='nofollow']
2convert_double_br_tags: yes 2convert_double_br_tags: yes
3 3
4title: //div[@class="story"]/h1 4title: //div[@class="story"]/h1
5body: //div[@id="story-body-text"] 5body: //div[@id="story-body-text"]
6author: //span[@class="byline"] 6author: //span[@class="byline"]
7date: //p[@class="date"] 7date: //p[@class="date"]
8 8
9strip: //*[@class='all'] 9strip: //*[@class='all']
10strip: //*[@class='articlerail'] 10strip: //*[@class='articlerail']
11 11
12test_url: http://www.baltimoresun.com/news/maryland/bs-md-omalley-budget-2-20120116,0,5340585.story \ No newline at end of file 12test_url: http://www.baltimoresun.com/news/maryland/bs-md-omalley-budget-2-20120116,0,5340585.story \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/baseballprospectus.com.txt b/inc/3rdparty/site_config/standard/baseballprospectus.com.txt
new file mode 100755
index 00000000..1207b343
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/baseballprospectus.com.txt
@@ -0,0 +1,13 @@
1title: //h1[@class='title']
2author: //p[@class="author"]/a[1]
3body: //div[@class="article"]
4date: //p[@class="date"]
5
6# remove user tools
7strip: //div[@class='tools']
8strip: //h1
9strip: //h2[@class='subtitle']
10strip: //p[@class='author']
11strip: //p[@class='date']
12
13test_url: http://www.baseballprospectus.com/article.php?articleid=18463 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/basicthinking.de.txt b/inc/3rdparty/site_config/standard/basicthinking.de.txt
index ab583145..f08c1f26 100644..100755
--- a/inc/3rdparty/site_config/standard/basicthinking.de.txt
+++ b/inc/3rdparty/site_config/standard/basicthinking.de.txt
@@ -1,7 +1,7 @@
1title: //h2 1title: //h2
2date: //span[@class='date'] 2date: //span[@class='date']
3body: //div[@class='entry'] 3body: //div[@class='entry']
4 4
5strip: //div[@class='zusatz'] 5strip: //div[@class='zusatz']
6 6
7test_url: http://www.basicthinking.de/blog/2011/12/13/sagt-social-networks-adieu-begrust-private-networks/ \ No newline at end of file 7test_url: http://www.basicthinking.de/blog/2011/12/13/sagt-social-networks-adieu-begrust-private-networks/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/bb.is.txt b/inc/3rdparty/site_config/standard/bb.is.txt
index eaafaf18..57f7fdfa 100644..100755
--- a/inc/3rdparty/site_config/standard/bb.is.txt
+++ b/inc/3rdparty/site_config/standard/bb.is.txt
@@ -1,13 +1,13 @@
1author: substring(//h3[@class='headlines']/span[@class='dates'],0,string-length(//h3[@class='headlines']/span[@class='dates'])-20) 1author: substring(//h3[@class='headlines']/span[@class='dates'],0,string-length(//h3[@class='headlines']/span[@class='dates'])-20)
2 2
3 3
4date: substring((//h3[@class='headlines']/span[@class='dates']),string-length(//h3[@class='headlines']/span[@class='dates'])-18,12) 4date: substring((//h3[@class='headlines']/span[@class='dates']),string-length(//h3[@class='headlines']/span[@class='dates'])-18,12)
5 5
6 6
7body: //div[@class='first-article-big'] 7body: //div[@class='first-article-big']
8strip: //table[@class='newsimagecontainer'] 8strip: //table[@class='newsimagecontainer']
9strip: //h3[@class='headlines'] 9strip: //h3[@class='headlines']
10strip: //iframe[@class='headlines'] 10strip: //iframe[@class='headlines']
11strip: //a[@class='newslink'] 11strip: //a[@class='newslink']
12convert_double_br_tags: yes 12convert_double_br_tags: yes
13test_url: http://bb.is/Pages/82?NewsID=174119 \ No newline at end of file 13test_url: http://bb.is/Pages/82?NewsID=174119 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/bbc.co.uk.txt b/inc/3rdparty/site_config/standard/bbc.co.uk.txt
index 9c5c3419..ef1f491a 100644..100755
--- a/inc/3rdparty/site_config/standard/bbc.co.uk.txt
+++ b/inc/3rdparty/site_config/standard/bbc.co.uk.txt
@@ -1,32 +1,42 @@
1body: //div[@class="story-body"] 1body: //div[@class="story-body"]
2title: //h1[@class="story-header"] 2# for video entries
3date: //span[@class="story-date"]/span[@class='date'] 3body: //div[contains(@class, "videoInStory") or @id="meta-information"]
4 4title: //h1[@class="story-header"]
5# recipes, e.g. http://www.bbc.co.uk/food/recipes/mymincepies_71055 5date: //span[@class="story-date"]/span[@class='date']
6body: //div[contains(@class, 'hrecipe')]//div[@id='subcolumn-1'] 6# for sport site
7 7date: //meta[@name='DCTERMS.created']/@content
8#strip: //div[@class="story-feature narrow"] 8author: //div[@id='headline']//span[@class='byline-name']
9#strip: //div[@class="story-feature wide"] 9
10#strip: //div[@class="story-feature dslideshow-enclosure"] 10# recipes, e.g. http://www.bbc.co.uk/food/recipes/mymincepies_71055
11strip: //div[contains(@class, "story-feature")] 11body: //div[contains(@class, 'hrecipe')]//div[@id='subcolumn-1']
12strip: //span[@class="story-date"] 12
13#strip: //div[@class="caption body-narrow-width"] 13#strip: //div[@class="story-feature narrow"]
14strip: //div[@class="warning"]//p 14#strip: //div[@class="story-feature wide"]
15strip: //div[@id='page-bookmark-links-head'] 15#strip: //div[@class="story-feature dslideshow-enclosure"]
16strip: //object 16strip: //div[contains(@class, "story-feature")]
17strip: //div[contains(@class, "bbccom_advert_placeholder")] 17strip: //span[@class="story-date"]
18strip: //div[contains(@class, "embedded-hyper")] 18#strip: //div[@class="caption body-narrow-width"]
19strip: //div[contains(@class, 'market-data')] 19strip: //div[@class="warning"]//p
20strip: //a[contains(@class, 'hidden')] 20strip: //div[@id='page-bookmark-links-head']
21strip: //div[contains(@class, 'hypertabs')] 21strip: //object
22strip: //div[contains(@class, 'related')] 22strip: //div[contains(@class, "bbccom_advert_placeholder")]
23strip: //form[@id='comment-form'] 23strip: //div[contains(@class, "embedded-hyper")]
24strip: //div[contains(@class, 'comment-introduction')] 24strip: //div[contains(@class, 'market-data')]
25 25strip: //a[contains(@class, 'hidden')]
26replace_string(<noscript>): <div> 26strip: //div[contains(@class, 'hypertabs')]
27replace_string(</noscript>): </div> 27strip: //div[contains(@class, 'related')]
28 28strip: //form[@id='comment-form']
29prune: no 29strip: //div[contains(@class, 'comment-introduction')]
30 30strip: //div[contains(@class, 'share-tools')]
31dissolve: //h2 31strip: //div[@id='also-related-links']
32test_url: http://www.bbc.co.uk/news/business-15060862 \ No newline at end of file 32
33replace_string(<noscript>): <div>
34replace_string(</noscript>): </div>
35
36prune: no
37
38dissolve: //h2
39test_url: http://www.bbc.co.uk/sport/0/football/23224017
40test_url: http://www.bbc.co.uk/news/business-15060862
41# video entry
42test_url: http://www.bbc.co.uk/news/world-asia-22056933 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/bbcgoodfood.com.txt b/inc/3rdparty/site_config/standard/bbcgoodfood.com.txt
new file mode 100755
index 00000000..1547d625
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/bbcgoodfood.com.txt
@@ -0,0 +1,16 @@
1title: //header//h1
2#body: //article[contains(@class, 'node-full')]
3body: //div[contains(@class, 'recipe-details') or contains(@class, 'tips-carousel')] | //section[@id='recipe-ingredients' or @id='recipe-method']
4
5strip_id_or_class: recipe-rating-wrapper
6strip_id_or_class: magazine-subcribe-header
7strip_id_or_class: hide
8strip_id_or_class: recipe-actions
9strip_id_or_class: buy-ingredients
10strip_id_or_class: related-content
11strip_id_or_class: recipe-magazine-ad
12strip_id_or_class: copy-right
13
14prune: no
15
16test_url: http://www.bbcgoodfood.com/recipes/1131634/minced-beef-wellington \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/benoitmaison.org.txt b/inc/3rdparty/site_config/standard/benoitmaison.org.txt
index f341d593..72c1baed 100644..100755
--- a/inc/3rdparty/site_config/standard/benoitmaison.org.txt
+++ b/inc/3rdparty/site_config/standard/benoitmaison.org.txt
@@ -1,16 +1,16 @@
1body: //div[@class="entry-content"] 1body: //div[@class="entry-content"]
2 2
3# Remove text &lsquo;Tweet&rsquo; 3# Remove text &lsquo;Tweet&rsquo;
4strip: //div[@class="entry-content"]/div[last()] 4strip: //div[@class="entry-content"]/div[last()]
5 5
6title: h1[@class="entry-title"] 6title: h1[@class="entry-title"]
7 7
8# If the Instapaper text parser worked with HTML5 tags, we would use: 8# If the Instapaper text parser worked with HTML5 tags, we would use:
9date: //time[@class="entry-date"] 9date: //time[@class="entry-date"]
10 10
11# But since it does not, use this more complicated rule: 11# But since it does not, use this more complicated rule:
12date: //div[@class="entry-meta"]/a[@rel="bookmark"] 12date: //div[@class="entry-meta"]/a[@rel="bookmark"]
13 13
14# Unfortunately, the following rule is overridden by the automatically found author. 14# Unfortunately, the following rule is overridden by the automatically found author.
15author: ("Benoit Maison") 15author: ("Benoit Maison")
16test_url: http://www.benoitmaison.org/2011/12/06/why-siri-had-to-start-in-beta/ \ No newline at end of file 16test_url: http://www.benoitmaison.org/2011/12/06/why-siri-had-to-start-in-beta/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/berlingske.dk.txt b/inc/3rdparty/site_config/standard/berlingske.dk.txt
index 607c998d..9f8c41c6 100644..100755
--- a/inc/3rdparty/site_config/standard/berlingske.dk.txt
+++ b/inc/3rdparty/site_config/standard/berlingske.dk.txt
@@ -1,3 +1,3 @@
1title: //h1[@class='headline'] 1title: //h1[@class='headline']
2body: //div[contains(@class, 'article-wrapper')] 2body: //div[contains(@class, 'article-wrapper')]
3test_url: http://www.berlingske.dk/danmark/festen-er-flyttet-nordpaa \ No newline at end of file 3test_url: http://www.berlingske.dk/danmark/festen-er-flyttet-nordpaa \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/bernama.com.txt b/inc/3rdparty/site_config/standard/bernama.com.txt
new file mode 100755
index 00000000..fdc04b7f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/bernama.com.txt
@@ -0,0 +1,5 @@
1body: //div[contains(@class, "NewsText"]
2prune: no
3
4test_url: http://www.bernama.com/bernama/v7/rss/english.php
5test_url: http://www.bernama.com/bernama/v7/newsindex.php?id=943513 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/betabeat.com.txt b/inc/3rdparty/site_config/standard/betabeat.com.txt
index 7815cf26..7815cf26 100644..100755
--- a/inc/3rdparty/site_config/standard/betabeat.com.txt
+++ b/inc/3rdparty/site_config/standard/betabeat.com.txt
diff --git a/inc/3rdparty/site_config/standard/betanews.com.txt b/inc/3rdparty/site_config/standard/betanews.com.txt
index 0eaf085e..90a54a23 100644..100755
--- a/inc/3rdparty/site_config/standard/betanews.com.txt
+++ b/inc/3rdparty/site_config/standard/betanews.com.txt
@@ -1,7 +1,7 @@
1# some articles at this site like this one doesn't 1# some articles at this site like this one doesn't
2# seem to pick up the article body via normal 2# seem to pick up the article body via normal
3# processing, other articles come through fine 3# processing, other articles come through fine
4# http://www.betanews.com/joewilcox/article 4# http://www.betanews.com/joewilcox/article
5# /Google-is-a-marketing-sensation/1309708375 5# /Google-is-a-marketing-sensation/1309708375
6body: //*[@id="article"] 6body: //*[@id="article"]
7test_url: http://www.betanews.com/joewilcox/article/Google-is-a-marketing-sensation/1309708375 \ No newline at end of file 7test_url: http://www.betanews.com/joewilcox/article/Google-is-a-marketing-sensation/1309708375 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/biography.com.txt b/inc/3rdparty/site_config/standard/biography.com.txt
index dc071299..e431037a 100644..100755
--- a/inc/3rdparty/site_config/standard/biography.com.txt
+++ b/inc/3rdparty/site_config/standard/biography.com.txt
@@ -1,8 +1,8 @@
1title: //div[contains(@class, 'main-content')]//h1 1title: //div[contains(@class, 'main-content')]//h1
2body: //div[@class='summary-column'] | //div[contains(@class, 'main-content')] 2body: //div[@class='summary-column'] | //div[contains(@class, 'main-content')]
3 3
4prune: no 4prune: no
5 5
6single_page_link: //div[@id='biography-action-links']//a[contains(@href, '/print/')] 6single_page_link: //div[@id='biography-action-links']//a[contains(@href, '/print/')]
7 7
8test_url: http://www.biography.com/print/profile/martin-luther-9389283 \ No newline at end of file 8test_url: http://www.biography.com/print/profile/martin-luther-9389283 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/bitelia.com.txt b/inc/3rdparty/site_config/standard/bitelia.com.txt
index 7bffae93..7bffae93 100644..100755
--- a/inc/3rdparty/site_config/standard/bitelia.com.txt
+++ b/inc/3rdparty/site_config/standard/bitelia.com.txt
diff --git a/inc/3rdparty/site_config/standard/bizjournals.com.txt b/inc/3rdparty/site_config/standard/bizjournals.com.txt
new file mode 100755
index 00000000..cfba766f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/bizjournals.com.txt
@@ -0,0 +1,13 @@
1date: //meta[@name='publish-date']/@content
2body: //div[contains(@class, 'articleContentWrapper')]
3prune: no
4
5strip: //div[contains(@class, 'staff_info')]//dd[contains(., 'Twitter')]
6
7strip_id_or_class: related_content
8strip_id_or_class: enlarge
9strip_id_or_class: photoBy
10strip_id_or_class: older
11
12test_url: http://www.bizjournals.com/cincinnati/news/2013/10/03/harris-teeter-shareholders-vote-on.html
13test_url: http://feeds.bizjournals.com/industry_20?format=xml \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/bjango.com.txt b/inc/3rdparty/site_config/standard/bjango.com.txt
index 6cb04631..0fed5526 100644..100755
--- a/inc/3rdparty/site_config/standard/bjango.com.txt
+++ b/inc/3rdparty/site_config/standard/bjango.com.txt
@@ -1,7 +1,7 @@
1title: //h1[@class='articlehead'] 1title: //h1[@class='articlehead']
2body: //div[@class='column'] 2body: //div[@class='column']
3strip: //h1 3strip: //h1
4strip: //div[@class='help'] 4strip: //div[@class='help']
5 5
6#no author or date/time provided in current layout 6#no author or date/time provided in current layout
7test_url: http://bjango.com/articles/actions/ \ No newline at end of file 7test_url: http://bjango.com/articles/actions/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.arsln.org.txt b/inc/3rdparty/site_config/standard/blog.arsln.org.txt
index 1f43f490..7ac8cc11 100644..100755
--- a/inc/3rdparty/site_config/standard/blog.arsln.org.txt
+++ b/inc/3rdparty/site_config/standard/blog.arsln.org.txt
@@ -1,8 +1,8 @@
1tidy: no 1tidy: no
2prune: no 2prune: no
3date: //article/header/h6/time 3date: //article/header/h6/time
4title: //article/header/h3 4title: //article/header/h3
5author: //meta[@name='author']/@content 5author: //meta[@name='author']/@content
6body: //article//post 6body: //article//post
7 7
8test_url: http://blog.arsln.org/aska-ayip-oluyor/ \ No newline at end of file 8test_url: http://blog.arsln.org/aska-ayip-oluyor/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.asmartbear.com.txt b/inc/3rdparty/site_config/standard/blog.asmartbear.com.txt
index 81c3bda6..78d7f516 100644..100755
--- a/inc/3rdparty/site_config/standard/blog.asmartbear.com.txt
+++ b/inc/3rdparty/site_config/standard/blog.asmartbear.com.txt
@@ -1,7 +1,7 @@
1title: //title 1title: //title
2author: //span[@class='author vcard']/a 2author: //span[@class='author vcard']/a
3date: //p[@class='headline_meta']/abbr[@class='published'] 3date: //p[@class='headline_meta']/abbr[@class='published']
4body: //div[@class='format_text entry-content'] 4body: //div[@class='format_text entry-content']
5 5
6strip: //div[@id='dd_ajax_float'] 6strip: //div[@id='dd_ajax_float']
7test_url: http://blog.asmartbear.com/how-to-get-quality-freelance-graphics-design-work-on-a-budget.html \ No newline at end of file 7test_url: http://blog.asmartbear.com/how-to-get-quality-freelance-graphics-design-work-on-a-budget.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.cloudflare.com.txt b/inc/3rdparty/site_config/standard/blog.cloudflare.com.txt
index a4c5aaea..db80a35f 100644..100755
--- a/inc/3rdparty/site_config/standard/blog.cloudflare.com.txt
+++ b/inc/3rdparty/site_config/standard/blog.cloudflare.com.txt
@@ -1,9 +1,9 @@
1# Instapaper gets this back to front and only gets the blog title instead of the article title. 1# Instapaper gets this back to front and only gets the blog title instead of the article title.
2title: substring-before(//title, '-') 2title: substring-before(//title, '-')
3 3
4author: //a[ contains(@href, '/people') ] 4author: //a[ contains(@href, '/people') ]
5 5
6body: //div[ @class='post' ] 6body: //div[ @class='post' ]
7 7
8# Date is impossible to retrieve since they use those stupid "fuzzy" dates, inserted through javascript, at posterous. 8# Date is impossible to retrieve since they use those stupid "fuzzy" dates, inserted through javascript, at posterous.
9test_url: http://blog.cloudflare.com/understanding-analytics-when-is-a-page-view-n \ No newline at end of file 9test_url: http://blog.cloudflare.com/understanding-analytics-when-is-a-page-view-n \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.fefe.de.txt b/inc/3rdparty/site_config/standard/blog.fefe.de.txt
index 92272b70..97e48e69 100644..100755
--- a/inc/3rdparty/site_config/standard/blog.fefe.de.txt
+++ b/inc/3rdparty/site_config/standard/blog.fefe.de.txt
@@ -1,5 +1,5 @@
1title: //h2 1title: //h2
2date: //h3 2date: //h3
3body: //ul 3body: //ul
4 4
5test_url: http://blog.fefe.de/?ts=b063bf55 \ No newline at end of file 5test_url: http://blog.fefe.de/?ts=b063bf55 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.instagram.com.txt b/inc/3rdparty/site_config/standard/blog.instagram.com.txt
index 3065dd80..13d1d44a 100644..100755
--- a/inc/3rdparty/site_config/standard/blog.instagram.com.txt
+++ b/inc/3rdparty/site_config/standard/blog.instagram.com.txt
@@ -1,11 +1,11 @@
1# clean Instagram blog a little bit 1# clean Instagram blog a little bit
2 2
3tidy:no 3tidy:no
4prune:no 4prune:no
5 5
6body://div[contains(@id,'content')] 6body://div[contains(@id,'content')]
7 7
8strip_id_or_class:meta 8strip_id_or_class:meta
9strip_id_or_class:notes 9strip_id_or_class:notes
10strip_id_or_class:pagination 10strip_id_or_class:pagination
11test_url: http://blog.instagram.com/post/8757832007/fromwhereistand \ No newline at end of file 11test_url: http://blog.instagram.com/post/8757832007/fromwhereistand \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.instapaper.com.txt b/inc/3rdparty/site_config/standard/blog.instapaper.com.txt
new file mode 100755
index 00000000..fda01b15
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blog.instapaper.com.txt
@@ -0,0 +1,9 @@
1author: //a[@href="http://www.marco.org/about"]
2date: //span[@class="date"]
3
4# Remove the date from article body.
5strip: //span[@class="date"]
6
7# Remove pagination links from article body.
8strip: //div[@id="pagination"]
9test_url: http://blog.instapaper.com/post/31303984531 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.jaysalvat.com.txt b/inc/3rdparty/site_config/standard/blog.jaysalvat.com.txt
index 4e467fe9..e89ad3a5 100644..100755
--- a/inc/3rdparty/site_config/standard/blog.jaysalvat.com.txt
+++ b/inc/3rdparty/site_config/standard/blog.jaysalvat.com.txt
@@ -1,4 +1,4 @@
1date: //span[contains(@class, 'date-links')] 1date: //span[contains(@class, 'date-links')]
2author: //span[contains(@class, 'author-links')] 2author: //span[contains(@class, 'author-links')]
3body: //div[contains(@class, 'entry-content')] 3body: //div[contains(@class, 'entry-content')]
4test_url: http://blog.jaysalvat.com/article/celui-qui-avait-refait-son-site-web \ No newline at end of file 4test_url: http://blog.jaysalvat.com/article/celui-qui-avait-refait-son-site-web \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.kaelig.fr.txt b/inc/3rdparty/site_config/standard/blog.kaelig.fr.txt
index ac18ad15..bcd3bdc9 100644..100755
--- a/inc/3rdparty/site_config/standard/blog.kaelig.fr.txt
+++ b/inc/3rdparty/site_config/standard/blog.kaelig.fr.txt
@@ -1,5 +1,5 @@
1body: //*[contains(@class, 'post_content')] 1body: //*[contains(@class, 'post_content')]
2author: string('Kaelig Deloumeau-Prigent') 2author: string('Kaelig Deloumeau-Prigent')
3title: //h1[@class='title'] 3title: //h1[@class='title']
4date: //span[@class='date'] 4date: //span[@class='date']
5test_url: http://blog.kaelig.fr/post/24877648508/preprocesseurs-css-renoncer-par-choix-ou-par \ No newline at end of file 5test_url: http://blog.kaelig.fr/post/24877648508/preprocesseurs-css-renoncer-par-choix-ou-par \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.naver.com.txt b/inc/3rdparty/site_config/standard/blog.naver.com.txt
index 702789ad..73c30c47 100644..100755
--- a/inc/3rdparty/site_config/standard/blog.naver.com.txt
+++ b/inc/3rdparty/site_config/standard/blog.naver.com.txt
@@ -1,6 +1,6 @@
1title: //span[@class='pcol1 itemSubjectBoldfont'] 1title: //span[@class='pcol1 itemSubjectBoldfont']
2body: //div[@id='postListBody'] 2body: //div[@id='postListBody']
3date: //p[@class='date fil5 pcol2'] 3date: //p[@class='date fil5 pcol2']
4single_page_link: /html/frameset/frame[1]/attribute::src 4single_page_link: /html/frameset/frame[1]/attribute::src
5strip: //div[@class='post-btn'] 5strip: //div[@class='post-btn']
6test_url: http://blog.naver.com/how2invest/110135068757 \ No newline at end of file 6test_url: http://blog.naver.com/how2invest/110135068757 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.pchome.net.txt b/inc/3rdparty/site_config/standard/blog.pchome.net.txt
index 3089001e..de81beba 100644..100755
--- a/inc/3rdparty/site_config/standard/blog.pchome.net.txt
+++ b/inc/3rdparty/site_config/standard/blog.pchome.net.txt
@@ -1,12 +1,12 @@
1# PCHOME blog, a popular Chinese blog host 1# PCHOME blog, a popular Chinese blog host
2# Oct 15, 2011 2# Oct 15, 2011
3# 3#
4 4
5title://*[contains(@class,'imp')]/h2 5title://*[contains(@class,'imp')]/h2
6 6
7date://*[contains(@class,'imp')]/span 7date://*[contains(@class,'imp')]/span
8body://div[contains(@id,'blog_content')] 8body://div[contains(@id,'blog_content')]
9 9
10 10
11 11
12test_url: http://blog.pchome.net/article/462502.html \ No newline at end of file 12test_url: http://blog.pchome.net/article/462502.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.pinboard.in.txt b/inc/3rdparty/site_config/standard/blog.pinboard.in.txt
index b7afe455..40f0c560 100644..100755
--- a/inc/3rdparty/site_config/standard/blog.pinboard.in.txt
+++ b/inc/3rdparty/site_config/standard/blog.pinboard.in.txt
@@ -1,6 +1,6 @@
1title: //a[@class="blog_title"] 1title: //a[@class="blog_title"]
2date: //p[@class="when"]/a 2date: //p[@class="when"]/a
3body: //div[@class="blog_entry"] 3body: //div[@class="blog_entry"]
4strip_id_or_class:blog_title 4strip_id_or_class:blog_title
5strip_id_or_class:when 5strip_id_or_class:when
6test_url: http://blog.pinboard.in/2011/11/the_social_graph_is_neither/ \ No newline at end of file 6test_url: http://blog.pinboard.in/2011/11/the_social_graph_is_neither/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.renren.com.txt b/inc/3rdparty/site_config/standard/blog.renren.com.txt
new file mode 100755
index 00000000..401d31e5
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blog.renren.com.txt
@@ -0,0 +1,11 @@
1# This filter is tested on:
2# http://blog.renren.com/share/224959024/14260739544
3# http://blog.renren.com/share/231323504/14261768898
4# http://blog.renren.com/share/230305019/1502806705
5
6title://h1[contains(@class, 'title-article')]
7author://span[contains(@class, 'name')]
8body://div[contains(@class, 'content-body')]
9
10convert_double_br_tags:yes
11test_url: http://blog.renren.com/share/230305019/1502806705 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.sina.com.cn.txt b/inc/3rdparty/site_config/standard/blog.sina.com.cn.txt
index acb9ce81..4895272a 100644..100755
--- a/inc/3rdparty/site_config/standard/blog.sina.com.cn.txt
+++ b/inc/3rdparty/site_config/standard/blog.sina.com.cn.txt
@@ -1,26 +1,26 @@
1# Sina blog, the most popular blog host in China. 1# Sina blog, the most popular blog host in China.
2# Its source code is horrible. 2# Its source code is horrible.
3# 3#
4# Issue: 4# Issue:
5# Only the first image in the article is displayed. 5# Only the first image in the article is displayed.
6# The rest images are replace by a 1x1 transparent gif by sina blog host. 6# The rest images are replace by a 1x1 transparent gif by sina blog host.
7# 7#
8 8
9title://*[contains(@class,'titName SG_txta')] 9title://*[contains(@class,'titName SG_txta')]
10author://*[contains(@id,'ownernick')] 10author://*[contains(@id,'ownernick')]
11date://*[contains(@class,'time SG_txtc')] 11date://*[contains(@class,'time SG_txtc')]
12body://div[contains(@class,'articalContent')] 12body://div[contains(@class,'articalContent')]
13 13
14# Remove redundant content which has span class start with "MASS" 14# Remove redundant content which has span class start with "MASS"
15# Example <span class="MASSf21674ffeef7"></span> 15# Example <span class="MASSf21674ffeef7"></span>
16strip://span[contains(@class,'MASS')] 16strip://span[contains(@class,'MASS')]
17 17
18# Remove comment 18# Remove comment
19strip://div[contains(@class,'allComm')] 19strip://div[contains(@class,'allComm')]
20 20
21# Remove hiden text and link 21# Remove hiden text and link
22strip://ins 22strip://ins
23 23
24tidy:no 24tidy:no
25convert_double_br_tags:yes 25convert_double_br_tags:yes
26test_url: http://blog.sina.com.cn/s/blog_5054769e0102dtja.html \ No newline at end of file 26test_url: http://blog.sina.com.cn/s/blog_5054769e0102dtja.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.spu.edu.txt b/inc/3rdparty/site_config/standard/blog.spu.edu.txt
index 68bd4e39..68bd4e39 100644..100755
--- a/inc/3rdparty/site_config/standard/blog.spu.edu.txt
+++ b/inc/3rdparty/site_config/standard/blog.spu.edu.txt
diff --git a/inc/3rdparty/site_config/standard/blog.wells.ee.txt b/inc/3rdparty/site_config/standard/blog.wells.ee.txt
index 8c8b3838..eae6982b 100644..100755
--- a/inc/3rdparty/site_config/standard/blog.wells.ee.txt
+++ b/inc/3rdparty/site_config/standard/blog.wells.ee.txt
@@ -1,6 +1,6 @@
1title: //h2/a[@class="no-link title"] 1title: //h2/a[@class="no-link title"]
2author: //h2[@id="blog_owner"] 2author: //h2[@id="blog_owner"]
3date: //time 3date: //time
4strip: //h2/a[@class="no-link title"] 4strip: //h2/a[@class="no-link title"]
5test_url: http://blog.wells.ee/retina 5test_url: http://blog.wells.ee/retina
6test_url: http://blog.wells.ee/skeuomorphism \ No newline at end of file 6test_url: http://blog.wells.ee/skeuomorphism \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blogs.aljazeera.net.txt b/inc/3rdparty/site_config/standard/blogs.aljazeera.net.txt
index f630127b..2a66952b 100644..100755
--- a/inc/3rdparty/site_config/standard/blogs.aljazeera.net.txt
+++ b/inc/3rdparty/site_config/standard/blogs.aljazeera.net.txt
@@ -1,8 +1,8 @@
1# 2011-08-23 [carlo@...] Initial version. 1# 2011-08-23 [carlo@...] Initial version.
2 2
3author: //div[@id="blogauthordatebox-node"]//a[@title="View user profile."]/text() 3author: //div[@id="blogauthordatebox-node"]//a[@title="View user profile."]/text()
4 4
5# why yes, I do feel a bit dirty 5# why yes, I do feel a bit dirty
6date: substring-before( substring-after( substring-after( //div[@id="blogauthordatebox-node"]//td[3], "on " ), ", "), " " ) 6date: substring-before( substring-after( substring-after( //div[@id="blogauthordatebox-node"]//td[3], "on " ), ", "), " " )
7 7
8test_url: http://blogs.aljazeera.net/asia/2011/08/22/peoples-hero \ No newline at end of file 8test_url: http://blogs.aljazeera.net/asia/2011/08/22/peoples-hero \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blogs.forbes.com.txt b/inc/3rdparty/site_config/standard/blogs.forbes.com.txt
index 86580d21..86580d21 100644..100755
--- a/inc/3rdparty/site_config/standard/blogs.forbes.com.txt
+++ b/inc/3rdparty/site_config/standard/blogs.forbes.com.txt
diff --git a/inc/3rdparty/site_config/standard/blogs.hbr.org.txt b/inc/3rdparty/site_config/standard/blogs.hbr.org.txt
index 3664d16c..d47c3520 100644..100755
--- a/inc/3rdparty/site_config/standard/blogs.hbr.org.txt
+++ b/inc/3rdparty/site_config/standard/blogs.hbr.org.txt
@@ -1,4 +1,4 @@
1title: //div[@id='pageFeature']/h1 1title: //div[@id='pageFeature']/h1
2body: //div[@id='articleBody'] 2body: //div[@id='articleBody']
3strip: //div[@class='module wide'] 3strip: //div[@class='module wide']
4test_url: http://blogs.hbr.org/bregman/2011/04/the-1-killer-of-meetings-and-w.html?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+harvardbusiness+%28HBR.org%29 \ No newline at end of file 4test_url: http://blogs.hbr.org/bregman/2011/04/the-1-killer-of-meetings-and-w.html?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+harvardbusiness+%28HBR.org%29 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blogs.msdn.com.txt b/inc/3rdparty/site_config/standard/blogs.msdn.com.txt
index 3d3ec020..b2ff8332 100644..100755
--- a/inc/3rdparty/site_config/standard/blogs.msdn.com.txt
+++ b/inc/3rdparty/site_config/standard/blogs.msdn.com.txt
@@ -1,6 +1,6 @@
1title: //h3[@class="post-name"] 1title: //h3[@class="post-name"]
2author: //span[@class="user-name"] 2author: //span[@class="user-name"]
3date: //div[@class="post-date"] 3date: //div[@class="post-date"]
4body: //div[@class="post-content user-defined-markup"] 4body: //div[@class="post-content user-defined-markup"]
5footnotes: no 5footnotes: no
6test_url: http://blogs.msdn.com/b/b8/archive/2011/10/04/designing-the-start-screen.aspx \ No newline at end of file 6test_url: http://blogs.msdn.com/b/b8/archive/2011/10/04/designing-the-start-screen.aspx \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blogs.reuters.com.txt b/inc/3rdparty/site_config/standard/blogs.reuters.com.txt
index 6907bcb2..d3eb9966 100644..100755
--- a/inc/3rdparty/site_config/standard/blogs.reuters.com.txt
+++ b/inc/3rdparty/site_config/standard/blogs.reuters.com.txt
@@ -1,3 +1,3 @@
1title: //div[@id='single']/h1 1title: //div[@id='single']/h1
2body: //div[@id='postcontent'] 2body: //div[@id='postcontent']
3test_url: http://blogs.reuters.com/felix-salmon/2010/07/16/the-value-of-a-strong-brand-apple-edition/ \ No newline at end of file 3test_url: http://blogs.reuters.com/felix-salmon/2010/07/16/the-value-of-a-strong-brand-apple-edition/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blogs.scientificamerican.com.txt b/inc/3rdparty/site_config/standard/blogs.scientificamerican.com.txt
index a7d15081..2102015d 100644..100755
--- a/inc/3rdparty/site_config/standard/blogs.scientificamerican.com.txt
+++ b/inc/3rdparty/site_config/standard/blogs.scientificamerican.com.txt
@@ -1,16 +1,16 @@
1# meta data 1# meta data
2title://h1[@class = 'postTitle'] 2title://h1[@class = 'postTitle']
3author:substring-before(substring-after(//span[@class = 'byline'],'By '),'|') 3author:substring-before(substring-after(//span[@class = 'byline'],'By '),'|')
4date://span[@class = 'datestamp'] 4date://span[@class = 'datestamp']
5 5
6#body content 6#body content
7body://div[@id = 'singleBlogPost'] 7body://div[@id = 'singleBlogPost']
8 8
9#reclaim author info 9#reclaim author info
10move_into(//div[@id = 'singleBlogPost'])://div[@id = 'aboutAuthorDiv'] 10move_into(//div[@id = 'singleBlogPost'])://div[@id = 'aboutAuthorDiv']
11strip://p[@class = 'moreLink mobileHide'] 11strip://p[@class = 'moreLink mobileHide']
12 12
13#cleanup comments, there might be some open <div> sections 13#cleanup comments, there might be some open <div> sections
14strip://div[@id = 'comments2'] 14strip://div[@id = 'comments2']
15strip://h3[a[@href = '#add-comment']] 15strip://h3[a[@href = '#add-comment']]
16test_url: http://blogs.scientificamerican.com/a-blog-around-the-clock/2012/07/10/science-blogs-definition-and-a-history/ \ No newline at end of file 16test_url: http://blogs.scientificamerican.com/a-blog-around-the-clock/2012/07/10/science-blogs-definition-and-a-history/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blogs.smithsonianmag.com.txt b/inc/3rdparty/site_config/standard/blogs.smithsonianmag.com.txt
index ba8bc6e7..1bc65e77 100644..100755
--- a/inc/3rdparty/site_config/standard/blogs.smithsonianmag.com.txt
+++ b/inc/3rdparty/site_config/standard/blogs.smithsonianmag.com.txt
@@ -1,15 +1,15 @@
1# metadata 1# metadata
2author://div[@class = 'post']/div[@class='meta']/a[1] 2author://div[@class = 'post']/div[@class='meta']/a[1]
3date://div[@id = 'rap']/h2[1] 3date://div[@id = 'rap']/h2[1]
4body://div[@class = 'post'] 4body://div[@class = 'post']
5 5
6# wrapping caption and image 6# wrapping caption and image
7wrap_in(fieldset)://div[contains(@class, 'wp-caption')] 7wrap_in(fieldset)://div[contains(@class, 'wp-caption')]
8 8
9 9
10# clean up 10# clean up
11strip://div[@class = 'post']/h3[@class = 'storytitle'] 11strip://div[@class = 'post']/h3[@class = 'storytitle']
12strip://div[@class = 'post']/div[@class = 'social'] 12strip://div[@class = 'post']/div[@class = 'social']
13strip://img[@style = 'display:none;'] 13strip://img[@style = 'display:none;']
14strip://img[@height='0' and @width='0'] 14strip://img[@height='0' and @width='0']
15test_url: http://blogs.smithsonianmag.com/adventure/2011/10/tips-for-women-traveling-in-turkey/ \ No newline at end of file 15test_url: http://blogs.smithsonianmag.com/adventure/2011/10/tips-for-women-traveling-in-turkey/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blogs.technet.com.txt b/inc/3rdparty/site_config/standard/blogs.technet.com.txt
index a2909fd1..3d0fbadc 100644..100755
--- a/inc/3rdparty/site_config/standard/blogs.technet.com.txt
+++ b/inc/3rdparty/site_config/standard/blogs.technet.com.txt
@@ -1,6 +1,9 @@
1title: //h3[@class="post-name"] 1title: //h3[@class="post-name"]
2author: //span[@class="user-name"] 2author: //span[@class="user-name"]
3date: //div[@class="post-date"] 3date: //div[@class="post-date"]
4body: //div[@class="post-content user-defined-markup"] 4body: //div[@class="post-content user-defined-markup"]
5strip_id_or_class: log-feedback-list
6tidy: no
5footnotes: no 7footnotes: no
6test_url: http://blogs.technet.com/b/dlemson/archive/2004/03/03/83304.aspx \ No newline at end of file 8test_url: http://blogs.technet.com/b/dlemson/archive/2004/03/03/83304.aspx
9test_url: http://blogs.technet.com/b/isablog/archive/2009/01/07/a-pptp-client-might-fail-to-connect-to-a-vpn-server-on-the-internet-through-an-isa-server-2006.aspx \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/bluetouff.com.txt b/inc/3rdparty/site_config/standard/bluetouff.com.txt
index fbe7a5c6..543d3920 100644..100755
--- a/inc/3rdparty/site_config/standard/bluetouff.com.txt
+++ b/inc/3rdparty/site_config/standard/bluetouff.com.txt
@@ -1,4 +1,4 @@
1body://div[@class='entry'] 1body://div[@class='entry']
2date://div[@class='meta'] 2date://div[@class='meta']
3strip://a[@class='FlattrButton'] 3strip://a[@class='FlattrButton']
4test_url: http://bluetouff.com/2012/03/02/polemique-google-vie-privee/ \ No newline at end of file 4test_url: http://bluetouff.com/2012/03/02/polemique-google-vie-privee/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/boagworld.com.txt b/inc/3rdparty/site_config/standard/boagworld.com.txt
index 91e48fdb..3b3da991 100644..100755
--- a/inc/3rdparty/site_config/standard/boagworld.com.txt
+++ b/inc/3rdparty/site_config/standard/boagworld.com.txt
@@ -1,8 +1,8 @@
1title: //h1[@class="entry-title"][2] 1title: //h1[@class="entry-title"][2]
2author: string("Paul Boag") 2author: string("Paul Boag")
3date: substring(//span[@class="meta"], 11) 3date: substring(//span[@class="meta"], 11)
4body: //article 4body: //article
5strip: //h2 5strip: //h2
6strip: //h1 6strip: //h1
7strip: //div[@id="callsToAction"] 7strip: //div[@id="callsToAction"]
8test_url: http://boagworld.com/working-in-web-design/dealing-with-the-dickheads/ \ No newline at end of file 8test_url: http://boagworld.com/working-in-web-design/dealing-with-the-dickheads/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/boingboing.net.txt b/inc/3rdparty/site_config/standard/boingboing.net.txt
index 9169e8fb..4f39661b 100644..100755
--- a/inc/3rdparty/site_config/standard/boingboing.net.txt
+++ b/inc/3rdparty/site_config/standard/boingboing.net.txt
@@ -1,11 +1,11 @@
1# This is far from perfect, but so is BoingBoing's markup 1# This is far from perfect, but so is BoingBoing's markup
2title: //h2[@class="headline"] 2title: //h2[@class="headline"]
3single_page_link: //h2[@class="headline"]/a 3single_page_link: //h2[@class="headline"]/a
4#date: //p[@class="byline"] 4#date: //p[@class="byline"]
5body: //div[@class="post"] 5body: //div[@class="post"]
6 6
7strip_id_or_class: shareMe 7strip_id_or_class: shareMe
8strip_id_or_class: authorbox 8strip_id_or_class: authorbox
9strip_id_or_class: byline 9strip_id_or_class: byline
10 10
11test_url: http://boingboing.net/2011/10/23/understanding-the-hyperrich-through-the-lens-of-tomorrows-history.html \ No newline at end of file 11test_url: http://boingboing.net/2011/10/23/understanding-the-hyperrich-through-the-lens-of-tomorrows-history.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/boldizsar.palotas.eu.txt b/inc/3rdparty/site_config/standard/boldizsar.palotas.eu.txt
index 4cc49043..3616bbf2 100644..100755
--- a/inc/3rdparty/site_config/standard/boldizsar.palotas.eu.txt
+++ b/inc/3rdparty/site_config/standard/boldizsar.palotas.eu.txt
@@ -1,3 +1,3 @@
1title: //h2[@class='entry-title'] 1title: //h2[@class='entry-title']
2body: //div[@class='entry-content'] 2body: //div[@class='entry-content']
3test_url: http://boldizsar.palotas.eu/blog/?p=1394 \ No newline at end of file 3test_url: http://boldizsar.palotas.eu/blog/?p=1394 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/book.douban.com.txt b/inc/3rdparty/site_config/standard/book.douban.com.txt
index 8b958562..fe2d2cbf 100644..100755
--- a/inc/3rdparty/site_config/standard/book.douban.com.txt
+++ b/inc/3rdparty/site_config/standard/book.douban.com.txt
@@ -1,6 +1,6 @@
1body: //span[@property='v:description'] 1body: //span[@property='v:description']
2date: //span[@property='v:dtreviewed'] 2date: //span[@property='v:dtreviewed']
3author: //span[@property='v:reviewer'] 3author: //span[@property='v:reviewer']
4prune: no 4prune: no
5 5
6test_url: http://book.douban.com/review/2422662/ \ No newline at end of file 6test_url: http://book.douban.com/review/2422662/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/bookforum.com.txt b/inc/3rdparty/site_config/standard/bookforum.com.txt
index 331f415e..03b60039 100644..100755
--- a/inc/3rdparty/site_config/standard/bookforum.com.txt
+++ b/inc/3rdparty/site_config/standard/bookforum.com.txt
@@ -1,19 +1,19 @@
1#metadata 1#metadata
2title://div[@class = 'Topper']/h1 2title://div[@class = 'Topper']/h1
3author://div[@class = 'Topper']/h3 3author://div[@class = 'Topper']/h3
4date://div[@class = 'Topper']/h6 4date://div[@class = 'Topper']/h6
5body://div[@class = 'Core'] 5body://div[@class = 'Core']
6 6
7 7
8 8
9# clean up 9# clean up
10strip://div[@class = 'Topper']/h1 10strip://div[@class = 'Topper']/h1
11strip://div[@class = 'Topper']/h3 11strip://div[@class = 'Topper']/h3
12strip://div[@class = 'Topper']/h4 12strip://div[@class = 'Topper']/h4
13strip://div[@class = 'Topper']/h5 13strip://div[@class = 'Topper']/h5
14strip://div[@class = 'Topper']/h6 14strip://div[@class = 'Topper']/h6
15strip://br[@clear = 'all'] 15strip://br[@clear = 'all']
16strip://div[@class = 'adCore'] 16strip://div[@class = 'adCore']
17strip://div[@class = 'BookR'] 17strip://div[@class = 'BookR']
18strip://div[@class = 'InfoBox'] 18strip://div[@class = 'InfoBox']
19test_url: http://bookforum.com/inprint/018_04/8595 \ No newline at end of file 19test_url: http://bookforum.com/inprint/018_04/8595 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/borderhouseblog.com.txt b/inc/3rdparty/site_config/standard/borderhouseblog.com.txt
index 190738d5..b4e116fe 100644..100755
--- a/inc/3rdparty/site_config/standard/borderhouseblog.com.txt
+++ b/inc/3rdparty/site_config/standard/borderhouseblog.com.txt
@@ -1,7 +1,7 @@
1title://h1 1title://h1
2author://div[@class="meta"]/span/a 2author://div[@class="meta"]/span/a
3date://div[@class="date"] 3date://div[@class="date"]
4body://div[@class="content article"] 4body://div[@class="content article"]
5strip://div[@class="content article"]/h1 5strip://div[@class="content article"]/h1
6 6
7test_url: http://borderhouseblog.com/?p=7832 \ No newline at end of file 7test_url: http://borderhouseblog.com/?p=7832 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/bostonglobe.com.txt b/inc/3rdparty/site_config/standard/bostonglobe.com.txt
index d3e6f43f..4c74a34e 100644..100755
--- a/inc/3rdparty/site_config/standard/bostonglobe.com.txt
+++ b/inc/3rdparty/site_config/standard/bostonglobe.com.txt
@@ -1,16 +1,16 @@
1# NOTE: If testing this configuration yields bad results, including junk text like "Try BostonGlobe.com today" and "THIS STORY APPEARED IN", please replace the Test URL with a current-day headline link from bostonglobe.com. 1# NOTE: If testing this configuration yields bad results, including junk text like "Try BostonGlobe.com today" and "THIS STORY APPEARED IN", please replace the Test URL with a current-day headline link from bostonglobe.com.
2 2
3title: //div[@class="header"]/h1 3title: //div[@class="header"]/h1
4author: substring-after(//div[@class="byline"]/h2[@class="author"],"By ") 4author: substring-after(//div[@class="byline"]/h2[@class="author"],"By ")
5date: //div[@class="byline"]/p[last()] 5date: //div[@class="byline"]/p[last()]
6body: //div[@class="article-body"] 6body: //div[@class="article-body"]
7 7
8strip_id_or_class: aside 8strip_id_or_class: aside
9strip_id_or_class: promo 9strip_id_or_class: promo
10strip_id_or_class: skip-nav 10strip_id_or_class: skip-nav
11strip_id_or_class: article-more 11strip_id_or_class: article-more
12strip_id_or_class: article-bar 12strip_id_or_class: article-bar
13 13
14# This removes image captions. If the parser starts saving images from bostonglobe.com (currently, it does not), then this directive should be removed. 14# This removes image captions. If the parser starts saving images from bostonglobe.com (currently, it does not), then this directive should be removed.
15strip_id_or_class: figure 15strip_id_or_class: figure
16test_url: http://bostonglobe.com/news/nation/2012/03/17/illinois-primary-could-pivotal/PsDzFZqvhEYyXbOcF9FOkO/story.html \ No newline at end of file 16test_url: http://bostonglobe.com/news/nation/2012/03/17/illinois-primary-could-pivotal/PsDzFZqvhEYyXbOcF9FOkO/story.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/bostonreview.net.txt b/inc/3rdparty/site_config/standard/bostonreview.net.txt
index 68567012..64e04a1c 100644..100755
--- a/inc/3rdparty/site_config/standard/bostonreview.net.txt
+++ b/inc/3rdparty/site_config/standard/bostonreview.net.txt
@@ -1,15 +1,15 @@
1#basics 1#basics
2title://h3[@class = 'article_title'] 2title://h3[@class = 'article_title']
3date://span[@class = 'article_date'] 3date://span[@class = 'article_date']
4body://div[@id = 'center_column_article'] 4body://div[@id = 'center_column_article']
5#correct, but author not being picked up in preview 5#correct, but author not being picked up in preview
6author://span[@class = 'article_author'] 6author://span[@class = 'article_author']
7 7
8#strips basics from article 8#strips basics from article
9strip_id_or_class:article_title 9strip_id_or_class:article_title
10strip_id_or_class:article_date 10strip_id_or_class:article_date
11strip_id_or_class:article_author 11strip_id_or_class:article_author
12 12
13#strips pull quotes 13#strips pull quotes
14strip_id_or_class:pull_quote 14strip_id_or_class:pull_quote
15test_url: http://www.bostonreview.net/BR36.4/megan_pugh_agnes_de_mille_dance.php \ No newline at end of file 15test_url: http://www.bostonreview.net/BR36.4/megan_pugh_agnes_de_mille_dance.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/boundlessline.org.txt b/inc/3rdparty/site_config/standard/boundlessline.org.txt
index bfc3f3d1..a836e1e2 100644..100755
--- a/inc/3rdparty/site_config/standard/boundlessline.org.txt
+++ b/inc/3rdparty/site_config/standard/boundlessline.org.txt
@@ -1,5 +1,5 @@
1title: substring-before(//title, '|') 1title: substring-before(//title, '|')
2body: //div[@class="entry"] 2body: //div[@class="entry"]
3# Remove the author's picture 3# Remove the author's picture
4strip: //div[@class="entry"]/a[1] 4strip: //div[@class="entry"]/a[1]
5test_url: http://www.boundlessline.org/2011/06/the-nyts-on-gender-over-the-weekend.html \ No newline at end of file 5test_url: http://www.boundlessline.org/2011/06/the-nyts-on-gender-over-the-weekend.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/bowdoinorient.com.txt b/inc/3rdparty/site_config/standard/bowdoinorient.com.txt
new file mode 100755
index 00000000..932143d1
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/bowdoinorient.com.txt
@@ -0,0 +1,6 @@
1title: //*[@class='articletitle']
2body: //*[(@id='articlebody')]
3date: //*[(@class='articledate')]
4author: //*[(@class='articleauthor')]
5autodetect_next_page: no
6test_url: http://bowdoinorient.com/article/8045 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/brainfacts.org.txt b/inc/3rdparty/site_config/standard/brainfacts.org.txt
index 94b0f56d..9705f621 100644..100755
--- a/inc/3rdparty/site_config/standard/brainfacts.org.txt
+++ b/inc/3rdparty/site_config/standard/brainfacts.org.txt
@@ -1,10 +1,10 @@
1title: //div[@class="standard"]/h1 1title: //div[@class="standard"]/h1
2author: string("BrainFacts.org") 2author: string("BrainFacts.org")
3date: //div[@class="meta"]/strong 3date: //div[@class="meta"]/strong
4 4
5strip: //p[@class="skip"] 5strip: //p[@class="skip"]
6strip: //div[@class="meta"] 6strip: //div[@class="meta"]
7strip: //div[@class="standard"]/h1 7strip: //div[@class="standard"]/h1
8strip: //div[@class="modal"] 8strip: //div[@class="modal"]
9strip: //div[@class="columnRight"] 9strip: //div[@class="columnRight"]
10test_url: http://brainfacts.org/diseases-disorders/childhood-disorders/articles/2011/autism-the-pervasive-developmental-disorder/ \ No newline at end of file 10test_url: http://brainfacts.org/diseases-disorders/childhood-disorders/articles/2011/autism-the-pervasive-developmental-disorder/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/brandeins.de.txt b/inc/3rdparty/site_config/standard/brandeins.de.txt
index 3753ce67..36aa2efa 100644..100755
--- a/inc/3rdparty/site_config/standard/brandeins.de.txt
+++ b/inc/3rdparty/site_config/standard/brandeins.de.txt
@@ -1,7 +1,7 @@
1# set body 1# set body
2body: //div[@id='theContent'] 2body: //div[@id='theContent']
3 3
4# set title 4# set title
5title: //div[@id='theContent']/h3 5title: //div[@id='theContent']/h3
6strip: //div[@id='theContent']/h3 6strip: //div[@id='theContent']/h3
7test_url: http://www.brandeins.de/archiv/magazin/gegessen-wird-immer/artikel/hunger.html \ No newline at end of file 7test_url: http://www.brandeins.de/archiv/magazin/gegessen-wird-immer/artikel/hunger.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/brandingstrategyinsider.com.txt b/inc/3rdparty/site_config/standard/brandingstrategyinsider.com.txt
index 19504844..fc020539 100644..100755
--- a/inc/3rdparty/site_config/standard/brandingstrategyinsider.com.txt
+++ b/inc/3rdparty/site_config/standard/brandingstrategyinsider.com.txt
@@ -1,3 +1,3 @@
1date://h2[@class="date-header"] 1date://h2[@class="date-header"]
2body://div[@class="entry-content"] 2body://div[@class="entry-content"]
3test_url: http://www.brandingstrategyinsider.com/2011/12/top-twelve-branding-keys-for-2012.html \ No newline at end of file 3test_url: http://www.brandingstrategyinsider.com/2011/12/top-twelve-branding-keys-for-2012.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/brasil.elpais.com.txt b/inc/3rdparty/site_config/standard/brasil.elpais.com.txt
new file mode 100755
index 00000000..0b8feb6a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/brasil.elpais.com.txt
@@ -0,0 +1,23 @@
1title: //meta[@name='DC.title']/@content
2title: //div[contains(@class, 'cabecera_noticia')]//h1
3date: //meta[@name='DC.date']/@content
4date: //meta[@name='date']/@content
5body: //div[@class='columna_texto']
6body: //div[@id='cuerpo_noticia']
7body: //div[@class='estructura_2col_1zq']//div[@class='margen_n']
8
9prune: no
10
11strip_id_or_class: disposicion_vertical
12strip_id_or_class: ampliar_foto
13strip_id_or_class: utilidades
14strip_id_or_class: info_relacionada
15strip_id_or_class: m-kiosko
16strip_id_or_class: info_complementa
17
18strip: //p[@class='nota_pie']
19strip: //div[starts-with(@id, 'sumario') and contains(., 'más información')]
20strip: //div[@id='coment' or @id='foros_not']
21
22test_url: http://elpais.com/elpais/2012/02/06/gente/1328526783_491687.html
23test_url: http://www.elpais.com/articulo/cultura/mano/retrato/materia/elpepicul/20120207elpepicul_2/Tes
diff --git a/inc/3rdparty/site_config/standard/brettterpstra.com.txt b/inc/3rdparty/site_config/standard/brettterpstra.com.txt
index f6f73778..55da1787 100644..100755
--- a/inc/3rdparty/site_config/standard/brettterpstra.com.txt
+++ b/inc/3rdparty/site_config/standard/brettterpstra.com.txt
@@ -1,5 +1,5 @@
1body: //div[@class='post full'] 1body: //div[@class='post full']
2title: //h1 2title: //h1
3author: substring-after(//title, '- ') 3author: substring-after(//title, '- ')
4date: //span[@class='date'] 4date: //span[@class='date']
5test_url: http://brettterpstra.com/byword-for-ios/ \ No newline at end of file 5test_url: http://brettterpstra.com/byword-for-ios/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/brisbanetimes.com.au.txt b/inc/3rdparty/site_config/standard/brisbanetimes.com.au.txt
index 27e6b70c..27e6b70c 100644..100755
--- a/inc/3rdparty/site_config/standard/brisbanetimes.com.au.txt
+++ b/inc/3rdparty/site_config/standard/brisbanetimes.com.au.txt
diff --git a/inc/3rdparty/site_config/standard/brookings.edu.txt b/inc/3rdparty/site_config/standard/brookings.edu.txt
index 9f4fc4e3..17a47605 100644..100755
--- a/inc/3rdparty/site_config/standard/brookings.edu.txt
+++ b/inc/3rdparty/site_config/standard/brookings.edu.txt
@@ -1,13 +1,13 @@
1title: //div[@id='contentheader']/h1 1title: //div[@id='contentheader']/h1
2author: //p[@class='attribution']/span[@class='author']/* 2author: //p[@class='attribution']/span[@class='author']/*
3# Is there a way to pull multiple authors? My XPath here is just grabbing the first 3# Is there a way to pull multiple authors? My XPath here is just grabbing the first
4 4
5date: /html/head/meta[@name="date"]/@content 5date: /html/head/meta[@name="date"]/@content
6body: //div[@class='main-content'] 6body: //div[@class='main-content']
7 7
8strip: //p[@class='byline'] 8strip: //p[@class='byline']
9strip: //div[@class='img-gallery'] 9strip: //div[@class='img-gallery']
10strip: //div[@class='callout'] 10strip: //div[@class='callout']
11strip: //div[@class='add-your-view'] 11strip: //div[@class='add-your-view']
12convert_double_br_tags: yes 12convert_double_br_tags: yes
13test_url: http://www.brookings.edu/opinions/2011/1018_cyberattack_libya_goldsmith.aspx \ No newline at end of file 13test_url: http://www.brookings.edu/opinions/2011/1018_cyberattack_libya_goldsmith.aspx \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/brooksreview.net.txt b/inc/3rdparty/site_config/standard/brooksreview.net.txt
index 71cafcdb..d33d7d4e 100644..100755
--- a/inc/3rdparty/site_config/standard/brooksreview.net.txt
+++ b/inc/3rdparty/site_config/standard/brooksreview.net.txt
@@ -1,6 +1,6 @@
1title: //h1 1title: //h1
2body: //div[@class='article'] 2body: //div[@class='article']
3body: //div[@class='post'] 3body: //div[@class='post']
4date: //*[@id='single']/span 4date: //*[@id='single']/span
5prune: no 5prune: no
6test_url: http://brooksreview.net/2011/11/readability-agency/ \ No newline at end of file 6test_url: http://brooksreview.net/2011/11/readability-agency/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/bt.no.txt b/inc/3rdparty/site_config/standard/bt.no.txt
new file mode 100755
index 00000000..200c2e4e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/bt.no.txt
@@ -0,0 +1,12 @@
1title: //h1[contains(@class,'articleTitle')]
2author: //span[@itemprop='name']
3date: //time[@class='published']
4body: //div[contains(@class,'bodyText')]
5
6strip_id_or_class: 'pull1'
7strip_id_or_class: 'relationArticle'
8strip: //span[@class='quote']
9
10# strip h2 if at end of article (typically a request for comments)
11strip: //div[contains(@class,'bodyText')]/node()[last()-1]/self::h2
12test_url: http://www.bt.no/meninger/debatt/Typisk-norsk-a-vare-god-nok-2884108.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/buffed.de.txt b/inc/3rdparty/site_config/standard/buffed.de.txt
new file mode 100755
index 00000000..3dd36ce6
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/buffed.de.txt
@@ -0,0 +1,14 @@
1date: //meta[@itemProp='datePublished']/@content
2body: //div[@class='intro' or contains(@class, 'article_text')]
3prune: no
4strip_id_or_class: embedcode
5strip_id_or_class: EmbedSwitch
6strip_id_or_class: EmbedText
7strip_id_or_class: bildergalerie
8strip_id_or_class: subline_seohour_image
9strip_id_or_class: ova-player
10strip_id_or_class: jcarouseloutput
11strip_id_or_class: cbox_embedded
12
13test_url: http://www.buffed.de/SWTOR-Star-Wars-The-Old-Republic-PC-218697/News/SWTOR-Ab-Patch-24-Lore-Klamotten-faerben-1090051/
14test_url: http://www.buffed.de/feed.cfm?menu_alias=home \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/buquad.com.txt b/inc/3rdparty/site_config/standard/buquad.com.txt
index a75fa046..f0fd08db 100644..100755
--- a/inc/3rdparty/site_config/standard/buquad.com.txt
+++ b/inc/3rdparty/site_config/standard/buquad.com.txt
@@ -1,8 +1,8 @@
1title: //h1 1title: //h1
2author: //h2/a 2author: //h2/a
3date: substring-after(//h2, '|') 3date: substring-after(//h2, '|')
4strip_id_or_class: 'attachment' 4strip_id_or_class: 'attachment'
5strip: //h3 5strip: //h3
6 6
7body: //div[@class='entry'] 7body: //div[@class='entry']
8test_url: http://buquad.com/2012/04/09/paul-ryan/ \ No newline at end of file 8test_url: http://buquad.com/2012/04/09/paul-ryan/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/business2community.com.txt b/inc/3rdparty/site_config/standard/business2community.com.txt
new file mode 100755
index 00000000..0dcc7ff8
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/business2community.com.txt
@@ -0,0 +1,5 @@
1date: substring-after(//p[@class='byline'],'Published')
2
3strip: //div[@class='article-meta']
4
5test_url: http://www.business2community.com/social-media/funky-ways-to-print-instagram-photos-0485340
diff --git a/inc/3rdparty/site_config/standard/businessinsider.com.txt b/inc/3rdparty/site_config/standard/businessinsider.com.txt
index c773db8b..39eb7426 100644..100755
--- a/inc/3rdparty/site_config/standard/businessinsider.com.txt
+++ b/inc/3rdparty/site_config/standard/businessinsider.com.txt
@@ -1,12 +1,16 @@
1title://div[@class="sl-layout-post"]/h1 1title://div[@class="sl-layout-post"]/h1
2body: //div[contains(@class, 'post-content') or contains(@class, 'KonaBody')] 2body: //div[contains(@class, 'post-content') or contains(@class, 'slide-module') or contains(@class, 'KonaBody')]
3strip: //div[contains(@class, "post-sidebar")] 3strip: //div[contains(@class, "post-sidebar")]
4strip: //div[@id='related-links'] 4strip: //div[@id='related-links']
5author://div[@class="byline"]/a 5strip: //div[@class='related-links-container']
6date://div[@class="byline"]/span[@class="date"] 6strip: //p[@class='source']
7prune: no 7author://div[@class="byline"]/a
8 8date://div[@class="byline"]/span[@class="date"]
9strip://*[contains(@class,'sponsored-text')] 9prune: no
10strip: //div[@id='post_footer'] 10
11 11single_page_link: //a[contains(text(), 'View as one page')]
12test_url: http://www.businessinsider.com/microsoft-just-put-one-of-its-hardcore-technical-geniuses-on-xbox-2012-1 \ No newline at end of file 12
13strip://*[contains(@class,'sponsored-text')]
14strip: //div[@id='post_footer']
15
16test_url: http://www.businessinsider.com/microsoft-just-put-one-of-its-hardcore-technical-geniuses-on-xbox-2012-1
diff --git a/inc/3rdparty/site_config/standard/businessnews.com.tn.txt b/inc/3rdparty/site_config/standard/businessnews.com.tn.txt
index 714cfc90..6502b8e1 100644..100755
--- a/inc/3rdparty/site_config/standard/businessnews.com.tn.txt
+++ b/inc/3rdparty/site_config/standard/businessnews.com.tn.txt
@@ -1,12 +1,12 @@
1body: //div[@id='article_detail'] 1body: //div[@id='article_detail']
2title: //meta[@property='og:title']/@content 2title: //meta[@property='og:title']/@content
3date: //div[@id='date_com_art']//a[@class='date'] 3date: //div[@id='date_com_art']//a[@class='date']
4author: //div[@id='article_detail']//font[@class='auteur'] 4author: //div[@id='article_detail']//font[@class='auteur']
5 5
6strip_id_or_class: porte_titre_theme 6strip_id_or_class: porte_titre_theme
7strip_id_or_class: cont_param 7strip_id_or_class: cont_param
8strip_id_or_class: date_com_art 8strip_id_or_class: date_com_art
9 9
10prune: no 10prune: no
11 11
12test_url: http://www.businessnews.com.tn/details_article.php?a=31073&t=522&lang=fr&temp=1 \ No newline at end of file 12test_url: http://www.businessnews.com.tn/details_article.php?a=31073&t=522&lang=fr&temp=1 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/businessweek.com.txt b/inc/3rdparty/site_config/standard/businessweek.com.txt
index 7b3d063b..03085593 100644..100755
--- a/inc/3rdparty/site_config/standard/businessweek.com.txt
+++ b/inc/3rdparty/site_config/standard/businessweek.com.txt
@@ -1,30 +1,30 @@
1# story has several pages, should be detected 1# story has several pages, should be detected
2body: //div[@id='storyBody'] 2body: //div[@id='storyBody']
3body: //div[@id='article_body'] 3body: //div[@id='article_body']
4body: //div[@id='story_body'] 4body: //div[@id='story_body']
5 5
6title://h1[@id='article_headline'] 6title://h1[@id='article_headline']
7 7
8# article author 8# article author
9author: //p[@class='author']/a 9author: //p[@class='author']/a
10# story author(s) 10# story author(s)
11author: substring-after(//p[@class='byline'], 'By ') 11author: substring-after(//p[@class='byline'], 'By ')
12 12
13# article date 13# article date
14date: //span[@class='published_date'] 14date: //span[@class='published_date']
15# story date 15# story date
16date: //span[@class='date'] 16date: //span[@class='date']
17 17
18date: substring-after(//div[contains(@class,'attributor')],'on') 18date: substring-after(//div[contains(@class,'attributor')],'on')
19strip_id_or_class: inset 19strip_id_or_class: inset
20strip: //p/span[@class='photoCredit'] 20strip: //p/span[@class='photoCredit']
21strip: //h1 21strip: //h1
22 22
23strip_id_or_class: page_count 23strip_id_or_class: page_count
24strip_id_or_class: tools 24strip_id_or_class: tools
25strip_id_or_class: pagination 25strip_id_or_class: pagination
26 26
27single_page_link: //li[@id='stPrint']/a 27single_page_link: //li[@id='stPrint']/a
28 28
29test_url: http://www.businessweek.com/magazine/buyback-insurance-a-good-deal-for-retailers-07282011.html 29test_url: http://www.businessweek.com/magazine/buyback-insurance-a-good-deal-for-retailers-07282011.html
30test_url: http://www.businessweek.com/articles/2012-06-06/american-pain-the-largest-u-dot-s-dot-pill-mills-rise-and-fall \ No newline at end of file 30test_url: http://www.businessweek.com/articles/2012-06-06/american-pain-the-largest-u-dot-s-dot-pill-mills-rise-and-fall \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/buzzfeed.com.txt b/inc/3rdparty/site_config/standard/buzzfeed.com.txt
index 6df8bc47..97dddaee 100644..100755
--- a/inc/3rdparty/site_config/standard/buzzfeed.com.txt
+++ b/inc/3rdparty/site_config/standard/buzzfeed.com.txt
@@ -1,15 +1,15 @@
1# Creator: Greg Leuch <greg@...> 1# Creator: Greg Leuch <greg@...>
2 2
3# It can be messy. 3# It can be messy.
4tidy:no 4tidy:no
5 5
6# The basic template. 6# The basic template.
7title: //h1[@data-print='title'] 7title: //h1[@data-print='title']
8author: //a[@data-print='author'] 8author: //a[@data-print='author']
9date: //time[@data-print='date'] 9date: //time[@data-print='date']
10body: //div[@data-print='body'] 10body: //div[@data-print='body']
11body: //section[@data-print='body'] 11body: //section[@data-print='body']
12 12
13# For various things... 13# For various things...
14strip: *[@data-print="ignore"] 14strip: *[@data-print="ignore"]
15test_url: http://www.buzzfeed.com/hgrant/35-reasons-why-dogs-hate-the-holidays \ No newline at end of file 15test_url: http://www.buzzfeed.com/hgrant/35-reasons-why-dogs-hate-the-holidays \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/bygonebureau.com.txt b/inc/3rdparty/site_config/standard/bygonebureau.com.txt
index 0abb6436..63c82130 100644..100755
--- a/inc/3rdparty/site_config/standard/bygonebureau.com.txt
+++ b/inc/3rdparty/site_config/standard/bygonebureau.com.txt
@@ -1,6 +1,6 @@
1title: //h1 1title: //h1
2author: //a[contains(@href, '/author/')] 2author: //a[contains(@href, '/author/')]
3date: //*[@class='post-date'] 3date: //*[@class='post-date']
4strip: //*[@class='post-date'] 4strip: //*[@class='post-date']
5strip: //h1 5strip: //h1
6test_url: http://bygonebureau.com/2011/06/20/an-existential-psychoanalysis/ \ No newline at end of file 6test_url: http://bygonebureau.com/2011/06/20/an-existential-psychoanalysis/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/cable.co.uk.txt b/inc/3rdparty/site_config/standard/cable.co.uk.txt
new file mode 100755
index 00000000..435bf3b5
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/cable.co.uk.txt
@@ -0,0 +1,11 @@
1title: //div[@class='page-content']//h1
2body: //div[@class='page-content']
3strip_id_or_class: editorial-bar-top
4strip_id_or_class: social-bottom
5strip_id_or_class: comment-form
6strip_id_or_class: pc-why
7
8prune: no
9tidy: no
10
11test_url: http://www.cable.co.uk/news/bt-vision-unveils-interactive-guide-application-800734218/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/cardboardconnection.com.txt b/inc/3rdparty/site_config/standard/cardboardconnection.com.txt
index 3adc7a35..49f34302 100644..100755
--- a/inc/3rdparty/site_config/standard/cardboardconnection.com.txt
+++ b/inc/3rdparty/site_config/standard/cardboardconnection.com.txt
@@ -1,8 +1,8 @@
1title: //h1[@class='producttabbed-title'] 1title: //h1[@class='producttabbed-title']
2body: //div[@class='postTabs_divs postTabs_curr_div'] 2body: //div[@class='postTabs_divs postTabs_curr_div']
3strip: //div[@class='ratingblock2'] 3strip: //div[@class='ratingblock2']
4strip: //p[@id='breadcrumbs'] 4strip: //p[@id='breadcrumbs']
5strip: //div[@style='display: none'] 5strip: //div[@style='display: none']
6 6
7 7
8test_url: http://www.cardboardconnection.com/2012-topps-archives-baseball-cards \ No newline at end of file 8test_url: http://www.cardboardconnection.com/2012-topps-archives-baseball-cards \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/carpeaqua.com.txt b/inc/3rdparty/site_config/standard/carpeaqua.com.txt
index 7ba1ed78..5ea302e0 100644..100755
--- a/inc/3rdparty/site_config/standard/carpeaqua.com.txt
+++ b/inc/3rdparty/site_config/standard/carpeaqua.com.txt
@@ -1,6 +1,6 @@
1title: //h2 1title: //h2
2body: //div[@class='entry'] 2body: //div[@class='entry']
3 3
4prune: no 4prune: no
5# otherwise the footnotes are removed 5# otherwise the footnotes are removed
6test_url: http://carpeaqua.com/2011/03/27/the-intersection-of-power-and-portability/ \ No newline at end of file 6test_url: http://carpeaqua.com/2011/03/27/the-intersection-of-power-and-portability/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/cars.com.txt b/inc/3rdparty/site_config/standard/cars.com.txt
new file mode 100755
index 00000000..71c5c050
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/cars.com.txt
@@ -0,0 +1,7 @@
1title: //div[contains(@class, 'basicInfo')]//h1
2
3body: //img[@id='chosenPhotoIMG'] | //div[@id='aboutThisVehicleBox']
4
5prune: no
6
7test_url: http://www.cars.com/go/search/detail.jsp?listingId=115364779 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/catb.org.txt b/inc/3rdparty/site_config/standard/catb.org.txt
index 8908292c..2cd197fb 100644..100755
--- a/inc/3rdparty/site_config/standard/catb.org.txt
+++ b/inc/3rdparty/site_config/standard/catb.org.txt
@@ -1,7 +1,7 @@
1body: //div[@class='article'] 1body: //div[@class='article']
2strip: //div[@class='revhistory'] 2strip: //div[@class='revhistory']
3strip: //div[@class='toc'] 3strip: //div[@class='toc']
4tidy: no 4tidy: no
5prune: no 5prune: no
6 6
7test_url: http://catb.org/~esr/faqs/smart-questions.html \ No newline at end of file 7test_url: http://catb.org/~esr/faqs/smart-questions.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/cbc.ca.txt b/inc/3rdparty/site_config/standard/cbc.ca.txt
index 25305109..ba5faf3f 100644..100755
--- a/inc/3rdparty/site_config/standard/cbc.ca.txt
+++ b/inc/3rdparty/site_config/standard/cbc.ca.txt
@@ -1,5 +1,5 @@
1title: //div[contains(@class, 'headline')]/h1 1title: //div[contains(@class, 'headline')]/h1
2author: //h5[contains(@class, 'byline')] 2author: //h5[contains(@class, 'byline')]
3date: substring-after(//h4[contains(@class, 'posted')], 'Posted: ') 3date: substring-after(//h4[contains(@class, 'posted')], 'Posted: ')
4body: //div[@id="storyboard"] 4body: //div[@id="storyboard"]
5test_url: http://www.cbc.ca/news/world/story/2012/01/16/cruise-ship-monday.html \ No newline at end of file 5test_url: http://www.cbc.ca/news/world/story/2012/01/16/cruise-ship-monday.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/cbn.com.txt b/inc/3rdparty/site_config/standard/cbn.com.txt
new file mode 100755
index 00000000..de8d8839
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/cbn.com.txt
@@ -0,0 +1,8 @@
1body: //div[contains(@class, 'articleText')]
2date: //div[contains(@class, 'articleDate')]
3author: //a[contains(@id, 'articleDetails_lnkByLine')]
4prune: no
5
6test_url: http://www.cbn.com/cbnnews/world/2013/June/Chilly-G-8-Obama-Putin-Agree-to-Disagree-on-Syria/
7test_url: http://www.cbn.com/cbnnews/world/2013/June/UK-Agency-Accused-of-Hacking-Foreign-Diplomats/
8test_url: http://www.cbn.com/cbnnews/feed/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/cbsnews.com.txt b/inc/3rdparty/site_config/standard/cbsnews.com.txt
index 4ba3da19..04d20230 100644..100755
--- a/inc/3rdparty/site_config/standard/cbsnews.com.txt
+++ b/inc/3rdparty/site_config/standard/cbsnews.com.txt
@@ -1,14 +1,15 @@
1date: //meta[@name="published"]/@content 1date: //meta[@name="published"]/@content
2date: //div[@class="timeLine"] 2date: //div[@class="timeLine"]
3title: //div[@id='contentBody']//h1 3title: //div[@id='contentBody']//h1
4author: //dl[@class="storyBlogByline"]/dd/a 4author: //dl[@class="storyBlogByline"]/dd/a
5body: //div[@id='storyMediaBox'] | //div[contains(@class, 'storyText')] 5body: //div[@id='storyMediaBox'] | //div[contains(@class, 'storyText')]
6 6
7# Content Pruning 7# Content Pruning
8strip: //div[@class="scrollingArrows"] 8strip: //div[@class="scrollingArrows"]
9strip: //div[@class="timeLine"] 9strip: //div[@class="timeLine"]
10strip: //dl[@class="storyBlogByline"] 10strip: //dl[@class="storyBlogByline"]
11 11strip: //span[@class='image-credit']
12prune: no 12
13 13prune: no
14test_url: http://www.cbsnews.com/8301-201_162-57366361/rescued-americans-dad-proud-of-the-u.s/ \ No newline at end of file 14
15test_url: http://www.cbsnews.com/8301-201_162-57366361/rescued-americans-dad-proud-of-the-u.s/
diff --git a/inc/3rdparty/site_config/standard/cedarrepublican.com.txt b/inc/3rdparty/site_config/standard/cedarrepublican.com.txt
new file mode 100755
index 00000000..42faa521
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/cedarrepublican.com.txt
@@ -0,0 +1,2 @@
1body: //div[@class='frame']//img[@class='horizontal'] | //div[@class='content']
2test_url: http://cedarrepublican.com/online_features/gift_ideas/sending-mother-s-day-flowers-how-to-be-sure-they/article_b69af9b8-1f05-5352-8621-16ce007e5623.html
diff --git a/inc/3rdparty/site_config/standard/chareidi.org.txt b/inc/3rdparty/site_config/standard/chareidi.org.txt
index de34a7d8..de34a7d8 100644..100755
--- a/inc/3rdparty/site_config/standard/chareidi.org.txt
+++ b/inc/3rdparty/site_config/standard/chareidi.org.txt
diff --git a/inc/3rdparty/site_config/standard/chinamining.org.txt b/inc/3rdparty/site_config/standard/chinamining.org.txt
index ea0df2a3..d00d65de 100644..100755
--- a/inc/3rdparty/site_config/standard/chinamining.org.txt
+++ b/inc/3rdparty/site_config/standard/chinamining.org.txt
@@ -1,10 +1,10 @@
1title: //*[@id='Content']/span[1] 1title: //*[@id='Content']/span[1]
2author: substring-after(substring-before(//*[@id='Content']/span[2], ')'), '(') 2author: substring-after(substring-before(//*[@id='Content']/span[2], ')'), '(')
3date: substring-before(substring-after(//*[@id='Content']/span[2], 'Updated: '), 'Counter') 3date: substring-before(substring-after(//*[@id='Content']/span[2], 'Updated: '), 'Counter')
4 4
5strip: //*[@id='Content']/span[1] 5strip: //*[@id='Content']/span[1]
6strip: //*[@id='Content']/span[2] 6strip: //*[@id='Content']/span[2]
7 7
8body: //*[@id='Content'] 8body: //*[@id='Content']
9 9
10test_url: http://www.chinamining.org/News/2011-07-22/1311319069d48087.html \ No newline at end of file 10test_url: http://www.chinamining.org/News/2011-07-22/1311319069d48087.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/chomsky.info.txt b/inc/3rdparty/site_config/standard/chomsky.info.txt
index 1d294109..31440538 100644..100755
--- a/inc/3rdparty/site_config/standard/chomsky.info.txt
+++ b/inc/3rdparty/site_config/standard/chomsky.info.txt
@@ -1,5 +1,5 @@
1title: //div[@class='title'] 1title: //div[@class='title']
2author: //div[@class='author'] 2author: //div[@class='author']
3prune: no 3prune: no
4 4
5test_url: http://www.chomsky.info/onchomsky/2002----.htm \ No newline at end of file 5test_url: http://www.chomsky.info/onchomsky/2002----.htm \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/chrisltd.com.txt b/inc/3rdparty/site_config/standard/chrisltd.com.txt
new file mode 100755
index 00000000..86d0f5db
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/chrisltd.com.txt
@@ -0,0 +1,6 @@
1title: //header/h1/b[contains(@class, 'title')]
2author: substring-after(//article/header/div, 'By ')
3date: //header/h1/span[contains(@class, 'date')]
4body: //div[@id='main]/article
5strip: //header
6test_url: http://chrisltd.com/blog/2012/03/fix-widows-indesign/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/christianitytoday.com.txt b/inc/3rdparty/site_config/standard/christianitytoday.com.txt
index 44288a46..86be14ce 100644..100755
--- a/inc/3rdparty/site_config/standard/christianitytoday.com.txt
+++ b/inc/3rdparty/site_config/standard/christianitytoday.com.txt
@@ -1,13 +1,13 @@
1title://div[@class='title'] 1title://div[@class='title']
2author://div[@class='byline']/b 2author://div[@class='byline']/b
3date:substring-after(//div[@class='byline'], 'posted') 3date:substring-after(//div[@class='byline'], 'posted')
4body://div[@id='body'] 4body://div[@id='body']
5wrap_in(h2)://span[@class='subhead'] 5wrap_in(h2)://span[@class='subhead']
6wrap_in(i)://p[@class='bio'] 6wrap_in(i)://p[@class='bio']
7wrap_in(i)://p[@class='copyright'] 7wrap_in(i)://p[@class='copyright']
8strip://div[@class='title'] 8strip://div[@class='title']
9strip://div[@class='deck'] 9strip://div[@class='deck']
10strip://div[@class='byline'] 10strip://div[@class='byline']
11strip://div[@class='copyright'] 11strip://div[@class='copyright']
12strip://br 12strip://br
13test_url: http://www.christianitytoday.com/ct/2012/aprilweb-only/my-god-forsaken-me.html \ No newline at end of file 13test_url: http://www.christianitytoday.com/ct/2012/aprilweb-only/my-god-forsaken-me.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/christianpf.com.txt b/inc/3rdparty/site_config/standard/christianpf.com.txt
index 7f089c55..fb5f342d 100644..100755
--- a/inc/3rdparty/site_config/standard/christianpf.com.txt
+++ b/inc/3rdparty/site_config/standard/christianpf.com.txt
@@ -1,5 +1,5 @@
1title: //h1[@class="entry-title"] 1title: //h1[@class="entry-title"]
2author: //*[@class="author vcard fn"] 2author: //*[@class="author vcard fn"]
3date: //*[@class="published"] 3date: //*[@class="published"]
4body: //div[(@class = "dd_content_wrap")] 4body: //div[(@class = "dd_content_wrap")]
5test_url: http://christianpf.com/do-ibuys-lead-to-more-buying/ \ No newline at end of file 5test_url: http://christianpf.com/do-ibuys-lead-to-more-buying/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/christies.com.txt b/inc/3rdparty/site_config/standard/christies.com.txt
index 5c5889a2..b3c76519 100644..100755
--- a/inc/3rdparty/site_config/standard/christies.com.txt
+++ b/inc/3rdparty/site_config/standard/christies.com.txt
@@ -1,6 +1,6 @@
1tidy: no 1tidy: no
2prune: no 2prune: no
3date: //article//time[@pubdate] 3date: //article//time[@pubdate]
4title: //article/header/h2 4title: //article/header/h2
5body: //article 5body: //article
6test_url: http://www.christies.com/LotFinder/custom/lot_details_MultiLanguage.aspx?from=salesummary&intObjectID=5556662&sid=e536ed1a-b763-41c4-afcf-c94815ec6eee&LID=3 \ No newline at end of file 6test_url: http://www.christies.com/LotFinder/custom/lot_details_MultiLanguage.aspx?from=salesummary&intObjectID=5556662&sid=e536ed1a-b763-41c4-afcf-c94815ec6eee&LID=3 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/chrome.google.com.txt b/inc/3rdparty/site_config/standard/chrome.google.com.txt
index d4cc8581..5a1d043d 100644..100755
--- a/inc/3rdparty/site_config/standard/chrome.google.com.txt
+++ b/inc/3rdparty/site_config/standard/chrome.google.com.txt
@@ -1,9 +1,9 @@
1body: //pre[@id='cx-desc-text'] 1body: //pre[@id='cx-desc-text']
2body: //div[contains(@class, 'overview-tab-right-bar-info')] 2body: //div[contains(@class, 'overview-tab-right-bar-info')]
3title: //h1[contains(@class, 'detail-dialog-title')] 3title: //h1[contains(@class, 'detail-dialog-title')]
4tidy: no 4tidy: no
5prune: no 5prune: no
6replace_string(<noscript>): <div> 6replace_string(<noscript>): <div>
7replace_string(</noscript>): </div> 7replace_string(</noscript>): </div>
8 8
9test_url: https://chrome.google.com/webstore/detail/pnaiinchjaonopoejhknmgjingcnaloc \ No newline at end of file 9test_url: https://chrome.google.com/webstore/detail/pnaiinchjaonopoejhknmgjingcnaloc \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/chronicle.com.txt b/inc/3rdparty/site_config/standard/chronicle.com.txt
index 0c6c11ed..e86d3eca 100644..100755
--- a/inc/3rdparty/site_config/standard/chronicle.com.txt
+++ b/inc/3rdparty/site_config/standard/chronicle.com.txt
@@ -1,17 +1,17 @@
1title: //h1[contains(@class, "entry-title")] 1title: //h1[contains(@class, "entry-title")]
2author: //p[contains(@class, "byline")] 2author: //p[contains(@class, "byline")]
3 3
4# blog articles (chronicle.com/blogs/*) 4# blog articles (chronicle.com/blogs/*)
5body: //div[contains(@class, "abstract")] 5body: //div[contains(@class, "abstract")]
6date: //p[contains(@class, "time")] 6date: //p[contains(@class, "time")]
7 7
8# all (?) other articles 8# all (?) other articles
9body: //div[@id="article-body"] 9body: //div[@id="article-body"]
10date: //p[contains(@class, "dateline")] 10date: //p[contains(@class, "dateline")]
11 11
12# remove sidebars containing images (I assume this is desired for Instapaper) 12# remove sidebars containing images (I assume this is desired for Instapaper)
13strip: //div[@id="related"] 13strip: //div[@id="related"]
14strip: //div[contains(@class, "image")] 14strip: //div[contains(@class, "image")]
15 15
16# note that if you're not a Chronicle subscriber (personally or institutionally), you'll only see the first couple of paragraphs of the article, and Instapaper will display that with some crap above and below. thank goodness for that bookmarklet 16# note that if you're not a Chronicle subscriber (personally or institutionally), you'll only see the first couple of paragraphs of the article, and Instapaper will display that with some crap above and below. thank goodness for that bookmarklet
17test_url: http://chronicle.com/article/In-a-Land-of-Second-Chances/128375/ \ No newline at end of file 17test_url: http://chronicle.com/article/In-a-Land-of-Second-Chances/128375/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ciaosamin.com.txt b/inc/3rdparty/site_config/standard/ciaosamin.com.txt
new file mode 100755
index 00000000..02fd3434
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/ciaosamin.com.txt
@@ -0,0 +1,4 @@
1body://div[contains(@class, 'entry-content')]
2date://h2[contains(@class, 'date-header')]
3title://h3[contains(@class, 'post-title')]
4test_url: http://www.ciaosamin.com/2013/04/how-this-happened.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/cicero.de.txt b/inc/3rdparty/site_config/standard/cicero.de.txt
index b9f9a12b..b8913639 100644..100755
--- a/inc/3rdparty/site_config/standard/cicero.de.txt
+++ b/inc/3rdparty/site_config/standard/cicero.de.txt
@@ -1,33 +1,33 @@
1# fforst@... 1# fforst@...
2 2
3# Use link to print article for single page view 3# Use link to print article for single page view
4single_page_link: //a[@class="print"] 4single_page_link: //a[@class="print"]
5 5
6# set body 6# set body
7tidy: no 7tidy: no
8body: //div[@class='artikel-content'] 8body: //div[@class='artikel-content']
9 9
10# strip title and subtitle since we got it already 10# strip title and subtitle since we got it already
11strip: //div[@class='issue'] 11strip: //div[@class='issue']
12strip: //div[@class='artikel-content']/h2 12strip: //div[@class='artikel-content']/h2
13 13
14# some authors are known and have a link, others don't 14# some authors are known and have a link, others don't
15author: //a[contains(@href, 'autor?')] 15author: //a[contains(@href, 'autor?')]
16 16
17#date 17#date
18date: //span[@class='article-date'] 18date: //span[@class='article-date']
19 19
20# Strip author since we got him 20# Strip author since we got him
21strip_id_or_class: author 21strip_id_or_class: author
22 22
23#strip captions 23#strip captions
24strip_id_or_class: field-name-field-image-credit 24strip_id_or_class: field-name-field-image-credit
25strip_id_or_class: field-name-field-article-image-subtitle 25strip_id_or_class: field-name-field-article-image-subtitle
26 26
27# remove community functions 27# remove community functions
28strip: //div[@class='meta'] 28strip: //div[@class='meta']
29strip: //div[@id='comments'] 29strip: //div[@id='comments']
30 30
31# remove "continue on the next page" text 31# remove "continue on the next page" text
32strip: //p[text()="[SEITE]"] 32strip: //p[text()="[SEITE]"]
33test_url: http://www.cicero.de/weltbuehne/ihre-wut-ist-global-krise-jugend-revolten-aufstaende-zelte/43049 \ No newline at end of file 33test_url: http://www.cicero.de/weltbuehne/ihre-wut-ist-global-krise-jugend-revolten-aufstaende-zelte/43049 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ciperchile.cl.txt b/inc/3rdparty/site_config/standard/ciperchile.cl.txt
index 4d3ac804..d7e9b762 100644..100755
--- a/inc/3rdparty/site_config/standard/ciperchile.cl.txt
+++ b/inc/3rdparty/site_config/standard/ciperchile.cl.txt
@@ -1,4 +1,4 @@
1body: //*[(@id = "articlebody")] 1body: //*[(@id = "articlebody")]
2strip_id_or_class: rotulo 2strip_id_or_class: rotulo
3 3
4test_url: http://ciperchile.cl/2011/04/18/las-operaciones-secretas-que-ordenaba-karadima-para-aniquilar-a-su-competencia/ \ No newline at end of file 4test_url: http://ciperchile.cl/2011/04/18/las-operaciones-secretas-que-ordenaba-karadima-para-aniquilar-a-su-competencia/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/cjr.org.txt b/inc/3rdparty/site_config/standard/cjr.org.txt
index a0c3ea5d..df4c7cc4 100644..100755
--- a/inc/3rdparty/site_config/standard/cjr.org.txt
+++ b/inc/3rdparty/site_config/standard/cjr.org.txt
@@ -1,6 +1,6 @@
1body: //p[@class='subhead' or @class='attribution'] | //div[@class='article-body'] 1body: //p[@class='subhead' or @class='attribution'] | //div[@class='article-body']
2prune: no 2prune: no
3 3
4single_page_link: //li[@class='print']/a 4single_page_link: //li[@class='print']/a
5 5
6test_url: http://www.cjr.org/behind_the_news/from_breaking_news_to_baseless.php \ No newline at end of file 6test_url: http://www.cjr.org/behind_the_news/from_breaking_news_to_baseless.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/classyllama.com.txt b/inc/3rdparty/site_config/standard/classyllama.com.txt
new file mode 100755
index 00000000..1864eee8
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/classyllama.com.txt
@@ -0,0 +1,6 @@
1date: //div[@id='content']//p[contains(@class, 'date')]/span
2author: substring-after(//div[@id='content']//div[contains(@class, 'over-under-bars')]/p[last()]/text(), 'Posted by ')
3body: //div[@id='content']//div[@class='pane-content']
4strip_id_or_class: trackback-url
5strip_id_or_class: over-under-bars
6test_url: http://www.classyllama.com/content/layout-caching \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/clientk.com.txt b/inc/3rdparty/site_config/standard/clientk.com.txt
index 369e88ad..d5a22ccb 100644..100755
--- a/inc/3rdparty/site_config/standard/clientk.com.txt
+++ b/inc/3rdparty/site_config/standard/clientk.com.txt
@@ -1,6 +1,6 @@
1title://div[@class="entrytitle"]/a 1title://div[@class="entrytitle"]/a
2author:substring-after(substring-before(//div[@class="entrytime"], "|"), "By ") 2author:substring-after(substring-before(//div[@class="entrytime"], "|"), "By ")
3date:substring-before(substring-after(//div[@class="entrytime"], "|"), "- Posted") 3date:substring-before(substring-after(//div[@class="entrytime"], "|"), "- Posted")
4body://div[@class="entrybody"] 4body://div[@class="entrybody"]
5strip://div[@class="entrybody"]//p[@class="singleinfo"] 5strip://div[@class="entrybody"]//p[@class="singleinfo"]
6test_url: http://clientk.com/2011/12/19/the-impact-of-more/ \ No newline at end of file 6test_url: http://clientk.com/2011/12/19/the-impact-of-more/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/clubic.com.txt b/inc/3rdparty/site_config/standard/clubic.com.txt
index b356bbdf..0148e54c 100644..100755
--- a/inc/3rdparty/site_config/standard/clubic.com.txt
+++ b/inc/3rdparty/site_config/standard/clubic.com.txt
@@ -1,11 +1,11 @@
1title: //h1 1title: //h1
2author: //a[@class='auteur'] 2author: //a[@class='auteur']
3body: //div[@class='editorial'] 3body: //div[@class='editorial']
4next_page_link: //a[contains(text(),'Page suivante')] 4next_page_link: //a[contains(text(),'Page suivante')]
5strip: //a[contains(text(),'Page suivante')] 5strip: //a[contains(text(),'Page suivante')]
6strip: //a[contains(text(),'Page précédente')] 6strip: //a[contains(text(),'Page précédente')]
7strip_id_or_class: slideshow 7strip_id_or_class: slideshow
8 8
9prune: no 9prune: no
10 10
11test_url: http://www.clubic.com/carte-graphique/carte-graphique-amd/radeon-hd-7770/article-478936-1-radeon-hd-7750-7770.html \ No newline at end of file 11test_url: http://www.clubic.com/carte-graphique/carte-graphique-amd/radeon-hd-7770/article-478936-1-radeon-hd-7750-7770.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/cmswire.com.txt b/inc/3rdparty/site_config/standard/cmswire.com.txt
index 2bc96d2e..0b76377a 100644..100755
--- a/inc/3rdparty/site_config/standard/cmswire.com.txt
+++ b/inc/3rdparty/site_config/standard/cmswire.com.txt
@@ -1,6 +1,6 @@
1body: //div[contains(@id,'article-body')] 1body: //div[contains(@id,'article-body')]
2strip://div[contains(@id,'disqus_count_block')] 2strip://div[contains(@id,'disqus_count_block')]
3strip://div[contains(@id,'col-left')] 3strip://div[contains(@id,'col-left')]
4strip://div[contains(@id,'col-right')] 4strip://div[contains(@id,'col-right')]
5 5
6test_url: http://www.cmswire.com/cms/customer-experience/for-apps-and-appstores-the-singularity-is-approaching-014888.php \ No newline at end of file 6test_url: http://www.cmswire.com/cms/customer-experience/for-apps-and-appstores-the-singularity-is-approaching-014888.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/cn.engadget.com.txt b/inc/3rdparty/site_config/standard/cn.engadget.com.txt
new file mode 100755
index 00000000..63f6f7ea
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/cn.engadget.com.txt
@@ -0,0 +1,5 @@
1title: //h2[@class="posttitle"]
2body: //div[@class="postbody"]
3prune: no
4
5test_url: http://cn.engadget.com/2013/06/29/google-play-music-all-access/
diff --git a/inc/3rdparty/site_config/standard/cn.reuters.com.txt b/inc/3rdparty/site_config/standard/cn.reuters.com.txt
new file mode 100755
index 00000000..b3878662
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/cn.reuters.com.txt
@@ -0,0 +1,5 @@
1title: //div[@id='maincontent']//h1
2body: //div[@id='resizeableText']
3
4test_url: http://cn.reuters.com/article/CNAnalysesNews/idCNKBS0FF0NM20140710
5test_url: http://cn.reuters.feedsportal.com/CNAnalysesNews \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/cnet.com.txt b/inc/3rdparty/site_config/standard/cnet.com.txt
index 74f46ba9..eac08aaa 100644..100755
--- a/inc/3rdparty/site_config/standard/cnet.com.txt
+++ b/inc/3rdparty/site_config/standard/cnet.com.txt
@@ -1,16 +1,16 @@
1title: //meta[@property="og:title"]/@content 1title: //meta[@property="og:title"]/@content
2body: //div[contains(@class, 'postBody')] 2body: //div[contains(@class, 'postBody')]
3date: //div[@id='nameAndTime']/time 3date: //div[@id='nameAndTime']/time
4author: //div[@id='nameAndTime']/span[@class='author'] 4author: //div[@id='nameAndTime']/span[@class='author']
5 5
6strip_id_or_class: image-credit 6strip_id_or_class: image-credit
7strip_id_or_class: noAutolink 7strip_id_or_class: noAutolink
8strip_id_or_class: related 8strip_id_or_class: related
9 9
10prune: no 10prune: no
11tidy: no 11tidy: no
12 12
13# early end 13# early end
14replace_string(Download today's podcast</a>): Download today's podcast</a></div></body></html> 14replace_string(Download today's podcast</a>): Download today's podcast</a></div></body></html>
15 15
16test_url: http://www.cnet.com/8301-13952_1-57367607-81/the-404-981-where-the-world-is-a-vampire-podcast/ \ No newline at end of file 16test_url: http://www.cnet.com/8301-13952_1-57367607-81/the-404-981-where-the-world-is-a-vampire-podcast/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/cnn.com.txt b/inc/3rdparty/site_config/standard/cnn.com.txt
index 995e2c79..6f69e4e8 100644..100755
--- a/inc/3rdparty/site_config/standard/cnn.com.txt
+++ b/inc/3rdparty/site_config/standard/cnn.com.txt
@@ -1,19 +1,23 @@
1title: //div[@class="cnn_storyarea"]/h1 1body: //div[@id='cnnContentContainer']//div[contains(@class, 'cnn_strycntntlft')]
2author: //div[@class="cnnByline"]/strong 2title: //div[@class="cnn_storyarea"]/h1
3date: substring-after(//div[@class="cnn_strytmstmp"], 'Sun') 3author: //div[@class="cnnByline"]/strong
4date: substring-after(//div[@class="cnn_strytmstmp"], 'Mon') 4date: substring-after(//div[@class="cnn_strytmstmp"], 'Sun')
5date: substring-after(//div[@class="cnn_strytmstmp"], 'Tue') 5date: substring-after(//div[@class="cnn_strytmstmp"], 'Mon')
6date: substring-after(//div[@class="cnn_strytmstmp"], 'Wed') 6date: substring-after(//div[@class="cnn_strytmstmp"], 'Tue')
7date: substring-after(//div[@class="cnn_strytmstmp"], 'Thu') 7date: substring-after(//div[@class="cnn_strytmstmp"], 'Wed')
8date: substring-after(//div[@class="cnn_strytmstmp"], 'Fri') 8date: substring-after(//div[@class="cnn_strytmstmp"], 'Thu')
9date: substring-after(//div[@class="cnn_strytmstmp"], 'Sat') 9date: substring-after(//div[@class="cnn_strytmstmp"], 'Fri')
10strip: //div[@class="cnn_storyarea"]/h1 10date: substring-after(//div[@class="cnn_strytmstmp"], 'Sat')
11strip_id_or_class: cnnByline 11strip: //div[@class="cnn_storyarea"]/h1
12strip_id_or_class: cnn_strytmstmp 12strip_id_or_class: cnnByline
13strip_id_or_class: cnn_strycaptiontxt 13strip_id_or_class: cnn_strytmstmp
14strip_id_or_class: cnn_strybtntoolsbttm 14strip_id_or_class: cnn_strycaptiontxt
15strip_id_or_class: cnn_strybtntools 15strip_id_or_class: cnn_strybtntoolsbttm
16strip_id_or_class: cnn_strybtmcntnt 16strip_id_or_class: cnn_strybtntools
17strip_id_or_class: cnn_containerwht 17strip_id_or_class: cnn_strybtmcntnt
18strip_id_or_class: cnn_stryathrtmp 18strip_id_or_class: sharebar
19test_url: http://www.cnn.com/2012/05/13/us/new-york-police-policy/index.html?eref=rss_topstories \ No newline at end of file 19#strip_id_or_class: cnn_containerwht
20strip_id_or_class: cnn_stryathrtmp
21replace_string(<a name="em0"></a>): <!-- a name -->
22test_url: http://www.cnn.com/2012/05/13/us/new-york-police-policy/index.html?eref=rss_topstories
23test_url: http://rss.cnn.com/rss/edition.rss \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/cnnsi.com.txt b/inc/3rdparty/site_config/standard/cnnsi.com.txt
index 6a2c2b80..ac49aef9 100644..100755
--- a/inc/3rdparty/site_config/standard/cnnsi.com.txt
+++ b/inc/3rdparty/site_config/standard/cnnsi.com.txt
@@ -1,26 +1,26 @@
1# main sportsillustrated.com articles 1# main sportsillustrated.com articles
2 2
3body: //div[@id="cnnStoryContent"] 3body: //div[@id="cnnStoryContent"]
4title: //div[@id="cnnStoryHeadline"]//h1 4title: //div[@id="cnnStoryHeadline"]//h1
5author: //div[@id="cnnSubBanner"]//strong 5author: //div[@id="cnnSubBanner"]//strong
6date: substring-after(//div[@id="cnnTimeStamp"], "Updated: ") 6date: substring-after(//div[@id="cnnTimeStamp"], "Updated: ")
7date: substring-after(//div[@id="cnnTimeStamp"], "Posted: ") 7date: substring-after(//div[@id="cnnTimeStamp"], "Posted: ")
8 8
9# kill ugly font buttons 9# kill ugly font buttons
10strip: //div[@id="cnnSCFontButtons"] 10strip: //div[@id="cnnSCFontButtons"]
11 11
12# kill misc filler videos & etc 12# kill misc filler videos & etc
13strip: //div[@class="cnnDivideContent"] 13strip: //div[@class="cnnDivideContent"]
14strip: //*[@class="cnnTMbox"] 14strip: //*[@class="cnnTMbox"]
15 15
16# si vault articles 16# si vault articles
17# ------------- 17# -------------
18body: //div[@class="siv_artPara"] 18body: //div[@class="siv_artPara"]
19title: //div[@class="siv_artHeader"]//h1 19title: //div[@class="siv_artHeader"]//h1
20author: //div[@class="byline"] 20author: //div[@class="byline"]
21date: //div[@class="date"] 21date: //div[@class="date"]
22 22
23next_page_link: //div[@id='cnnStoryContinue']/a 23next_page_link: //div[@id='cnnStoryContinue']/a
24strip_id_or_class: cnnstorypagination 24strip_id_or_class: cnnstorypagination
25 25
26test_url: http://cnnsi.com/2012/writers/peter_king/01/08/wild.card.round/index.html \ No newline at end of file 26test_url: http://cnnsi.com/2012/writers/peter_king/01/08/wild.card.round/index.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/code.activestate.com.txt b/inc/3rdparty/site_config/standard/code.activestate.com.txt
index 6cf72e23..83a21e19 100644..100755
--- a/inc/3rdparty/site_config/standard/code.activestate.com.txt
+++ b/inc/3rdparty/site_config/standard/code.activestate.com.txt
@@ -1,10 +1,10 @@
1body: //div[@id='content'] 1body: //div[@id='content']
2title: //div[@id='page_header']/h1 2title: //div[@id='page_header']/h1
3 3
4strip_id_or_class: 'lineno' 4strip_id_or_class: 'lineno'
5strip_id_or_class: 'block-toolbar-button' 5strip_id_or_class: 'block-toolbar-button'
6strip_id_or_class: 'recipe_score' 6strip_id_or_class: 'recipe_score'
7strip: //div[@id='recipe_tools'] 7strip: //div[@id='recipe_tools']
8strip: //div[@id='addcomment'] 8strip: //div[@id='addcomment']
9 9
10test_url: http://code.activestate.com/recipes/500261-named-tuples/ \ No newline at end of file 10test_url: http://code.activestate.com/recipes/500261-named-tuples/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/code.fivefilters.org.txt b/inc/3rdparty/site_config/standard/code.fivefilters.org.txt
new file mode 100755
index 00000000..269fb547
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/code.fivefilters.org.txt
@@ -0,0 +1 @@
body: //div[@id='content']
diff --git a/inc/3rdparty/site_config/standard/code.google.com.txt b/inc/3rdparty/site_config/standard/code.google.com.txt
index 40a16209..6e9c00a7 100644..100755
--- a/inc/3rdparty/site_config/standard/code.google.com.txt
+++ b/inc/3rdparty/site_config/standard/code.google.com.txt
@@ -1,5 +1,5 @@
1body: //div[@id="gc-pagecontent"] 1body: //div[@id="gc-pagecontent"]
2strip: //a[@class="backtotop"] 2strip: //a[@class="backtotop"]
3prune: no 3prune: no
4 4
5test_url: http://code.google.com/apis/analytics/docs/tracking/gaTrackingEcommerce.html \ No newline at end of file 5test_url: http://code.google.com/apis/analytics/docs/tracking/gaTrackingEcommerce.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/codeproject.com.txt b/inc/3rdparty/site_config/standard/codeproject.com.txt
new file mode 100755
index 00000000..d1191acc
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/codeproject.com.txt
@@ -0,0 +1,3 @@
1body: //div[@id="contentdiv"]
2date: //span[@class="date"]
3test_url: http://www.codeproject.com/Articles/499902/Profiling-Entity-Framework-5-in-code \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/codinghorror.com.txt b/inc/3rdparty/site_config/standard/codinghorror.com.txt
index 9c95f107..adf6e5a0 100644..100755
--- a/inc/3rdparty/site_config/standard/codinghorror.com.txt
+++ b/inc/3rdparty/site_config/standard/codinghorror.com.txt
@@ -1,15 +1,15 @@
1body: //div[@class='blogbody'] 1body: //div[@class='blogbody']
2strip: //h3[@class='title'] 2strip: //h3[@class='title']
3date: //h2[@class='date'] 3date: //h2[@class='date']
4#Should Atwood just be a literal? 4#Should Atwood just be a literal?
5author: substring-before( substring-after(//div[@class='posted'], 'y'), 'V') 5author: substring-before( substring-after(//div[@class='posted'], 'y'), 'V')
6 6
7# tim.kingman@... 2011-07-26 7# tim.kingman@... 2011-07-26
8# Prune:no to retain all-link ULs that are part of the body content like 8# Prune:no to retain all-link ULs that are part of the body content like
9# http://www.codinghorror.com/blog/2011/07/building-a-pc-part-vii-rebooting.html 9# http://www.codinghorror.com/blog/2011/07/building-a-pc-part-vii-rebooting.html
10# Then explicitly strip the "Posted By" and prev/next links that Prune:yes would have removed. 10# Then explicitly strip the "Posted By" and prev/next links that Prune:yes would have removed.
11 11
12prune: no 12prune: no
13strip: //div[@class='posted']/following-sibling::* 13strip: //div[@class='posted']/following-sibling::*
14strip: //div[@class='posted'] 14strip: //div[@class='posted']
15test_url: http://www.codinghorror.com/blog/2011/07/building-a-pc-part-vii-rebooting.html \ No newline at end of file 15test_url: http://www.codinghorror.com/blog/2011/07/building-a-pc-part-vii-rebooting.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/collegehumor.com.txt b/inc/3rdparty/site_config/standard/collegehumor.com.txt
index 9d75d641..318e6ff4 100644..100755
--- a/inc/3rdparty/site_config/standard/collegehumor.com.txt
+++ b/inc/3rdparty/site_config/standard/collegehumor.com.txt
@@ -1,14 +1,14 @@
1title: //h1[@class='title'] 1title: //h1[@class='title']
2author: //p[@class='byline']/a[1] 2author: //p[@class='byline']/a[1]
3date: //*[@class='date'] 3date: //*[@class='date']
4 4
5body: //div[@class='article_body'] 5body: //div[@class='article_body']
6strip: //p[@class='ca_intro'] 6strip: //p[@class='ca_intro']
7strip: //div[@id='action_bar'] 7strip: //div[@id='action_bar']
8strip: //div[@class='below_content'] 8strip: //div[@class='below_content']
9strip: //div[@id='announcement'] 9strip: //div[@id='announcement']
10strip: //div[@id='leftovers'] 10strip: //div[@id='leftovers']
11strip: //div[@class='form'] 11strip: //div[@class='form']
12strip: //div[@id='email_overlay'] 12strip: //div[@id='email_overlay']
13strip: //a[@class='close'] 13strip: //a[@class='close']
14test_url: http://www.collegehumor.com/article/6599562/how-it-happened-the-necktie \ No newline at end of file 14test_url: http://www.collegehumor.com/article/6599562/how-it-happened-the-necktie \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/communities-dominate.blogs.com.txt b/inc/3rdparty/site_config/standard/communities-dominate.blogs.com.txt
index 800a907d..800a907d 100644..100755
--- a/inc/3rdparty/site_config/standard/communities-dominate.blogs.com.txt
+++ b/inc/3rdparty/site_config/standard/communities-dominate.blogs.com.txt
diff --git a/inc/3rdparty/site_config/standard/community.service-now.com.txt b/inc/3rdparty/site_config/standard/community.service-now.com.txt
index 10fd2516..c9854b43 100644..100755
--- a/inc/3rdparty/site_config/standard/community.service-now.com.txt
+++ b/inc/3rdparty/site_config/standard/community.service-now.com.txt
@@ -1,8 +1,8 @@
1body: //div[@id="center"]//div[@class="node"] 1body: //div[@id="center"]//div[@class="node"]
2title: //div[@id="center"]//h2 2title: //div[@id="center"]//h2
3author: substring-after(//div[@id="center"]//div[@class="node"]//span[@class="submitted"], "&mdash;") 3author: substring-after(//div[@id="center"]//div[@class="node"]//span[@class="submitted"], "&mdash;")
4date: substring-before(//div[@id="center"]//div[@class="node"]//span[@class="submitted"], "&mdash;") 4date: substring-before(//div[@id="center"]//div[@class="node"]//span[@class="submitted"], "&mdash;")
5strip: //div[@id="center"]//h2[1] 5strip: //div[@id="center"]//h2[1]
6strip: //span[@class="submitted"][1] 6strip: //span[@class="submitted"][1]
7move_into(//div[@class="node"])://div[@class="breadcrumb"] 7move_into(//div[@class="node"])://div[@class="breadcrumb"]
8test_url: http://community.service-now.com/blog/lawrenceeng/seasons-greetings-servicenow-team \ No newline at end of file 8test_url: http://community.service-now.com/blog/lawrenceeng/seasons-greetings-servicenow-team \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/computer.org.txt b/inc/3rdparty/site_config/standard/computer.org.txt
index 00e6fddf..8345cf50 100644..100755
--- a/inc/3rdparty/site_config/standard/computer.org.txt
+++ b/inc/3rdparty/site_config/standard/computer.org.txt
@@ -1,5 +1,5 @@
1strip_id_or_class:column-3 1strip_id_or_class:column-3
2strip_id_or_class:portlet-boundary 2strip_id_or_class:portlet-boundary
3strip_id_or_class:banner 3strip_id_or_class:banner
4 4
5test_url: http://www.computer.org/portal/web/buildyourcareer/careerwatch/jt19 \ No newline at end of file 5test_url: http://www.computer.org/portal/web/buildyourcareer/careerwatch/jt19 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/computerbase.de.txt b/inc/3rdparty/site_config/standard/computerbase.de.txt
index 29199242..5973c50b 100644..100755
--- a/inc/3rdparty/site_config/standard/computerbase.de.txt
+++ b/inc/3rdparty/site_config/standard/computerbase.de.txt
@@ -1,18 +1,18 @@
1title://h1 1title://h1
2 2
3author://div[@id="news-meta"]/a 3author://div[@id="news-meta"]/a
4 4
5body://*[@id="main"]/div[1] 5body://*[@id="main"]/div[1]
6 6
7strip://*[@id="main"]/div[2] 7strip://*[@id="main"]/div[2]
8strip://*[@id="main"]/div[3] 8strip://*[@id="main"]/div[3]
9strip://*[@id="page"]//footer 9strip://*[@id="page"]//footer
10 10
11#date: didn't manage to parse it 11#date: didn't manage to parse it
12 12
13#Images have to be stripped because the page does it with overlay 13#Images have to be stripped because the page does it with overlay
14strip://img 14strip://img
15 15
16#figures are not displayed in instapaper... 16#figures are not displayed in instapaper...
17strip://figure | //figcaption 17strip://figure | //figcaption
18test_url: http://www.computerbase.de/news/2012-06/verbraucherzentrale-mahnt-blizzard-fuer-diablo-3-ab/ \ No newline at end of file 18test_url: http://www.computerbase.de/news/2012-06/verbraucherzentrale-mahnt-blizzard-fuer-diablo-3-ab/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/computerworld.com.txt b/inc/3rdparty/site_config/standard/computerworld.com.txt
index 8e1f3e11..7f20a4da 100644..100755
--- a/inc/3rdparty/site_config/standard/computerworld.com.txt
+++ b/inc/3rdparty/site_config/standard/computerworld.com.txt
@@ -1,22 +1,22 @@
1title: //meta[@name='headline']/@content 1title: //meta[@name='headline']/@content
2date: //meta[@name='date']/@content 2date: //meta[@name='date']/@content
3author: //meta[@name='author']/@content 3author: //meta[@name='author']/@content
4body: //div[contains(@class, 'article')] 4body: //div[contains(@class, 'article')]
5body://div[@id="article_body"] 5body://div[@id="article_body"]
6 6
7strip_id_or_class: banner 7strip_id_or_class: banner
8strip: //noscript 8strip: //noscript
9strip: //div[@style='width:1px;height:130px;float:right;'] 9strip: //div[@style='width:1px;height:130px;float:right;']
10strip: //div[@class='storyby'] 10strip: //div[@class='storyby']
11strip_image_src: twitter_icon 11strip_image_src: twitter_icon
12strip_image_src: rss_bug 12strip_image_src: rss_bug
13 13
14tidy: no 14tidy: no
15prune: no 15prune: no
16 16
17next_page_link://div[@id="next_page"]/a 17next_page_link://div[@id="next_page"]/a
18 18
19single_page_link: concat('http://www.computerworld.com/s/article/print/', substring-after(//link[@rel='canonical']/@href, '/s/article/')) 19single_page_link: concat('http://www.computerworld.com/s/article/print/', substring-after(//link[@rel='canonical']/@href, '/s/article/'))
20 20
21test_url: http://www.computerworld.com/s/article/9224348/Apple_s_new_OS_X_tightens_screws_on_some_malware 21test_url: http://www.computerworld.com/s/article/9224348/Apple_s_new_OS_X_tightens_screws_on_some_malware
22test_url: http://www.computerworld.com/s/article/9227679/Windows_8_Release_Preview_Updated_but_still_uneasy \ No newline at end of file 22test_url: http://www.computerworld.com/s/article/9227679/Windows_8_Release_Preview_Updated_but_still_uneasy \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/computerworld.dk.txt b/inc/3rdparty/site_config/standard/computerworld.dk.txt
index a83f366f..d819109c 100644..100755
--- a/inc/3rdparty/site_config/standard/computerworld.dk.txt
+++ b/inc/3rdparty/site_config/standard/computerworld.dk.txt
@@ -1,5 +1,5 @@
1strip: //div[contains(@class, 'articleAdtechAd')] 1strip: //div[contains(@class, 'articleAdtechAd')]
2title: //div[@id='article']/h1 2title: //div[@id='article']/h1
3title: //div[contains(@class, 'article')]/h1 3title: //div[contains(@class, 'article')]/h1
4body: //div[@id='articleText'] 4body: //div[@id='articleText']
5test_url: http://www.computerworld.dk/art/56748/test-din-viden-med-computerworlds-store-sommerquiz?a=fp_1&i=0 \ No newline at end of file 5test_url: http://www.computerworld.dk/art/56748/test-din-viden-med-computerworlds-store-sommerquiz?a=fp_1&i=0 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/contemporist.com.txt b/inc/3rdparty/site_config/standard/contemporist.com.txt
index d2b289a3..c3120fe8 100644..100755
--- a/inc/3rdparty/site_config/standard/contemporist.com.txt
+++ b/inc/3rdparty/site_config/standard/contemporist.com.txt
@@ -1,9 +1,9 @@
1# get author from string like "Posted by <author> on <date>" 1# get author from string like "Posted by <author> on <date>"
2author: substring-before(substring-after(//div[@class='post']/p[@class='post-meta'], 'by'), 'on') 2author: substring-before(substring-after(//div[@class='post']/p[@class='post-meta'], 'by'), 'on')
3 3
4# get date from string like "Posted by <author> on <date>" 4# get date from string like "Posted by <author> on <date>"
5date: substring-after(//div[@class='post']/p[@class='post-meta'], 'on') 5date: substring-after(//div[@class='post']/p[@class='post-meta'], 'on')
6 6
7# this keeps thumbnail images 7# this keeps thumbnail images
8prune: no 8prune: no
9test_url: http://www.contemporist.com/2011/11/02/landing-200-lamp-by-kim-hyunjoo \ No newline at end of file 9test_url: http://www.contemporist.com/2011/11/02/landing-200-lamp-by-kim-hyunjoo \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/conversaciones.nokia.com.txt b/inc/3rdparty/site_config/standard/conversaciones.nokia.com.txt
index 9bad2c84..966cc861 100644..100755
--- a/inc/3rdparty/site_config/standard/conversaciones.nokia.com.txt
+++ b/inc/3rdparty/site_config/standard/conversaciones.nokia.com.txt
@@ -1,7 +1,7 @@
1title: //div[@class='article_header']/h1 1title: //div[@class='article_header']/h1
2body: //div[@class='article_header']/p | //div[@class='article_body'] 2body: //div[@class='article_header']/p | //div[@class='article_body']
3strip_id_or_class: share_this 3strip_id_or_class: share_this
4strip_id_or_class: sociable 4strip_id_or_class: sociable
5prune: no 5prune: no
6 6
7test_url: http://conversaciones.nokia.com/2011/10/07/cinco-atajos-en-el-nokia-n8/ \ No newline at end of file 7test_url: http://conversaciones.nokia.com/2011/10/07/cinco-atajos-en-el-nokia-n8/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/cooper.com.txt b/inc/3rdparty/site_config/standard/cooper.com.txt
new file mode 100755
index 00000000..a4244097
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/cooper.com.txt
@@ -0,0 +1,4 @@
1body: //*[contains(@class,'body')]
2date: //abbr[@class='published']
3
4test_url: http://www.cooper.com/journal/2012/08/2-weeks-left-to-win-your-way-to-the-woodstock-of-ux-coopers-ux-boot-camp.html/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/core77.com.txt b/inc/3rdparty/site_config/standard/core77.com.txt
index a24374d8..cf1fa93c 100644..100755
--- a/inc/3rdparty/site_config/standard/core77.com.txt
+++ b/inc/3rdparty/site_config/standard/core77.com.txt
@@ -1,7 +1,7 @@
1body: //div[@id="permalink"]/div[@class="post"] 1body: //div[@id="permalink"]/div[@class="post"]
2 2
3strip: //div[@id='backArrow'] 3strip: //div[@id='backArrow']
4strip: //div[@id='fwdArrow'] 4strip: //div[@id='fwdArrow']
5strip: //div[@class="post-title"] 5strip: //div[@class="post-title"]
6strip: //div[@class="sharing"] 6strip: //div[@class="sharing"]
7test_url: http://www.core77.com/blog/columns/why_design_education_must_change_17993.asp \ No newline at end of file 7test_url: http://www.core77.com/blog/columns/why_design_education_must_change_17993.asp \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/counterpunch.org.txt b/inc/3rdparty/site_config/standard/counterpunch.org.txt
index c9e92287..b6bd8be5 100644..100755
--- a/inc/3rdparty/site_config/standard/counterpunch.org.txt
+++ b/inc/3rdparty/site_config/standard/counterpunch.org.txt
@@ -1,6 +1,6 @@
1title: //div[@class='main']//h1[contains(@class, 'article-title')] 1title: //div[@class='main']//h1[contains(@class, 'article-title')]
2author: //div[@class='mainauthorstyle'] 2author: //div[@class='mainauthorstyle']
3body: //div[@class='main']//div[@class='main-text'] 3body: //div[@class='main']//div[@class='main-text']
4strip: //td[@width='140'] 4strip: //td[@width='140']
5 5
6test_url: http://www.counterpunch.org/johnstone05172011.html \ No newline at end of file 6test_url: http://www.counterpunch.org/johnstone05172011.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/crazybutable.com.txt b/inc/3rdparty/site_config/standard/crazybutable.com.txt
index d25cd05d..037cd177 100644..100755
--- a/inc/3rdparty/site_config/standard/crazybutable.com.txt
+++ b/inc/3rdparty/site_config/standard/crazybutable.com.txt
@@ -1,3 +1,3 @@
1title://h2 1title://h2
2body://div[contains(@class, 'entrytext')] 2body://div[contains(@class, 'entrytext')]
3test_url: http://www.crazybutable.com/weblog/archives/2010/07/01/house-ideas-that-worked/ \ No newline at end of file 3test_url: http://www.crazybutable.com/weblog/archives/2010/07/01/house-ideas-that-worked/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/crimemagazine.com.txt b/inc/3rdparty/site_config/standard/crimemagazine.com.txt
index 9cf0bccc..9cf0bccc 100644..100755
--- a/inc/3rdparty/site_config/standard/crimemagazine.com.txt
+++ b/inc/3rdparty/site_config/standard/crimemagazine.com.txt
diff --git a/inc/3rdparty/site_config/standard/crimethinc.com.txt b/inc/3rdparty/site_config/standard/crimethinc.com.txt
index 74bc6db9..b5a8018a 100644..100755
--- a/inc/3rdparty/site_config/standard/crimethinc.com.txt
+++ b/inc/3rdparty/site_config/standard/crimethinc.com.txt
@@ -1,3 +1,3 @@
1body: //div[@class="readingtext"] 1body: //div[@class="readingtext"]
2title: substring-after(substring-after(//title, ':'), ':') 2title: substring-after(substring-after(//title, ':'), ':')
3test_url: http://www.crimethinc.com/texts/recentfeatures/nightmares.php \ No newline at end of file 3test_url: http://www.crimethinc.com/texts/recentfeatures/nightmares.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/crn.de.txt b/inc/3rdparty/site_config/standard/crn.de.txt
index 7fa950af..61d5d6a7 100644..100755
--- a/inc/3rdparty/site_config/standard/crn.de.txt
+++ b/inc/3rdparty/site_config/standard/crn.de.txt
@@ -1,3 +1,3 @@
1author: //p[contains(@class,'author')]/a 1author: //p[contains(@class,'author')]/a
2date: //div[contains(@class,'date')] 2date: //div[contains(@class,'date')]
3test_url: http://www.crn.de/netzwerke-tk/artikel-93103.html \ No newline at end of file 3test_url: http://www.crn.de/netzwerke-tk/artikel-93103.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/csmonitor.com.txt b/inc/3rdparty/site_config/standard/csmonitor.com.txt
index d4dbc5c8..b482e34e 100644..100755
--- a/inc/3rdparty/site_config/standard/csmonitor.com.txt
+++ b/inc/3rdparty/site_config/standard/csmonitor.com.txt
@@ -1,18 +1,18 @@
1title: //h1[contains(@class, 'head')] 1title: //h1[contains(@class, 'head')]
2 2
3# standard page 3# standard page
4body: //div[@id='mainColumn']//div[contains(@class, 'list-article-full')] 4body: //div[@id='mainColumn']//div[contains(@class, 'list-article-full')]
5# print page 5# print page
6body: //div[@id='mainColumn'] 6body: //div[@id='mainColumn']
7 7
8author: //a[contains(@class, 'ui-author')] 8author: //a[contains(@class, 'ui-author')]
9 9
10single_page_link: //div[@class='storyToolbar']//a[contains(@href, '/print/')] 10single_page_link: //div[@class='storyToolbar']//a[contains(@href, '/print/')]
11 11
12strip_id_or_class: storyToolbar 12strip_id_or_class: storyToolbar
13strip_id_or_class: promotion-tag 13strip_id_or_class: promotion-tag
14 14
15tidy: no 15tidy: no
16prune: no 16prune: no
17 17
18test_url: www.csmonitor.com/World/Middle-East/2011/1108/Imminent-Iran-nuclear-threat-A-timeline-of-warnings-since-1979/Earliest-warnings-1979-84 \ No newline at end of file 18test_url: www.csmonitor.com/World/Middle-East/2011/1108/Imminent-Iran-nuclear-threat-A-timeline-of-warnings-since-1979/Earliest-warnings-1979-84 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/csnbayarea.com.txt b/inc/3rdparty/site_config/standard/csnbayarea.com.txt
index 131a923b..1da60b4e 100644..100755
--- a/inc/3rdparty/site_config/standard/csnbayarea.com.txt
+++ b/inc/3rdparty/site_config/standard/csnbayarea.com.txt
@@ -1,7 +1,7 @@
1title: //div[@id='csn_blogST_headline']/h1 1title: //div[@id='csn_blogST_headline']/h1
2 2
3body: //div[@id='csn_blogST_main'] 3body: //div[@id='csn_blogST_main']
4strip_id_or_class: ipfootnotes 4strip_id_or_class: ipfootnotes
5strip: //div[@id='csn_blogST_main']/p[1]/img 5strip: //div[@id='csn_blogST_main']/p[1]/img
6strip: //div[@id='csn_blogST_sidebar'] 6strip: //div[@id='csn_blogST_sidebar']
7test_url: http://www.csnbayarea.com/blog/giants-talk/post/-?blog%2Fgiants-talk%2Fpost%2F-=&blockID=578902&feedID=5987 \ No newline at end of file 7test_url: http://www.csnbayarea.com/blog/giants-talk/post/-?blog%2Fgiants-talk%2Fpost%2F-=&blockID=578902&feedID=5987 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/csnphilly.com.txt b/inc/3rdparty/site_config/standard/csnphilly.com.txt
index 0df72c32..c14a934a 100644..100755
--- a/inc/3rdparty/site_config/standard/csnphilly.com.txt
+++ b/inc/3rdparty/site_config/standard/csnphilly.com.txt
@@ -1,22 +1,22 @@
1# author's name is not isolated as a tag.... ugh 1# author's name is not isolated as a tag.... ugh
2convert_double_br_tags: yes 2convert_double_br_tags: yes
3body: //csn_blogST_main 3body: //csn_blogST_main
4 4
5#junk above and around the article 5#junk above and around the article
6strip: /html/body/div[4]/div[3]/div/div/div/section/div/div/div/div/div/div 6strip: /html/body/div[4]/div[3]/div/div/div/section/div/div/div/div/div/div
7strip: /html/body/div[4]/header 7strip: /html/body/div[4]/header
8strip_id_or_class: article-right-sidebar 8strip_id_or_class: article-right-sidebar
9strip_id_or_class: rsn-gigya-sharebar-container 9strip_id_or_class: rsn-gigya-sharebar-container
10strip_id_or_class: article-bottom 10strip_id_or_class: article-bottom
11strip_id_or_class: hider 11strip_id_or_class: hider
12strip_id_or_class: footer 12strip_id_or_class: footer
13strip_id_or_class: masthead 13strip_id_or_class: masthead
14strip_id_or_class: block-menu-menu-rsn-login-or-register 14strip_id_or_class: block-menu-menu-rsn-login-or-register
15strip_id_or_class: block-menu-menu-header-links 15strip_id_or_class: block-menu-menu-header-links
16strip_id_or_class: block-rsn-follow-bar-follow-bar 16strip_id_or_class: block-rsn-follow-bar-follow-bar
17strip_id_or_class: block-rsn-weather-rsn-weather-scoreboard 17strip_id_or_class: block-rsn-weather-rsn-weather-scoreboard
18strip_id_or_class: logo 18strip_id_or_class: logo
19strip_id_or_class: element-invisible 19strip_id_or_class: element-invisible
20strip_id_or_class: site-name 20strip_id_or_class: site-name
21strip: //div[contains(@style, 'none')] 21strip: //div[contains(@style, 'none')]
22test_url: http://www.csnphilly.com/eagles/can-stoutland-save-danny-watkins-career \ No newline at end of file 22test_url: http://www.csnphilly.com/eagles/can-stoutland-save-danny-watkins-career \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/css-tricks.com.txt b/inc/3rdparty/site_config/standard/css-tricks.com.txt
new file mode 100755
index 00000000..3d8174aa
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/css-tricks.com.txt
@@ -0,0 +1,6 @@
1title://article[contains(@id, "post-")]/h1
2date://article[contains(@id, "post-")]/p[@class="time"]/time
3body://article[contains(@id, "post-")]
4strip://article[contains(@id, "post-")]/p[@class="time"]/time
5prune:yes
6test_url: http://css-tricks.com/off-canvas-menu-with-css-target/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/cucharasonica.com.txt b/inc/3rdparty/site_config/standard/cucharasonica.com.txt
index e691fe83..e691fe83 100644..100755
--- a/inc/3rdparty/site_config/standard/cucharasonica.com.txt
+++ b/inc/3rdparty/site_config/standard/cucharasonica.com.txt
diff --git a/inc/3rdparty/site_config/standard/cw.com.tw.txt b/inc/3rdparty/site_config/standard/cw.com.tw.txt
new file mode 100755
index 00000000..6e3a91ee
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/cw.com.tw.txt
@@ -0,0 +1,14 @@
1author://span[contains(@class,'reporter')]
2
3date://span[contains(@class,'date')]
4
5body://div[contains(@class,'mainContaner')]
6
7strip://div[contains(@class,'mainHeaer')]
8strip://div[contains(@class,'keyW')]
9strip://div[contains(@class,'wonderful')]
10strip://div[contains(@class,'pages')]
11strip://div[contains(@class,'Topics TopicsW3')]
12
13next_page_link://li[@class='pageNext']/a[contains(.,'下一頁')]
14test_url: http://www.cw.com.tw/article/article.action?id=5032848 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/da.feedsportal.com.txt b/inc/3rdparty/site_config/standard/da.feedsportal.com.txt
index 4a00ef44..381446e5 100644..100755
--- a/inc/3rdparty/site_config/standard/da.feedsportal.com.txt
+++ b/inc/3rdparty/site_config/standard/da.feedsportal.com.txt
@@ -1,5 +1,5 @@
1single_page_link: //a 1single_page_link: //a
2tidy: no 2tidy: no
3prune: no 3prune: no
4 4
5test_url: da.feedsportal.com/c/585/f/413794/s/17037b5a/l/0L0Stelegraaf0Bnl0Cbinnenland0C10A2757860C0I0IKlacht0Itegen0Idr0B0IFrank0Iniet0I0Eontvankelijk0I0I0Bhtml0Dcid0Frss/ia1.htm \ No newline at end of file 5test_url: da.feedsportal.com/c/585/f/413794/s/17037b5a/l/0L0Stelegraaf0Bnl0Cbinnenland0C10A2757860C0I0IKlacht0Itegen0Idr0B0IFrank0Iniet0I0Eontvankelijk0I0I0Bhtml0Dcid0Frss/ia1.htm \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dagogtid.no.txt b/inc/3rdparty/site_config/standard/dagogtid.no.txt
new file mode 100755
index 00000000..1531472c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/dagogtid.no.txt
@@ -0,0 +1,4 @@
1title: //span[@class = 'overskriftEkstrastor']
2author: //em/a
3
4test_url: http://dagogtid.no/nyhet.cfm?nyhetid=2414 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dailydot.com.txt b/inc/3rdparty/site_config/standard/dailydot.com.txt
index 61013993..978ed1ce 100644..100755
--- a/inc/3rdparty/site_config/standard/dailydot.com.txt
+++ b/inc/3rdparty/site_config/standard/dailydot.com.txt
@@ -1,4 +1,4 @@
1tidy: no 1tidy: no
2body: //article 2body: //article
3 3
4test_url: http://www.dailydot.com/entertainment/tumblr-christopher-price-topherchris/ \ No newline at end of file 4test_url: http://www.dailydot.com/entertainment/tumblr-christopher-price-topherchris/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dailykos.com.txt b/inc/3rdparty/site_config/standard/dailykos.com.txt
index 124675cb..6d4cb82a 100644..100755
--- a/inc/3rdparty/site_config/standard/dailykos.com.txt
+++ b/inc/3rdparty/site_config/standard/dailykos.com.txt
@@ -1,10 +1,10 @@
1body: //div[@id='article-1']//div[contains(@class, 'article-body')] 1body: //div[@id='article-1']//div[contains(@class, 'article-body')]
2title: //div[@class='meta']//a[@id='titleHref'] 2title: //div[@class='meta']//a[@id='titleHref']
3date: //div[@class='meta']//p[@class='date'] 3date: //div[@class='meta']//p[@class='date']
4 4
5strip_id_or_class: invisible 5strip_id_or_class: invisible
6strip_id_or_class: divider-doodle 6strip_id_or_class: divider-doodle
7 7
8prune: no 8prune: no
9 9
10test_url: http://www.dailykos.com/story/2012/01/26/1058790/-Newt-Gingrichs-campaign-admits-he-lied-during-debate-about-ABC-News-interview-with-hisex-wife \ No newline at end of file 10test_url: http://www.dailykos.com/story/2012/01/26/1058790/-Newt-Gingrich-s-campaign-admits-he-lied-during-debate-about-ABC-News-interview-with-his-ex-wife
diff --git a/inc/3rdparty/site_config/standard/dailymail.co.uk.txt b/inc/3rdparty/site_config/standard/dailymail.co.uk.txt
index c83dbdb0..cd29a4d4 100644..100755
--- a/inc/3rdparty/site_config/standard/dailymail.co.uk.txt
+++ b/inc/3rdparty/site_config/standard/dailymail.co.uk.txt
@@ -1,12 +1,12 @@
1body: //div[@id='js-article-text'] 1body: //div[@id='js-article-text']
2strip: //div[@class='explore-links'] 2strip: //div[@class='explore-links']
3strip: //div[@id='js-article-text']/br[position()=1] 3strip: //div[@id='js-article-text']/br[position()=1]
4strip_id_or_class: print-or-mail-links 4strip_id_or_class: print-or-mail-links
5strip_id_or_class: shareArticles 5strip_id_or_class: shareArticles
6strip_id_or_class: googleAds 6strip_id_or_class: googleAds
7strip_id_or_class: digg-button 7strip_id_or_class: digg-button
8strip_id_or_class: article-icon-links-container 8strip_id_or_class: article-icon-links-container
9strip_id_or_class: clickToEnlarge 9strip_id_or_class: clickToEnlarge
10tidy: no 10tidy: no
11 11
12test_url: http://www.dailymail.co.uk/news/article-1375423/Royal-wedding-Texan-billionaire-Joe-Albritton-invited-Prince-Charles.html \ No newline at end of file 12test_url: http://www.dailymail.co.uk/news/article-1375423/Royal-wedding-Texan-billionaire-Joe-Albritton-invited-Prince-Charles.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dailystar.com.lb.txt b/inc/3rdparty/site_config/standard/dailystar.com.lb.txt
new file mode 100755
index 00000000..3b153042
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/dailystar.com.lb.txt
@@ -0,0 +1,6 @@
1title: //div[@class='ec-blog-headline']
2body: //*[@id="divDetails"]
3date: //*[@id="ctl00_ContentPlaceHolder1_tdDate"]
4author: //*[@id="ctl00_ContentPlaceHolder1_anchorAuthor"]/a
5autodetect_next_page: no
6test_url: http://dailystar.com.lb/Opinion/Columnist/2012/Oct-10/190803-americas-new-modesty-in-the-mideast.ashx#axzz2928JP5xE \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/danleech.com.txt b/inc/3rdparty/site_config/standard/danleech.com.txt
new file mode 100755
index 00000000..1d4cec77
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/danleech.com.txt
@@ -0,0 +1,6 @@
1tidy: no
2prune: no
3date: //article//time[@pubdate]
4title: //article/h1//span[contains(@class, 'entry-title')]
5body: //article/div[contains(@class, 'entry-content')]
6test_url: http://danleech.com/post/36822126876/simple-icons \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dansdata.com.txt b/inc/3rdparty/site_config/standard/dansdata.com.txt
index 96a2bc41..60669480 100644..100755
--- a/inc/3rdparty/site_config/standard/dansdata.com.txt
+++ b/inc/3rdparty/site_config/standard/dansdata.com.txt
@@ -1,5 +1,5 @@
1autodetect_next_page: no 1autodetect_next_page: no
2tidy: no 2tidy: no
3prune: no 3prune: no
4body: //div[@class='NoOverflow'] 4body: //div[@class='NoOverflow']
5test_url: http://www.dansdata.com/gz129.htm \ No newline at end of file 5test_url: http://www.dansdata.com/gz129.htm \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dantri.com.vn.txt b/inc/3rdparty/site_config/standard/dantri.com.vn.txt
new file mode 100755
index 00000000..f19fee7c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/dantri.com.vn.txt
@@ -0,0 +1,7 @@
1title: //h1[contains(@class, 'fon31 mt2')]
2body: //h2[contains(@class, 'fon33 mt1')] | //div[contains(@class, 'fon34 mt3')]
3
4prune: no
5
6test_url: http://dantri.com.vn/su-kien/chang-trai-mot-minh-dap-xe-vuot-450km-de-vieng-mo-dai-tuong-869763.htm
7test_url: http://dantri.com.vn/trangchu.rss \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/daringfireball.net.txt b/inc/3rdparty/site_config/standard/daringfireball.net.txt
index dca8ade7..251cc670 100644..100755
--- a/inc/3rdparty/site_config/standard/daringfireball.net.txt
+++ b/inc/3rdparty/site_config/standard/daringfireball.net.txt
@@ -1,7 +1,7 @@
1title: //div[@class="article"]/h1 1title: //div[@class="article"]/h1
2author: //div[@id="Sidebar"]/p/strong 2author: //div[@id="Sidebar"]/p/strong
3date: //h6[@class="dateline"] 3date: //h6[@class="dateline"]
4body: //div[@class="article"] 4body: //div[@class="article"]
5strip: //h6[@class="dateline"] 5strip: //h6[@class="dateline"]
6strip: //div[@class="article"]/h1 6strip: //div[@class="article"]/h1
7test_url: http://daringfireball.net/2011/10/apps_are_the_new_channels \ No newline at end of file 7test_url: http://daringfireball.net/2011/10/apps_are_the_new_channels \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/datanami.com.txt b/inc/3rdparty/site_config/standard/datanami.com.txt
index 3534002a..e9111a48 100644..100755
--- a/inc/3rdparty/site_config/standard/datanami.com.txt
+++ b/inc/3rdparty/site_config/standard/datanami.com.txt
@@ -1,4 +1,4 @@
1body: //div[@id="article"] 1body: //div[@id="article"]
2date: //p[@class="date"] 2date: //p[@class="date"]
3author: //p[@class="byline"] 3author: //p[@class="byline"]
4test_url: http://www.datanami.com/datanami/2011-12-07/new_path_for_sap:_in_memory_computing,_predictive_analysis_converge.html?featured=top \ No newline at end of file 4test_url: http://www.datanami.com/datanami/2011-12-07/new_path_for_sap:_in_memory_computing,_predictive_analysis_converge.html?featured=top \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dcurt.is.txt b/inc/3rdparty/site_config/standard/dcurt.is.txt
index 7d11c6e1..524c4bf1 100644..100755
--- a/inc/3rdparty/site_config/standard/dcurt.is.txt
+++ b/inc/3rdparty/site_config/standard/dcurt.is.txt
@@ -1,8 +1,8 @@
1title: (//article//h2)[1] 1title: (//article//h2)[1]
2body: //article[contains(@class, 'post')] 2body: //article[contains(@class, 'post')]
3date: //time[@id='top_time']/@datetime 3date: //time[@id='top_time']/@datetime
4 4
5prune: no 5prune: no
6tidy: no 6tidy: no
7 7
8test_url: http://dcurt.is/predictions-txt \ No newline at end of file 8test_url: http://dcurt.is/predictions-txt \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/defomicron.net.txt b/inc/3rdparty/site_config/standard/defomicron.net.txt
new file mode 100755
index 00000000..9f11258c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/defomicron.net.txt
@@ -0,0 +1,9 @@
1title: //article/h1
2author: //hgroup/h3/a
3date: //time
4body: //article
5strip: //aside
6footnotes: yes
7prune: no
8tidy: no
9test_url: https://defomicron.net/2012/09/ios-6/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/delong.typepad.com.txt b/inc/3rdparty/site_config/standard/delong.typepad.com.txt
index 84fd4f79..c4b922e4 100644..100755
--- a/inc/3rdparty/site_config/standard/delong.typepad.com.txt
+++ b/inc/3rdparty/site_config/standard/delong.typepad.com.txt
@@ -1,4 +1,4 @@
1strip_id_or_class: banner 1strip_id_or_class: banner
2strip_id_or_class: gamma 2strip_id_or_class: gamma
3strip_id_or_class: module-list 3strip_id_or_class: module-list
4test_url: http://delong.typepad.com/sdj/2011/02/in-which-suresh-naidu-visits-the-new-jerusalem.html \ No newline at end of file 4test_url: http://delong.typepad.com/sdj/2011/02/in-which-suresh-naidu-visits-the-new-jerusalem.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/democracynow.org.txt b/inc/3rdparty/site_config/standard/democracynow.org.txt
new file mode 100755
index 00000000..b0050b4f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/democracynow.org.txt
@@ -0,0 +1,5 @@
1body: //div[contains(@class, 'blog_body')]
2
3prune: no
4
5test_url: http://www.democracynow.org/blog/2014/1/9/the_fbi_the_nsa_and_a \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/derstandard.at.txt b/inc/3rdparty/site_config/standard/derstandard.at.txt
index 48722ebd..07db3521 100644..100755
--- a/inc/3rdparty/site_config/standard/derstandard.at.txt
+++ b/inc/3rdparty/site_config/standard/derstandard.at.txt
@@ -1,13 +1,13 @@
1title: //div[@id='artikelHeader']/h1 1title: //div[@id='artikelHeader']/h1
2author: //span[@class='author'] 2author: //span[@class='author']
3date: //span[@class='date'] 3date: //span[@class='date']
4body: //div[@class='copytext'] 4body: //div[@class='copytext']
5strip: //ul[@class='lookupLinksArtikel'] 5strip: //ul[@class='lookupLinksArtikel']
6 6
7strip: //div[@id='pageTop'] 7strip: //div[@id='pageTop']
8strip: //div[@id='toolbar'] 8strip: //div[@id='toolbar']
9strip: //div[@id='articleTools'] 9strip: //div[@id='articleTools']
10strip: //div[@id='weiterlesen'] 10strip: //div[@id='weiterlesen']
11strip: //div[@id='communityCanvas'] 11strip: //div[@id='communityCanvas']
12 12
13test_url: http://derstandard.at/1318726018343/Breitband-LTE-Was-bringt-die-neue-Mobilfunk-Generation \ No newline at end of file 13test_url: http://derstandard.at/1318726018343/Breitband-LTE-Was-bringt-die-neue-Mobilfunk-Generation \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/designtagebuch.de.txt b/inc/3rdparty/site_config/standard/designtagebuch.de.txt
index 6096db0b..9020847f 100644..100755
--- a/inc/3rdparty/site_config/standard/designtagebuch.de.txt
+++ b/inc/3rdparty/site_config/standard/designtagebuch.de.txt
@@ -1,11 +1,11 @@
1tidy: no 1tidy: no
2body: //div[@class='main'] 2body: //div[@class='main']
3 3
4author: substring-before(substring-after(//div[@class='meta-single'], 'erstellt von '), ' am') 4author: substring-before(substring-after(//div[@class='meta-single'], 'erstellt von '), ' am')
5date: substring-before(substring-after(//div[@class='meta-single'], ' am '), ' | ') 5date: substring-before(substring-after(//div[@class='meta-single'], ' am '), ' | ')
6 6
7strip_id_or_class: pagelink 7strip_id_or_class: pagelink
8strip_id_or_class: wp-polls 8strip_id_or_class: wp-polls
9 9
10next_page_link: //div[@class='post-page-next']/a 10next_page_link: //div[@class='post-page-next']/a
11test_url: http://www.designtagebuch.de/die-gefuehlte-lesbarkeit/ \ No newline at end of file 11test_url: http://www.designtagebuch.de/die-gefuehlte-lesbarkeit/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/desitvforum.net.txt b/inc/3rdparty/site_config/standard/desitvforum.net.txt
index a6dac5fd..efa85f76 100644..100755
--- a/inc/3rdparty/site_config/standard/desitvforum.net.txt
+++ b/inc/3rdparty/site_config/standard/desitvforum.net.txt
@@ -1,5 +1,5 @@
1body: (//blockquote[contains(@class, 'postcontent')])[1] 1body: (//blockquote[contains(@class, 'postcontent')])[1]
2body: (//div[starts-with(@id, 'post_message')])[1] 2body: (//div[starts-with(@id, 'post_message')])[1]
3 3
4prune: no 4prune: no
5tidy: no \ No newline at end of file 5tidy: no \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/details.com.txt b/inc/3rdparty/site_config/standard/details.com.txt
index 548cabad..d1d8a29a 100644..100755
--- a/inc/3rdparty/site_config/standard/details.com.txt
+++ b/inc/3rdparty/site_config/standard/details.com.txt
@@ -1,8 +1,8 @@
1title: //h1[@class="content-headline"] 1title: //h1[@class="content-headline"]
2body: //div[@class="headers-container"] | //div[@class="content-container"] 2body: //div[@class="headers-container"] | //div[@class="content-container"]
3prune: no 3prune: no
4tidy: no 4tidy: no
5 5
6single_page_link: //li[@class='utility-print']/a 6single_page_link: //li[@class='utility-print']/a
7 7
8test_url: http://www.details.com/culture-trends/critical-eye/201108/best-new-designers-innovations \ No newline at end of file 8test_url: http://www.details.com/culture-trends/critical-eye/201108/best-new-designers-innovations \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/developers.facebook.com.txt b/inc/3rdparty/site_config/standard/developers.facebook.com.txt
index 43a8f0a0..7609b72f 100644..100755
--- a/inc/3rdparty/site_config/standard/developers.facebook.com.txt
+++ b/inc/3rdparty/site_config/standard/developers.facebook.com.txt
@@ -1,3 +1,3 @@
1title: //div[@class="bodyText"]/h1 1title: //div[@class="bodyText"]/h1
2author: //div[@class="picture"]/a/img/@alt 2author: //div[@class="picture"]/a/img/@alt
3test_url: https://developers.facebook.com/blog/post/2012/03/22/developer-spotlight--foodspotting/ \ No newline at end of file 3test_url: https://developers.facebook.com/blog/post/2012/03/22/developer-spotlight--foodspotting/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/devlinsangle.blogspot.co.at.txt b/inc/3rdparty/site_config/standard/devlinsangle.blogspot.co.at.txt
index b960b37e..6f1d4e27 100644..100755
--- a/inc/3rdparty/site_config/standard/devlinsangle.blogspot.co.at.txt
+++ b/inc/3rdparty/site_config/standard/devlinsangle.blogspot.co.at.txt
@@ -1,6 +1,6 @@
1date: //h2[@class='date-header'] 1date: //h2[@class='date-header']
2body: //div[@class='post hentry'] 2body: //div[@class='post hentry']
3title: //h3 3title: //h3
4strip: //div[@class='post-footer'] 4strip: //div[@class='post-footer']
5 5
6test_url: http://devlinsangle.blogspot.co.at/2012/03/difference-between-teaching-and_01.html \ No newline at end of file 6test_url: http://devlinsangle.blogspot.co.at/2012/03/difference-between-teaching-and_01.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dictionary.reference.com.txt b/inc/3rdparty/site_config/standard/dictionary.reference.com.txt
index a1172024..f8b79c80 100644..100755
--- a/inc/3rdparty/site_config/standard/dictionary.reference.com.txt
+++ b/inc/3rdparty/site_config/standard/dictionary.reference.com.txt
@@ -1,8 +1,8 @@
1title: //h1[@id='query_h1'] 1title: //h1[@id='query_h1']
2body: //div[contains(@class, 'lunatext results_content')] 2body: //div[contains(@class, 'lunatext results_content')]
3strip_id_or_class: spl_unshd 3strip_id_or_class: spl_unshd
4#replace_string(<div class="dicTl">): <div class="dicTl">------------------<br /> 4#replace_string(<div class="dicTl">): <div class="dicTl">------------------<br />
5 5
6prune: no 6prune: no
7 7
8test_url: http://www.wired.com/cloudline/2011/10/meet-arms-cortex-a15-the-future-of-the-ipad-and-possibly-the-macbook-air/ \ No newline at end of file 8test_url: http://www.wired.com/cloudline/2011/10/meet-arms-cortex-a15-the-future-of-the-ipad-and-possibly-the-macbook-air/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/diepresse.com.txt b/inc/3rdparty/site_config/standard/diepresse.com.txt
index 7e825a91..ced189cc 100644..100755
--- a/inc/3rdparty/site_config/standard/diepresse.com.txt
+++ b/inc/3rdparty/site_config/standard/diepresse.com.txt
@@ -1,6 +1,6 @@
1title: //div[@class='article']/h1 1title: //div[@class='article']/h1
2date: substring-before(//p[@class='articletime'],'|') 2date: substring-before(//p[@class='articletime'],'|')
3body: //div[@id='articletext'] 3body: //div[@id='articletext']
4strip: //div[@class='inlineDiashow'] 4strip: //div[@class='inlineDiashow']
5 5
6test_url: http://diepresse.com/home/politik/aussenpolitik/701905/TibeterProteste_Nonne-verbrennt-sich-selbst?_vl_backlink=/home/politik/index.do \ No newline at end of file 6test_url: http://diepresse.com/home/politik/aussenpolitik/701905/TibeterProteste_Nonne-verbrennt-sich-selbst?_vl_backlink=/home/politik/index.do \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/digiphoto.techbang.com.txt b/inc/3rdparty/site_config/standard/digiphoto.techbang.com.txt
index 2d2ae2c2..80ce5ff3 100644..100755
--- a/inc/3rdparty/site_config/standard/digiphoto.techbang.com.txt
+++ b/inc/3rdparty/site_config/standard/digiphoto.techbang.com.txt
@@ -1,8 +1,8 @@
1# default parser works great 1# default parser works great
2# only add "author" and "next page link" reference 2# only add "author" and "next page link" reference
3# 2012-04-13 3# 2012-04-13
4 4
5next_page_link: //div[@class = 'pagination']/a[@class = 'next_page'] 5next_page_link: //div[@class = 'pagination']/a[@class = 'next_page']
6 6
7author: //*[@class = 'author metadata']/a 7author: //*[@class = 'author metadata']/a
8test_url: http://digiphoto.techbang.com/posts/2433--commercial-photography-communication-is-the-key-to-a-good-work \ No newline at end of file 8test_url: http://digiphoto.techbang.com/posts/2433--commercial-photography-communication-is-the-key-to-a-good-work \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/digital-photography-school.com.txt b/inc/3rdparty/site_config/standard/digital-photography-school.com.txt
index 37192ac0..18ce370e 100644..100755
--- a/inc/3rdparty/site_config/standard/digital-photography-school.com.txt
+++ b/inc/3rdparty/site_config/standard/digital-photography-school.com.txt
@@ -1,6 +1,6 @@
1title: //div[@class='post-title']/h1 1title: //div[@class='post-title']/h1
2author: //a[@href='#author'] 2author: //a[@href='#author']
3body: //div[@class='post-content'] 3body: //div[@class='post-content']
4strip: //div[@class='post-meta'] 4strip: //div[@class='post-meta']
5 5
6test_url: http://www.digital-photography-school.com/10-ways-to-develop-yourself-photographically \ No newline at end of file 6test_url: http://www.digital-photography-school.com/10-ways-to-develop-yourself-photographically \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/digitalspy.co.uk.txt b/inc/3rdparty/site_config/standard/digitalspy.co.uk.txt
index b21431d7..f48bdfdb 100644..100755
--- a/inc/3rdparty/site_config/standard/digitalspy.co.uk.txt
+++ b/inc/3rdparty/site_config/standard/digitalspy.co.uk.txt
@@ -1,5 +1,5 @@
1title: //div[@class="article_header"]/h1 1title: //div[@class="article_header"]/h1
2date: //div[@class="article_pub"]/span[@class="time"] 2date: //div[@class="article_pub"]/span[@class="time"]
3author: //div[@class="article_pub"]/span[@class="editors"]/a/text() 3author: //div[@class="article_pub"]/span[@class="editors"]/a/text()
4body: //div[@class="article_body clear_left"] 4body: //div[@class="article_body clear_left"]
5test_url: http://www.digitalspy.co.uk/movies/at-the-movies/a364066/top-5-super-bowl-movie-trailers-the-avengers-battleship-more.html \ No newline at end of file 5test_url: http://www.digitalspy.co.uk/movies/at-the-movies/a364066/top-5-super-bowl-movie-trailers-the-avengers-battleship-more.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dilbert.com.txt b/inc/3rdparty/site_config/standard/dilbert.com.txt
index 413e5506..85cc78e5 100644..100755
--- a/inc/3rdparty/site_config/standard/dilbert.com.txt
+++ b/inc/3rdparty/site_config/standard/dilbert.com.txt
@@ -1,8 +1,11 @@
1convert_double_br_tags: yes 1#title: substring(substring-after(//title, ':'), 1, string-length(substring-after(//title, ':')) - 10)
2 2title: //div[contains(@class, 'SB_Title')]//a
3title: substring(substring-after(//title, ':'), 1, string-length(substring-after(//title, ':')) - 10) 3body: //div[contains(@class, 'STR_Image')]
4body: //*[contains(@class, 'SB_Content')] 4body: //*[contains(@class, 'SB_Content')]
5author: string('Scott Adams') 5author: string('Scott Adams')
6date: //*[contains(@class, 'SB_Detail')]/text()[1] 6date: //*[contains(@class, 'SB_Detail')]/text()[1]
7 7
8test_url: http://dilbert.com/blog/entry/death_by_hypnosis_or_not/ \ No newline at end of file 8
9test_url: http://dilbert.com/blog/entry/death_by_hypnosis_or_not/
10test_url: http://dilbert.com/strips/comic/2013-10-22
11test_url: http://feed.dilbert.com/dilbert/daily_strip \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dinamalar.com.txt b/inc/3rdparty/site_config/standard/dinamalar.com.txt
index 9ef198c9..bc315cf1 100644..100755
--- a/inc/3rdparty/site_config/standard/dinamalar.com.txt
+++ b/inc/3rdparty/site_config/standard/dinamalar.com.txt
@@ -1,19 +1,19 @@
1title: //div[@class='newsdetbd'] 1title: //div[@class='newsdetbd']
2body: //div[@id='innerleft'] 2body: //div[@id='innerleft']
3#//p[@class = 'plnht'] 3#//p[@class = 'plnht']
4strip_image_src: /albums/ 4strip_image_src: /albums/
5strip: //div[@class='mrrt'] 5strip: //div[@class='mrrt']
6prune: yes 6prune: yes
7strip_id_or_class: 'fdpd' 7strip_id_or_class: 'fdpd'
8strip_id_or_class: 'epapt' 8strip_id_or_class: 'epapt'
9strip_id_or_class: 'newsrtwd' 9strip_id_or_class: 'newsrtwd'
10strip_id_or_class: 'padtp' 10strip_id_or_class: 'padtp'
11strip_id_or_class: 'newdt' 11strip_id_or_class: 'newdt'
12strip_id_or_class: 'newdlt' 12strip_id_or_class: 'newdlt'
13strip: //div[@id='selNotes'] 13strip: //div[@id='selNotes']
14strip_id_or_class: 'clsNotes' 14strip_id_or_class: 'clsNotes'
15strip_id_or_class: 'clear' 15strip_id_or_class: 'clear'
16strip_id_or_class: 'cmtwrap' 16strip_id_or_class: 'cmtwrap'
17strip_id_or_class: 'sess' 17strip_id_or_class: 'sess'
18strip_id_or_class: 'parents' 18strip_id_or_class: 'parents'
19test_url: http://www.dinamalar.com/News_Detail.asp?Id=295725 \ No newline at end of file 19test_url: http://www.dinamalar.com/News_Detail.asp?Id=295725 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dn.se.txt b/inc/3rdparty/site_config/standard/dn.se.txt
index 86bb3b8d..5283a0cd 100644..100755
--- a/inc/3rdparty/site_config/standard/dn.se.txt
+++ b/inc/3rdparty/site_config/standard/dn.se.txt
@@ -1,26 +1,28 @@
1# Since this element has class="clear", the Instapaper stylesheets (at least this text parser preview), will render it unreadable, with a 1px font size and line height. 1# Since this element has class="clear", the Instapaper stylesheets (at least this text parser preview), will render it unreadable, with a 1px font size and line height.
2 2
3body: //div[@id="article-content"] 3body: //div[@id="article-content"]
4 4
5 5
6# Ads 6# Ads
7strip_id_or_class: advert-space 7strip_id_or_class: advert-space
8 8
9# Read more, recommend, comments etc 9# Read more, recommend, comments etc
10strip_id_or_class: fbc-recommend 10strip_id_or_class: fbc-recommend
11strip_id_or_class: recommend 11strip_id_or_class: recommend
12strip_id_or_class: article-readers 12strip_id_or_class: article-readers
13strip_id_or_class: article-addons 13strip_id_or_class: article-addons
14strip_id_or_class: hook 14strip_id_or_class: hook
15strip_id_or_class: right 15strip_id_or_class: right
16strip_id_or_class: footer 16strip_id_or_class: footer
17 17
18# Other news 18# Other news
19strip: //div[@id="mirrors"] 19strip: //div[@id="mirrors"]
20 20
21# Author 21# Author
22author: //div[@id="byline"]/div/p/strong 22author: //div[@id="byline"]/div/p/strong
23 23
24# Date 24# Date
25date: substring(substring-after(//p[@class="published"], 'Publicerad '), 0, 11) 25date: substring(substring-after(//p[@class="published"], 'Publicerad '), 0, 11)
26test_url: http://www.dn.se/nyheter/varlden/landade-flygplan-mitt-i-villaomrade \ No newline at end of file 26
27test_url: http://www.dn.se/nyheter/varlden/landade-flygplan-mitt-i-villaomrade
28test_url: http://www.dn.se/m/rss/senaste-nytt \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dobreprogramy.pl.txt b/inc/3rdparty/site_config/standard/dobreprogramy.pl.txt
new file mode 100755
index 00000000..972293bc
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/dobreprogramy.pl.txt
@@ -0,0 +1,6 @@
1title: //*[@class="news"]//h1[@class="title"]
2author: //*[@class="news"]//*[@class="newsInfo"]/a
3date: substring-before(//*[@class="news"]//*[@class="newsInfo"]/text(), ',')
4body: //*[@class="news"]//*[@class="newsContent"]
5footnotes: no
6test_url: http://www.dobreprogramy.pl/Sony-konczy-z-Foldinghome-na-PS3,Aktualnosc,36899.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/doctac.com.txt b/inc/3rdparty/site_config/standard/doctac.com.txt
index 9f65ea9b..1c518a9b 100644..100755
--- a/inc/3rdparty/site_config/standard/doctac.com.txt
+++ b/inc/3rdparty/site_config/standard/doctac.com.txt
@@ -1,8 +1,8 @@
1strip: //*[(@id = "featured")] 1strip: //*[(@id = "featured")]
2 2
3author:substring-after( //div[@class='posttitle']/h2[@class='author'],'by ') 3author:substring-after( //div[@class='posttitle']/h2[@class='author'],'by ')
4 4
5date: concat(//div[@class='month'],' ',//div[@class='day']) 5date: concat(//div[@class='month'],' ',//div[@class='day'])
6 6
7#doctac doesn't provide a year, but month/day is better than nothing 7#doctac doesn't provide a year, but month/day is better than nothing
8test_url: http://www.doctac.com/mac/iphone/instapaper-update-app/ \ No newline at end of file 8test_url: http://www.doctac.com/mac/iphone/instapaper-update-app/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/domusweb.it.txt b/inc/3rdparty/site_config/standard/domusweb.it.txt
index 81683f02..20566ee3 100644..100755
--- a/inc/3rdparty/site_config/standard/domusweb.it.txt
+++ b/inc/3rdparty/site_config/standard/domusweb.it.txt
@@ -1,21 +1,21 @@
1# TODO: clean up the extra junk at the end of articles 1# TODO: clean up the extra junk at the end of articles
2 2
3# general text formatting 3# general text formatting
4prune: no 4prune: no
5convert_double_br_tags:yes 5convert_double_br_tags:yes
6 6
7# where to find the basic metadata 7# where to find the basic metadata
8author://a[@class='articleauthor'] 8author://a[@class='articleauthor']
9date://a[starts-with(@href,'/en/search/published/')] 9date://a[starts-with(@href,'/en/search/published/')]
10title:substring-before(//h2[@class='title'],'&mdash;') 10title:substring-before(//h2[@class='title'],'&mdash;')
11body://div[@id='maincontainer'] 11body://div[@id='maincontainer']
12 12
13dissolve://div[starts-with(@id,'commentableblock')] 13dissolve://div[starts-with(@id,'commentableblock')]
14 14
15# clean up the crap 15# clean up the crap
16strip://div[contains(@class,'domusnetwork')] 16strip://div[contains(@class,'domusnetwork')]
17strip://div[contains(@class,'relative_wrapper')] 17strip://div[contains(@class,'relative_wrapper')]
18 18
19strip://div[contains(@class,'captionsubimage')]/img[contains(@class,'arrow')] 19strip://div[contains(@class,'captionsubimage')]/img[contains(@class,'arrow')]
20wrap_in(em): //div[contains(@class,'captionsubimage')]/span 20wrap_in(em): //div[contains(@class,'captionsubimage')]/span
21test_url: http://www.domusweb.it/en/design/in-praise-of-lost-time/ \ No newline at end of file 21test_url: http://www.domusweb.it/en/design/in-praise-of-lost-time/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dou.ua.txt b/inc/3rdparty/site_config/standard/dou.ua.txt
index 22907c22..0f983112 100644..100755
--- a/inc/3rdparty/site_config/standard/dou.ua.txt
+++ b/inc/3rdparty/site_config/standard/dou.ua.txt
@@ -1,8 +1,8 @@
1title: //h1[@itemprop="name"] 1title: //h1[@itemprop="name"]
2 2
3author: //div[contains(@class, 'author')]//div[contains(@class, 'name')]/a 3author: //div[contains(@class, 'author')]//div[contains(@class, 'name')]/a
4 4
5date: //div[contains(@class, 'b-info')]//span[contains(@class, 'date')] 5date: //div[contains(@class, 'b-info')]//span[contains(@class, 'date')]
6 6
7body: //div[contains(@class, 'b-typo')] 7body: //div[contains(@class, 'b-typo')]
8test_url: http://dou.ua/lenta/interviews/andrej-havryuchenko/?from=sb_mostcomm \ No newline at end of file 8test_url: http://dou.ua/lenta/interviews/andrej-havryuchenko/?from=sb_mostcomm \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/douban.com.txt b/inc/3rdparty/site_config/standard/douban.com.txt
index 99d7e5dc..d72a2223 100644..100755
--- a/inc/3rdparty/site_config/standard/douban.com.txt
+++ b/inc/3rdparty/site_config/standard/douban.com.txt
@@ -1,21 +1,21 @@
1# This filter is tested on: 1# This filter is tested on:
2# http://www.douban.com/note/215003067/ 2# http://www.douban.com/note/215003067/
3# http://www.douban.com/note/213540049/ 3# http://www.douban.com/note/213540049/
4# http://www.douban.com/group/topic/31140104/ 4# http://www.douban.com/group/topic/31140104/
5 5
6title: //div[@class='note-header']/h1 6title: //div[@class='note-header']/h1
7title: //div[@id='content']/h1 7title: //div[@id='content']/h1
8 8
9author: //div[@class='info']/ul/li/a 9author: //div[@class='info']/ul/li/a
10author: //h3/span/a 10author: //h3/span/a
11 11
12date://div[@class='note-header']/div/span 12date://div[@class='note-header']/div/span
13date://h3/span[contains(@class, 'color-green')] 13date://h3/span[contains(@class, 'color-green')]
14 14
15body://div[contains(@class, 'note')] 15body://div[contains(@class, 'note')]
16body://div[contains(@class, 'topic-content')] 16body://div[contains(@class, 'topic-content')]
17 17
18strip://h3 18strip://h3
19 19
20convert_double_br_tags: yes 20convert_double_br_tags: yes
21test_url: http://www.douban.com/group/topic/31140104/ \ No newline at end of file 21test_url: http://www.douban.com/group/topic/31140104/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dpreview.com.txt b/inc/3rdparty/site_config/standard/dpreview.com.txt
index 30179a3b..001c810f 100644..100755
--- a/inc/3rdparty/site_config/standard/dpreview.com.txt
+++ b/inc/3rdparty/site_config/standard/dpreview.com.txt
@@ -1,9 +1,9 @@
1# next_page_link for product review 1# next_page_link for product review
2# example: http://www.dpreview.com/reviews/lytro/ 2# example: http://www.dpreview.com/reviews/lytro/
3next_page_link: //img[@alt = 'Next page']/../@href 3next_page_link: //img[@alt = 'Next page']/../@href
4 4
5# next_page_link for other articles 5# next_page_link for other articles
6# example: http://www.dpreview.com/articles/6126592906/first-impressions-using-the-fujifilm-x-pro1 6# example: http://www.dpreview.com/articles/6126592906/first-impressions-using-the-fujifilm-x-pro1
7next_page_link: //*[@class = 'pages']/*/td[@class = 'next enabled']/a 7next_page_link: //*[@class = 'pages']/*/td[@class = 'next enabled']/a
8single_page_link: //a[contains(.,'Print view')] 8single_page_link: //a[contains(.,'Print view')]
9test_url: http://www.dpreview.com/articles/6126592906/first-impressions-using-the-fujifilm-x-pro1 \ No newline at end of file 9test_url: http://www.dpreview.com/articles/6126592906/first-impressions-using-the-fujifilm-x-pro1 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dr.dk.txt b/inc/3rdparty/site_config/standard/dr.dk.txt
index 7e46b0d6..d8ec1acf 100644..100755
--- a/inc/3rdparty/site_config/standard/dr.dk.txt
+++ b/inc/3rdparty/site_config/standard/dr.dk.txt
@@ -1,9 +1,9 @@
1title: //meta[@property='og:title']/@content 1title: //meta[@property='og:title']/@content
2author: //div[@class='articleFunctions']//a 2author: //div[@class='articleFunctions']//a
3date: //meta[@name='pubdate']/@content 3date: //meta[@name='pubdate']/@content
4 4
5# Can you strip elements from the body only? It is required here (`//div[@class='articleContent']/p` breaks for some reason) 5# Can you strip elements from the body only? It is required here (`//div[@class='articleContent']/p` breaks for some reason)
6body: //div[@class='articleContent'] 6body: //div[@class='articleContent']
7 7
8tidy: no 8tidy: no
9test_url: http://www.dr.dk/Nyheder/Udland/2011/10/24/150115.htm \ No newline at end of file 9test_url: http://www.dr.dk/Nyheder/Udland/2011/10/24/150115.htm \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dramasonline.com.txt b/inc/3rdparty/site_config/standard/dramasonline.com.txt
index 659d0443..4898353b 100644..100755
--- a/inc/3rdparty/site_config/standard/dramasonline.com.txt
+++ b/inc/3rdparty/site_config/standard/dramasonline.com.txt
@@ -1,10 +1,10 @@
1body: //div[@class='postext'] 1body: //div[@class='postext']
2 2
3strip_id_or_class: ratingblock 3strip_id_or_class: ratingblock
4strip_id_or_class: hreview-aggregate 4strip_id_or_class: hreview-aggregate
5strip: //div[contains(@style, 'display: none;')] 5strip: //div[contains(@style, 'display: none;')]
6 6
7tidy: no 7tidy: no
8prune: no 8prune: no
9 9
10test_url: http://www.dramasonline.com/jago-pakistan-jago-7th-december-2012-ali-gul-pir/ \ No newline at end of file 10test_url: http://www.dramasonline.com/jago-pakistan-jago-7th-december-2012-ali-gul-pir/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/drdobbs.com.txt b/inc/3rdparty/site_config/standard/drdobbs.com.txt
index b1a9db6f..b1a9db6f 100644..100755
--- a/inc/3rdparty/site_config/standard/drdobbs.com.txt
+++ b/inc/3rdparty/site_config/standard/drdobbs.com.txt
diff --git a/inc/3rdparty/site_config/standard/drive2.ru.txt b/inc/3rdparty/site_config/standard/drive2.ru.txt
index 6125ce79..d500cb81 100644..100755
--- a/inc/3rdparty/site_config/standard/drive2.ru.txt
+++ b/inc/3rdparty/site_config/standard/drive2.ru.txt
@@ -1,12 +1,12 @@
1body: //div[@class = "description"] 1body: //div[@class = "description"]
2body: //div[@id = "post"] 2body: //div[@id = "post"]
3 3
4strip_id_or_class: vcard 4strip_id_or_class: vcard
5strip_id_or_class: journallist 5strip_id_or_class: journallist
6strip_id_or_class: infobox 6strip_id_or_class: infobox
7strip_id_or_class: terms 7strip_id_or_class: terms
8strip_id_or_class: replieslist 8strip_id_or_class: replieslist
9strip_id_or_class: communityside 9strip_id_or_class: communityside
10 10
11 11
12test_url: http://www.drive2.ru/cars/audi/a6/a6_c5/elysey/journal/288230376151836654/ \ No newline at end of file 12test_url: http://www.drive2.ru/cars/audi/a6/a6_c5/elysey/journal/288230376151836654/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dropbox.com.txt b/inc/3rdparty/site_config/standard/dropbox.com.txt
new file mode 100755
index 00000000..92ae31b2
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/dropbox.com.txt
@@ -0,0 +1 @@
single_page_link: //a[@id='download_button_link'] \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/drupal.org.txt b/inc/3rdparty/site_config/standard/drupal.org.txt
index ffb77e4d..2da3eb1c 100644..100755
--- a/inc/3rdparty/site_config/standard/drupal.org.txt
+++ b/inc/3rdparty/site_config/standard/drupal.org.txt
@@ -1,8 +1,8 @@
1title://h1 1title://h1
2author://div[@class="submitted"]/a 2author://div[@class="submitted"]/a
3date:substring-after(//div[@class="meta"],'modified: ') 3date:substring-after(//div[@class="meta"],'modified: ')
4date:substring-after(//div[@class="submitted"],'on ') 4date:substring-after(//div[@class="submitted"],'on ')
5body://div[@class="node-content"] 5body://div[@class="node-content"]
6strip://div[@class="meta"] 6strip://div[@class="meta"]
7strip_id_or_class:book-navigation 7strip_id_or_class:book-navigation
8test_url: http://drupal.org/node/1327354 \ No newline at end of file 8test_url: http://drupal.org/node/1327354 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dukebasketballreport.com.txt b/inc/3rdparty/site_config/standard/dukebasketballreport.com.txt
index 418c9f62..2978797e 100644..100755
--- a/inc/3rdparty/site_config/standard/dukebasketballreport.com.txt
+++ b/inc/3rdparty/site_config/standard/dukebasketballreport.com.txt
@@ -1,11 +1,11 @@
1title: //h2/a 1title: //h2/a
2author: substring-before(substring-after(//span[@class='byline'], 'by'), ',') 2author: substring-before(substring-after(//span[@class='byline'], 'by'), ',')
3date: substring-before(substring-after(//span[@class='byline'], ','), '|') 3date: substring-before(substring-after(//span[@class='byline'], ','), '|')
4body: //div[@class='entry'] 4body: //div[@class='entry']
5 5
6 6
7# strip out auction stuff at the end of posts 7# strip out auction stuff at the end of posts
8# tidy kills the center tag, so disable it 8# tidy kills the center tag, so disable it
9tidy: no 9tidy: no
10strip: //center//table 10strip: //center//table
11test_url: http://www.dukebasketballreport.com/articles/?p=42660 \ No newline at end of file 11test_url: http://www.dukebasketballreport.com/articles/?p=42660 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dushumashang.com.txt b/inc/3rdparty/site_config/standard/dushumashang.com.txt
new file mode 100755
index 00000000..6a50a77e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/dushumashang.com.txt
@@ -0,0 +1,17 @@
1# This filter is tested on:
2# http://www.dushumashang.com/2389
3# http://www.dushumashang.com/2415
4# http://www.dushumashang.com/2355
5
6body://div[@class='main_content']
7#body://section[@class='entry_content fl']
8title://h2
9author://span[@class='article_author']/a
10date://span[@class='pub_date']/time
11
12strip://span[@class='article_author']
13strip://span[@class='pub_date']
14strip://div[@class='page_turn']
15strip://span[@class='source_link']/em
16wrap_in(strong)://span[@class='source_link']/a
17test_url: http://www.dushumashang.com/2355 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dvice.com.txt b/inc/3rdparty/site_config/standard/dvice.com.txt
index c8163680..1a1990ee 100644..100755
--- a/inc/3rdparty/site_config/standard/dvice.com.txt
+++ b/inc/3rdparty/site_config/standard/dvice.com.txt
@@ -1,9 +1,9 @@
1strip://*[@id = 'blog_top_stories'] 1strip://*[@id = 'blog_top_stories']
2strip://*[@id = 'takeover_off'] 2strip://*[@id = 'takeover_off']
3strip://*[@id = 'right_gray_box'] 3strip://*[@id = 'right_gray_box']
4strip://*[@class = 'blog_topics'] 4strip://*[@class = 'blog_topics']
5strip://*[@class = 'section_titles'] 5strip://*[@class = 'section_titles']
6 6
7author://div[@class = 'post_author_info']/a 7author://div[@class = 'post_author_info']/a
8date://div[@class = 'post_date_info'] 8date://div[@class = 'post_date_info']
9test_url: http://dvice.com/archives/2012/05/is-nfc-and-smar.php \ No newline at end of file 9test_url: http://dvice.com/archives/2012/05/is-nfc-and-smar.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/eamesinerudition.com.txt b/inc/3rdparty/site_config/standard/eamesinerudition.com.txt
index 908a1b51..89a68bcd 100644..100755
--- a/inc/3rdparty/site_config/standard/eamesinerudition.com.txt
+++ b/inc/3rdparty/site_config/standard/eamesinerudition.com.txt
@@ -1,8 +1,8 @@
1title: //div [@class="post contain"]/h1 1title: //div [@class="post contain"]/h1
2strip: //div [@class="post contain"]/h1 2strip: //div [@class="post contain"]/h1
3body: //div [@class="post contain"] 3body: //div [@class="post contain"]
4author: substring-before(//title, ':') 4author: substring-before(//title, ':')
5author: substring-before(//title, ' ') 5author: substring-before(//title, ' ')
6 6
7 7
8test_url: http://eamesinerudition.com/2012/03/hospital-numbers-are-bad-for-you \ No newline at end of file 8test_url: http://eamesinerudition.com/2012/03/hospital-numbers-are-bad-for-you \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/eandt.theiet.org.txt b/inc/3rdparty/site_config/standard/eandt.theiet.org.txt
index c4c38f25..ba9d312d 100644..100755
--- a/inc/3rdparty/site_config/standard/eandt.theiet.org.txt
+++ b/inc/3rdparty/site_config/standard/eandt.theiet.org.txt
@@ -1,8 +1,8 @@
1title: //h1 1title: //h1
2date: //div[@class="et_dateUnderTitle"] 2date: //div[@class="et_dateUnderTitle"]
3author: substring-after(//div[@class="et_authorUnderTitle"], 'By ') 3author: substring-after(//div[@class="et_authorUnderTitle"], 'By ')
4body: //div[@id="et_leftCol640split"] 4body: //div[@id="et_leftCol640split"]
5 5
6strip: //div[@id="et_leftCol640splitRight"] 6strip: //div[@id="et_leftCol640splitRight"]
7strip: //div[@class="et_light_greybgboxlower"] 7strip: //div[@class="et_light_greybgboxlower"]
8test_url: http://eandt.theiet.org/magazine/2011/12/this-festive-waste.cfm \ No newline at end of file 8test_url: http://eandt.theiet.org/magazine/2011/12/this-festive-waste.cfm \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/eastoftheweb.com.txt b/inc/3rdparty/site_config/standard/eastoftheweb.com.txt
index d762091c..36708da3 100644..100755
--- a/inc/3rdparty/site_config/standard/eastoftheweb.com.txt
+++ b/inc/3rdparty/site_config/standard/eastoftheweb.com.txt
@@ -1,18 +1,18 @@
1title: //div[@class='title_text'] 1title: //div[@class='title_text']
2 2
3author: //div[@class='author_text'] 3author: //div[@class='author_text']
4 4
5body: //div[@class='story_text']/.. 5body: //div[@class='story_text']/..
6 6
7strip: //b 7strip: //b
8 8
9strip_id_or_class: back_to_top 9strip_id_or_class: back_to_top
10strip_id_or_class: author_text 10strip_id_or_class: author_text
11strip_id_or_class: title_text 11strip_id_or_class: title_text
12 12
13wrap_in(center): //a 13wrap_in(center): //a
14 14
15dissolve: //a 15dissolve: //a
16 16
17footnotes: no 17footnotes: no
18test_url: http://www.eastoftheweb.com/short-stories/UBooks/Horl.shtml \ No newline at end of file 18test_url: http://www.eastoftheweb.com/short-stories/UBooks/Horl.shtml \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ebay.com.txt b/inc/3rdparty/site_config/standard/ebay.com.txt
index 5fa18ff3..f17e1f72 100644..100755
--- a/inc/3rdparty/site_config/standard/ebay.com.txt
+++ b/inc/3rdparty/site_config/standard/ebay.com.txt
@@ -1,5 +1,5 @@
1body: //h1[@class='it-ttl'] | //div[@id='mainImgHldr'] | //span[@id='prcIsum'] 1body: //h1[@class='it-ttl'] | //div[@id='mainImgHldr'] | //span[@id='prcIsum']
2 2
3strip_image_src: imgLoading_30x30.gif 3strip_image_src: imgLoading_30x30.gif
4 4
5test_url: http://www.ebay.com/itm/BRAND-NEW-FM-Transmitter-Ca-r-Charger-iPhone-4S-4-4G-3GS-3G-2G-iPod-Touch-/190657497204 \ No newline at end of file 5test_url: http://www.ebay.com/itm/BRAND-NEW-FM-Transmitter-Ca-r-Charger-iPhone-4S-4-4G-3GS-3G-2G-iPod-Touch-/190657497204 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ecetia.com.txt b/inc/3rdparty/site_config/standard/ecetia.com.txt
index d67e9103..d67e9103 100644..100755
--- a/inc/3rdparty/site_config/standard/ecetia.com.txt
+++ b/inc/3rdparty/site_config/standard/ecetia.com.txt
diff --git a/inc/3rdparty/site_config/standard/econlog.econlib.org.txt b/inc/3rdparty/site_config/standard/econlog.econlib.org.txt
index ebafc197..729affd4 100644..100755
--- a/inc/3rdparty/site_config/standard/econlog.econlib.org.txt
+++ b/inc/3rdparty/site_config/standard/econlog.econlib.org.txt
@@ -1,6 +1,6 @@
1title: //h1[@class="title"] 1title: //h1[@class="title"]
2author: //div[@class="hosted"]/a 2author: //div[@class="hosted"]/a
3date: substring-after(//div[@class="dateline"]/text(), '|') 3date: substring-after(//div[@class="dateline"]/text(), '|')
4 4
5strip: //a[@class="top" and @href="#"] 5strip: //a[@class="top" and @href="#"]
6test_url: http://econlog.econlib.org/archives/2012/04/blinder_on_heal.html \ No newline at end of file 6test_url: http://econlog.econlib.org/archives/2012/04/blinder_on_heal.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/economia.estadao.com.br.txt b/inc/3rdparty/site_config/standard/economia.estadao.com.br.txt
index b59f554e..936a191d 100644..100755
--- a/inc/3rdparty/site_config/standard/economia.estadao.com.br.txt
+++ b/inc/3rdparty/site_config/standard/economia.estadao.com.br.txt
@@ -1,7 +1,7 @@
1date: //div[@class="bb-md-noticia-fecha"] 1date: //div[@class="bb-md-noticia-fecha"]
2body: //div[@class="corpo"] 2body: //div[@class="corpo"]
3dissolve: //div[@class="bb-md-noticia-extras"] 3dissolve: //div[@class="bb-md-noticia-extras"]
4strip: //strong 4strip: //strong
5strip_id_or_class: bb-md-noticia-foto-autor 5strip_id_or_class: bb-md-noticia-foto-autor
6strip_id_or_class: bb-md-noticia-foto-bajada 6strip_id_or_class: bb-md-noticia-foto-bajada
7test_url: http://economia.estadao.com.br/noticias/economia,cmn-aprova-r-67-bi-em-credito-para-20-setores-da-economia,118501,0.htm \ No newline at end of file 7test_url: http://economia.estadao.com.br/noticias/economia,cmn-aprova-r-67-bi-em-credito-para-20-setores-da-economia,118501,0.htm \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/economist.com.txt b/inc/3rdparty/site_config/standard/economist.com.txt
index 71dd62f5..16c9ed64 100644..100755
--- a/inc/3rdparty/site_config/standard/economist.com.txt
+++ b/inc/3rdparty/site_config/standard/economist.com.txt
@@ -1,10 +1,8 @@
1title: //div[@class='ec-blog-headline'] 1body: //div[@class='main-content']
2body: //div[@class='ec-blog-body'] 2date: //time[@class='date-created']
3body: //div[@class='ec-article-content clear'] 3strip: //aside
4strip: //div[@class='related-items'] 4prune: no
5date: substring-before(//p[@class='ec-article-info'], '|') 5
6prune: no 6autodetect_next_page: no
7 7
8autodetect_next_page: no
9
10test_url: http://www.economist.com/node/21528429 \ No newline at end of file 8test_url: http://www.economist.com/node/21528429 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/edge-online.com.txt b/inc/3rdparty/site_config/standard/edge-online.com.txt
index 461d909c..cf585815 100644..100755
--- a/inc/3rdparty/site_config/standard/edge-online.com.txt
+++ b/inc/3rdparty/site_config/standard/edge-online.com.txt
@@ -1,13 +1,13 @@
1title: //meta[@property="og:title"]/@content 1title: //meta[@property="og:title"]/@content
2body: //h2[@class='strapline'] | //article[contains(@class, 'node-article')] 2body: //h2[@class='strapline'] | //article[contains(@class, 'node-article')]
3date: //time[@pubdate]/@datetime 3date: //time[@pubdate]/@datetime
4author: //span[@class='author-name'] 4author: //span[@class='author-name']
5prune: no 5prune: no
6tidy: no 6tidy: no
7strip: //footer 7strip: //footer
8 8
9replace_string(<p>[ pagebreak ]</p>): <!-- pagebreak --> 9replace_string(<p>[ pagebreak ]</p>): <!-- pagebreak -->
10 10
11single_page_link: //a[contains(@href, '?page=show')] 11single_page_link: //a[contains(@href, '?page=show')]
12 12
13test_url: http://www.edge-online.com/features/telling-modern-warfares-story \ No newline at end of file 13test_url: http://www.edge-online.com/features/telling-modern-warfares-story \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/edge.org.txt b/inc/3rdparty/site_config/standard/edge.org.txt
index 9980000d..95805f6e 100644..100755
--- a/inc/3rdparty/site_config/standard/edge.org.txt
+++ b/inc/3rdparty/site_config/standard/edge.org.txt
@@ -1,5 +1,5 @@
1title: //div[@class='HomeLeftPannel IMGCTRL']/h2 1title: //div[@class='HomeLeftPannel IMGCTRL']/h2
2body: //div[@class='HomeLeftPannel IMGCTRL']//div[@class='Brownalink' or @id='shortdesc'] 2body: //div[@class='HomeLeftPannel IMGCTRL']//div[@class='Brownalink' or @id='shortdesc']
3tidy: no 3tidy: no
4 4
5test_url: http://edge.org/print/conversation.php?cid=the-argumentative-theory \ No newline at end of file 5test_url: http://edge.org/print/conversation.php?cid=the-argumentative-theory \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/edition.channel5belize.com.txt b/inc/3rdparty/site_config/standard/edition.channel5belize.com.txt
new file mode 100755
index 00000000..6d5f170a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/edition.channel5belize.com.txt
@@ -0,0 +1,9 @@
1title: //div[@id='singlePage']//h2
2body: //div[@id='singlePage']//div[contains(@class, 'post')]
3strip: //a[@title='Email This Story']
4strip_id_or_class: sociable
5
6prune: no
7
8test_url: http://edition.channel5belize.com/archives/86016
9test_url: http://edition.channel5belize.com/feed \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/edition.cnn.com.txt b/inc/3rdparty/site_config/standard/edition.cnn.com.txt
index dc8ebe14..6fc82d24 100644..100755
--- a/inc/3rdparty/site_config/standard/edition.cnn.com.txt
+++ b/inc/3rdparty/site_config/standard/edition.cnn.com.txt
@@ -1,9 +1,18 @@
1body: //div[@id='cnnContentContainer']//div[contains(@class, 'cnn_strycntntlft')] 1body: //div[@id='cnnContentContainer']//div[contains(@class, 'cnn_strycntntlft')]
2strip: //div[@id='cnnCVP2'] 2strip: //a[starts-with(@name, 'em')]
3strip_id_or_class: cnn_strylftcexpbx 3strip: //div[@id='cnnCVP2']
4strip_id_or_class: cnn_strylctcqrelt 4strip_id_or_class: cnn_strylftcexpbx
5strip_id_or_class: cnn_strybtntoolsbttm 5strip_id_or_class: cnn_strylctcqrelt
6strip_id_or_class: cnn_stryftsbttm 6strip_id_or_class: cnn_strybtntoolsbttm
7strip_id_or_class: cnn_strybtmcntnt 7strip_id_or_class: cnn_stryftsbttm
8strip_id_or_class: cnn_strybtmcntnt
9strip_id_or_class: cnn_stryshrwdgtbtm
10strip_id_or_class: cnnGalleryContainer
11strip_id_or_class: cnn_strycrcntr
12strip_id_or_class: cnn_html_slideshow
8prune: no 13prune: no
9test_url: http://edition.cnn.com/2011/US/04/29/severe.weather/index.html \ No newline at end of file 14
15test_url: http://edition.cnn.com/2011/US/04/29/severe.weather/index.html
16test_url: http://edition.cnn.com/2013/08/15/world/africa/nigeria-boko-haram-commander-killed/index.html?eref=edition
17test_url: http://rss.cnn.com/rss/edition.rss
18test_url: http://rss.cnn.com/rss/edition_technology.rss \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/eetimes.com.txt b/inc/3rdparty/site_config/standard/eetimes.com.txt
new file mode 100755
index 00000000..300db307
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/eetimes.com.txt
@@ -0,0 +1,8 @@
1body: //div[contains(@class, 'grayshowlinks')]
2
3next_page_link: //div[@id='sitecontentcol']//a[.='Next >']
4# Doesn't work (site doesn't always load full content in print view)
5#single_page_link: //div[@id='sitecontentcol']//a[contains(@href, 'print=yes')]
6
7test_url: http://www.eetimes.com/document.asp?doc_id=1319966&
8test_url: http://www.eetimes.com/rss_simple.asp \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ekultura.hu.txt b/inc/3rdparty/site_config/standard/ekultura.hu.txt
index 59f6a711..3756027c 100644..100755
--- a/inc/3rdparty/site_config/standard/ekultura.hu.txt
+++ b/inc/3rdparty/site_config/standard/ekultura.hu.txt
@@ -1,11 +1,11 @@
1title: //h1[@class='style6 nevek'] 1title: //h1[@class='style6 nevek']
2 2
3body: //div[@class='bal3'] 3body: //div[@class='bal3']
4 4
5 5
6prune: yes 6prune: yes
7 7
8tidy: yes 8tidy: yes
9convert_double_br_tags: yes 9convert_double_br_tags: yes
10 10
11test_url: http://ekultura.hu/olvasnivalo/egyeb/cikk/2010-12-15/interju-galvolgyi-judit-2010-december \ No newline at end of file 11test_url: http://ekultura.hu/olvasnivalo/egyeb/cikk/2010-12-15/interju-galvolgyi-judit-2010-december \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/elance.com.txt b/inc/3rdparty/site_config/standard/elance.com.txt
index 52ffe2d0..d4b0a9b8 100644..100755
--- a/inc/3rdparty/site_config/standard/elance.com.txt
+++ b/inc/3rdparty/site_config/standard/elance.com.txt
@@ -1,3 +1,3 @@
1body: //div[@id='jobDesc-bd']/p 1body: //div[@id='jobDesc-bd']/p
2 2
3test_url: http://www.elance.com/j/xml-technical-intergration/23687172/ \ No newline at end of file 3test_url: http://www.elance.com/j/xml-technical-intergration/23687172/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/elderscrollsonline.com.txt b/inc/3rdparty/site_config/standard/elderscrollsonline.com.txt
new file mode 100755
index 00000000..fa3892c6
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/elderscrollsonline.com.txt
@@ -0,0 +1,22 @@
1date: //time
2title: //h1[contains(@class, "alpha")]
3body: //article[contains(@class, "news-post")]
4
5# fix dates - dates as they are won't work as strtotime doesn't understand format (03.28.2013)
6replace_string(<time class="gamma">01.): <time class="gamma">January.
7replace_string(<time class="gamma">02.): <time class="gamma">February.
8replace_string(<time class="gamma">03.): <time class="gamma">March.
9replace_string(<time class="gamma">04.): <time class="gamma">April.
10replace_string(<time class="gamma">05.): <time class="gamma">May.
11replace_string(<time class="gamma">06.): <time class="gamma">June.
12replace_string(<time class="gamma">07.): <time class="gamma">July.
13replace_string(<time class="gamma">08.): <time class="gamma">August.
14replace_string(<time class="gamma">09.): <time class="gamma">September.
15replace_string(<time class="gamma">10.): <time class="gamma">October.
16replace_string(<time class="gamma">11.): <time class="gamma">November.
17replace_string(<time class="gamma">12.): <time class="gamma">December.
18
19prune: no
20
21test_url: http://elderscrollsonline.com/en/rss
22test_url: http://elderscrollsonline.com/en/news/post/2013/03/27/developer-question-of-the-week-17 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/elektroniknet.de.txt b/inc/3rdparty/site_config/standard/elektroniknet.de.txt
index 07664719..56fba5ff 100644..100755
--- a/inc/3rdparty/site_config/standard/elektroniknet.de.txt
+++ b/inc/3rdparty/site_config/standard/elektroniknet.de.txt
@@ -1,27 +1,27 @@
1title: //h1 1title: //h1
2date: //div[@class='datum'] 2date: //div[@class='datum']
3single_page_link: //a[contains(@href, '?type=99')] 3single_page_link: //a[contains(@href, '?type=99')]
4 4
5# this hack preserves the intro text, because it would be striped otherwise if the title is set to //h1 5# this hack preserves the intro text, because it would be striped otherwise if the title is set to //h1
6dissolve: //div[@class='artikelMeldung'] 6dissolve: //div[@class='artikelMeldung']
7 7
8 8
9strip_id_or_class: anzeige 9strip_id_or_class: anzeige
10strip_id_or_class: top_page_navigation 10strip_id_or_class: top_page_navigation
11strip_id_or_class: cr_image_container 11strip_id_or_class: cr_image_container
12strip_id_or_class: cr_image_reference 12strip_id_or_class: cr_image_reference
13strip_id_or_class: cr_image_icon 13strip_id_or_class: cr_image_icon
14strip_id_or_class: _close_txt 14strip_id_or_class: _close_txt
15strip_id_or_class: _close_ico 15strip_id_or_class: _close_ico
16strip_id_or_class: clearer 16strip_id_or_class: clearer
17 17
18strip://h1 18strip://h1
19strip://h6 19strip://h6
20strip://div[contains(@id, 'plista')] 20strip://div[contains(@id, 'plista')]
21strip://img[contains(@id,'tiny')] 21strip://img[contains(@id,'tiny')]
22strip://img[@class='cr_image'] 22strip://img[@class='cr_image']
23 23
24# strip url at the top 24# strip url at the top
25strip: //p[@style='font-size: 10px;'] 25strip: //p[@style='font-size: 10px;']
26 26
27test_url: http://www.elektroniknet.de/automotive/technik-know-how/sicherheitselektronik/article/87717/0/Besser_als_die_Wirklichkeit/ \ No newline at end of file 27test_url: http://www.elektroniknet.de/automotive/technik-know-how/sicherheitselektronik/article/87717/0/Besser_als_die_Wirklichkeit/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/elmalpensante.com.txt b/inc/3rdparty/site_config/standard/elmalpensante.com.txt
index 9fecd663..435c6c20 100644..100755
--- a/inc/3rdparty/site_config/standard/elmalpensante.com.txt
+++ b/inc/3rdparty/site_config/standard/elmalpensante.com.txt
@@ -1,4 +1,4 @@
1single_page_link: //a[contains(@href, 'print_contenido')] 1single_page_link: //a[contains(@href, 'print_contenido')]
2title: //h2 2title: //h2
3author: //div[@class="autor"] 3author: //div[@class="autor"]
4test_url: http://www.elmalpensante.com/index.php?doc=display_contenido&id=668 \ No newline at end of file 4test_url: http://www.elmalpensante.com/index.php?doc=display_contenido&id=668 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/elpais.com.txt b/inc/3rdparty/site_config/standard/elpais.com.txt
index 32f9fc3f..c6f9787b 100644..100755
--- a/inc/3rdparty/site_config/standard/elpais.com.txt
+++ b/inc/3rdparty/site_config/standard/elpais.com.txt
@@ -1,22 +1,22 @@
1title: //meta[@name='DC.title']/@content 1title: //meta[@name='DC.title']/@content
2title: //div[contains(@class, 'cabecera_noticia')]//h1 2title: //div[contains(@class, 'cabecera_noticia')]//h1
3date: //meta[@name='DC.date']/@content 3date: //meta[@name='DC.date']/@content
4date: //meta[@name='date']/@content 4date: //meta[@name='date']/@content
5body: //div[@class='columna_texto'] 5body: //div[@class='columna_texto']
6body: //div[@id='cuerpo_noticia'] 6body: //div[@id='cuerpo_noticia']
7body: //div[@class='estructura_2col_1zq']//div[@class='margen_n'] 7body: //div[@class='estructura_2col_1zq']//div[@class='margen_n']
8
9prune: no
10
11strip_id_or_class: disposicion_vertical
12strip_id_or_class: ampliar_foto
13strip_id_or_class: utilidades
14strip_id_or_class: info_relacionada
15strip_id_or_class: m-kiosko
16strip_id_or_class: info_complementa
17
18strip: //div[starts-with(@id, 'sumario') and contains(., 'más información')]
19strip: //div[@id='coment' or @id='foros_not']
20 8
21test_url: http://elpais.com/elpais/2012/02/06/gente/1328526783_491687.html 9prune: no
10
11strip_id_or_class: disposicion_vertical
12strip_id_or_class: ampliar_foto
13strip_id_or_class: utilidades
14strip_id_or_class: info_relacionada
15strip_id_or_class: m-kiosko
16strip_id_or_class: info_complementa
17
18strip: //div[starts-with(@id, 'sumario') and contains(., 'más información')]
19strip: //div[@id='coment' or @id='foros_not']
20
21test_url: http://elpais.com/elpais/2012/02/06/gente/1328526783_491687.html
22test_url: http://www.elpais.com/articulo/cultura/mano/retrato/materia/elpepicul/20120207elpepicul_2/Tes \ No newline at end of file 22test_url: http://www.elpais.com/articulo/cultura/mano/retrato/materia/elpepicul/20120207elpepicul_2/Tes \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/emaratalyoum.com.txt b/inc/3rdparty/site_config/standard/emaratalyoum.com.txt
new file mode 100755
index 00000000..3d1313e2
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/emaratalyoum.com.txt
@@ -0,0 +1,7 @@
1body: //div[@id='main-column']//div[@class='content']
2
3prune: no
4
5test_url: http://www.emaratalyoum.com/sports/arab-and-international/2013-08-29-1.601844
6test_url: http://www.emaratalyoum.com/sports/arab-and-international/2013-08-29-1.601842
7test_url: http://www.emaratalyoum.com/public-sports-1.533088?ot=ot.AjaxPageLayout \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/en.espnf1.com.txt b/inc/3rdparty/site_config/standard/en.espnf1.com.txt
index c1a91063..2ca0216b 100644..100755
--- a/inc/3rdparty/site_config/standard/en.espnf1.com.txt
+++ b/inc/3rdparty/site_config/standard/en.espnf1.com.txt
@@ -1,10 +1,10 @@
1body: //div[@id='content'] 1body: //div[@id='content']
2strip: //div[@class='rl'] 2strip: //div[@class='rl']
3strip: //p[@class='authdesc'] 3strip: //p[@class='authdesc']
4strip: //p[@class='strybtm'] 4strip: //p[@class='strybtm']
5strip: //div[@id='stryFtrLft'] 5strip: //div[@id='stryFtrLft']
6strip: //div[@id='f1Conversation'] 6strip: //div[@id='f1Conversation']
7strip: //div[@id='cmtSpncrRuler'] 7strip: //div[@id='cmtSpncrRuler']
8strip: //div[@id='stryComments'] 8strip: //div[@id='stryComments']
9strip: //div[@id='athrData'] 9strip: //div[@id='athrData']
10test_url: http://en.espnf1.com/monaco/motorsport/story/50529.html \ No newline at end of file 10test_url: http://en.espnf1.com/monaco/motorsport/story/50529.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/engadget.com.txt b/inc/3rdparty/site_config/standard/engadget.com.txt
index 6cc6b14e..52acddb0 100644..100755
--- a/inc/3rdparty/site_config/standard/engadget.com.txt
+++ b/inc/3rdparty/site_config/standard/engadget.com.txt
@@ -1,7 +1,7 @@
1title: //meta[@property="og:title"]/@content 1title: //meta[@property="og:title"]/@content
2body: //div[@class='post_body'] 2body: //div[@class='post_body']
3date: //*[@class='post_time'] 3date: //*[@class='post_time']
4 4
5prune: no 5prune: no
6 6
7test_url: http://www.engadget.com/2011/05/20/screen-grabs-the-mentalist-takes-the-ipad-to-new-heights/ \ No newline at end of file 7test_url: http://www.engadget.com/2011/05/20/screen-grabs-the-mentalist-takes-the-ipad-to-new-heights/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/engineering.tumblr.com.txt b/inc/3rdparty/site_config/standard/engineering.tumblr.com.txt
index 35ace467..48f301fe 100644..100755
--- a/inc/3rdparty/site_config/standard/engineering.tumblr.com.txt
+++ b/inc/3rdparty/site_config/standard/engineering.tumblr.com.txt
@@ -1,7 +1,7 @@
1title: //h2 1title: //h2
2body: //div[@class="post_content"] 2body: //div[@class="post_content"]
3author: //p[@class="author"]/a 3author: //p[@class="author"]/a
4date: //p[@class="date"] 4date: //p[@class="date"]
5strip: //h2 5strip: //h2
6strip: //header 6strip: //header
7test_url: http://engineering.tumblr.com/post/21276808338/tumblr-firehose \ No newline at end of file 7test_url: http://engineering.tumblr.com/post/21276808338/tumblr-firehose \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/english.aljazeera.net.txt b/inc/3rdparty/site_config/standard/english.aljazeera.net.txt
index aed3a5f9..97365994 100644..100755
--- a/inc/3rdparty/site_config/standard/english.aljazeera.net.txt
+++ b/inc/3rdparty/site_config/standard/english.aljazeera.net.txt
@@ -1,7 +1,7 @@
1title: //span[@id='DetailedTitle'] 1title: //span[@id='DetailedTitle']
2body: //div[@id='ctl00_cphBody_dvArticleInfoBlock'] | //td[@class='DetailedSummary'] 2body: //div[@id='ctl00_cphBody_dvArticleInfoBlock'] | //td[@class='DetailedSummary']
3strip_id_or_class: sidebar 3strip_id_or_class: sidebar
4strip_id_or_class: Skyscrapper_Body 4strip_id_or_class: Skyscrapper_Body
5strip: //td[@class='DetailedSummary']/table[position() != 1] 5strip: //td[@class='DetailedSummary']/table[position() != 1]
6prune: no 6prune: no
7test_url: http://english.aljazeera.net//news/middleeast/2011/04/20114681444376835.html \ No newline at end of file 7test_url: http://english.aljazeera.net//news/middleeast/2011/04/20114681444376835.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/enikos.gr.txt b/inc/3rdparty/site_config/standard/enikos.gr.txt
index e2b99bfc..ddd51c4b 100644..100755
--- a/inc/3rdparty/site_config/standard/enikos.gr.txt
+++ b/inc/3rdparty/site_config/standard/enikos.gr.txt
@@ -1,9 +1,9 @@
1body: //div[@id='article']//div[contains(@class, 'inside')] 1body: //div[@id='article']//div[contains(@class, 'inside')]
2 2
3strip_id_or_class: tags 3strip_id_or_class: tags
4strip_id_or_class: actions 4strip_id_or_class: actions
5strip_id_or_class: google-ads 5strip_id_or_class: google-ads
6 6
7prune: no 7prune: no
8 8
9test_url: http://www.enikos.gr/politics/98606,To_oxi_toy_Agorastoy_stoys_Germanoys.html \ No newline at end of file 9test_url: http://www.enikos.gr/politics/98606,To_oxi_toy_Agorastoy_stoys_Germanoys.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/entertainment.timesonline.co.uk.txt b/inc/3rdparty/site_config/standard/entertainment.timesonline.co.uk.txt
index 3e7fba09..a756c457 100644..100755
--- a/inc/3rdparty/site_config/standard/entertainment.timesonline.co.uk.txt
+++ b/inc/3rdparty/site_config/standard/entertainment.timesonline.co.uk.txt
@@ -1,10 +1,10 @@
1author://div[@class = 'article-author']/span[@class = 'byline'] 1author://div[@class = 'article-author']/span[@class = 'byline']
2title://h1[@class = 'heading'] 2title://h1[@class = 'heading']
3body://div[@id = 'related-article-links'] 3body://div[@id = 'related-article-links']
4strip://div[@id = 'comment-sort-order'] 4strip://div[@id = 'comment-sort-order']
5strip://div[@id = 'my-profile'] 5strip://div[@id = 'my-profile']
6strip://div[@class = 'article-author'] 6strip://div[@class = 'article-author']
7strip://div[@class = 'bg-f8f1d8 width-385 text-left'] 7strip://div[@class = 'bg-f8f1d8 width-385 text-left']
8strip://div[@id = 'login-status'] 8strip://div[@id = 'login-status']
9strip://div[@class = 'puff-padding'] 9strip://div[@class = 'puff-padding']
10test_url: http://entertainment.timesonline.co.uk/tol/arts_and_entertainment/the_tls/article7177738.ece \ No newline at end of file 10test_url: http://entertainment.timesonline.co.uk/tol/arts_and_entertainment/the_tls/article7177738.ece \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ericsuh.com.txt b/inc/3rdparty/site_config/standard/ericsuh.com.txt
new file mode 100755
index 00000000..d25140c5
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/ericsuh.com.txt
@@ -0,0 +1,4 @@
1date: //h6[@class='datetime']/child::text()
2author: string("Eric J. Suh")
3footnotes: yes
4test_url: http://www.ericsuh.com/blog/posts/2012/8/strange-numbers.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/es.hu.txt b/inc/3rdparty/site_config/standard/es.hu.txt
index 19a1e9dd..21691a56 100644..100755
--- a/inc/3rdparty/site_config/standard/es.hu.txt
+++ b/inc/3rdparty/site_config/standard/es.hu.txt
@@ -1,11 +1,11 @@
1title: concat(//div[@class='doc_author'], ' - ', upper-case(//div[@class='doc_title'])) 1title: concat(//div[@class='doc_author'], ' - ', upper-case(//div[@class='doc_title']))
2 2
3body: //div[@class='doc'] 3body: //div[@class='doc']
4 4
5prune: yes 5prune: yes
6 6
7tidy: yes 7tidy: yes
8convert_double_br_tags: yes 8convert_double_br_tags: yes
9 9
10strip: //a[contains(@href, 'www.facebook.com/pages/Elet-es-Irodalom/')] 10strip: //a[contains(@href, 'www.facebook.com/pages/Elet-es-Irodalom/')]
11test_url: http://www.es.hu/2010-12-08_vissza-a-partpenzt \ No newline at end of file 11test_url: http://www.es.hu/2010-12-08_vissza-a-partpenzt \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/escapistmagazine.com.txt b/inc/3rdparty/site_config/standard/escapistmagazine.com.txt
index 7e17a04d..fd453a19 100644..100755
--- a/inc/3rdparty/site_config/standard/escapistmagazine.com.txt
+++ b/inc/3rdparty/site_config/standard/escapistmagazine.com.txt
@@ -1,2 +1,8 @@
1title: //h1[@class='headline']/div[@class='name']
2
3strip_image_src: 'http://cdn.themis-media.com/media/global/images/library/deriv/115/115825.png'
4
5next_page_link: //a[@class='next_page']
6
1strip_comments: no 7strip_comments: no
2test_url: http://www.escapistmagazine.com/articles/view/columns/extraconsideration/8717-Extra-Consideration-The-Story \ No newline at end of file 8test_url: http://www.escapistmagazine.com/articles/view/columns/criticalintel/10302-I-Hate-Magic \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/espn.go.com.txt b/inc/3rdparty/site_config/standard/espn.go.com.txt
index 319d352b..06476296 100644..100755
--- a/inc/3rdparty/site_config/standard/espn.go.com.txt
+++ b/inc/3rdparty/site_config/standard/espn.go.com.txt
@@ -1,12 +1,12 @@
1title: //div[@class='headline'] | //div[@class='mod-header']/h3 1title: //div[@class='headline'] | //div[@class='mod-header']/h3
2body: //div[contains(@class, 'article')] 2body: //div[contains(@class, 'article')]
3strip: //div[contains(@class, 'mod-inline')] 3strip: //div[contains(@class, 'mod-inline')]
4strip: //*/span[@class='page-actions'] 4strip: //*/span[@class='page-actions']
5strip: //div[@class='page-actions']/* 5strip: //div[@class='page-actions']/*
6strip: //div[@class='headline'] | //div[@class='mod-header']/h3 6strip: //div[@class='headline'] | //div[@class='mod-header']/h3
7strip: //div[@class='mod-blog-navigation'] 7strip: //div[@class='mod-blog-navigation']
8strip: //div[@class='monthday'] 8strip: //div[@class='monthday']
9strip: //div[@class='time'] 9strip: //div[@class='time']
10strip: //div[@class='timeofday'] 10strip: //div[@class='timeofday']
11strip: //div[contains(@class, 'mod-conversations')] 11strip: //div[contains(@class, 'mod-conversations')]
12test_url: http://espn.go.com/boston/mlb/story/_/id/7092528/terry-francona-victim-latest-red-sox-smear-campaign \ No newline at end of file 12test_url: http://espn.go.com/boston/mlb/story/_/id/7092528/terry-francona-victim-latest-red-sox-smear-campaign \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/esquire.com.txt b/inc/3rdparty/site_config/standard/esquire.com.txt
index 7566e8cc..b9cb1e55 100644..100755
--- a/inc/3rdparty/site_config/standard/esquire.com.txt
+++ b/inc/3rdparty/site_config/standard/esquire.com.txt
@@ -1,10 +1,11 @@
1title: //h1 1title: //h1
2author: //div[@id='byline'] 2author: //div[@id='byline']
3 3
4body: //div[@id='printBody'] 4body: //div[@id='printBody']
5 5
6single_page_link: concat('http://www.esquire.com/print-this/', substring-after(//link[@rel='canonical']/@href, 'esquire.com/')) 6single_page_link: concat('http://www.esquire.com/print-this/', substring-after(//link[@rel='canonical']/@href, 'esquire.com/'))
7 7
8prune: no 8prune: no
9 9
10test_url: http://www.esquire.com/features/impossible/price-is-right-perfect-bid-0810 \ No newline at end of file 10test_url: http://www.esquire.com/features/impossible/price-is-right-perfect-bid-0810
11test_url: http://www.esquire.com/blogs/politics/police-getting-leftover-armoured-iraq-trucks-112513 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/essentialpublicradio.org.txt b/inc/3rdparty/site_config/standard/essentialpublicradio.org.txt
index 88c8c560..9a922392 100644..100755
--- a/inc/3rdparty/site_config/standard/essentialpublicradio.org.txt
+++ b/inc/3rdparty/site_config/standard/essentialpublicradio.org.txt
@@ -1,6 +1,6 @@
1title: //*[@itemprop='headline'] 1title: //*[@itemprop='headline']
2author: //*[@itemprop='author'] 2author: //*[@itemprop='author']
3date: //*[@itemprop='datePublished'] 3date: //*[@itemprop='datePublished']
4body: //*[@itemprop='articleBody'] 4body: //*[@itemprop='articleBody']
5strip: //*[contains(@class, 'instapaper_ignore')] 5strip: //*[contains(@class, 'instapaper_ignore')]
6test_url: http://www.essentialpublicradio.org/story/2011-11-14/volunteers-sought-federal-tax-assistance-program-pennsylvania-9421 \ No newline at end of file 6test_url: http://www.essentialpublicradio.org/story/2011-11-14/volunteers-sought-federal-tax-assistance-program-pennsylvania-9421 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/etc.se.txt b/inc/3rdparty/site_config/standard/etc.se.txt
index 58da5ef7..95f8cf78 100644..100755
--- a/inc/3rdparty/site_config/standard/etc.se.txt
+++ b/inc/3rdparty/site_config/standard/etc.se.txt
@@ -1,6 +1,6 @@
1strip_id_or_class: 'left' 1strip_id_or_class: 'left'
2strip_id_or_class: 'right' 2strip_id_or_class: 'right'
3strip_id_or_class: 'block-belowcontent' 3strip_id_or_class: 'block-belowcontent'
4author: //span[@class = 'name']/a 4author: //span[@class = 'name']/a
5date: //div[@class= 'datum'] 5date: //div[@class= 'datum']
6test_url: http://www.etc.se/intervju/lonsamt-att-radda-jorden \ No newline at end of file 6test_url: http://www.etc.se/intervju/lonsamt-att-radda-jorden \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/eternabuenosaires.com.txt b/inc/3rdparty/site_config/standard/eternabuenosaires.com.txt
index bfa2c5dc..bfa2c5dc 100644..100755
--- a/inc/3rdparty/site_config/standard/eternabuenosaires.com.txt
+++ b/inc/3rdparty/site_config/standard/eternabuenosaires.com.txt
diff --git a/inc/3rdparty/site_config/standard/eurogamer.net.txt b/inc/3rdparty/site_config/standard/eurogamer.net.txt
index 6ecdf6bd..8a351667 100644..100755
--- a/inc/3rdparty/site_config/standard/eurogamer.net.txt
+++ b/inc/3rdparty/site_config/standard/eurogamer.net.txt
@@ -1,8 +1,8 @@
1body: //div[ @class='content' ] | //div[ @class='blog-entry' ] 1body: //div[ @class='content' ] | //div[ @class='blog-entry' ]
2 2
3strip: //h2/abbr | //div[ @class='lowleader' ] | //*[ @class='discussion' ] | //img[ @class='play-button' ] | //div[ @class='boxout' ] | //h2/a | //h2 | //h2/div | //p[ @class='timestamp' ] | //a[ @class='eurogamer-author' ] | //p[ @class='aPager' ] | //h1 | //div[ @id='lowleader' ] | //a[ @class='next' ] | //div[contains(concat(' ', normalize-space(@class), ' '), ' pullquote ')] 3strip: //h2/abbr | //div[ @class='lowleader' ] | //*[ @class='discussion' ] | //img[ @class='play-button' ] | //div[ @class='boxout' ] | //h2/a | //h2 | //h2/div | //p[ @class='timestamp' ] | //a[ @class='eurogamer-author' ] | //p[ @class='aPager' ] | //h1 | //div[ @id='lowleader' ] | //a[ @class='next' ] | //div[contains(concat(' ', normalize-space(@class), ' '), ' pullquote ')]
4 4
5date://p[ @class='timestamp' ] 5date://p[ @class='timestamp' ]
6 6
7author://a[ @class='eurogamer-author' ] 7author://a[ @class='eurogamer-author' ]
8test_url: http://www.eurogamer.net/articles/digitalfoundry-vs-unreal-engine-4 \ No newline at end of file 8test_url: http://www.eurogamer.net/articles/digitalfoundry-vs-unreal-engine-4 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/evo.co.uk.txt b/inc/3rdparty/site_config/standard/evo.co.uk.txt
index 07162513..ccb4f879 100644..100755
--- a/inc/3rdparty/site_config/standard/evo.co.uk.txt
+++ b/inc/3rdparty/site_config/standard/evo.co.uk.txt
@@ -1,11 +1,11 @@
1author: substring-after(//div[@class='articleauthor'],'By ') 1author: substring-after(//div[@class='articleauthor'],'By ')
2 2
3# Blog posts 3# Blog posts
4date: //div[@class='articledate'] 4date: //div[@class='articledate']
5# News 5# News
6date: //div[@class='articledate_b'] 6date: //div[@class='articledate_b']
7 7
8body: //div[@class='articletext'] 8body: //div[@class='articletext']
9 9
10convert_double_br_tags: yes 10convert_double_br_tags: yes
11test_url: http://www.evo.co.uk/carreviews/evolongtermtests/280072/bmw_330d_sport_touring.html \ No newline at end of file 11test_url: http://www.evo.co.uk/carreviews/evolongtermtests/280072/bmw_330d_sport_touring.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/expressen.se.txt b/inc/3rdparty/site_config/standard/expressen.se.txt
index d0cb283e..d81d3251 100644..100755
--- a/inc/3rdparty/site_config/standard/expressen.se.txt
+++ b/inc/3rdparty/site_config/standard/expressen.se.txt
@@ -1,9 +1,10 @@
1title: //div[@id='article']/div[contains(@class, 'content')]/h1 1title: //h1[contains(@class, 'b-headline_article')]
2body: //div[@id='article']/div[contains(@class, 'content')] 2body: //div[contains(@class, 'b-article_print')]
3date: //div[contains(@class, 'article-slot')]/descendant::div[contains(@id, 'articledates')] 3
4 4single_page_link: //div[contains(@class, 'b-page__footer__actions')]//a[contains(@href, 'print=true')]
5strip: //img[contains(@src, 'img/px.gif')] 5
6prune: no 6prune: no
7# remove Facebook banner and obtrusive ad 7
8strip: //div[@id='article']/div[contains(@class, 'content')]/div[contains(@class, 'art-right')] 8test_url: http://www.expressen.se/kultur/1.2683904/medan-natet-dras-at
9test_url: http://www.expressen.se/kultur/1.2683904/medan-natet-dras-at \ No newline at end of file 9test_url: http://www.expressen.se/gt/polis-om-styckmordet-extremt-markligt-fall/
10test_url: http://www.expressen.se/Pages/OutboundFeedsPage.aspx?id=3642159&viewstyle=rss \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/extracine.com.txt b/inc/3rdparty/site_config/standard/extracine.com.txt
index 52b598da..52b598da 100644..100755
--- a/inc/3rdparty/site_config/standard/extracine.com.txt
+++ b/inc/3rdparty/site_config/standard/extracine.com.txt
diff --git a/inc/3rdparty/site_config/standard/f1actual.com.txt b/inc/3rdparty/site_config/standard/f1actual.com.txt
index 6ef2738a..6ef2738a 100644..100755
--- a/inc/3rdparty/site_config/standard/f1actual.com.txt
+++ b/inc/3rdparty/site_config/standard/f1actual.com.txt
diff --git a/inc/3rdparty/site_config/standard/facebook.com.txt b/inc/3rdparty/site_config/standard/facebook.com.txt
new file mode 100755
index 00000000..6a492767
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/facebook.com.txt
@@ -0,0 +1,5 @@
1body: //div[@id='imagestage']
2prune: no
3tidy: no
4
5test_url: https://www.facebook.com/feeds/page.php?id=338077742912613&format=rss20 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/facta.co.jp.txt b/inc/3rdparty/site_config/standard/facta.co.jp.txt
index c17e0b8c..4c96a1a4 100644..100755
--- a/inc/3rdparty/site_config/standard/facta.co.jp.txt
+++ b/inc/3rdparty/site_config/standard/facta.co.jp.txt
@@ -1,3 +1,3 @@
1bosdy: //div[@class='content'] 1bosdy: //div[@class='content']
2 2
3test_url: http://facta.co.jp/blog/archives/20111026001026.html \ No newline at end of file 3test_url: http://facta.co.jp/blog/archives/20111026001026.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/falter.at.txt b/inc/3rdparty/site_config/standard/falter.at.txt
index b941b740..2bfcc9b4 100644..100755
--- a/inc/3rdparty/site_config/standard/falter.at.txt
+++ b/inc/3rdparty/site_config/standard/falter.at.txt
@@ -1,18 +1,14 @@
1title: //h2[@class='related relatedTitle'] 1title: //h1
2author: //a[contains(@href, 'liste.php?author_id')] 2author: //a[contains(@href, '/kategorie/autoren')]
3 3date: //a[contains(@href, '/falter/ausgabe')]
4# can't think of a better way unfortunately, really bad markup on this site 4body: //article[@class='spanMain']
5date: substring-after(//td[@style='width:85%;'], 'vom') 5
6 6# cleanup
7# not sure why, but instapaper seems to suck up the teaser paragraph 7strip_id_or_class: 'respond'
8# not solved! 8strip: //img[@src='http://www.falter.at/web/_pics/falterlogo_dblau.gif']
9body: //div[contains(@class, 'teaser')] 9strip_id_or_class: 'meta'
10body: //div[@id='content'] 10strip_id_or_class: 'servicebox'
11 11strip_id_or_class: 'related'
12# cleanup 12strip_id_or_class: 'twitter-share-button'
13strip: //img[@src='http://www.falter.at/web/_pics/falterlogo_dblau.gif'] 13strip: //br
14strip: //div[@class='servicebox'] 14test_url: http://www.falter.at/falter/2013/03/26/der-dandy-auf-der-sinkenden-galeere/ \ No newline at end of file
15strip: //h1
16strip: //br
17strip: //td[@id='adcol']
18test_url: http://www.falter.at/web/print/detail.php?id=1634 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/fanfiction.net.txt b/inc/3rdparty/site_config/standard/fanfiction.net.txt
index 8d0c4daf..e7cab4d4 100644..100755
--- a/inc/3rdparty/site_config/standard/fanfiction.net.txt
+++ b/inc/3rdparty/site_config/standard/fanfiction.net.txt
@@ -1,6 +1,6 @@
1body: //*[@id = 'story text'] 1body: //*[@id = 'story text']
2author: //a[starts-with(@href, '/u/')] 2author: //a[starts-with(@href, '/u/')]
3next_page_link: substring-after(//input[contains(@value, 'Next')]/@onclick, "self.location='") 3next_page_link: substring-after(//input[contains(@value, 'Next')]/@onclick, "self.location='")
4autodetect_next_page:yes 4autodetect_next_page:yes
5strip_id_or_class: 'a2a_kit' 5strip_id_or_class: 'a2a_kit'
6test_url: http://www.fanfiction.net/s/6497403/1/Spartan_Love \ No newline at end of file 6test_url: http://www.fanfiction.net/s/6497403/1/Spartan_Love \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/fastcompany.com.txt b/inc/3rdparty/site_config/standard/fastcompany.com.txt
index 5547a76c..a6417237 100644..100755
--- a/inc/3rdparty/site_config/standard/fastcompany.com.txt
+++ b/inc/3rdparty/site_config/standard/fastcompany.com.txt
@@ -1,16 +1,16 @@
1title: //h1 1title: //h1
2author: //h5[@class='byline']//a 2author: //h5[@class='byline']//a
3date: //h5[@class='date'] 3date: //h5[@class='date']
4body: //figure[@class='node-poster'] | //div[contains(@class, "node-content")] 4body: //figure[@class='node-poster'] | //div[contains(@class, "node-content")]
5strip_id_or_class: article-top-wrapper 5strip_id_or_class: article-top-wrapper
6strip_id_or_class: footer-message 6strip_id_or_class: footer-message
7strip_id_or_class: print-logo 7strip_id_or_class: print-logo
8strip: //cite 8strip: //cite
9strip://*[@class='timestamp'] 9strip://*[@class='timestamp']
10strip://div[@id='page_right'] 10strip://div[@id='page_right']
11strip://section[@id='header_region'] 11strip://section[@id='header_region']
12strip://h1[@class='node-title'] 12strip://h1[@class='node-title']
13strip://div[@class='node-submitted'] 13strip://div[@class='node-submitted']
14strip_id_or_class: skipnav 14strip_id_or_class: skipnav
15test_url: http://www.fastcompany.com/3000226/link-between-quietness-and-productivity 15test_url: http://www.fastcompany.com/3000226/link-between-quietness-and-productivity
16test_url: http://www.fastcompany.com/3003586/6-simple-rituals-reach-your-potential-every-day \ No newline at end of file 16test_url: http://www.fastcompany.com/3003586/6-simple-rituals-reach-your-potential-every-day \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/faz.net.txt b/inc/3rdparty/site_config/standard/faz.net.txt
index 4fe5968b..d087d2aa 100644..100755
--- a/inc/3rdparty/site_config/standard/faz.net.txt
+++ b/inc/3rdparty/site_config/standard/faz.net.txt
@@ -1,30 +1,36 @@
1# Title 1# Title
2title: //p[@class='Content HeadlineShort'] 2title: //p[@class='Content HeadlineShort']
3 3
4# Authors 4# Authors
5# some are known and have a link, others don't 5# some are known and have a link, others don't
6author: substring-after(//span[@class='Autor'], 'Von') 6author: substring-after(//span[@class='Autor'], 'Von')
7 7
8# Date 8# Date
9date: //span[@class='Datum'] 9date: //span[@class='Datum']
10 10
11# Body 11# Body
12body: //div[@class='Artikel'] 12body: //div[@class='Artikel']
13 13
14# Removements before body text 14# Removements before body text
15strip: //div[@class='Breadcrumbs'] 15strip: //div[@class='Breadcrumbs']
16strip: //div[@class='QuickSearchBox'] 16strip: //div[@class='QuickSearchBox']
17strip: //div[@class='FAZArtikelEinleitung'] 17strip: //div[@class='FAZArtikelEinleitung']
18strip: //div[@class='FAZArtikelReiter'] 18strip: //div[@class='FAZArtikelReiter']
19strip: //div[@class='clear'] 19strip: //div[@class='clear']
20 20
21# General removements 21# General removements
22strip: //span[@class='Bildnachweis'] 22strip: //span[@class='Bildnachweis']
23 23strip: //img[@class='MediaIcon']
24# Removements after body text 24strip: //div[@class='ArtikelMediaLink']
25strip: //div[@class='ArtikelAbbinder'] 25dissolve: //a[img]
26strip: //div[@class='ArtikelKommentieren Artikelfuss GETS;tk;boxen.top-lesermeinungen;tp;content'] 26
27strip: //div[@class='FAZArtikelKommentare FAZArtikelContent'] 27# Removements after body text
28strip: //div[@class='FAZArtikelFunktionen'] 28strip: //div[@class='ArtikelAbbinder']
29strip: //div[@id='FAZContentRight'] 29strip: //div[@class='ArtikelKommentieren Artikelfuss GETS;tk;boxen.top-lesermeinungen;tp;content']
30test_url: http://www.faz.net/aktuell/gesellschaft/ehe-haltbarkeitsformel-verliebe-dich-oft-verlobe-dich-selten-heirate-vielleicht-11685306.html \ No newline at end of file 30strip: //div[@class='FAZArtikelKommentare FAZArtikelContent']
31strip: //div[@class='FAZArtikelFunktionen']
32strip: //div[@id='FAZContentRight']
33
34# Fix picture captions
35wrap_in(small): //span[@class='Bildunterschrift']/text()
36test_url: http://www.faz.net/aktuell/feuilleton/zum-tod-von-margaret-thatcher-die-reizfigur-12141919.html#Drucken \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/fertigung.de.txt b/inc/3rdparty/site_config/standard/fertigung.de.txt
new file mode 100755
index 00000000..90145e58
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/fertigung.de.txt
@@ -0,0 +1,23 @@
1title: //title
2
3body: //div[@id='content']
4
5strip: (//div[@id='content']/h2)[1]
6
7strip: //h2[contains(., 'mehr News')]/following::*
8strip: //h2[contains(., 'mehr News')]
9
10strip: //div[contains(@class, 'indizar')]/following::*
11strip: //div[contains(@class, 'indizar')]
12
13strip: //h1[contains(@class, 'single')]/preceding::*
14strip: //h1[contains(@class, 'single')]
15
16strip_id_or_class: plista_widget
17
18prune: no
19
20next_page_link: //a[contains(., 'Weiter')]
21
22test_url: http://www.fertigung.de/2013/04/igus-neuer-energiekettenkatalog/
23test_url: http://www.fertigung.de/2013/04/dynamisch-und-hochpraezise/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/fictionpress.com.txt b/inc/3rdparty/site_config/standard/fictionpress.com.txt
index 4a04e832..19ec16b0 100644..100755
--- a/inc/3rdparty/site_config/standard/fictionpress.com.txt
+++ b/inc/3rdparty/site_config/standard/fictionpress.com.txt
@@ -1,5 +1,5 @@
1body: id('storytext') 1body: id('storytext')
2author: //a[starts-with(@href, '/u/')] 2author: //a[starts-with(@href, '/u/')]
3#next_page_link: substring-after(//input[contains(@value, 'Next')]/@onclick, "self.location='") 3#next_page_link: substring-after(//input[contains(@value, 'Next')]/@onclick, "self.location='")
4strip_id_or_class: 'a2a_kit' 4strip_id_or_class: 'a2a_kit'
5test_url: http://www.fictionpress.com/s/2897964/1/All_We_Knew \ No newline at end of file 5test_url: http://www.fictionpress.com/s/2897964/1/All_We_Knew \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ficwad.com.txt b/inc/3rdparty/site_config/standard/ficwad.com.txt
index 3dbfe76f..081f0bb0 100644..100755
--- a/inc/3rdparty/site_config/standard/ficwad.com.txt
+++ b/inc/3rdparty/site_config/standard/ficwad.com.txt
@@ -1,12 +1,12 @@
1title: //h4 1title: //h4
2author: //span[@class="author"] 2author: //span[@class="author"]
3body: //div[@id="story"] 3body: //div[@id="story"]
4strip_id_or_class: summary 4strip_id_or_class: summary
5strip_id_or_class: meta 5strip_id_or_class: meta
6strip_id_or_class: storyfoot 6strip_id_or_class: storyfoot
7convert_double_br_tags: yes 7convert_double_br_tags: yes
8prune: no 8prune: no
9 9
10# Note: this site still has trouble because single <br> tags are stripped, but I don't see a way to fix that with this interface. 10# Note: this site still has trouble because single <br> tags are stripped, but I don't see a way to fix that with this interface.
11 11
12test_url: http://www.ficwad.com/story/158977 \ No newline at end of file 12test_url: http://www.ficwad.com/story/158977 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/finance.yahoo.com.txt b/inc/3rdparty/site_config/standard/finance.yahoo.com.txt
index 81c18fd3..248522cb 100644..100755
--- a/inc/3rdparty/site_config/standard/finance.yahoo.com.txt
+++ b/inc/3rdparty/site_config/standard/finance.yahoo.com.txt
@@ -1,12 +1,12 @@
1title: //meta[@property='og:title']/@content 1title: //meta[@property='og:title']/@content
2body: //div[@id='y-article-bd'] 2body: //div[@id='y-article-bd']
3body: //div[contains(@class, 'yom-art-content')] 3body: //div[contains(@class, 'yom-art-content')]
4strip: //div[contains(@class, 'related-companies')] 4strip: //div[contains(@class, 'related-companies')]
5strip: //div[@id='y-article-related'] 5strip: //div[@id='y-article-related']
6strip: //div[@id='ypf-article-related'] 6strip: //div[@id='ypf-article-related']
7prune: no 7prune: no
8 8
9single_page_link: //div[@class='ft']//a[contains(@href, 'page=all')] 9single_page_link: //div[@class='ft']//a[contains(@href, 'page=all')]
10 10
11test_url: http://sg.finance.yahoo.com/news/Motorola-takes-wraps-249-rsg-3508842732.html?x=0&.v=1 11test_url: http://sg.finance.yahoo.com/news/Motorola-takes-wraps-249-rsg-3508842732.html?x=0&.v=1
12test_url: http://finance.yahoo.com/news/super-young-retirement-savers.html \ No newline at end of file 12test_url: http://finance.yahoo.com/news/super-young-retirement-savers.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/findtheswagger.tumblr.com.txt b/inc/3rdparty/site_config/standard/findtheswagger.tumblr.com.txt
index 1a5cd2e1..43aef750 100644..100755
--- a/inc/3rdparty/site_config/standard/findtheswagger.tumblr.com.txt
+++ b/inc/3rdparty/site_config/standard/findtheswagger.tumblr.com.txt
@@ -1,10 +1,10 @@
1date: //div[@class='notes']/a 1date: //div[@class='notes']/a
2body: //div[@id='content'] 2body: //div[@id='content']
3 3
4strip_id_or_class: tags 4strip_id_or_class: tags
5strip_id_or_class: permalink 5strip_id_or_class: permalink
6strip_id_or_class: notes 6strip_id_or_class: notes
7strip_id_or_class: post_nav 7strip_id_or_class: post_nav
8strip: //div[@id='content']//h2 8strip: //div[@id='content']//h2
9strip_id_or_class: right_column 9strip_id_or_class: right_column
10test_url: http://findtheswagger.tumblr.com/post/11589145141/moe-resners-end-of-an-era-1957-giants-final \ No newline at end of file 10test_url: http://findtheswagger.tumblr.com/post/11589145141/moe-resners-end-of-an-era-1957-giants-final \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/firstthings.com.txt b/inc/3rdparty/site_config/standard/firstthings.com.txt
index dd56da22..ce972bac 100644..100755
--- a/inc/3rdparty/site_config/standard/firstthings.com.txt
+++ b/inc/3rdparty/site_config/standard/firstthings.com.txt
@@ -1,7 +1,7 @@
1title: //div[@class='articleTitle'] 1title: //div[@class='articleTitle']
2author: //div[@class='articleAuthor'] 2author: //div[@class='articleAuthor']
3body: //div[@class='articleContent'] 3body: //div[@class='articleContent']
4prune: no 4prune: no
5convert_double_br_tags: yes 5convert_double_br_tags: yes
6 6
7test_url: http://www.firstthings.com/article/2011/05/the-trouble-with-ayn-rand \ No newline at end of file 7test_url: http://www.firstthings.com/article/2011/05/the-trouble-with-ayn-rand \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/fivechapters.com.txt b/inc/3rdparty/site_config/standard/fivechapters.com.txt
index d9c5e42e..d9c5e42e 100644..100755
--- a/inc/3rdparty/site_config/standard/fivechapters.com.txt
+++ b/inc/3rdparty/site_config/standard/fivechapters.com.txt
diff --git a/inc/3rdparty/site_config/standard/fivefilters.org.txt b/inc/3rdparty/site_config/standard/fivefilters.org.txt
index dc1db432..dc1db432 100644..100755
--- a/inc/3rdparty/site_config/standard/fivefilters.org.txt
+++ b/inc/3rdparty/site_config/standard/fivefilters.org.txt
diff --git a/inc/3rdparty/site_config/standard/fivethirtyeight.com.txt b/inc/3rdparty/site_config/standard/fivethirtyeight.com.txt
index 3d7b45a8..d0a0a772 100644..100755
--- a/inc/3rdparty/site_config/standard/fivethirtyeight.com.txt
+++ b/inc/3rdparty/site_config/standard/fivethirtyeight.com.txt
@@ -1,7 +1,7 @@
1title: substring-after(//title, 'Right:') 1title: substring-after(//title, 'Right:')
2body: //div[@class = 'post-body'] 2body: //div[@class = 'post-body']
3author: substring-after(//*[@class='post-author'], 'by') 3author: substring-after(//*[@class='post-author'], 'by')
4date: concat(//*[@class='date-header'], ' ', //*[@class='post-timestamp']/a) 4date: concat(//*[@class='date-header'], ' ', //*[@class='post-timestamp']/a)
5convert_double_br_tags: yes 5convert_double_br_tags: yes
6 6
7test_url: http://www.fivethirtyeight.com/2010/07/does-rnc-have-structural-problems.html \ No newline at end of file 7test_url: http://www.fivethirtyeight.com/2010/07/does-rnc-have-structural-problems.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/flyingmachinestudios.com.txt b/inc/3rdparty/site_config/standard/flyingmachinestudios.com.txt
new file mode 100755
index 00000000..2053f801
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/flyingmachinestudios.com.txt
@@ -0,0 +1,2 @@
1strip_id_or_class: linenos
2test_url: http://www.flyingmachinestudios.com/programming/whoops-dci-refactoring/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/fm4.orf.at.txt b/inc/3rdparty/site_config/standard/fm4.orf.at.txt
index 32d44c87..5db3e58c 100644..100755
--- a/inc/3rdparty/site_config/standard/fm4.orf.at.txt
+++ b/inc/3rdparty/site_config/standard/fm4.orf.at.txt
@@ -1,7 +1,7 @@
1author: //div[@class='authorDescription']/h2 1author: //div[@class='authorDescription']/h2
2body: //div[@id='story'] 2body: //div[@id='story']
3date: substring-before(substring-after(//p[@class='date'],'Erstellt am:'), '-') 3date: substring-before(substring-after(//p[@class='date'],'Erstellt am:'), '-')
4title: //h1[@class='detail'] 4title: //h1[@class='detail']
5strip: //div[@class='fact'] 5strip: //div[@class='fact']
6 6
7test_url: http://fm4.orf.at/stories/1689156/ \ No newline at end of file 7test_url: http://fm4.orf.at/stories/1689156/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/fnal.gov.txt b/inc/3rdparty/site_config/standard/fnal.gov.txt
index 7faa6bfc..e404ccb8 100644..100755
--- a/inc/3rdparty/site_config/standard/fnal.gov.txt
+++ b/inc/3rdparty/site_config/standard/fnal.gov.txt
@@ -1,15 +1,15 @@
1title: normalize(//h1) 1title: normalize(//h1)
2 2
3author: //td/p[position()=last()]/em 3author: //td/p[position()=last()]/em
4 4
5# I swear, this is really the best way to do this 5# I swear, this is really the best way to do this
6date: normalize(//td[contains(@style, "color: #ffffff")]) 6date: normalize(//td[contains(@style, "color: #ffffff")])
7 7
8# my god, it's full of tables 8# my god, it's full of tables
9body: /table/tbody/tr[5]//table/tbody//table/tbody/tr/td 9body: /table/tbody/tr[5]//table/tbody//table/tbody/tr/td
10strip: //h1 10strip: //h1
11 11
12# the following two lines strip the byline at the end of the article (the byline is a <p> that consists of an em dash and then some text in an <em>). I have no idea why I can't just strip //p[position()=last()], but trying to do so includes a bunch of other crap in the output. 12# the following two lines strip the byline at the end of the article (the byline is a <p> that consists of an em dash and then some text in an <em>). I have no idea why I can't just strip //p[position()=last()], but trying to do so includes a bunch of other crap in the output.
13strip: //p[position()=last()]/em 13strip: //p[position()=last()]/em
14strip: //p[position()=last()]/child::text() 14strip: //p[position()=last()]/child::text()
15test_url: http://www.fnal.gov/pub/today/archive_2011/today11-11-09_MuonDepartmentReadMore.html \ No newline at end of file 15test_url: http://www.fnal.gov/pub/today/archive_2011/today11-11-09_MuonDepartmentReadMore.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/focus.de.txt b/inc/3rdparty/site_config/standard/focus.de.txt
index 3ad5cabf..6da3687e 100644..100755
--- a/inc/3rdparty/site_config/standard/focus.de.txt
+++ b/inc/3rdparty/site_config/standard/focus.de.txt
@@ -1,19 +1,19 @@
1title: //h1 1title: //h1
2 2
3author: //div[@class='articleContent small']/div[@class='textBlock']//span[@class='created'] 3author: //div[@class='articleContent small']/div[@class='textBlock']//span[@class='created']
4 4
5date: //div[@class='articleHead']/span[@class='created'] 5date: //div[@class='articleHead']/span[@class='created']
6 6
7body: //div[@id='article'] 7body: //div[@id='article']
8 8
9strip: //span[@class='markerText'] 9strip: //span[@class='markerText']
10strip: //div[@class='articleContent small']/div[@class='textBlock']//span[@class='created'] 10strip: //div[@class='articleContent small']/div[@class='textBlock']//span[@class='created']
11strip: //div[@class='sidebar'] 11strip: //div[@class='sidebar']
12strip: //div[@class='starbar'] 12strip: //div[@class='starbar']
13strip: //div[@class='actions clearfix'] 13strip: //div[@class='actions clearfix']
14strip: //div[@id='commentForm'] 14strip: //div[@id='commentForm']
15strip: //div[@id='commentSent'] 15strip: //div[@id='commentSent']
16strip: //div[@id='comments'] 16strip: //div[@id='comments']
17strip: //div[@class='similarityBlock'] 17strip: //div[@class='similarityBlock']
18 18
19test_url: http://www.focus.de/politik/ausland/ein-jahr-nach-bombenanschlag-u-bahn-attentaeter-von-minsk-hingerichtet_aid_724958.html \ No newline at end of file 19test_url: http://www.focus.de/politik/ausland/ein-jahr-nach-bombenanschlag-u-bahn-attentaeter-von-minsk-hingerichtet_aid_724958.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/folklore.org.txt b/inc/3rdparty/site_config/standard/folklore.org.txt
new file mode 100755
index 00000000..ed23a0b6
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/folklore.org.txt
@@ -0,0 +1,4 @@
1author: /html/body/table[3]/tbody/tr/td[1]/table[2]/tbody/tr[1]/td[2]
2date: /html/body/table[3]/tbody/tr/td[1]/table[2]/tbody/tr[2]/td[2]
3body: //div[@class='main']
4test_url: http://www.folklore.org/StoryView.py?story=Calculator_Construction_Set.txt \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/food.com.txt b/inc/3rdparty/site_config/standard/food.com.txt
new file mode 100755
index 00000000..a70da766
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/food.com.txt
@@ -0,0 +1,11 @@
1body: //div[@id='print-area']
2title: //h1[contains(@class, 'section-title')]
3single_page_link: //a[@id='prntrec']
4strip_image_src: food-logo-small
5strip_id_or_class: timer
6strip_id_or_class: photo-sm
7strip_id_or_class: page-header
8
9prune: no
10
11test_url: http://www.food.com/recipe/couldnt-be-easier-bbq-pork-tenderloin-crock-pot-317152 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/fool.com.txt b/inc/3rdparty/site_config/standard/fool.com.txt
index 69867ccb..89cb8b9a 100644..100755
--- a/inc/3rdparty/site_config/standard/fool.com.txt
+++ b/inc/3rdparty/site_config/standard/fool.com.txt
@@ -1,11 +1,11 @@
1body: //div[@class='entry-content'] 1body: //div[@class='entry-content']
2date: //meta[@name="date"]/@content 2date: //meta[@name="date"]/@content
3author: //meta[@name="author"]/@content 3author: //meta[@name="author"]/@content
4 4
5strip_id_or_class: ecapShell 5strip_id_or_class: ecapShell
6strip_id_or_class: noindent 6strip_id_or_class: noindent
7strip_id_or_class: targetedPromotion 7strip_id_or_class: targetedPromotion
8 8
9prune: no 9prune: no
10 10
11test_url: http://www.fool.com/investing/general/2012/01/27/dfc-global-beats-up-on-analysts-yet-again.aspx \ No newline at end of file 11test_url: http://www.fool.com/investing/general/2012/01/27/dfc-global-beats-up-on-analysts-yet-again.aspx \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/forbes.com.txt b/inc/3rdparty/site_config/standard/forbes.com.txt
index 2381b56a..9e1d04c1 100644..100755
--- a/inc/3rdparty/site_config/standard/forbes.com.txt
+++ b/inc/3rdparty/site_config/standard/forbes.com.txt
@@ -1,16 +1,27 @@
1title: //hgroup//h1 1title: //hgroup//h1
2title: //span[@class='mainarttitle'] 2title: //span[@class='mainarttitle']
3 3
4body: //div[@id='leftRail']//div[contains(@class, 'body')] 4body: //div[@id='leftRail']//div[contains(@class, 'body')]
5 5
6author: //meta[@name="author"]/@content 6author: //meta[@name="author"]/@content
7author: //span[@class='mainartauthor'] 7author: //span[@class='mainartauthor']
8 8
9date: substring-before(//hgroup//h6, '@') 9date: substring-before(//hgroup//h6, '@')
10date: //span[@class='mainartdate'] 10date: //span[@class='mainartdate']
11 11
12prune: no 12prune: no
13 13strip: //aside
14single_page_link: //a[contains(@href, '/print/')] 14strip_id_or_class: sticky_sharing
15 15strip_id_or_class: pagination
16test_url: http://www.forbes.com/forbes/2011/0509/technology-frog-design-jan-chipchase-ethnographer-birth-cool_print.html \ No newline at end of file 16strip_id_or_class: controlsbox
17strip_id_or_class: storyboxes
18strip_id_or_class: sponsoredlinks
19strip_id_or_class: nextpage
20strip_id_or_class: contextuallinks
21strip_id_or_class: article_actions
22strip_id_or_class: engagement_block
23
24single_page_link: //a[contains(@href, '/print/')]
25
26test_url: http://www.forbes.com/forbes/2011/0509/technology-frog-design-jan-chipchase-ethnographer-birth-cool_print.html
27test_url: http://www.forbes.com/sites/bruceupbin/2012/09/11/the-iphone-5-winners-and-losers/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/foreignaffairs.com.txt b/inc/3rdparty/site_config/standard/foreignaffairs.com.txt
new file mode 100755
index 00000000..cf8b742f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/foreignaffairs.com.txt
@@ -0,0 +1,34 @@
1# TIDY
2#tidy: no
3# PRUNE
4#prune: no
5
6# SINGLE PAGE
7single_page_link: //div[@class='showlinks']/a
8
9# TITLE
10title: //h1[@class="title"]
11
12# AUTHOR
13author: //div[contains(@class,"field-field-article-display-authors")]/div/div/a/text()
14
15# DATE
16date: //div[contains(@class,"field-field-article-issue")]/div/div/a/text() | //span[@class="date-display-single"]
17
18# BODY
19body: //div[contains(@class,"content-resize")]
20
21# Remove clutter
22strip: //div[@class="article-sidebar"]
23strip: //div[@class="showlinks"]
24strip: //div[contains(@class,"premium-box")]
25strip: //div[contains(@class,"premium-box")]
26strip: //table[contains(@border,"2")]
27
28# Fix picture captions
29wrap_in(small): //p/img/following-sibling::em
30wrap_in(small): //p[img]/text()
31
32# Fix sub-headlines
33wrap_in(h3): //div[contains(@class,"field-field-article-subtitle")]/div/div/text()
34test_url: http://www.foreignaffairs.com/articles/138810/pierre-n-leval/the-long-arm-of-international-law \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/foreignpolicy.com.txt b/inc/3rdparty/site_config/standard/foreignpolicy.com.txt
index 6ab7a091..4e84b989 100644..100755
--- a/inc/3rdparty/site_config/standard/foreignpolicy.com.txt
+++ b/inc/3rdparty/site_config/standard/foreignpolicy.com.txt
@@ -1,11 +1,15 @@
1title: //div[@id='art-mast']//h1 1title: //div[@class='translateHead']//h1 | //div[@id='art-mast']//h1
2author: substring-after(//span[@id='by-line'], 'BY ') 2author: substring-after(//span[@id='by-line'], 'BY ')
3date: //span[@id='pub-date'] 3date: //span[@id='pub-date']
4body: //div[@id='art-mast']//h2 | //div[@id='art-mast']/h3 | //div[@id='art-body']//div[@class='translateBody'] 4body: //div[@id='art-mast']/h2 | //div[@class='translateBody'] | //div[@id='art-body']
5strip: //div[@id='share-box'] 5#Strip inside article content
6prune: no 6strip: //div[@id='share-box']
7 7strip: //div[@id='special-box']
8single_page_link: //span[@id='controls']/a[contains(@href, 'print=yes')] 8
9 9prune: no
10test_url: http://www.foreignpolicy.com/articles/2011/08/01/a_murderers_manifesto_and_me 10
11test_url: test_url: http://www.foreignpolicy.com/articles/2012/02/29/five_years_in_damascus \ No newline at end of file 11single_page_link: //span[@id='controls']/a[contains(@href, 'print=yes')]
12single_page_link: //a[text()='SINGLE PAGE']
13
14test_url: http://www.foreignpolicy.com/articles/2011/08/01/a_murderers_manifesto_and_me
15test_url: http://www.foreignpolicy.com/articles/2012/02/29/five_years_in_damascus \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/forsvaret.no.txt b/inc/3rdparty/site_config/standard/forsvaret.no.txt
index 3085c8f2..c1bd2bac 100644..100755
--- a/inc/3rdparty/site_config/standard/forsvaret.no.txt
+++ b/inc/3rdparty/site_config/standard/forsvaret.no.txt
@@ -1,9 +1,9 @@
1title: //div[@class="articleHeader"]/h1 1title: //div[@class="articleHeader"]/h1
2author: //p[@class="byline"] 2author: //p[@class="byline"]
3date: //p[contains(@class,"publishedDate")]/span 3date: //p[contains(@class,"publishedDate")]/span
4# remove the right menu 4# remove the right menu
5strip: //div[contains(@class,"aside")] 5strip: //div[contains(@class,"aside")]
6# remove some SharePoint webpart label junk 6# remove some SharePoint webpart label junk
7strip: //div[@id="ctl00_PlaceHolderMain_ArticleLeadField_label"] 7strip: //div[@id="ctl00_PlaceHolderMain_ArticleLeadField_label"]
8strip: //div[@id="ctl00_PlaceHolderMain_PublishingPageContentField_label"] 8strip: //div[@id="ctl00_PlaceHolderMain_PublishingPageContentField_label"]
9test_url: http://forsvaret.no/aktuelt/publisert/nyheter/Sider/F5-fly-til-Skedsmo.aspx \ No newline at end of file 9test_url: http://forsvaret.no/aktuelt/publisert/nyheter/Sider/F5-fly-til-Skedsmo.aspx \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/foxnews.com.txt b/inc/3rdparty/site_config/standard/foxnews.com.txt
index f1ee4851..e19c77db 100644..100755
--- a/inc/3rdparty/site_config/standard/foxnews.com.txt
+++ b/inc/3rdparty/site_config/standard/foxnews.com.txt
@@ -1,9 +1,9 @@
1prune: no 1prune: no
2 2
3author: //meta[@name="dc.publisher"]/@content 3author: //meta[@name="dc.publisher"]/@content
4date: //meta[@name="dc.date"]/@content 4date: //meta[@name="dc.date"]/@content
5strip: //p[contains(@class, 'contributor vcard')] 5strip: //p[contains(@class, 'contributor vcard')]
6replace_string(<ul><li><div class="photo">): <div class="photo"> 6replace_string(<ul><li><div class="photo">): <div class="photo">
7strip: //p[a[contains(., 'Click here to read more on this story ')]] 7strip: //p[a[contains(., 'Click here to read more on this story ')]]
8 8
9test_url: http://www.foxnews.com/entertainment/2011/05/04/dwayne-johnson-guys-grow-pair-driving-hybrid/ \ No newline at end of file 9test_url: http://www.foxnews.com/entertainment/2011/05/04/dwayne-johnson-guys-grow-pair-driving-hybrid/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/freelancer.com.txt b/inc/3rdparty/site_config/standard/freelancer.com.txt
index f3d5425c..78d37729 100644..100755
--- a/inc/3rdparty/site_config/standard/freelancer.com.txt
+++ b/inc/3rdparty/site_config/standard/freelancer.com.txt
@@ -1,3 +1,3 @@
1body: //div[@id="projectDetailsContent"]//td 1body: //div[@id="projectDetailsContent"]//td
2 2
3test_url: http://www.freelancer.com/projects/PHP-Website-Design/debug-Forum-website-code.html \ No newline at end of file 3test_url: http://www.freelancer.com/projects/PHP-Website-Design/debug-Forum-website-code.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/freytag-film.com.txt b/inc/3rdparty/site_config/standard/freytag-film.com.txt
index 8dc0dabc..c83f8303 100644..100755
--- a/inc/3rdparty/site_config/standard/freytag-film.com.txt
+++ b/inc/3rdparty/site_config/standard/freytag-film.com.txt
@@ -1,5 +1,5 @@
1body: //div[@class = 'instapaperbody'] 1body: //div[@class = 'instapaperbody']
2convert_double_br_tags: no 2convert_double_br_tags: no
3date: //div[@class='instadate'] 3date: //div[@class='instadate']
4title: //h2[@class = 'instatitle'] 4title: //h2[@class = 'instatitle']
5test_url: http://freytag-film.com/blog/artikel/shooting_a_feature_film_in_10_days \ No newline at end of file 5test_url: http://freytag-film.com/blog/artikel/shooting_a_feature_film_in_10_days \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/fria.nu.txt b/inc/3rdparty/site_config/standard/fria.nu.txt
new file mode 100755
index 00000000..9d8eff97
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/fria.nu.txt
@@ -0,0 +1,8 @@
1body: //div[contains(@class, 'layout__inner')]//div[contains(@class, 'file-image') or contains(@class, 'node__content')]
2author: //article//div[contains(@class, 'field-byline')]
3strip_id_or_class: rekommenderade
4strip_id_or_class: disqus
5strip_id_or_class: annonser
6
7test_url: http://www.fria.nu/artikel/112079
8test_url: http://www.fria.nu/taxonomy/term/1928/all/feed \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/friatidningen.se.txt b/inc/3rdparty/site_config/standard/friatidningen.se.txt
new file mode 100755
index 00000000..1e4abc5a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/friatidningen.se.txt
@@ -0,0 +1,7 @@
1body: //div[contains(@class, 'layout__inner')]//div[contains(@class, 'file-image') or contains(@class, 'node__content')]
2author: //article//div[contains(@class, 'field-byline')]
3strip_id_or_class: rekommenderade
4strip_id_or_class: disqus
5strip_id_or_class: annonser
6
7test_url: http://www.friatidningen.se/artikel/112074 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/friendskorner.com.txt b/inc/3rdparty/site_config/standard/friendskorner.com.txt
index 39a9973f..b067d88a 100644..100755
--- a/inc/3rdparty/site_config/standard/friendskorner.com.txt
+++ b/inc/3rdparty/site_config/standard/friendskorner.com.txt
@@ -1,11 +1,11 @@
1#body: (//div[@class='ftr-yt-vid'])[1] 1#body: (//div[@class='ftr-yt-vid'])[1]
2body: (//blockquote[contains(@class, 'postcontent')])[1] 2body: (//blockquote[contains(@class, 'postcontent')])[1]
3body: (//div[starts-with(@id, 'post_message')])[1] 3body: (//div[starts-with(@id, 'post_message')])[1]
4 4
5prune: no 5prune: no
6tidy: no 6tidy: no
7 7
8#replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player" 8#replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player"
9#replace_string(</iframe>): </iframe>&nbsp;</div> 9#replace_string(</iframe>): </iframe>&nbsp;</div>
10 10
11test_url: http://www.friendskorner.com/forum/f137/debate-personal-lives-leaders-west-vs-pakistan-must-read-297989/ \ No newline at end of file 11test_url: http://www.friendskorner.com/forum/f137/debate-personal-lives-leaders-west-vs-pakistan-must-read-297989/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ft.com.txt b/inc/3rdparty/site_config/standard/ft.com.txt
index 38d9d326..e66b9603 100644..100755
--- a/inc/3rdparty/site_config/standard/ft.com.txt
+++ b/inc/3rdparty/site_config/standard/ft.com.txt
@@ -1,5 +1,5 @@
1body: //div[contains(@class, 'ft-story-body')] 1body: //div[contains(@class, 'ft-story-body')]
2 2
3author: substring-after(//div[contains(@class, 'ft-story-header')]/p[1], 'By ') 3author: substring-after(//div[contains(@class, 'ft-story-header')]/p[1], 'By ')
4date: substring-before(substring-after(//div[contains(@class, 'ft-story-header')]/p[2], 'Published:'), '|') 4date: substring-before(substring-after(//div[contains(@class, 'ft-story-header')]/p[2], 'Published:'), '|')
5test_url: http://www.ft.com/cms/s/2/e1be4b5a-620c-11e0-8ee4-00144feab49a.html \ No newline at end of file 5test_url: http://www.ft.com/cms/s/2/e1be4b5a-620c-11e0-8ee4-00144feab49a.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ftchinese.com.txt b/inc/3rdparty/site_config/standard/ftchinese.com.txt
new file mode 100755
index 00000000..5c94d9b0
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/ftchinese.com.txt
@@ -0,0 +1,18 @@
1# Modified to define the single_page_link
2# This filter is tested on:
3# http://www.ftchinese.com/story/001047373
4# http://www.ftchinese.com/story/001047631
5# http://www.ftchinese.com/story/001047622/?print=y
6# http://www.ftchinese.com/story/001049052
7# http://www.ftchinese.com/story/001049088
8
9title:substring-before(//title, '-')
10author: //div[@class='byline']/a
11date: //a[@class='storytime']
12#Set date in print view
13#date: //div[@class='byline']/a/following-sibling::a
14body: //div[@id="bodytext"]
15strip://div[@class='pagination']
16single_page_link://div[@class='pagination']/a[.='全文']
17#next_page_link: //div[@class='pagination']//a[.='下一页']
18test_url: http://www.ftchinese.com/story/001049088 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ftd.de.txt b/inc/3rdparty/site_config/standard/ftd.de.txt
index a58765b0..7d76af00 100644..100755
--- a/inc/3rdparty/site_config/standard/ftd.de.txt
+++ b/inc/3rdparty/site_config/standard/ftd.de.txt
@@ -1,5 +1,5 @@
1body: //div[@class='boxIntroHead']/span[@class='h3'] | //div[@class='section']/div[@class='paragraph' or @class='embObjLeft'] 1body: //div[@class='boxIntroHead']/span[@class='h3'] | //div[@class='section']/div[@class='paragraph' or @class='embObjLeft']
2single_page_link: //a[@class='icon print'] 2single_page_link: //a[@class='icon print']
3 3
4test_url: http://www.ftd.de/it-medien/it-telekommunikation/:mobilfunk-vivendi-und-vodafone-trennen-sich-in-frankreich/60034691.html 4test_url: http://www.ftd.de/it-medien/it-telekommunikation/:mobilfunk-vivendi-und-vodafone-trennen-sich-in-frankreich/60034691.html
5test_url: http://www.ftd.de/it-medien/medien-internet/:verkauf-von-warner-music-musikbranche-auf-dem-sprung/60048185.html \ No newline at end of file 5test_url: http://www.ftd.de/it-medien/medien-internet/:verkauf-von-warner-music-musikbranche-auf-dem-sprung/60048185.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/fubiz.net.txt b/inc/3rdparty/site_config/standard/fubiz.net.txt
index 8e6356bf..0dc30475 100644..100755
--- a/inc/3rdparty/site_config/standard/fubiz.net.txt
+++ b/inc/3rdparty/site_config/standard/fubiz.net.txt
@@ -1,3 +1,3 @@
1body: //div[@class = 'entry'] 1body: //div[@class = 'entry']
2 2
3test_url: http://www.fubiz.net/2011/05/31/world-press-photo-2011/ \ No newline at end of file 3test_url: http://www.fubiz.net/2011/05/31/world-press-photo-2011/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/futurezone.at.txt b/inc/3rdparty/site_config/standard/futurezone.at.txt
index 50fc144a..808c1f1b 100644..100755
--- a/inc/3rdparty/site_config/standard/futurezone.at.txt
+++ b/inc/3rdparty/site_config/standard/futurezone.at.txt
@@ -1,11 +1,11 @@
1date: //span[@class='date'] 1date: //span[@class='date']
2strip: //div[@class='postsidebar'] 2strip: //div[@class='postsidebar']
3body: //div[@class='singlepost'] 3body: //div[@class='singlepost']
4title: //div[@class='singlepost']/h1 4title: //div[@class='singlepost']/h1
5move_into(//div[@class='singlepost']): //div[@class='info'] 5move_into(//div[@class='singlepost']): //div[@class='info']
6strip: //div[@class='gallery'] 6strip: //div[@class='gallery']
7strip: //div[@class='biggallery'] 7strip: //div[@class='biggallery']
8strip: //ul[@class='social'] 8strip: //ul[@class='social']
9strip: //ul[@class='social_mail'] 9strip: //ul[@class='social_mail']
10 10
11test_url: http://futurezone.at/future/5502-erste-galileo-satelliten-starten-ins-all.php \ No newline at end of file 11test_url: http://futurezone.at/future/5502-erste-galileo-satelliten-starten-ins-all.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gamasutra.com.txt b/inc/3rdparty/site_config/standard/gamasutra.com.txt
index 35a8762a..7c808cfd 100644..100755
--- a/inc/3rdparty/site_config/standard/gamasutra.com.txt
+++ b/inc/3rdparty/site_config/standard/gamasutra.com.txt
@@ -1,20 +1,20 @@
1# default view title 1# default view title
2title: //span[@class='newsTitle'] 2title: //span[@class='newsTitle']
3# print view title 3# print view title
4title: //h3[@class='title'] 4title: //h3[@class='title']
5 5
6# default view author 6# default view author
7author: //span[@class='newsAuth']/a 7author: //span[@class='newsAuth']/a
8author: substring-after(//span[@class='newsAuth'], 'by ') 8author: substring-after(//span[@class='newsAuth'], 'by ')
9 9
10# default view date 10# default view date
11date: //td[@class='newsDate'] 11date: //td[@class='newsDate']
12 12
13# default view body 13# default view body
14body: //td[@class='featureText'] 14body: //td[@class='featureText']
15body: //td[@class='newsText'] 15body: //td[@class='newsText']
16 16
17strip: //h3[@class='title'] 17strip: //h3[@class='title']
18 18
19single_page_link: //a[contains(@href, '?print=1')] 19single_page_link: //a[contains(@href, '?print=1')]
20test_url: http://www.gamasutra.com/view/feature/132559/staying_power_rethinking_feedback_.php \ No newline at end of file 20test_url: http://www.gamasutra.com/view/feature/132559/staying_power_rethinking_feedback_.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gameblog.fr.txt b/inc/3rdparty/site_config/standard/gameblog.fr.txt
index 2cc4b378..73f8342f 100644..100755
--- a/inc/3rdparty/site_config/standard/gameblog.fr.txt
+++ b/inc/3rdparty/site_config/standard/gameblog.fr.txt
@@ -1,10 +1,10 @@
1title: //meta[@property="og:title"]/@content 1title: //meta[@property="og:title"]/@content
2body: //div[@id='GBTVPlayer'] | //div[contains(@class, 'col490')] 2body: //div[@id='GBTVPlayer'] | //div[contains(@class, 'col490')]
3 3
4prune: no 4prune: no
5 5
6strip_id_or_class: noprint 6strip_id_or_class: noprint
7strip: //div[@id='gbNewsTextContent']/following-sibling::* 7strip: //div[@id='gbNewsTextContent']/following-sibling::*
8 8
9test_url: http://www.gameblog.fr/news/26330-les-sims-3-showtime-s-annonce-en-video 9test_url: http://www.gameblog.fr/news/26330-les-sims-3-showtime-s-annonce-en-video
10test_url: http://www.gameblog.fr/news/26306-mise-a-jour-du-dashboard-de-la-xbox-360-disponible \ No newline at end of file 10test_url: http://www.gameblog.fr/news/26306-mise-a-jour-du-dashboard-de-la-xbox-360-disponible \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gamechurch.com.txt b/inc/3rdparty/site_config/standard/gamechurch.com.txt
new file mode 100755
index 00000000..c9eea5f8
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/gamechurch.com.txt
@@ -0,0 +1,10 @@
1title: //h1[@class='title']
2
3date: substring-before(substring-after(//div[@class='comment-bubble']/.., 'Posted'), 'by')
4
5body: //div[@class='the-content']
6
7strip: //div[@class='article-image responsive']
8
9strip_id_or_class: 'pullquote'
10test_url: http://gamechurch.com/virtual-gun-control-the-best-amendment/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gamer.no.txt b/inc/3rdparty/site_config/standard/gamer.no.txt
new file mode 100755
index 00000000..e76a59d9
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/gamer.no.txt
@@ -0,0 +1,11 @@
1body: //div[@class='pageContent description']
2date: //div[@class='authorsAndDateTime']/span[@title]
3single_page_link: //div[@class='pages']/a[last()-1]
4
5# fix images and captions
6wrap_in(figure): //div[contains(concat(' ', @class, ' '), ' image')]
7wrap_in(figcaption): //div[contains(concat(' ', @class, ' '), ' image')]/div[@class='text']/text()
8
9# get rid of videos
10strip_id_or_class: 'video full'
11test_url: http://www.gamer.no/artikler/142455/slik-blei-ambisiose-dragons-dogma-skapt/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gamereactor.no.txt b/inc/3rdparty/site_config/standard/gamereactor.no.txt
new file mode 100755
index 00000000..6f7c1b9b
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/gamereactor.no.txt
@@ -0,0 +1,11 @@
1title: //div[@id='content']/div/h1
2
3author: //a[@itemprop='reviewer']
4
5date: //time[@itemprop='dtreviewed']/@datetime
6
7body: //div[@id='breadtext']
8
9# fix for NOT magically removing anchors with text identical to title
10dissolve: //a[text()=//div[@id='content']/div/h1/text()]
11test_url: http://www.gamereactor.no/previews/177481/The+Evil+Within/?sid=38b5bd30f56f1b7214de4ff5bed4b76f \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/garythink.com.txt b/inc/3rdparty/site_config/standard/garythink.com.txt
index 1791e816..327ac55b 100644..100755
--- a/inc/3rdparty/site_config/standard/garythink.com.txt
+++ b/inc/3rdparty/site_config/standard/garythink.com.txt
@@ -1,3 +1,3 @@
1tidy: no 1tidy: no
2 2
3test_url: http://www.garythink.com/eft/testing.html \ No newline at end of file 3test_url: http://www.garythink.com/eft/testing.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gasteroprod.com.txt b/inc/3rdparty/site_config/standard/gasteroprod.com.txt
index ef68082a..8eda0c36 100644..100755
--- a/inc/3rdparty/site_config/standard/gasteroprod.com.txt
+++ b/inc/3rdparty/site_config/standard/gasteroprod.com.txt
@@ -1,4 +1,4 @@
1# These should work, but don't. They were given by Firefox XPather extension 1# These should work, but don't. They were given by Firefox XPather extension
2title: //article//header//a//h1 2title: //article//header//a//h1
3body: //article//section 3body: //article//section
4test_url: http://gasteroprod.com/blog/faut-il-continuer-a-supporter-internet-explorer-6.html \ No newline at end of file 4test_url: http://gasteroprod.com/blog/faut-il-continuer-a-supporter-internet-explorer-6.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gatopardo.com.txt b/inc/3rdparty/site_config/standard/gatopardo.com.txt
index 74346328..2ab144f5 100644..100755
--- a/inc/3rdparty/site_config/standard/gatopardo.com.txt
+++ b/inc/3rdparty/site_config/standard/gatopardo.com.txt
@@ -1,8 +1,8 @@
1body: //div[@class='panel'] 1body: //div[@class='panel']
2strip: //div[@style='float:right'] 2strip: //div[@style='float:right']
3strip: //span[@class='titulosHomePublicidad'] 3strip: //span[@class='titulosHomePublicidad']
4strip: //div[@id='TitTop5Der'] 4strip: //div[@id='TitTop5Der']
5strip: //img[@src='/ImagesGatoPardo/LogoGatopardo.png'] 5strip: //img[@src='/ImagesGatoPardo/LogoGatopardo.png']
6 6
7prune: yes 7prune: yes
8test_url: http://www.gatopardo.com/ReportajesGP.php?R=95 \ No newline at end of file 8test_url: http://www.gatopardo.com/ReportajesGP.php?R=95 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gawker.com.txt b/inc/3rdparty/site_config/standard/gawker.com.txt
index 6531d81a..9bc5613a 100644..100755
--- a/inc/3rdparty/site_config/standard/gawker.com.txt
+++ b/inc/3rdparty/site_config/standard/gawker.com.txt
@@ -1,6 +1,6 @@
1body: //div[@class="post-body"] 1body: //div[@class="post-body"]
2 2
3# Remove 'content is restricted' 3# Remove 'content is restricted'
4strip: //div[@id='agegate_IDHERE'] 4strip: //div[@id='agegate_IDHERE']
5 5
6test_url: http://gawker.com/#!5782070/russian-bomb-squad-successfully-defuses-sex-toy \ No newline at end of file 6test_url: http://gawker.com/#!5782070/russian-bomb-squad-successfully-defuses-sex-toy \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/geeksofdoom.com.txt b/inc/3rdparty/site_config/standard/geeksofdoom.com.txt
index 55586e1c..89eb402f 100644..100755
--- a/inc/3rdparty/site_config/standard/geeksofdoom.com.txt
+++ b/inc/3rdparty/site_config/standard/geeksofdoom.com.txt
@@ -1,3 +1,3 @@
1author: substring-after(//span[@class='storyauthor'],'Posted by') 1author: substring-after(//span[@class='storyauthor'],'Posted by')
2date: //span[@class='storydate'] 2date: //span[@class='storydate']
3test_url: http://www.geeksofdoom.com/2012/03/14/robert-rodriguez-says-machete-kills-and-sin-city-2-will-film-this-year/ \ No newline at end of file 3test_url: http://www.geeksofdoom.com/2012/03/14/robert-rodriguez-says-machete-kills-and-sin-city-2-will-film-this-year/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/geenstijl.nl.txt b/inc/3rdparty/site_config/standard/geenstijl.nl.txt
index f6dccf48..a664b4d9 100644..100755
--- a/inc/3rdparty/site_config/standard/geenstijl.nl.txt
+++ b/inc/3rdparty/site_config/standard/geenstijl.nl.txt
@@ -1,3 +1,3 @@
1body: //div[@id = 'article'] 1body: //div[@id = 'article']
2strip: //div[@id = 'klasbox'] 2strip: //div[@id = 'klasbox']
3test_url: http://www.geenstijl.nl/mt/archieven/2010/10/vrouw_lange_frans_wou_baas_b_d.html \ No newline at end of file 3test_url: http://www.geenstijl.nl/mt/archieven/2010/10/vrouw_lange_frans_wou_baas_b_d.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/getnews.jp.txt b/inc/3rdparty/site_config/standard/getnews.jp.txt
index 537b4c2e..e28d4b8b 100644..100755
--- a/inc/3rdparty/site_config/standard/getnews.jp.txt
+++ b/inc/3rdparty/site_config/standard/getnews.jp.txt
@@ -1,3 +1,3 @@
1body: //div[@class='post'] 1body: //div[@class='post']
2strip: //ul[@id='bookmark_single'] 2strip: //ul[@id='bookmark_single']
3test_url: http://getnews.jp/archives/117312 \ No newline at end of file 3test_url: http://getnews.jp/archives/117312 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/giantbomb.com.txt b/inc/3rdparty/site_config/standard/giantbomb.com.txt
index 8a54bc07..61de51b2 100644..100755
--- a/inc/3rdparty/site_config/standard/giantbomb.com.txt
+++ b/inc/3rdparty/site_config/standard/giantbomb.com.txt
@@ -1,11 +1,11 @@
1# 2011-11-19 - carlo@... - Initial setup. 1# 2011-11-19 - carlo@... - Initial setup.
2 2
3strip_id_or_class: user-review-detail 3strip_id_or_class: user-review-detail
4strip: //h1 4strip: //h1
5 5
6body: //div[@class="wiki-content"] | //div[@class="section-bd"] | //div[@class="news-story"] 6body: //div[@class="wiki-content"] | //div[@class="section-bd"] | //div[@class="news-story"]
7 7
8author: //span[@class="reviewer"] | //p[@class="byline"]/a/text() 8author: //span[@class="reviewer"] | //p[@class="byline"]/a/text()
9date: //span[@class="dtreviewed"] 9date: //span[@class="dtreviewed"]
10 10
11test_url: http://www.giantbomb.com/the-elder-scrolls-v-skyrim/61-33394/ \ No newline at end of file 11test_url: http://www.giantbomb.com/the-elder-scrolls-v-skyrim/61-33394/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/giga.de.txt b/inc/3rdparty/site_config/standard/giga.de.txt
index f60199ad..e2689eae 100644..100755
--- a/inc/3rdparty/site_config/standard/giga.de.txt
+++ b/inc/3rdparty/site_config/standard/giga.de.txt
@@ -1,20 +1,20 @@
1tidy:no 1tidy:no
2title://h2[@class="title"] 2title://h2[@class="title"]
3# author:"Ben Miller" 3# author:"Ben Miller"
4date://div[@id="stats"]/span 4date://div[@id="stats"]/span
5strip_id_or_class:stats 5strip_id_or_class:stats
6strip_id_or_class:breadcrumbs 6strip_id_or_class:breadcrumbs
7strip_id_or_class:gn-why-content 7strip_id_or_class:gn-why-content
8strip_id_or_class:single-social 8strip_id_or_class:single-social
9strip_id_or_class:sidebar-ads 9strip_id_or_class:sidebar-ads
10strip_id_or_class:sidebar-top 10strip_id_or_class:sidebar-top
11strip_id_or_class:footer 11strip_id_or_class:footer
12strip_id_or_class:post_meta 12strip_id_or_class:post_meta
13# strip_id_or_class: 13# strip_id_or_class:
14# strip_id_or_class: 14# strip_id_or_class:
15# strip_id_or_class: 15# strip_id_or_class:
16# strip_id_or_class: 16# strip_id_or_class:
17# strip_id_or_class: 17# strip_id_or_class:
18# strip_id_or_class: 18# strip_id_or_class:
19 19
20test_url: http://www.giga.de/benm/2011/10/17/probleme-mit-ios-5-wenn-die-daten-weg-sind/#more-58033 \ No newline at end of file 20test_url: http://www.giga.de/benm/2011/10/17/probleme-mit-ios-5-wenn-die-daten-weg-sind/#more-58033 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gigaom.com.txt b/inc/3rdparty/site_config/standard/gigaom.com.txt
index 348bdf23..cc8fdfa0 100644..100755
--- a/inc/3rdparty/site_config/standard/gigaom.com.txt
+++ b/inc/3rdparty/site_config/standard/gigaom.com.txt
@@ -1,17 +1,12 @@
1date: //meta[@name='DC.date.issued']/@content 1date: //meta[@name='dcterms.created']/@content
2date: //span[@class='post-meta the-date'] 2title: //meta[@property='og:title']/@content
3 3author: //section[@class="post-meta"]//a[@rel="author"]
4title: //meta[@property='og:title']/@content 4
5 5body: //div[starts-with(@id, 'post-content-')]
6author: //meta[@name='DC.creator']/@content 6
7 7strip_id_or_class: sharedaddy
8body: //div[contains(@class, 'post-sub-head') or starts-with(@id, 'post-content-')] 8
9 9prune: no
10find_string: id="content" 10
11replace_string: id="content-ignore" 11test_url: http://gigaom.com/2011/10/24/groupon-google-lawsuit/
12 12test_url: http://gigaom.com/2012/12/26/snapchat-rises-why-pokes-decline-shows-facebooks-inability-to-invent/ \ No newline at end of file
13strip_id_or_class: sharedaddy
14
15prune: no
16
17test_url: http://gigaom.com/2011/10/24/groupon-google-lawsuit/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gihyo.jp.txt b/inc/3rdparty/site_config/standard/gihyo.jp.txt
index 478b23a3..d3534b29 100644..100755
--- a/inc/3rdparty/site_config/standard/gihyo.jp.txt
+++ b/inc/3rdparty/site_config/standard/gihyo.jp.txt
@@ -1,3 +1,3 @@
1single_page_link: //p[@id='skip']//a[contains(@href, 'skip')] 1single_page_link: //p[@id='skip']//a[contains(@href, 'skip')]
2 2
3test_url: http://gihyo.jp/dev/serial/01/machine-learning/0010 \ No newline at end of file 3test_url: http://gihyo.jp/dev/serial/01/machine-learning/0010 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gist.github.com.txt b/inc/3rdparty/site_config/standard/gist.github.com.txt
index 53095b34..90207862 100644..100755
--- a/inc/3rdparty/site_config/standard/gist.github.com.txt
+++ b/inc/3rdparty/site_config/standard/gist.github.com.txt
@@ -1,6 +1,6 @@
1body: //div[@class="highlight"]/pre 1body: //div[@class="highlight"]/pre
2 2
3prune: no 3prune: no
4tidy: no 4tidy: no
5 5
6test_url: https://gist.github.com/1258908 \ No newline at end of file 6test_url: https://gist.github.com/1258908 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/givemesomethingtoread.com.txt b/inc/3rdparty/site_config/standard/givemesomethingtoread.com.txt
index 144ce045..0de0750b 100644..100755
--- a/inc/3rdparty/site_config/standard/givemesomethingtoread.com.txt
+++ b/inc/3rdparty/site_config/standard/givemesomethingtoread.com.txt
@@ -1,3 +1,3 @@
1single_page_link: //div[@id="content"]//h2/a 1single_page_link: //div[@id="content"]//h2/a
2 2
3test_url: http://givemesomethingtoread.com/post/6285838917/the-baddest-lawyer-in-the-history-of-jersey \ No newline at end of file 3test_url: http://givemesomethingtoread.com/post/6285838917/the-baddest-lawyer-in-the-history-of-jersey \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gizmodo.co.uk.txt b/inc/3rdparty/site_config/standard/gizmodo.co.uk.txt
index 285e76c0..2eb82a6d 100644..100755
--- a/inc/3rdparty/site_config/standard/gizmodo.co.uk.txt
+++ b/inc/3rdparty/site_config/standard/gizmodo.co.uk.txt
@@ -1,7 +1,7 @@
1body: //div[@id="leadimage" or @class="postcontent"] 1body: //div[@id="leadimage" or @class="postcontent"]
2author: //div[@class="contentauthor"] 2author: //div[@class="contentauthor"]
3date: //div[@class="timestamp"] 3date: //div[@class="timestamp"]
4 4
5prune: no 5prune: no
6 6
7test_url: http://www.gizmodo.co.uk/2013/02/bbc-forcing-poor-old-sir-david-attenborough-to-go-on-twitter/ \ No newline at end of file 7test_url: http://www.gizmodo.co.uk/2013/02/bbc-forcing-poor-old-sir-david-attenborough-to-go-on-twitter/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gizmodo.com.txt b/inc/3rdparty/site_config/standard/gizmodo.com.txt
index c9536255..e73ec9d2 100644..100755
--- a/inc/3rdparty/site_config/standard/gizmodo.com.txt
+++ b/inc/3rdparty/site_config/standard/gizmodo.com.txt
@@ -1,7 +1,11 @@
1body: //div[@class="post-body" or contains(@class, 'illustration top')] 1#body: //div[@class="post-body" or contains(@class, 'illustration top')]
2author: (//cite//span[@class="plus-icon"])[1] 2body: //div[contains(@class, 'image-annotation-box') or contains(@class, 'post-content')]
3date: //span[@class="date"] 3#author: (//cite//span[@class="plus-icon"])[1]
4 4author: //span[contains(@class, 'display-name')]
5prune: no 5date: //span[@class="date"]
6 6
7test_url: http://gizmodo.com/5880147/kuhn-rikon-improves-their-spice-grinder-with-grade-school-science \ No newline at end of file 7prune: no
8
9test_url: http://gizmodo.com/5880147/kuhn-rikon-improves-their-spice-grinder-with-grade-school-science
10test_url: http://gizmodo.com/what-van-goghs-paintings-would-look-like-if-they-came-874035680
11test_url: http://gizmodo.com/vip.xml \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gizmodo.uol.com.br.txt b/inc/3rdparty/site_config/standard/gizmodo.uol.com.br.txt
new file mode 100755
index 00000000..d963d684
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/gizmodo.uol.com.br.txt
@@ -0,0 +1,6 @@
1title: //h1
2
3body: //div[@id='destaques']//div[contains(@class, 'img')] | //div[@id='maincontent']//p
4
5test_url: http://gizmodo.uol.com.br/nvidia-gtx-titan-z/
6test_url: http://gizmodo.uol.com.br/perfil-mark-zuckerberg-hackeado/
diff --git a/inc/3rdparty/site_config/standard/gizmologia.com.txt b/inc/3rdparty/site_config/standard/gizmologia.com.txt
index d2c7c9f9..d2c7c9f9 100644..100755
--- a/inc/3rdparty/site_config/standard/gizmologia.com.txt
+++ b/inc/3rdparty/site_config/standard/gizmologia.com.txt
diff --git a/inc/3rdparty/site_config/standard/gizmovil.com.txt b/inc/3rdparty/site_config/standard/gizmovil.com.txt
index 5fc204b8..5fc204b8 100644..100755
--- a/inc/3rdparty/site_config/standard/gizmovil.com.txt
+++ b/inc/3rdparty/site_config/standard/gizmovil.com.txt
diff --git a/inc/3rdparty/site_config/standard/global.txt b/inc/3rdparty/site_config/standard/global.txt
index 135ed500..71fbc934 100644..100755
--- a/inc/3rdparty/site_config/standard/global.txt
+++ b/inc/3rdparty/site_config/standard/global.txt
@@ -1,4 +1,18 @@
1# Look for Open Graph data - http://ogp.me 1# Look for Open Graph data - http://ogp.me
2title: //meta[@property="og:title"]/@content 2title: //meta[@property="og:title"]/@content
3date: //meta[@property="article:published_time"]/@content 3date: //meta[@property="article:published_time"]/@content
4# article:author is someties URL, e.g. on guardian.co.uk \ No newline at end of file 4# article:author is someties URL, e.g. on guardian.co.uk
5
6# Remove Google Publisher Tags: https://support.google.com/dfp_sb/answer/1649768?hl=en
7#strip_id_or_class: div-gpt-ad
8
9# Strip doubleclick image ads
10strip_image_src: doubleclick.net
11
12# If you get chunks of Javascript code appearing in the extracted output, try uncommenting the lines below.
13# This tries to convert script tags to hidden div elements (which Full-Text RSS removes).
14# If you notice issues with this approach, please let us know.
15#find_string: <script
16#replace_string: <div style="display:none"
17#find_string: </script>
18#replace_string: </div> \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/globalissues.org.txt b/inc/3rdparty/site_config/standard/globalissues.org.txt
index 95d4becf..ee50f68f 100644..100755
--- a/inc/3rdparty/site_config/standard/globalissues.org.txt
+++ b/inc/3rdparty/site_config/standard/globalissues.org.txt
@@ -1,15 +1,15 @@
1body: //div[@id='content'] 1body: //div[@id='content']
2 2
3strip: //p[@class='top'] 3strip: //p[@class='top']
4strip: //h2[.='Where next?'] 4strip: //h2[.='Where next?']
5strip_id_or_class: where-next 5strip_id_or_class: where-next
6strip_id_or_class: social-bookmarks 6strip_id_or_class: social-bookmarks
7strip_id_or_class: link-to-here 7strip_id_or_class: link-to-here
8strip_id_or_class: options-heading 8strip_id_or_class: options-heading
9strip_id_or_class: page-options-content 9strip_id_or_class: page-options-content
10strip_id_or_class: page-info-bottom 10strip_id_or_class: page-info-bottom
11 11
12tidy: no 12tidy: no
13prune: no 13prune: no
14 14
15test_url: http://www.globalissues.org/article/39/a-primer-on-neoliberalism \ No newline at end of file 15test_url: http://www.globalissues.org/article/39/a-primer-on-neoliberalism \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/globoesporte.globo.com.txt b/inc/3rdparty/site_config/standard/globoesporte.globo.com.txt
new file mode 100755
index 00000000..fd8e70ff
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/globoesporte.globo.com.txt
@@ -0,0 +1,25 @@
1title: //h1[@class="entry-title"]
2
3body: //div[@class='materia-titulo']/h2 | //*[@id="materia-letra"]
4
5date: //abbr[@class="published"]
6date: //abbr[@class="updated"]
7
8author: //*[@class="author"]/strong
9
10strip: //div[contains(@class,'foto')]/strong
11strip: //div[contains(@class,'frase-materia')]/div[@class='autor']
12strip: //div[contains(@class,'saibamais')]
13strip: //*[contains(text(),'Clique aqui e veja mais')]/ancestor::p
14strip: //ul[@class="toolbar"]
15
16# quotes
17wrap_in(blockquote): //div[@id='materia-letra']//div[contains(@class,'frase-materia')]/div[@class='frase']
18
19prune: no
20
21replace_string([Clique aqui e veja mais vídeos do Fluminense]): []
22
23test_url: http://globoesporte.globo.com/atletismo/noticia/2013/08/michael-johnson-diz-que-bolt-e-melhor-da-historia-nao-ha-duvidas.html
24test_url: http://globoesporte.globo.com/futebol/futebol-internacional/futebol-espanhol/noticia/2013/08/barca-atropela-levante-e-neymar-passa-em-branco-em-estreia-oficial.html
25test_url: http://globoesporte.globo.com/futebol/times/fluminense/noticia/2013/08/poupado-no-sabado-felipe-se-diz-pronto-para-ser-titular-contra-o-goias.html
diff --git a/inc/3rdparty/site_config/standard/gloswielkopolski.pl.txt b/inc/3rdparty/site_config/standard/gloswielkopolski.pl.txt
new file mode 100755
index 00000000..16487955
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/gloswielkopolski.pl.txt
@@ -0,0 +1,8 @@
1title: //article[@id='material']/header/h1
2author: //article[@id='material']/header/div[2]/p
3date: //article[@id='material']/header/p/time[1]
4body: //section[@id='tresc']
5next_page_link: .//section[@id='tresc']/div[@class='stronicowanie']/a[@rel='next']
6strip://div[@class='podobneSonda']
7
8test_url: http://www.gloswielkopolski.pl/artykul/803547,abc-telemarketingu-praca-ktora-zwalnia-z-myslenia,id,t.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/goal.com.txt b/inc/3rdparty/site_config/standard/goal.com.txt
index 075c4d2b..e25e9a00 100644..100755
--- a/inc/3rdparty/site_config/standard/goal.com.txt
+++ b/inc/3rdparty/site_config/standard/goal.com.txt
@@ -1,16 +1,16 @@
1title: //div[@id='article_headline']//h1 1title: //div[@id='article_headline']//h1
2date: //div[contains(@class, 'articleDate')]//h4 2date: //div[contains(@class, 'articleDate')]//h4
3body: //div[@id='article_headline']/h2 | //div[@id='large_article_image' or @id='article_content'] 3body: //div[@id='article_headline']/h2 | //div[@id='large_article_image' or @id='article_content']
4 4
5strip_id_or_class: relatedLinksBox 5strip_id_or_class: relatedLinksBox
6strip_id_or_class: betting-widget 6strip_id_or_class: betting-widget
7strip_image_src: install_flash.gif 7strip_image_src: install_flash.gif
8 8
9strip: //table[contains(@style, 'float: right; width: 285px;')] 9strip: //table[contains(@style, 'float: right; width: 285px;')]
10strip: //div[@class='caption'] 10strip: //div[@class='caption']
11 11
12tidy: no 12tidy: no
13prune: no 13prune: no
14 14
15test_url: http://www.goal.com/en-gb/news/3284/euro-2012/2012/05/31/3139032/video-profile-back-to-his-very-best-for-bayern-frances-flair-and- 15test_url: http://www.goal.com/en-gb/news/3284/euro-2012/2012/05/31/3139032/video-profile-back-to-his-very-best-for-bayern-frances-flair-and-
16test_url: http://www.goal.com/en-gb/news/3284/euro-2012/2012/05/31/3139869/lampard-injury-a-bitter-blow-for-england-and-sorry-way-to# \ No newline at end of file 16test_url: http://www.goal.com/en-gb/news/3284/euro-2012/2012/05/31/3139869/lampard-injury-a-bitter-blow-for-england-and-sorry-way-to# \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/golem.de.txt b/inc/3rdparty/site_config/standard/golem.de.txt
index 6c5d1c4f..6afdebe8 100644..100755
--- a/inc/3rdparty/site_config/standard/golem.de.txt
+++ b/inc/3rdparty/site_config/standard/golem.de.txt
@@ -1,25 +1,25 @@
1# Jens Kohl, jens.kohl@... 1# Jens Kohl, jens.kohl@...
2# - Added publication date 2# - Added publication date
3# - Striped pagination block 3# - Striped pagination block
4# - Added single page link 4# - Added single page link
5# - Added xpath-querys for the printer friendly version 5# - Added xpath-querys for the printer friendly version
6 6
7title: //h1 7title: //h1
8body: //div[@class='formatted'] 8body: //div[@class='formatted']
9prune: no 9prune: no
10 10
11date: substring-after(//li[2][@class="text1"], 'Datum:') 11date: substring-after(//li[2][@class="text1"], 'Datum:')
12strip: //ol[@class="list-chapters"] 12strip: //ol[@class="list-chapters"]
13strip_comments: yes 13strip_comments: yes
14 14
15# next: commands for printer friendly pages 15# next: commands for printer friendly pages
16single_page_link: //a[contains(@href, 'print.php?a=')]/@href 16single_page_link: //a[contains(@href, 'print.php?a=')]/@href
17title: //body/h3 17title: //body/h3
18strip_image_src: staticrl/images/logo.jpg 18strip_image_src: staticrl/images/logo.jpg
19strip_image_src: http://cpx.golem.de/cpx.php?class=7 19strip_image_src: http://cpx.golem.de/cpx.php?class=7
20strip: //body/h3 20strip: //body/h3
21strip: //body/b[1] 21strip: //body/b[1]
22strip: //body/b[2] 22strip: //body/b[2]
23strip: //body/b[3] 23strip: //body/b[3]
24strip: //div[1] 24strip: //div[1]
25test_url: http://www.golem.de/1112/88696.html \ No newline at end of file 25test_url: http://www.golem.de/1112/88696.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/good.is.txt b/inc/3rdparty/site_config/standard/good.is.txt
index 5cf67011..94159fbf 100644..100755
--- a/inc/3rdparty/site_config/standard/good.is.txt
+++ b/inc/3rdparty/site_config/standard/good.is.txt
@@ -1,4 +1,4 @@
1title: //div[@class="title"]/div/h1 1title: //div[@class="title"]/div/h1
2body: //div[@class="body"] 2body: //div[@class="body"]
3date: //li[@class="date-time"] 3date: //li[@class="date-time"]
4test_url: http://www.good.is/post/why-amazon-is-the-next-top-tech-company/ \ No newline at end of file 4test_url: http://www.good.is/post/why-amazon-is-the-next-top-tech-company/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/goodfil.ms.txt b/inc/3rdparty/site_config/standard/goodfil.ms.txt
new file mode 100755
index 00000000..f8bbbc6a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/goodfil.ms.txt
@@ -0,0 +1,2 @@
1strip_id_or_class: gutter
2test_url: http://goodfil.ms/blog/posts/2012/08/13/angularjs-and-the-goodfilms-mobile-site-part-1/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gossip-tv.gr.txt b/inc/3rdparty/site_config/standard/gossip-tv.gr.txt
index c2fe4e40..e2d2d0b2 100644..100755
--- a/inc/3rdparty/site_config/standard/gossip-tv.gr.txt
+++ b/inc/3rdparty/site_config/standard/gossip-tv.gr.txt
@@ -1,14 +1,14 @@
1date: //meta[@name='og:article:published_time']/@value 1date: //meta[@name='og:article:published_time']/@value
2 2
3body: //div[@class='itemIntroText' or @class='itemImageBlock' or @class='itemFullText'] 3body: //div[@class='itemIntroText' or @class='itemImageBlock' or @class='itemFullText']
4 4
5strip_id_or_class: itemImageGallery 5strip_id_or_class: itemImageGallery
6 6
7# remove extras at end of post content 7# remove extras at end of post content
8find_string: <div style="margin:5px 0 10px;"> 8find_string: <div style="margin:5px 0 10px;">
9replace_string: </div></body></html><!-- 9replace_string: </div></body></html><!--
10 10
11prune: no 11prune: no
12 12
13test_url: http://www.gossip-tv.gr/story/158902/aggelike-daliane-semera-duskoleuontai-oloi-sta-epaggelmatika-tous 13test_url: http://www.gossip-tv.gr/story/158902/aggelike-daliane-semera-duskoleuontai-oloi-sta-epaggelmatika-tous
14test_url: http://www.gossip-tv.gr/lifestyle/Taste/story/230266/lahtaristo-kai-ygieino-tost-sokolatas \ No newline at end of file 14test_url: http://www.gossip-tv.gr/lifestyle/Taste/story/230266/lahtaristo-kai-ygieino-tost-sokolatas \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/goteborgsfria.se.txt b/inc/3rdparty/site_config/standard/goteborgsfria.se.txt
new file mode 100755
index 00000000..c90aed0b
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/goteborgsfria.se.txt
@@ -0,0 +1,7 @@
1body: //div[contains(@class, 'layout__inner')]//div[contains(@class, 'file-image') or contains(@class, 'node__content')]
2author: //article//div[contains(@class, 'field-byline')]
3strip_id_or_class: rekommenderade
4strip_id_or_class: disqus
5strip_id_or_class: annonser
6
7test_url: http://www.goteborgsfria.se/artikel/112079 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gothamist.com.txt b/inc/3rdparty/site_config/standard/gothamist.com.txt
index 5179fc12..36453878 100644..100755
--- a/inc/3rdparty/site_config/standard/gothamist.com.txt
+++ b/inc/3rdparty/site_config/standard/gothamist.com.txt
@@ -1,7 +1,7 @@
1title: //div[@class='entry-header'] 1title: //div[@class='entry-header']
2author: //span[@class='vcard author'] 2author: //span[@class='vcard author']
3date: //abbr[@class='published'] 3date: //abbr[@class='published']
4#move_into(//div[@class='entry-body']): //img[@id='photo_1'] 4#move_into(//div[@class='entry-body']): //img[@id='photo_1']
5body: //div[@class='entry-body'] 5body: //div[@class='entry-body']
6strip: //div[@class='galleryEaseThumbs'] 6strip: //div[@class='galleryEaseThumbs']
7test_url: http://gothamist.com/2012/03/15/fancy_cocktail_lounge_the_randolph.php \ No newline at end of file 7test_url: http://gothamist.com/2012/03/15/fancy_cocktail_lounge_the_randolph.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gotomanager.com.txt b/inc/3rdparty/site_config/standard/gotomanager.com.txt
index 7fb0ee03..f8af7324 100644..100755
--- a/inc/3rdparty/site_config/standard/gotomanager.com.txt
+++ b/inc/3rdparty/site_config/standard/gotomanager.com.txt
@@ -1,21 +1,21 @@
1title: //span[@id="showTitle"] 1title: //span[@id="showTitle"]
2author: //span[@id="showAuthor"] 2author: //span[@id="showAuthor"]
3date: //span[@id="showRefDate"] 3date: //span[@id="showRefDate"]
4 4
5strip: //span[@class="black_bold"] 5strip: //span[@class="black_bold"]
6strip: //div[@id="sectionName"] 6strip: //div[@id="sectionName"]
7strip: //div[@id="storyHeader"] 7strip: //div[@id="storyHeader"]
8 8
9body: //div[@id="newsBodyText"] 9body: //div[@id="newsBodyText"]
10 10
11strip_image_src: "http://www.gotomanager.com/img/mgrm/space.gif" 11strip_image_src: "http://www.gotomanager.com/img/mgrm/space.gif"
12strip_image_src: "http://www.gotomanager.com/images/separator.gif" 12strip_image_src: "http://www.gotomanager.com/images/separator.gif"
13strip_image_src: "http://www.gotomanager.com/images/spaces.gif" 13strip_image_src: "http://www.gotomanager.com/images/spaces.gif"
14 14
15convert_double_br_tags: yes 15convert_double_br_tags: yes
16tidy: yes 16tidy: yes
17 17
18strip: //div[@id="smallLeadImage"] 18strip: //div[@id="smallLeadImage"]
19strip: //div[@id="truehitsSurvey"] 19strip: //div[@id="truehitsSurvey"]
20strip: //table[@id="relatedInfoTable"] 20strip: //table[@id="relatedInfoTable"]
21test_url: http://www.gotomanager.com/news/details.aspx?id=86759 \ No newline at end of file 21test_url: http://www.gotomanager.com/news/details.aspx?id=86759 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gov.ky.txt b/inc/3rdparty/site_config/standard/gov.ky.txt
new file mode 100755
index 00000000..294ece3a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/gov.ky.txt
@@ -0,0 +1,4 @@
1strip: //body//title
2
3test_url: http://www.gov.ky/pls/portal/PORTAL.wwv_media.show?p_id=7593947&p_settingssetid=1&p_settingssiteid=0&p_siteid=2425&p_type=basetext&p_textid=7593948
4test_url: http://www.rcips.ky/pls/portal/wlacomp.wlafeed.show_cignewsfeed_agency?p_sitecode=POL&p_agency=Police \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gp.se.txt b/inc/3rdparty/site_config/standard/gp.se.txt
new file mode 100755
index 00000000..158ae4ed
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/gp.se.txt
@@ -0,0 +1,11 @@
1body: //div[@id='articleContainer']
2author: //div[@id='articleContent']//div[contains(@class, 'byline')]//span[contains(@class, 'name fn')]
3strip_id_or_class: toolbar
4strip_id_or_class: ADad
5strip_id_or_class: articleSerieWrapper
6strip_id_or_class: articleFloatContainer
7strip: //div[contains(@class, 'byline')]//img
8prune: no
9
10test_url: http://www.gp.se/nyheter/bohuslan/1.2045564-styckade-mannen-hade-mordat-hustrun
11test_url: http://www.gp.se/1.16560 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gq.com.txt b/inc/3rdparty/site_config/standard/gq.com.txt
index 233c4a7f..8ad8a14e 100644..100755
--- a/inc/3rdparty/site_config/standard/gq.com.txt
+++ b/inc/3rdparty/site_config/standard/gq.com.txt
@@ -1,9 +1,9 @@
1next_page_link: //div[@class='pagination']//span[@class='paginationNext']/a 1next_page_link: //div[@class='pagination']//span[@class='paginationNext']/a
2strip_id_or_class: utility 2strip_id_or_class: utility
3strip_id_or_class: keywords 3strip_id_or_class: keywords
4strip_id_or_class: pagination 4strip_id_or_class: pagination
5strip_id_or_class: position2_content 5strip_id_or_class: position2_content
6body: //div[@class='article'] 6body: //div[@class='article']
7title: //h1[@class='content-headline'] 7title: //h1[@class='content-headline']
8author: //span[@class='contributor']//a 8author: //span[@class='contributor']//a
9test_url: http://www.gq.com/news-politics/newsmakers/201203/terry-thompson-ohio-zoo-massacre-chris-heath-gq-february-2012 \ No newline at end of file 9test_url: http://www.gq.com/news-politics/newsmakers/201203/terry-thompson-ohio-zoo-massacre-chris-heath-gq-february-2012 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/grantland.com.txt b/inc/3rdparty/site_config/standard/grantland.com.txt
index 3269e086..b8d419f4 100644..100755
--- a/inc/3rdparty/site_config/standard/grantland.com.txt
+++ b/inc/3rdparty/site_config/standard/grantland.com.txt
@@ -1,20 +1,20 @@
1# this is fragile with footnotes -- leave it for now 1# this is fragile with footnotes -- leave it for now
2 2
3#tidy: no 3#tidy: no
4#prune: no 4#prune: no
5#move_into(//article): //aside[@id='footnotes'] 5#move_into(//article): //aside[@id='footnotes']
6author: //cite/a 6author: //cite/a
7date: //time 7date: //time
8 8
9strip: //a[text()='Grantland'] 9strip: //a[text()='Grantland']
10strip_id_or_class: ad-wrapper 10strip_id_or_class: ad-wrapper
11strip_id_or_class: fb-connect-link 11strip_id_or_class: fb-connect-link
12strip_id_or_class: fb-status 12strip_id_or_class: fb-status
13strip: //li[@class='print'] 13strip: //li[@class='print']
14strip: //cite 14strip: //cite
15strip: //a[contains(text(), '[+]')] 15strip: //a[contains(text(), '[+]')]
16strip: //a[@id='jump-nav-link'] 16strip: //a[@id='jump-nav-link']
17strip: //h1[text()='Share This'] 17strip: //h1[text()='Share This']
18strip: //h1[text()='Top Stories'] 18strip: //h1[text()='Top Stories']
19strip: //div[@id="update-text-size"] 19strip: //div[@id="update-text-size"]
20test_url: http://www.grantland.com/story/_/id/8421241/examining-new-albums-rock-veterans-no-doubt-green-day \ No newline at end of file 20test_url: http://www.grantland.com/story/_/id/8421241/examining-new-albums-rock-veterans-no-doubt-green-day \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/greatergreaterwashington.org.txt b/inc/3rdparty/site_config/standard/greatergreaterwashington.org.txt
index a5258030..31a41075 100644..100755
--- a/inc/3rdparty/site_config/standard/greatergreaterwashington.org.txt
+++ b/inc/3rdparty/site_config/standard/greatergreaterwashington.org.txt
@@ -1,11 +1,11 @@
1title: //div[@class="blogpost"]/h2 1title: //div[@class="blogpost"]/h2
2author: //div[@class="blogpost"]/p[@class="byline"]/a 2author: //div[@class="blogpost"]/p[@class="byline"]/a
3date: //div[@class="blogpost"]/p[@class="byline"]/span[@class="time_posted"] 3date: //div[@class="blogpost"]/p[@class="byline"]/span[@class="time_posted"]
4body: //div[@class="blogpost"] 4body: //div[@class="blogpost"]
5strip_id_or_class: flag 5strip_id_or_class: flag
6strip_id_or_class: byline 6strip_id_or_class: byline
7strip_id_or_class: post_footer 7strip_id_or_class: post_footer
8strip_id_or_class: related_posts 8strip_id_or_class: related_posts
9strip_id_or_class: post_author_bios 9strip_id_or_class: post_author_bios
10strip: //h2 10strip: //h2
11test_url: http://greatergreaterwashington.org/post/12457/ask-ggw-what-will-happen-to-the-1000-series-railcars/ \ No newline at end of file 11test_url: http://greatergreaterwashington.org/post/12457/ask-ggw-what-will-happen-to-the-1000-series-railcars/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/groups.drupal.org.txt b/inc/3rdparty/site_config/standard/groups.drupal.org.txt
index 7e15a5c1..0fe30ef5 100644..100755
--- a/inc/3rdparty/site_config/standard/groups.drupal.org.txt
+++ b/inc/3rdparty/site_config/standard/groups.drupal.org.txt
@@ -1,5 +1,5 @@
1title://h1 1title://h1
2author://span[@class="submitted"]/a 2author://span[@class="submitted"]/a
3date:substring-after(//span[@class="submitted"],'on ') 3date:substring-after(//span[@class="submitted"],'on ')
4body://div[@class="content"] 4body://div[@class="content"]
5test_url: http://groups.drupal.org/node/36816 \ No newline at end of file 5test_url: http://groups.drupal.org/node/36816 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gulfnews.com.txt b/inc/3rdparty/site_config/standard/gulfnews.com.txt
index e69044b3..97b620de 100644..100755
--- a/inc/3rdparty/site_config/standard/gulfnews.com.txt
+++ b/inc/3rdparty/site_config/standard/gulfnews.com.txt
@@ -1,5 +1,5 @@
1body: //div[@class='wrapper_half']//ul[@class='details'] | //div[@class='wrapper_half']//p[@class='synopsis'] | //div[@class='wrapper_half']//div[@class='image'] | //div[@class='wrapper_half']//div[@class='article'] 1body: //div[@class='wrapper_half']//ul[@class='details'] | //div[@class='wrapper_half']//p[@class='synopsis'] | //div[@class='wrapper_half']//div[@class='image'] | //div[@class='wrapper_half']//div[@class='article']
2strip: //div[@class='wrapper_half']//ul[@class='details']/li[position()>1] 2strip: //div[@class='wrapper_half']//ul[@class='details']/li[position()>1]
3prune: no 3prune: no
4tidy: no 4tidy: no
5test_url: http://gulfnews.com/news/gulf/uae/government/abu-dhabi-centre-offers-useful-information-1.811084 \ No newline at end of file 5test_url: http://gulfnews.com/news/gulf/uae/government/abu-dhabi-centre-offers-useful-information-1.811084 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/guokr.com.txt b/inc/3rdparty/site_config/standard/guokr.com.txt
index 00255eb8..f8327bea 100644..100755
--- a/inc/3rdparty/site_config/standard/guokr.com.txt
+++ b/inc/3rdparty/site_config/standard/guokr.com.txt
@@ -1,22 +1,22 @@
1# To administrator: 1# To administrator:
2# Please change the hostname to "www.guokr.com/article/*" 2# Please change the hostname to "www.guokr.com/article/*"
3# Not working for "www.guokr.com/post/" pages configured by carlosliu913@gmail.com 3# Not working for "www.guokr.com/post/" pages configured by carlosliu913@gmail.com
4 4
5# This filter is tested on: 5# This filter is tested on:
6# http://www.guokr.com/article/274325/ 6# http://www.guokr.com/article/274325/
7# http://www.guokr.com/article/275013/ 7# http://www.guokr.com/article/275013/
8 8
9title://h1 9title://h1
10author://div[contains(@class, 'content-th-info')]/a 10author://div[contains(@class, 'content-th-info')]/a
11date://div[contains(@class, 'content-th-info')]/span 11date://div[contains(@class, 'content-th-info')]/span
12body://div[contains(@class, 'Content')] 12body://div[contains(@class, 'Content')]
13 13
14strip://div[contains(@class, 'bottom-i')] 14strip://div[contains(@class, 'bottom-i')]
15strip://div[contains(@class, 'copyright')] 15strip://div[contains(@class, 'copyright')]
16strip://div[contains(@class, 'fr')] 16strip://div[contains(@class, 'fr')]
17strip://div[contains(@class, 'content-th-info')] 17strip://div[contains(@class, 'content-th-info')]
18strip://h1[contains(@id, 'articleTitle')] 18strip://h1[contains(@id, 'articleTitle')]
19strip://div[contains(@class, 'side')] 19strip://div[contains(@class, 'side')]
20strip://div[contains(@class, 'top-wp')] 20strip://div[contains(@class, 'top-wp')]
21test_url: http://www.guokr.com/article/275013/ 21test_url: http://www.guokr.com/article/275013/
22test_url: http://www.guokr.com/article/338387/ \ No newline at end of file 22test_url: http://www.guokr.com/article/338387/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/haberler.com.txt b/inc/3rdparty/site_config/standard/haberler.com.txt
index bc1ce689..1bb2bc7d 100644..100755
--- a/inc/3rdparty/site_config/standard/haberler.com.txt
+++ b/inc/3rdparty/site_config/standard/haberler.com.txt
@@ -1,5 +1,5 @@
1title: //div[@id="habermetni"]/h1[@id="haber_baslik"] 1title: //div[@id="habermetni"]/h1[@id="haber_baslik"]
2body: //div[@id="habermetni"]/p 2body: //div[@id="habermetni"]/p
3strip: //img[@class='newsDetailLeft'] 3strip: //img[@class='newsDetailLeft']
4strip_image_src: /haber-resimleri/ 4strip_image_src: /haber-resimleri/
5test_url: http://www.haberler.com/emniyete-atacakti-elinde-patladi-3198733-haberi/ \ No newline at end of file 5test_url: http://www.haberler.com/emniyete-atacakti-elinde-patladi-3198733-haberi/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/habrahabr.ru.txt b/inc/3rdparty/site_config/standard/habrahabr.ru.txt
new file mode 100755
index 00000000..67538359
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/habrahabr.ru.txt
@@ -0,0 +1,21 @@
1title: //span[@class="post_title"]
2author: //div[@class="author"]
3date: //div[@class="published
4
5body: //div[@class='content html_format'] | //div[@id='comments']
6
7strip: //a[@class="link_to_comment"]
8strip: //div[@class="show_tree"]
9strip: //a[@class="to_parent"]
10
11
12replace_string(class="reply_comments"): style="padding-left: 20px"
13replace_string(class="voting "): style="float: right"
14replace_string(src="//habrastorage.org/getpro/habr/avatars/): style="width:24px; height:24px;" class="123" src="//habrastorage.org/getpro/habr/avatars/
15replace_string(class="info "): style="padding-top:5px;font-size:0.85em;line-height:24px;"
16
17
18prune: no
19tidy: no
20
21test_url: http://habrahabr.ru/post/229883/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/hackmake.org.txt b/inc/3rdparty/site_config/standard/hackmake.org.txt
new file mode 100755
index 00000000..98140117
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/hackmake.org.txt
@@ -0,0 +1,7 @@
1date: //article//time[@pubdate]
2body: //article/div[@id="post-wide"]
3title: //article/header/h2
4strip: /div[@id="comment"]
5strip: //footer
6author: substring-after(//footer/p[@class='byline'] , 'By')
7test_url: http://hackmake.org/2012/12/21/mindfulness-of-concentration \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/halo.bungie.org.txt b/inc/3rdparty/site_config/standard/halo.bungie.org.txt
index 7989d09f..1802efea 100644..100755
--- a/inc/3rdparty/site_config/standard/halo.bungie.org.txt
+++ b/inc/3rdparty/site_config/standard/halo.bungie.org.txt
@@ -1,5 +1,5 @@
1title:substring-before(id("maincontent")/table, 'Posted') 1title:substring-before(id("maincontent")/table, 'Posted')
2body:id("maincontent")/p 2body:id("maincontent")/p
3# eventually convert linebreaks better 3# eventually convert linebreaks better
4 4
5test_url: http://halo.bungie.org/fanfic/?story=Delahunt0312112316071.html \ No newline at end of file 5test_url: http://halo.bungie.org/fanfic/?story=Delahunt0312112316071.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/hammers.theoffside.com.txt b/inc/3rdparty/site_config/standard/hammers.theoffside.com.txt
index 747f90a1..33f7e726 100644..100755
--- a/inc/3rdparty/site_config/standard/hammers.theoffside.com.txt
+++ b/inc/3rdparty/site_config/standard/hammers.theoffside.com.txt
@@ -1,7 +1,7 @@
1# Remove right column 1# Remove right column
2strip: //*[(@class = 'right_col')] 2strip: //*[(@class = 'right_col')]
3 3
4# Remove comments etc. 4# Remove comments etc.
5strip: //*[(@class = 'category')] 5strip: //*[(@class = 'category')]
6strip: /html/body/div[1][@class='absolute_content_high']/div[1][@class='wrapper']/div[1][@class='main_col']/div[@class='main_content']/h3 6strip: /html/body/div[1][@class='absolute_content_high']/div[1][@class='wrapper']/div[1][@class='main_col']/div[@class='main_content']/h3
7test_url: http://hammers.theoffside.com/carling-cup/a-funny-thing-happened-on-the-way-to-4-nil.html \ No newline at end of file 7test_url: http://hammers.theoffside.com/carling-cup/a-funny-thing-happened-on-the-way-to-4-nil.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/handelsblatt.com.txt b/inc/3rdparty/site_config/standard/handelsblatt.com.txt
new file mode 100755
index 00000000..7d067aa6
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/handelsblatt.com.txt
@@ -0,0 +1,31 @@
1#Single Page
2single_page_link: //li[contains(@class,"hcf-print")]/a
3
4# Title hcf-headline
5title: //span[@class='hcf-headline']
6
7# Authors
8author: //div[@class="hcf-author"]/a/text()
9author: substring-after(//div[@class='hcf-author'], 'von ')
10
11# Date
12date: //div[@class='hcf-article-date']
13
14# Body
15body: //div[@class='article']
16
17# General removements
18strip: //div[contains(@class,"hcf-smartbox")]
19strip: //div[contains(@class,"hcf-stopper")]
20strip: //div[contains(@class,"hcf-img-controls")]
21strip: //span[@class='hcf-location-mark']
22strip: //span[@class='hcf-copyright']
23strip: //div[@class='hcf-copyright']
24strip: //div[@class='hcf-origin']
25
26
27
28
29# Fix picture captions
30wrap_in(small): //div[@class="hcf-caption"]
31test_url: http://www.handelsblatt.com/meinung/gastbeitraege/gastkommentar-zum-emissionshandel-kurskorrekturen-fuehren-zum-kentern/8044326.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/hanselman.com.txt b/inc/3rdparty/site_config/standard/hanselman.com.txt
index d3ffeab1..1dca632f 100644..100755
--- a/inc/3rdparty/site_config/standard/hanselman.com.txt
+++ b/inc/3rdparty/site_config/standard/hanselman.com.txt
@@ -1,4 +1,4 @@
1date: //span[@class="item-date"] 1date: //span[@class="item-date"]
2body: //div[@class="item-content"] 2body: //div[@class="item-content"]
3strip_comments: no 3strip_comments: no
4test_url: http://www.hanselman.com/blog/BrainBytesBackBunsTheProgrammersPriorities.aspx \ No newline at end of file 4test_url: http://www.hanselman.com/blog/BrainBytesBackBunsTheProgrammersPriorities.aspx \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/hardware.fr.txt b/inc/3rdparty/site_config/standard/hardware.fr.txt
index 318885c8..e4f1f6bc 100644..100755
--- a/inc/3rdparty/site_config/standard/hardware.fr.txt
+++ b/inc/3rdparty/site_config/standard/hardware.fr.txt
@@ -1,6 +1,6 @@
1title: //h1 1title: //h1
2author: //a[@class='a_aut'] 2author: //a[@class='a_aut']
3body: //div[@class='content_dossier'] 3body: //div[@class='content_dossier']
4strip: //div[@id='pagination'] 4strip: //div[@id='pagination']
5next_page_link: //div[@class='sommaire_colonne']//span[@class='page_actuelle']/following::span[@class='autres_page']//a/@href 5next_page_link: //div[@class='sommaire_colonne']//span[@class='page_actuelle']/following::span[@class='autres_page']//a/@href
6test_url: http://www.hardware.fr/articles/850-1/pci-express-3-0-impact-performances.html \ No newline at end of file 6test_url: http://www.hardware.fr/articles/850-1/pci-express-3-0-impact-performances.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/hardware.no.txt b/inc/3rdparty/site_config/standard/hardware.no.txt
new file mode 100755
index 00000000..cbbcf84e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/hardware.no.txt
@@ -0,0 +1,16 @@
1title: //h1[@class='headline']
2title: //h2[@itemprop='alternativeHeadline']
3title: //h1[@itemprop='headline']
4author: //span[@itemprop='name']
5date: //time[@itemprop='datePublished']
6body: //div[@itemprop='reviewBody']
7
8wrap_in(blockquote): //div[@class='factBox']
9
10next_page_link: //a[@rel='next']
11
12strip_id_or_class: 'product-box'
13strip: //a[@rel='next']
14strip: //a[text()='Del på Facebook']
15strip: //a[text()='Del på Twitter']
16test_url: http://www.hardware.no/artikler/asus-vg248qe/132792 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/hbr.org.txt b/inc/3rdparty/site_config/standard/hbr.org.txt
index fd6145e7..c2f292e1 100644..100755
--- a/inc/3rdparty/site_config/standard/hbr.org.txt
+++ b/inc/3rdparty/site_config/standard/hbr.org.txt
@@ -1,6 +1,7 @@
1title: //div[@id='article-title'] 1title: //div[@id='article-title']
2author: //div[@id='articleAuthors'] 2author: //div[@id='articleAuthors']
3body: //div[@id='article'] 3body: //div[@id='article']
4strip: //div[@class='module wide'] 4strip: //div[@class='module wide']
5next_page_link: //a[@title='Next Page'] 5#single_page_link: //a[@class='social-print']
6test_url: http://hbr.org/2012/04/the-real-leadership-lessons-of-steve-jobs/ar/ \ No newline at end of file 6test_url: http://hbr.org/2012/04/the-real-leadership-lessons-of-steve-jobs/ar/
7test_url: http://hbr.org/2013/03/big-bang-disruption/ar/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/headrush.typepad.com.txt b/inc/3rdparty/site_config/standard/headrush.typepad.com.txt
new file mode 100755
index 00000000..a3146771
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/headrush.typepad.com.txt
@@ -0,0 +1,14 @@
1title://div[@class='content']/h3[1]
2body://div[@class='content']
3
4# Article nav
5strip://div[@class='content']/p[1]
6
7# Comments and trackbacks
8strip://h2/following-sibling::p
9strip://h2
10
11# Posted on
12strip://b/p
13strip://div[@class='content']/p[@class='posted']
14test_url: http://headrush.typepad.com/creating_passionate_users/2005/05/the_case_for_ea.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/heise-online.mobi.txt b/inc/3rdparty/site_config/standard/heise-online.mobi.txt
index 1da82ac7..daff6143 100644..100755
--- a/inc/3rdparty/site_config/standard/heise-online.mobi.txt
+++ b/inc/3rdparty/site_config/standard/heise-online.mobi.txt
@@ -1,3 +1,3 @@
1body: //div[@id='content']/div 1body: //div[@id='content']/div
2date: //p[@class='author_date']/span[@class='date'] 2date: //p[@class='author_date']/span[@class='date']
3test_url: http://heise-online.mobi/newsticker/meldung/Amazons-Appstore-in-der-Kritik-Ein-Desaster-fuer-Kunden-und-Entwickler-1273936.html \ No newline at end of file 3test_url: http://heise-online.mobi/newsticker/meldung/Amazons-Appstore-in-der-Kritik-Ein-Desaster-fuer-Kunden-und-Entwickler-1273936.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/heise.de.txt b/inc/3rdparty/site_config/standard/heise.de.txt
index 5f19d3f8..c51af561 100644..100755
--- a/inc/3rdparty/site_config/standard/heise.de.txt
+++ b/inc/3rdparty/site_config/standard/heise.de.txt
@@ -1,7 +1,7 @@
1single_page_link: //p[@class='news_option']/a 1single_page_link: //p[@class='news_option']/a
2 2
3date: //p[@class='news_datum'] 3date: //p[@class='news_datum']
4title: //h1 4title: //h1
5body: //div[@class='meldung_wrapper'] 5body: //div[@class='meldung_wrapper']
6 6
7test_url: http://www.heise.de/newsticker/meldung/Europa-soll-Grundrechteschutz-im-Netz-staerken-1392664.html \ No newline at end of file 7test_url: http://www.heise.de/newsticker/meldung/Europa-soll-Grundrechteschutz-im-Netz-staerken-1392664.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/hemmings.com.txt b/inc/3rdparty/site_config/standard/hemmings.com.txt
new file mode 100755
index 00000000..a02b4a62
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/hemmings.com.txt
@@ -0,0 +1,9 @@
1title: //h2
2body: //div[@id='leftdetail']
3single_page_link: //a[contains(@href, 'printable=1')]
4strip: //a[contains(., 'Full Version')]
5
6prune: no
7
8test_url: http://www.hemmings.com/classifieds/dealer/ferrari/330gtc/1601235.html
9test_url: http://www.hemmings.com/rss/keyword.xml?adtype=carsforsale&make=ferrari \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/heroturko.me.txt b/inc/3rdparty/site_config/standard/heroturko.me.txt
new file mode 100755
index 00000000..07b6adf1
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/heroturko.me.txt
@@ -0,0 +1,6 @@
1title: //div[contains(@class, 'title')]//h1
2body: //div[contains(@class, 'story')]
3
4prune: no
5
6test_url: http://www.heroturko.me/5223034-ds-catia-p3-v5-6r2014-gasp0-x86x64-multilanguage-english-docs.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/hespress.com.txt b/inc/3rdparty/site_config/standard/hespress.com.txt
index d866f629..4ed0b8b5 100644..100755
--- a/inc/3rdparty/site_config/standard/hespress.com.txt
+++ b/inc/3rdparty/site_config/standard/hespress.com.txt
@@ -1,7 +1,7 @@
1body: //div[@id='article_holder']//div[@class='image'] | //div[@id='article_body'] 1body: //div[@id='article_holder']//div[@class='image'] | //div[@id='article_body']
2 2
3prune: no 3prune: no
4tidy: no 4tidy: no
5 5
6test_url: http://hespress.com/videos/73684.html 6test_url: http://hespress.com/videos/73684.html
7test_url: http://hespress.com/permalink/73678.html \ No newline at end of file 7test_url: http://hespress.com/permalink/73678.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/hiamag.com.txt b/inc/3rdparty/site_config/standard/hiamag.com.txt
new file mode 100755
index 00000000..3c7ba5ac
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/hiamag.com.txt
@@ -0,0 +1,3 @@
1body: (//div[contains(@class, 'gallery-slides')]//img)[1] | //div[contains(@class, 'node_body_inner')]
2
3test_url: http://www.hiamag.com/rss.xml \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/highscalability.com.txt b/inc/3rdparty/site_config/standard/highscalability.com.txt
index fd50b6ad..5a808fa4 100644..100755
--- a/inc/3rdparty/site_config/standard/highscalability.com.txt
+++ b/inc/3rdparty/site_config/standard/highscalability.com.txt
@@ -1,3 +1,3 @@
1body: //div[@class='journal-entry-text'] 1body: //div[@class='journal-entry-text']
2 2
3test_url: http://highscalability.com/blog/2011/3/14/6-lessons-from-dropbox-one-million-files-saved-every-15-minu.html \ No newline at end of file 3test_url: http://highscalability.com/blog/2011/3/14/6-lessons-from-dropbox-one-million-files-saved-every-15-minu.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/hiperpop.com.txt b/inc/3rdparty/site_config/standard/hiperpop.com.txt
index b5eb062e..b5eb062e 100644..100755
--- a/inc/3rdparty/site_config/standard/hiperpop.com.txt
+++ b/inc/3rdparty/site_config/standard/hiperpop.com.txt
diff --git a/inc/3rdparty/site_config/standard/hiphopleeft.nl.txt b/inc/3rdparty/site_config/standard/hiphopleeft.nl.txt
index c57c1aa9..d869a866 100644..100755
--- a/inc/3rdparty/site_config/standard/hiphopleeft.nl.txt
+++ b/inc/3rdparty/site_config/standard/hiphopleeft.nl.txt
@@ -1,4 +1,4 @@
1body: //div[@class = 'pd'] 1body: //div[@class = 'pd']
2strip: //div[@id = 'overzicht-albumrecensies'] 2strip: //div[@id = 'overzicht-albumrecensies']
3strip: //div[@id = 'jc'] 3strip: //div[@id = 'jc']
4test_url: http://hiphopleeft.nl/index.php?option=com_content&view=article&id=2767:mark-ronson-record-collection&catid=66:m&Itemid=142 \ No newline at end of file 4test_url: http://hiphopleeft.nl/index.php?option=com_content&view=article&id=2767:mark-ronson-record-collection&catid=66:m&Itemid=142 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/historytoday.com.txt b/inc/3rdparty/site_config/standard/historytoday.com.txt
index dc687f3f..78fb60a6 100644..100755
--- a/inc/3rdparty/site_config/standard/historytoday.com.txt
+++ b/inc/3rdparty/site_config/standard/historytoday.com.txt
@@ -1,10 +1,10 @@
1body://div[@id = 'content'] 1body://div[@id = 'content']
2author://span[@class = 'authors'] 2author://span[@class = 'authors']
3author://span[@class = 'ht-vtag'][1] 3author://span[@class = 'ht-vtag'][1]
4date:substring-before(//meta[@name = 'dc.date']/@content,'T') 4date:substring-before(//meta[@name = 'dc.date']/@content,'T')
5strip://div[contains(@class, 'region-ubercontent')] 5strip://div[contains(@class, 'region-ubercontent')]
6strip://h1 6strip://h1
7strip://div[@id = 'ht-author'] 7strip://div[@id = 'ht-author']
8strip://ul[@class = 'links inline'] 8strip://ul[@class = 'links inline']
9strip://div[@id = 'ht-tools'] 9strip://div[@id = 'ht-tools']
10test_url: http://www.historytoday.com/carol-dyhouse/skin-deep-fall-fur \ No newline at end of file 10test_url: http://www.historytoday.com/carol-dyhouse/skin-deep-fall-fur \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/hmercer.com.txt b/inc/3rdparty/site_config/standard/hmercer.com.txt
index eeee1594..2da13a8e 100644..100755
--- a/inc/3rdparty/site_config/standard/hmercer.com.txt
+++ b/inc/3rdparty/site_config/standard/hmercer.com.txt
@@ -1,5 +1,5 @@
1title: //*[@class='ptitle'] 1title: //*[@class='ptitle']
2date: //span[@class='date'] 2date: //span[@class='date']
3body: //div[@class='body'] 3body: //div[@class='body']
4prune: no 4prune: no
5test_url: http://hmercer.com/2011/07/why-i-switched-to-jekyll/ \ No newline at end of file 5test_url: http://hmercer.com/2011/07/why-i-switched-to-jekyll/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/hollywoodlife.com.txt b/inc/3rdparty/site_config/standard/hollywoodlife.com.txt
new file mode 100755
index 00000000..975ffa26
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/hollywoodlife.com.txt
@@ -0,0 +1,22 @@
1date: //meta[@name='sailthru.date']/@content
2body: //article[contains(@class, 'entry-content')]
3
4strip_image_src: subscribe.png
5
6strip_id_or_class: wpcom-iframe-form
7strip_id_or_class: gallery-thumbs
8strip_id_or_class: twitter
9strip_id_or_class: fb-link
10strip_id_or_class: pinterest
11
12strip: //div[@class='data']
13strip: //iframe[contains(@name, 'wpcom')]
14
15find_string: <a href="http://www.youtube.com/subscription_center?add_user_id=2rJLq19N0dGrxfib80M
16replace_string: </p></div></body></html><!--
17
18find_string: <h3>More
19replace_string: </div></body></html><!--
20
21test_url: http://hollywoodlife.com/2013/10/04/miriam-carey-dead-capitol-hill-car-chase-shooting-postpartum-depression/
22test_url: http://hollywoodlife.com/feed/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/hometheaterreview.com.txt b/inc/3rdparty/site_config/standard/hometheaterreview.com.txt
index d43e6448..8ed26ff5 100644..100755
--- a/inc/3rdparty/site_config/standard/hometheaterreview.com.txt
+++ b/inc/3rdparty/site_config/standard/hometheaterreview.com.txt
@@ -1,4 +1,4 @@
1body: //div[@id='entry-body'] 1body: //div[@id='entry-body']
2strip_id_or_class: paginate 2strip_id_or_class: paginate
3strip: //p[contains(., 'Additional Resources')] 3strip: //p[contains(., 'Additional Resources')]
4test_url: http://hometheaterreview.com/dreamvision-starlight-3-three-chip-d-ila-projector-reviewed/ \ No newline at end of file 4test_url: http://hometheaterreview.com/dreamvision-starlight-3-three-chip-d-ila-projector-reviewed/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/hosted.ap.org.txt b/inc/3rdparty/site_config/standard/hosted.ap.org.txt
index e19dd526..dfd81937 100644..100755
--- a/inc/3rdparty/site_config/standard/hosted.ap.org.txt
+++ b/inc/3rdparty/site_config/standard/hosted.ap.org.txt
@@ -1,5 +1,5 @@
1body: //table[@class='ap-smallphoto-table'] | //div[@class='body']//*[@class='entry-content'] 1body: //table[@class='ap-smallphoto-table'] | //div[@class='body']//*[@class='entry-content']
2tidy: no 2tidy: no
3strip_image_src: analytics.apnewsregistry 3strip_image_src: analytics.apnewsregistry
4 4
5test_url: http://hosted.ap.org/dynamic/stories/U/US_SPENDING_SHOWDOWN?SITE=FLPET&SECTION=HOME&TEMPLATE=DEFAULT&CTIME=2011-04-06-07-46-50 \ No newline at end of file 5test_url: http://hosted.ap.org/dynamic/stories/U/US_SPENDING_SHOWDOWN?SITE=FLPET&SECTION=HOME&TEMPLATE=DEFAULT&CTIME=2011-04-06-07-46-50 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/howtogeek.com.txt b/inc/3rdparty/site_config/standard/howtogeek.com.txt
new file mode 100755
index 00000000..baa2ed4a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/howtogeek.com.txt
@@ -0,0 +1,11 @@
1body: //div[contains(@class, 'thecontent')]
2
3strip_image_src: loading.gif
4find_string:src="http://cdn.howtogeek.com/public/images/blank.gif"
5replace_string:-
6find_string:data-href=
7replace_string:src=
8
9strip_id_or_class: relatedside
10
11test_url: http://www.howtogeek.com/school/microsoft-excel-formulas-and-functions/lesson1/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/hs.fi.txt b/inc/3rdparty/site_config/standard/hs.fi.txt
index 67125fb5..360dc725 100644..100755
--- a/inc/3rdparty/site_config/standard/hs.fi.txt
+++ b/inc/3rdparty/site_config/standard/hs.fi.txt
@@ -1,3 +1,3 @@
1prune: yes 1prune: yes
2tidy: yes 2tidy: yes
3test_url: http://www.hs.fi/kotimaa/Teollisuushallin%20palo%20levitt%C3%A4%C3%A4%20vaarallista%20savua%20Tuusulassa/a1305571582405 \ No newline at end of file 3test_url: http://www.hs.fi/kotimaa/Teollisuushallin%20palo%20levitt%C3%A4%C3%A4%20vaarallista%20savua%20Tuusulassa/a1305571582405 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ht.ly.txt b/inc/3rdparty/site_config/standard/ht.ly.txt
index a8412d2a..46535088 100644..100755
--- a/inc/3rdparty/site_config/standard/ht.ly.txt
+++ b/inc/3rdparty/site_config/standard/ht.ly.txt
@@ -1,3 +1,3 @@
1single_page_link: //iframe[@id='hootFrame']/@src 1single_page_link: //iframe[@id='hootFrame']/@src
2 2
3test_url: http://ht.ly/bOiZV \ No newline at end of file 3test_url: http://ht.ly/bOiZV \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/huffingtonpost.com.txt b/inc/3rdparty/site_config/standard/huffingtonpost.com.txt
index d40513b2..d4618c14 100644..100755
--- a/inc/3rdparty/site_config/standard/huffingtonpost.com.txt
+++ b/inc/3rdparty/site_config/standard/huffingtonpost.com.txt
@@ -1,16 +1,21 @@
1title: //meta[@property="og:title"]/@content 1title: //meta[@property="og:title"]/@content
2body: //div[img[starts-with(@id, 'img_caption')]] | //div[@class="big_photo"] | //div[contains(@class, 'entry_body_text')] 2body: //div[img[starts-with(@id, 'img_caption')]] | //div[@class="big_photo"] | //div[contains(@class, 'entry_body_text')]
3date: //meta[@name="publish_date"]/@content 3date: //meta[@name="publish_date"]/@content
4author: //a[@rel="author"] 4author: //a[@rel="author"]
5author: //meta[@name="author"]/@content 5author: //meta[@name="author"]/@content
6prune: no 6
7tidy: no 7prune: no
8strip: //footer 8tidy: no
9strip_id_or_class: ps-slideshow 9
10strip_id_or_class: fs-slideshow 10strip: //footer
11strip: //p[contains(., 'Related on HuffPost:')] 11strip_id_or_class: ps-slideshow
12# end early 12strip_id_or_class: fs-slideshow
13replace_string(<div class="sbm-main): </body></html><div class="not-interested 13strip: //p[contains(., 'Related on HuffPost:')]
14 14strip_id_or_class: contribute-story
15test_url: http://www.huffingtonpost.com/mitch-moxley/tracking-beijings-boom-th_b_1209828.html 15strip_id_or_class: promo_holder
16test_url: http://www.huffingtonpost.com/2012/09/11/president-obama-iphone-throwdown_n_1873826.html \ No newline at end of file 16
17# end early
18replace_string(<div class="sbm-main): </body></html><div class="not-interested
19
20test_url: http://www.huffingtonpost.com/mitch-moxley/tracking-beijings-boom-th_b_1209828.html
21test_url: http://www.huffingtonpost.com/2012/09/11/president-obama-iphone-throwdown_n_1873826.html
diff --git a/inc/3rdparty/site_config/standard/humantransit.org.txt b/inc/3rdparty/site_config/standard/humantransit.org.txt
index ec7d3c06..92d3c678 100644..100755
--- a/inc/3rdparty/site_config/standard/humantransit.org.txt
+++ b/inc/3rdparty/site_config/standard/humantransit.org.txt
@@ -1,5 +1,5 @@
1title: //h3[@class="entry-header"] 1title: //h3[@class="entry-header"]
2date: //h2[@class="date-header"] 2date: //h2[@class="date-header"]
3body: //div[contains(@class, 'entry')] 3body: //div[contains(@class, 'entry')]
4 4
5test_url: http://www.humantransit.org/2012/06/can-network-primers-reduce-grief-about-network-design.html \ No newline at end of file 5test_url: http://www.humantransit.org/2012/06/can-network-primers-reduce-grief-about-network-design.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/hurriyet.com.tr.txt b/inc/3rdparty/site_config/standard/hurriyet.com.tr.txt
index ccf09dcc..68fd220a 100644..100755
--- a/inc/3rdparty/site_config/standard/hurriyet.com.tr.txt
+++ b/inc/3rdparty/site_config/standard/hurriyet.com.tr.txt
@@ -1,7 +1,7 @@
1title: //div[@class='HaberDetayTitleHold Title']/h1 1title: //div[@class='HaberDetayTitleHold Title']/h1
2body: //div[@id='YazarDetayText'] 2body: //div[@id='YazarDetayText']
3author: //div[@class='HaberDetayTitleHold Title']/h1 3author: //div[@class='HaberDetayTitleHold Title']/h1
4prune: no 4prune: no
5 5
6test_url: http://www.hurriyet.com.tr/ekonomi/19490260.asp 6test_url: http://www.hurriyet.com.tr/ekonomi/19490260.asp
7test_url: http://www.hurriyet.com.tr/yazarlar/22078439.asp \ No newline at end of file 7test_url: http://www.hurriyet.com.tr/yazarlar/22078439.asp \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/hvg.hu.txt b/inc/3rdparty/site_config/standard/hvg.hu.txt
index 06fa98d8..05e7b5f1 100644..100755
--- a/inc/3rdparty/site_config/standard/hvg.hu.txt
+++ b/inc/3rdparty/site_config/standard/hvg.hu.txt
@@ -1,9 +1,9 @@
1title: //div[@id='pg-content']//h1 1title: //div[@id='pg-content']//h1
2body: //div[@id='articleBody0'] 2body: //div[@id='articleBody0']
3replace_string(</table>): </table><br /><br /> 3replace_string(</table>): </table><br /><br />
4 4
5single_page_link: //div[@class="up-header"]/a 5single_page_link: //div[@class="up-header"]/a
6 6
7prune: no 7prune: no
8 8
9test_url: http://hvg.hu/w/20111125_sparta \ No newline at end of file 9test_url: http://hvg.hu/w/20111125_sparta \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/hypebeast.com.txt b/inc/3rdparty/site_config/standard/hypebeast.com.txt
index 49b46da5..23e47545 100644..100755
--- a/inc/3rdparty/site_config/standard/hypebeast.com.txt
+++ b/inc/3rdparty/site_config/standard/hypebeast.com.txt
@@ -1,10 +1,10 @@
1body: //div[@id='content']//div[contains(@class, 'wp-image-') or contains(@class, 'entry')][1] 1body: //div[@id='content']//div[contains(@class, 'wp-image-') or contains(@class, 'entry')][1]
2author: //span[@class='author']/a 2author: //span[@class='author']/a
3 3
4strip_id_or_class: disqus 4strip_id_or_class: disqus
5strip_id_or_class: paginator 5strip_id_or_class: paginator
6strip_id_or_class: photo-number 6strip_id_or_class: photo-number
7 7
8prune: no 8prune: no
9 9
10test_url: http://hypebeast.com/2012/11/stussy-2012-fall-winter-november-releases/ \ No newline at end of file 10test_url: http://hypebeast.com/2012/11/stussy-2012-fall-winter-november-releases/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/icannabis.tumblr.com.txt b/inc/3rdparty/site_config/standard/icannabis.tumblr.com.txt
new file mode 100755
index 00000000..3bda753c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/icannabis.tumblr.com.txt
@@ -0,0 +1,9 @@
1tidy:no
2prune:no
3
4body://div[contains(@id,'content')]
5
6strip_id_or_class:meta
7strip_id_or_class:notes
8strip_id_or_class:pagination
9test_url: http://icannabis.tumblr.com/post/28660592471/reviewmswireless3000 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/idealog.co.nz.txt b/inc/3rdparty/site_config/standard/idealog.co.nz.txt
new file mode 100755
index 00000000..ca88f606
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/idealog.co.nz.txt
@@ -0,0 +1,12 @@
1body: //div[@class='content']
2
3strip: //p[@class='dateline']
4strip: //hr
5strip_id_or_class: share
6strip_id_or_class: comments
7strip_id_or_class: tags
8
9title: substring-before(//title,' ::')
10author: substring-before(//p[@class='dateline'],',')
11date: //p[@class='dateline']/time
12test_url: http://www.idealog.co.nz/blog/2012/12/geeks-plane-help-kiwis-take-san-francisco \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/idlewords.com.txt b/inc/3rdparty/site_config/standard/idlewords.com.txt
index e1badef7..f3b33796 100644..100755
--- a/inc/3rdparty/site_config/standard/idlewords.com.txt
+++ b/inc/3rdparty/site_config/standard/idlewords.com.txt
@@ -1,7 +1,7 @@
1title: //a[@class='post_title'] 1title: //a[@class='post_title']
2body: //div[@class='entrybox'] 2body: //div[@class='entrybox']
3strip_id_or_class: post_title 3strip_id_or_class: post_title
4date: //div[@class='entrybox']/b[1] 4date: //div[@class='entrybox']/b[1]
5strip: //div[@class='entrybox']/b[1] 5strip: //div[@class='entrybox']/b[1]
6author: string('Maciej Cegłowski') 6author: string('Maciej Cegłowski')
7test_url: http://idlewords.com/2011/08/why_arabic_is_terrific.htm \ No newline at end of file 7test_url: http://idlewords.com/2011/08/why_arabic_is_terrific.htm \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/igeneration.fr.txt b/inc/3rdparty/site_config/standard/igeneration.fr.txt
index d7ec2da1..45dd5f25 100644..100755
--- a/inc/3rdparty/site_config/standard/igeneration.fr.txt
+++ b/inc/3rdparty/site_config/standard/igeneration.fr.txt
@@ -1,5 +1,5 @@
1author: substring-after(substring-after(//span[@class='submitted'],'- '),'- ') 1author: substring-after(substring-after(//span[@class='submitted'],'- '),'- ')
2date: substring-before(//span[@class='submitted'], concat('- ',substring-after(substring-after(//span[@class='submitted'],'- '),'- '))) 2date: substring-before(//span[@class='submitted'], concat('- ',substring-after(substring-after(//span[@class='submitted'],'- '),'- ')))
3body: //div[@class='content clear-block zoneApple'] 3body: //div[@class='content clear-block zoneApple']
4 4
5test_url: http://www.igeneration.fr/iphone/l-iphone-et-l-ipad-chouchous-des-tpe-et-pme-55112 \ No newline at end of file 5test_url: http://www.igeneration.fr/iphone/l-iphone-et-l-ipad-chouchous-des-tpe-et-pme-55112 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ignoredbydinosaurs.com.txt b/inc/3rdparty/site_config/standard/ignoredbydinosaurs.com.txt
index f74178a9..60635301 100644..100755
--- a/inc/3rdparty/site_config/standard/ignoredbydinosaurs.com.txt
+++ b/inc/3rdparty/site_config/standard/ignoredbydinosaurs.com.txt
@@ -1,7 +1,7 @@
1title://h1[@class='page-title'] 1title://h1[@class='page-title']
2body://*[@id='content']//div[contains(@class,'node-content')] 2body://*[@id='content']//div[contains(@class,'node-content')]
3 3
4author://*[@id='content']//div[contains(@class,'node-submitted')]/a 4author://*[@id='content']//div[contains(@class,'node-submitted')]/a
5 5
6date:substring-after(//div[contains(@class,'node-submitted')],' on ') 6date:substring-after(//div[contains(@class,'node-submitted')],' on ')
7test_url: http://ignoredbydinosaurs.com/2011/09/great-lie-lorem-ipsum \ No newline at end of file 7test_url: http://ignoredbydinosaurs.com/2011/09/great-lie-lorem-ipsum \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ilounge.com.txt b/inc/3rdparty/site_config/standard/ilounge.com.txt
index ca1e54a8..9880b51f 100644..100755
--- a/inc/3rdparty/site_config/standard/ilounge.com.txt
+++ b/inc/3rdparty/site_config/standard/ilounge.com.txt
@@ -1,13 +1,13 @@
1# Get proper Title, Author and Date info 1# Get proper Title, Author and Date info
2title: substring-before(//title, '|') 2title: substring-before(//title, '|')
3author: substring-after(//h4/a[@href='http://www.ilounge.com/index.php/ilounge/aboutus/'], 'By') 3author: substring-after(//h4/a[@href='http://www.ilounge.com/index.php/ilounge/aboutus/'], 'By')
4date: //span[@class='instapaper_date'] 4date: //span[@class='instapaper_date']
5 5
6# For Reviews & First Looks, get the intro paragraph and put it in front of the main body. 6# For Reviews & First Looks, get the intro paragraph and put it in front of the main body.
7move_into(//div[@id='instapaper_para1']): //div[@id='instapaper_body'] 7move_into(//div[@id='instapaper_para1']): //div[@id='instapaper_body']
8body: //div[@id='instapaper_para1'] 8body: //div[@id='instapaper_para1']
9strip: //div[@class='reviewinfo'] 9strip: //div[@class='reviewinfo']
10 10
11# We don't use footnotes, so why bother checking for them? 11# We don't use footnotes, so why bother checking for them?
12footnotes: no 12footnotes: no
13test_url: http://www.ilounge.com/index.php/reviews/entry/luxa2-alum-x-for-iphone-4-4s/?utm_source=twitterfeed&utm_medium=twitter \ No newline at end of file 13test_url: http://www.ilounge.com/index.php/reviews/entry/luxa2-alum-x-for-iphone-4-4s/?utm_source=twitterfeed&utm_medium=twitter \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ilyabirman.ru.txt b/inc/3rdparty/site_config/standard/ilyabirman.ru.txt
index da6a60f6..51a7eb9c 100644..100755
--- a/inc/3rdparty/site_config/standard/ilyabirman.ru.txt
+++ b/inc/3rdparty/site_config/standard/ilyabirman.ru.txt
@@ -1,5 +1,5 @@
1title: //div[@class='published visible e2-smart-title']//span 1title: //div[@class='published visible e2-smart-title']//span
2author: //span[@id='e2-blog-title'] 2author: //span[@id='e2-blog-title']
3date: //p[@class='super-h'] 3date: //p[@class='super-h']
4body: //div[@class='text published visible'] 4body: //div[@class='text published visible']
5test_url: http://ilyabirman.ru/meanwhile/2011/11/15/2/ \ No newline at end of file 5test_url: http://ilyabirman.ru/meanwhile/2011/11/15/2/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/inc.com.txt b/inc/3rdparty/site_config/standard/inc.com.txt
index 0589aaae..5410e64e 100644..100755
--- a/inc/3rdparty/site_config/standard/inc.com.txt
+++ b/inc/3rdparty/site_config/standard/inc.com.txt
@@ -1,21 +1,21 @@
1author: substring-after(substring-before(//div[@id='byline'],'|'),'By') 1author: substring-after(substring-before(//div[@id='byline'],'|'),'By')
2author: //div[@class='byline']/a 2author: //div[@class='byline']/a
3date: //span[@class='pubdate'] 3date: //span[@class='pubdate']
4# print friendly page 4# print friendly page
5body: //div[@id='text'] 5body: //div[@id='text']
6# regular page 6# regular page
7body: //div[@id= 'articlecontent'] 7body: //div[@id= 'articlecontent']
8 8
9strip: //div[@id= 'articlecontent']/h1 9strip: //div[@id= 'articlecontent']/h1
10strip: //div[@id='articlecontent']/p[@class='deck'] 10strip: //div[@id='articlecontent']/p[@class='deck']
11strip: //div[@id='articlecontent']/div[@class='byline'] 11strip: //div[@id='articlecontent']/div[@class='byline']
12strip: //div[@id='articlespacer'] 12strip: //div[@id='articlespacer']
13strip: //div[@id='incsharebox'] 13strip: //div[@id='incsharebox']
14strip: //div[@id='articlesidebar'] 14strip: //div[@id='articlesidebar']
15 15
16prune: no 16prune: no
17 17
18single_page_link: //a[contains(@href, 'Printer_Friendly.html')] 18single_page_link: //a[contains(@href, 'Printer_Friendly.html')]
19strip: //a[contains(., 'Dig Deeper')] 19strip: //a[contains(., 'Dig Deeper')]
20test_url: http://www.inc.com/guides/2010/11/seven-tips-for-lobbying-politicians.html 20test_url: http://www.inc.com/guides/2010/11/seven-tips-for-lobbying-politicians.html
21test_url: http://www.inc.com/eric-schurenberg/startups-are-we-geting-irrationally-exuberant.html \ No newline at end of file 21test_url: http://www.inc.com/eric-schurenberg/startups-are-we-geting-irrationally-exuberant.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/independent.co.uk.txt b/inc/3rdparty/site_config/standard/independent.co.uk.txt
index 47baf36b..af742209 100644..100755
--- a/inc/3rdparty/site_config/standard/independent.co.uk.txt
+++ b/inc/3rdparty/site_config/standard/independent.co.uk.txt
@@ -1,9 +1,9 @@
1title: //meta[@property='og:title']/@content 1title: //meta[@property='og:title']/@content
2body: //div[contains(@class, 'articleContent')] 2body: //div[contains(@class, 'articleContent')]
3date: //meta[@property='article:published_time']/@content 3date: //meta[@property='article:published_time']/@content
4author: //div[@id='main']//div[@class='byline']//span[@class='authorName'] 4author: //div[@id='main']//div[@class='byline']//span[@class='authorName']
5 5
6strip_id_or_class: RelatedArtTag 6strip_id_or_class: RelatedArtTag
7 7
8tidy: no 8tidy: no
9test_url: http://www.independent.co.uk/news/world/middle-east/syria-could-face-human-rights-probe-2274326.html \ No newline at end of file 9test_url: http://www.independent.co.uk/news/world/middle-east/syria-could-face-human-rights-probe-2274326.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/indiatimes.com.txt b/inc/3rdparty/site_config/standard/indiatimes.com.txt
index e7a35e84..8112105f 100644..100755
--- a/inc/3rdparty/site_config/standard/indiatimes.com.txt
+++ b/inc/3rdparty/site_config/standard/indiatimes.com.txt
@@ -1,6 +1,6 @@
1body: //figure[@class='mainVideo'] 1body: //figure[@class='mainVideo']
2strip: //figcaption 2strip: //figcaption
3 3
4prune: no 4prune: no
5 5
6test_url: http://www.indiatimes.com/bollywood/kareena-insecure-about-saif-working-with-bipasha-23386.html \ No newline at end of file 6test_url: http://www.indiatimes.com/bollywood/kareena-insecure-about-saif-working-with-bipasha-23386.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/inessential.com.txt b/inc/3rdparty/site_config/standard/inessential.com.txt
index 312cec4b..52252455 100644..100755
--- a/inc/3rdparty/site_config/standard/inessential.com.txt
+++ b/inc/3rdparty/site_config/standard/inessential.com.txt
@@ -1,5 +1,5 @@
1title: //div[@class='weblogPost']/h3[1] 1title: //div[@class='weblogPost']/h3[1]
2author: ("Brent Simmons") 2author: ("Brent Simmons")
3date: //span[@class="weblogPostDisplayDate"] 3date: //span[@class="weblogPostDisplayDate"]
4body: //div[@class='weblogPostBody'] 4body: //div[@class='weblogPostBody']
5test_url: http://inessential.com/2011/10/25/why_just_store_the_app_data_on_dropbo \ No newline at end of file 5test_url: http://inessential.com/2011/10/25/why_just_store_the_app_data_on_dropbo \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/info.abril.com.br.txt b/inc/3rdparty/site_config/standard/info.abril.com.br.txt
index 64cf3c8e..dee69f80 100644..100755
--- a/inc/3rdparty/site_config/standard/info.abril.com.br.txt
+++ b/inc/3rdparty/site_config/standard/info.abril.com.br.txt
@@ -1,4 +1,4 @@
1title://h1 1title://h1
2body://div[@id='texto_link'] 2body://div[@id='texto_link']
3 3
4test_url: http://info.abril.com.br/noticias/internet/filme-do-youtube-vai-estrear-nos-cinemas-22042011-6.shl \ No newline at end of file 4test_url: http://info.abril.com.br/noticias/internet/filme-do-youtube-vai-estrear-nos-cinemas-22042011-6.shl \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/infoq.com.txt b/inc/3rdparty/site_config/standard/infoq.com.txt
index 3a4e402d..f4a328a6 100644..100755
--- a/inc/3rdparty/site_config/standard/infoq.com.txt
+++ b/inc/3rdparty/site_config/standard/infoq.com.txt
@@ -1,14 +1,14 @@
1body: //div[@id="intTranscript"] 1body: //div[@id="intTranscript"]
2body: //div[@class="box-content"] 2body: //div[@class="box-content"]
3title: //div[@class="box-content"]//h1[1] 3title: //div[@class="box-content"]//h1[1]
4author: //p[@class="info"]/strong 4author: //p[@class="info"]/strong
5date: substring-before(substring-after(//p[@class="info"], "on"), "Length") 5date: substring-before(substring-after(//p[@class="info"], "on"), "Length")
6strip: //div[@class="box-content"]//h1[1] 6strip: //div[@class="box-content"]//h1[1]
7strip: //div[@class="box-content"]//p[@class="info"] 7strip: //div[@class="box-content"]//p[@class="info"]
8strip_id_or_class: vendor-content-box 8strip_id_or_class: vendor-content-box
9strip_id_or_class: tags2 9strip_id_or_class: tags2
10strip_id_or_class: instructions 10strip_id_or_class: instructions
11strip_id_or_class: comments 11strip_id_or_class: comments
12strip_id_or_class: forum-list-tree 12strip_id_or_class: forum-list-tree
13strip: //div[@class="addthis_toolbox addthis_default_style"] 13strip: //div[@class="addthis_toolbox addthis_default_style"]
14test_url: http://www.infoq.com/interviews/oleg-zhurakousky-javaone2011-interview \ No newline at end of file 14test_url: http://www.infoq.com/interviews/oleg-zhurakousky-javaone2011-interview \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/informador.com.mx.txt b/inc/3rdparty/site_config/standard/informador.com.mx.txt
index eedec24f..77987493 100644..100755
--- a/inc/3rdparty/site_config/standard/informador.com.mx.txt
+++ b/inc/3rdparty/site_config/standard/informador.com.mx.txt
@@ -1,9 +1,9 @@
1title: //div[@class='tituloInt'] 1title: //div[@class='tituloInt']
2body: //div[@class='notaPortada'] 2body: //div[@class='notaPortada']
3strip: //img[@id='imgHorizontalInt imgDetalleImg imagenNota'] 3strip: //img[@id='imgHorizontalInt imgDetalleImg imagenNota']
4date: //span[@class='publi'] 4date: //span[@class='publi']
5author: //span[@class='autor'] 5author: //span[@class='autor']
6tidy: no 6tidy: no
7prune: no 7prune: no
8 8
9test_url: http://www.informador.com.mx/tecnologia/2011/337606/6/iran-desarrolla-antivirus-tras-afectaciones-por-duqu.htm \ No newline at end of file 9test_url: http://www.informador.com.mx/tecnologia/2011/337606/6/iran-desarrolla-antivirus-tras-afectaciones-por-duqu.htm \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/information.dk.txt b/inc/3rdparty/site_config/standard/information.dk.txt
index 6e3c3b1a..3ade754d 100644..100755
--- a/inc/3rdparty/site_config/standard/information.dk.txt
+++ b/inc/3rdparty/site_config/standard/information.dk.txt
@@ -1,7 +1,7 @@
1title: //meta[@property='og:title']/@content 1title: //meta[@property='og:title']/@content
2author: //*[@property='dc:creator'] 2author: //*[@property='dc:creator']
3date: //*[@property='dc:date']/@content 3date: //*[@property='dc:date']/@content
4body: //div[@id='page-content']//div[contains(@class, 'article-body')] 4body: //div[@id='page-content']//div[contains(@class, 'article-body')]
5 5
6tidy: no 6tidy: no
7test_url: http://www.information.dk/282307 \ No newline at end of file 7test_url: http://www.information.dk/282307 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/informationarchitects.net.txt b/inc/3rdparty/site_config/standard/informationarchitects.net.txt
index 134306cd..1330a040 100644..100755
--- a/inc/3rdparty/site_config/standard/informationarchitects.net.txt
+++ b/inc/3rdparty/site_config/standard/informationarchitects.net.txt
@@ -1,10 +1,10 @@
1title://h1[@class="post_title"] 1title://h1[@class="post_title"]
2body://article[@class="post"] 2body://article[@class="post"]
3date://h1[@class="section_separator"] 3date://h1[@class="section_separator"]
4author://span[@class="post_author"] 4author://span[@class="post_author"]
5strip://nav[@class="arrow_nav"] 5strip://nav[@class="arrow_nav"]
6strip://section[@id="contact"] 6strip://section[@id="contact"]
7strip_id_or_class:post_title 7strip_id_or_class:post_title
8strip_id_or_class:post_author 8strip_id_or_class:post_author
9strip_id_or_class:section_separator 9strip_id_or_class:section_separator
10test_url: http://informationarchitects.net/blog/nzz-relaunch-a-quick-review/ \ No newline at end of file 10test_url: http://informationarchitects.net/blog/nzz-relaunch-a-quick-review/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/informationclearinghouse.info.txt b/inc/3rdparty/site_config/standard/informationclearinghouse.info.txt
index 0879e9e6..60b798e6 100644..100755
--- a/inc/3rdparty/site_config/standard/informationclearinghouse.info.txt
+++ b/inc/3rdparty/site_config/standard/informationclearinghouse.info.txt
@@ -1,6 +1,6 @@
1title: //head/title 1title: //head/title
2body: //table[@id='table3']//div[@class='postContent'] 2body: //table[@id='table3']//div[@class='postContent']
3prune: no 3prune: no
4tidy: no 4tidy: no
5 5
6test_url: http://www.informationclearinghouse.info/article28238.htm \ No newline at end of file 6test_url: http://www.informationclearinghouse.info/article28238.htm \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/informit.com.txt b/inc/3rdparty/site_config/standard/informit.com.txt
index 84c1fdcf..24bf6242 100644..100755
--- a/inc/3rdparty/site_config/standard/informit.com.txt
+++ b/inc/3rdparty/site_config/standard/informit.com.txt
@@ -1,7 +1,7 @@
1title: //div[@id='content']/h1 1title: //div[@id='content']/h1
2body: //div[@id="content"] 2body: //div[@id="content"]
3strip: //img[contains(@src, 'informit_printer.png')] 3strip: //img[contains(@src, 'informit_printer.png')]
4single_page_link: //div[contains(@class, 'articleTools')]//a[contains(@href, '/printerfriendly.')] 4single_page_link: //div[contains(@class, 'articleTools')]//a[contains(@href, '/printerfriendly.')]
5prune: no 5prune: no
6 6
7test_url: http://www.informit.com/articles/article.aspx?p=1729268 \ No newline at end of file 7test_url: http://www.informit.com/articles/article.aspx?p=1729268 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/infoworld.com.txt b/inc/3rdparty/site_config/standard/infoworld.com.txt
index dd588ed8..d335bc4a 100644..100755
--- a/inc/3rdparty/site_config/standard/infoworld.com.txt
+++ b/inc/3rdparty/site_config/standard/infoworld.com.txt
@@ -1,12 +1,12 @@
1body: //div[@id='main_text'] 1body: //div[@id='main_text']
2title: //div[@id='main_text']/h1 2title: //div[@id='main_text']/h1
3strip: //div[@id='main_text']/h1 3strip: //div[@id='main_text']/h1
4strip: //div[@id='main_text']/h2 4strip: //div[@id='main_text']/h2
5strip_id_or_class: tools 5strip_id_or_class: tools
6strip_id_or_class: articleTools 6strip_id_or_class: articleTools
7strip_id_or_class: pagination 7strip_id_or_class: pagination
8strip_id_or_class: byline 8strip_id_or_class: byline
9strip_id_or_class: tweet 9strip_id_or_class: tweet
10date: //div[@class='date'] 10date: //div[@class='date']
11strip: //div[@class='date'] 11strip: //div[@class='date']
12test_url: http://www.infoworld.com/d/the-industry-standard/it-jobs-the-rise-both-offshore-and-in-us-187689 \ No newline at end of file 12test_url: http://www.infoworld.com/d/the-industry-standard/it-jobs-the-rise-both-offshore-and-in-us-187689 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/infzm.com.txt b/inc/3rdparty/site_config/standard/infzm.com.txt
index 012c873f..489d5aff 100644..100755
--- a/inc/3rdparty/site_config/standard/infzm.com.txt
+++ b/inc/3rdparty/site_config/standard/infzm.com.txt
@@ -1,9 +1,9 @@
1# This filter is tested on: 1# This filter is tested on:
2# http://www.infzm.com/content/71068 2# http://www.infzm.com/content/71068
3# http://www.infzm.com/content/41577 3# http://www.infzm.com/content/41577
4 4
5author://em[contains(@class, 'toAuthor')] 5author://em[contains(@class, 'toAuthor')]
6date:substring(//em[contains(@class, 'pubTime')],1) 6date:substring(//em[contains(@class, 'pubTime')],1)
7body://section[contains(@id, 'articleContent')] 7body://section[contains(@id, 'articleContent')]
8title://h1[contains(@class ,'articleHeadline clearfix')] 8title://h1[contains(@class ,'articleHeadline clearfix')]
9test_url: http://www.infzm.com/content/41577 \ No newline at end of file 9test_url: http://www.infzm.com/content/41577 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/inhabitat.com.txt b/inc/3rdparty/site_config/standard/inhabitat.com.txt
index 6629dafe..c63f53a6 100644..100755
--- a/inc/3rdparty/site_config/standard/inhabitat.com.txt
+++ b/inc/3rdparty/site_config/standard/inhabitat.com.txt
@@ -1,8 +1,8 @@
1# set body 1# set body
2body: //div[@class='post-listing'] 2body: //div[@class='post-listing']
3 3
4# remove clutter 4# remove clutter
5strip: //a/big 5strip: //a/big
6strip: //a/em 6strip: //a/em
7strip: //p/em 7strip: //p/em
8test_url: http://inhabitat.com/2010/11/18/sliding-walls-transform-this-tokyo-house-into-an-office/ \ No newline at end of file 8test_url: http://inhabitat.com/2010/11/18/sliding-walls-transform-this-tokyo-house-into-an-office/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/instagr.am.txt b/inc/3rdparty/site_config/standard/instagr.am.txt
index ad9e8214..522caebc 100644..100755
--- a/inc/3rdparty/site_config/standard/instagr.am.txt
+++ b/inc/3rdparty/site_config/standard/instagr.am.txt
@@ -1,6 +1,6 @@
1title: //div[@class='caption'] 1title: //div[@class='caption']
2author: //p[@class='username'] 2author: //p[@class='username']
3 3
4strip: //div[@class='contents']/h3 4strip: //div[@class='contents']/h3
5strip: //div[@class='location'] 5strip: //div[@class='location']
6test_url: http://instagr.am/p/G-s_aciyDJ/ \ No newline at end of file 6test_url: http://instagr.am/p/G-s_aciyDJ/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/interest.co.nz.txt b/inc/3rdparty/site_config/standard/interest.co.nz.txt
index 28c3310a..28c3310a 100644..100755
--- a/inc/3rdparty/site_config/standard/interest.co.nz.txt
+++ b/inc/3rdparty/site_config/standard/interest.co.nz.txt
diff --git a/inc/3rdparty/site_config/standard/iolanguage.com.txt b/inc/3rdparty/site_config/standard/iolanguage.com.txt
index 231875ad..231875ad 100644..100755
--- a/inc/3rdparty/site_config/standard/iolanguage.com.txt
+++ b/inc/3rdparty/site_config/standard/iolanguage.com.txt
diff --git a/inc/3rdparty/site_config/standard/ipadclub.nl.txt b/inc/3rdparty/site_config/standard/ipadclub.nl.txt
index d196059e..afe058df 100644..100755
--- a/inc/3rdparty/site_config/standard/ipadclub.nl.txt
+++ b/inc/3rdparty/site_config/standard/ipadclub.nl.txt
@@ -1,7 +1,7 @@
1body: //div[@id = 'post'] 1body: //div[@id = 'post']
2strip: //div[@class = 'postinfo'] 2strip: //div[@class = 'postinfo']
3strip: //div[@id = 'postmetanew'] 3strip: //div[@id = 'postmetanew']
4strip: //div[@class = 'paginator'] 4strip: //div[@class = 'paginator']
5strip: //div[@class = 'col-2'] 5strip: //div[@class = 'col-2']
6strip: //div[@id = 'adfactor-label'] 6strip: //div[@id = 'adfactor-label']
7test_url: http://www.ipadclub.nl/15808/text-writer-ipad-tekstverwerker-met-functieknoppen/ \ No newline at end of file 7test_url: http://www.ipadclub.nl/15808/text-writer-ipad-tekstverwerker-met-functieknoppen/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ipadplanet.nl.txt b/inc/3rdparty/site_config/standard/ipadplanet.nl.txt
index a2e49005..dedb5572 100644..100755
--- a/inc/3rdparty/site_config/standard/ipadplanet.nl.txt
+++ b/inc/3rdparty/site_config/standard/ipadplanet.nl.txt
@@ -1,7 +1,7 @@
1body: //div[@id = 'post'] 1body: //div[@id = 'post']
2strip: //div[@class = 'postinfo'] 2strip: //div[@class = 'postinfo']
3strip: //div[@id = 'postmetanew'] 3strip: //div[@id = 'postmetanew']
4strip: //div[@class = 'paginator'] 4strip: //div[@class = 'paginator']
5strip: //div[@class = 'col-2'] 5strip: //div[@class = 'col-2']
6strip: //div[@id = 'adfactor-label'] 6strip: //div[@id = 'adfactor-label']
7test_url: http://www.ipadplanet.nl/11723/steve-jobs-bevestigt-verdwijnen-fysieke-rotatieschakelaar-in-ios-4-2/ \ No newline at end of file 7test_url: http://www.ipadplanet.nl/11723/steve-jobs-bevestigt-verdwijnen-fysieke-rotatieschakelaar-in-ios-4-2/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/iphoneclub.nl.txt b/inc/3rdparty/site_config/standard/iphoneclub.nl.txt
index f8d4f6a6..850a24e9 100644..100755
--- a/inc/3rdparty/site_config/standard/iphoneclub.nl.txt
+++ b/inc/3rdparty/site_config/standard/iphoneclub.nl.txt
@@ -1,7 +1,7 @@
1body: //div[@id = 'post'] 1body: //div[@id = 'post']
2strip: //div[@class = 'postinfo'] 2strip: //div[@class = 'postinfo']
3strip: //div[@id = 'postmetanew'] 3strip: //div[@id = 'postmetanew']
4strip: //div[@class = 'paginator'] 4strip: //div[@class = 'paginator']
5strip: //div[@class = 'col-2'] 5strip: //div[@class = 'col-2']
6strip: //div[@id = 'adfactor-label'] 6strip: //div[@id = 'adfactor-label']
7test_url: http://www.iphoneclub.nl/105808/t-mobile-mobiel-internet-wordt-duurder-maar-blijft-onbeperkt/ \ No newline at end of file 7test_url: http://www.iphoneclub.nl/105808/t-mobile-mobiel-internet-wordt-duurder-maar-blijft-onbeperkt/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/iphonehacks.com.txt b/inc/3rdparty/site_config/standard/iphonehacks.com.txt
index c97ff43c..e8ccea06 100644..100755
--- a/inc/3rdparty/site_config/standard/iphonehacks.com.txt
+++ b/inc/3rdparty/site_config/standard/iphonehacks.com.txt
@@ -1,9 +1,9 @@
1title: //meta[@name='og:title']/@content 1title: //meta[@name='og:title']/@content
2body: //small[@class='postmetadata'] | //div[contains(@class, 'entry-content')] 2body: //small[@class='postmetadata'] | //div[contains(@class, 'entry-content')]
3 3
4strip: //span[@vanilla-identifier] 4strip: //span[@vanilla-identifier]
5 5
6prune: no 6prune: no
7tidy: no 7tidy: no
8 8
9test_url: http://www.iphonehacks.com/2012/07/app-review-process-behind-the-scenes.html \ No newline at end of file 9test_url: http://www.iphonehacks.com/2012/07/app-review-process-behind-the-scenes.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/iplaysoft.com.txt b/inc/3rdparty/site_config/standard/iplaysoft.com.txt
index 4a944768..4a944768 100644..100755
--- a/inc/3rdparty/site_config/standard/iplaysoft.com.txt
+++ b/inc/3rdparty/site_config/standard/iplaysoft.com.txt
diff --git a/inc/3rdparty/site_config/standard/isource.com.txt b/inc/3rdparty/site_config/standard/isource.com.txt
index a1c16a16..215fdf87 100644..100755
--- a/inc/3rdparty/site_config/standard/isource.com.txt
+++ b/inc/3rdparty/site_config/standard/isource.com.txt
@@ -1,6 +1,6 @@
1# Remove social buttons 1# Remove social buttons
2strip: //div[@id='temp_Content_Right'] 2strip: //div[@id='temp_Content_Right']
3 3
4# Remove duplicate article title 4# Remove duplicate article title
5strip: //*[(@class='storytitle')] 5strip: //*[(@class='storytitle')]
6test_url: http://isource.com/2010/10/24/swearch-a-cool-iphone-web-app/ \ No newline at end of file 6test_url: http://isource.com/2010/10/24/swearch-a-cool-iphone-web-app/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/itavisen.no.txt b/inc/3rdparty/site_config/standard/itavisen.no.txt
index 8da78cb0..3ba484a7 100644..100755
--- a/inc/3rdparty/site_config/standard/itavisen.no.txt
+++ b/inc/3rdparty/site_config/standard/itavisen.no.txt
@@ -1,6 +1,6 @@
1author: //p[@class = 'writer'] 1author: //p[@class = 'writer']
2 2
3date: //p[@class = 'published-time'] 3date: //p[@class = 'published-time']
4 4
5body: //div[@class = 'text main'] 5body: //div[@class = 'text main']
6test_url: http://www.itavisen.no/899786/old-republic-blir-gratis \ No newline at end of file 6test_url: http://www.itavisen.no/899786/old-republic-blir-gratis \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/itmedia.co.jp.txt b/inc/3rdparty/site_config/standard/itmedia.co.jp.txt
new file mode 100755
index 00000000..97f00ce8
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/itmedia.co.jp.txt
@@ -0,0 +1,8 @@
1body: //div[@id='cmsBody']
2
3next_page_link: //span[@id='next']/a
4
5strip_id_or_class: cmsCopyright
6strip_id_or_class: masterSocialbuttonBtm
7
8test_url: http://www.itmedia.co.jp/enterprise/articles/0912/05/news002.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/itstactical.com.txt b/inc/3rdparty/site_config/standard/itstactical.com.txt
index 550875ec..b8cb461c 100644..100755
--- a/inc/3rdparty/site_config/standard/itstactical.com.txt
+++ b/inc/3rdparty/site_config/standard/itstactical.com.txt
@@ -1,12 +1,12 @@
1title: //h1[@class="entry-title"] 1title: //h1[@class="entry-title"]
2body: //div[@class='format_text entry-content'] 2body: //div[@class='format_text entry-content']
3author: //span[@class="author vcard"]/a 3author: //span[@class="author vcard"]/a
4date: //abbr[@class="published"] 4date: //abbr[@class="published"]
5 5
6strip_id_or_class: related-posts 6strip_id_or_class: related-posts
7strip_id_or_class: membershipbox 7strip_id_or_class: membershipbox
8strip_id_or_class: share_this_compact_bt 8strip_id_or_class: share_this_compact_bt
9 9
10 10
11footnotes: no 11footnotes: no
12test_url: http://www.itstactical.com/warcom/knives/exclusive-triple-aught-design-production-dauntless-knife-video-walkthrough/ \ No newline at end of file 12test_url: http://www.itstactical.com/warcom/knives/exclusive-triple-aught-design-production-dauntless-knife-video-walkthrough/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/itwire.com.txt b/inc/3rdparty/site_config/standard/itwire.com.txt
new file mode 100755
index 00000000..72b41065
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/itwire.com.txt
@@ -0,0 +1,5 @@
1author: //a[@rel="author"]
2date: //li[@class="itemDateCreated"]
3strip: //div[contains(@class, 'legend-rounded')]
4
5test_url: http://www.itwire.com/it-industry-news/market/59661-ibm-looks-to-high-value-solutions-to-meet-changing-demands
diff --git a/inc/3rdparty/site_config/standard/itworld.com.txt b/inc/3rdparty/site_config/standard/itworld.com.txt
index d4fa604e..1ee0ee58 100644..100755
--- a/inc/3rdparty/site_config/standard/itworld.com.txt
+++ b/inc/3rdparty/site_config/standard/itworld.com.txt
@@ -1,5 +1,5 @@
1title: //*[@id="article-title"] 1title: //*[@id="article-title"]
2author: //*[@id="article-info"]/strong 2author: //*[@id="article-info"]/strong
3date: //*[@class="article-dateline"]/strong 3date: //*[@class="article-dateline"]/strong
4body: //*[@id="article-content"] 4body: //*[@id="article-content"]
5test_url: http://www.itworld.com/open-source/140916/android-sued-microsoft-not-linux \ No newline at end of file 5test_url: http://www.itworld.com/open-source/140916/android-sued-microsoft-not-linux \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/izismile.com.txt b/inc/3rdparty/site_config/standard/izismile.com.txt
index af3f299a..b0114d35 100644..100755
--- a/inc/3rdparty/site_config/standard/izismile.com.txt
+++ b/inc/3rdparty/site_config/standard/izismile.com.txt
@@ -1,4 +1,4 @@
1body: //div[starts-with(@id, 'news-id-')] 1body: //div[starts-with(@id, 'news-id-')]
2prune: no 2prune: no
3 3
4test_url: http://izismile.com/2011/06/13/uncanny_factoid_fashion_or_creepy_2_pics.html \ No newline at end of file 4test_url: http://izismile.com/2011/06/13/uncanny_factoid_fashion_or_creepy_2_pics.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/jalopnik.com.txt b/inc/3rdparty/site_config/standard/jalopnik.com.txt
index fc2eef8e..fc2eef8e 100644..100755
--- a/inc/3rdparty/site_config/standard/jalopnik.com.txt
+++ b/inc/3rdparty/site_config/standard/jalopnik.com.txt
diff --git a/inc/3rdparty/site_config/standard/jandan.net.txt b/inc/3rdparty/site_config/standard/jandan.net.txt
index f1dd3d17..343fd6fb 100644..100755
--- a/inc/3rdparty/site_config/standard/jandan.net.txt
+++ b/inc/3rdparty/site_config/standard/jandan.net.txt
@@ -1,6 +1,6 @@
1body: //div[@id='content']//div[@class = 'post f'] 1body: //div[@id='content']//div[@class = 'post f']
2strip_id_or_class: comment-big 2strip_id_or_class: comment-big
3strip_id_or_class: avatar 3strip_id_or_class: avatar
4strip: //div[@class='time_s'] 4strip: //div[@class='time_s']
5 5
6test_url: http://jandan.net/2011/04/03/iphone-5-sony.html \ No newline at end of file 6test_url: http://jandan.net/2011/04/03/iphone-5-sony.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/jetzt.sueddeutsche.de.txt b/inc/3rdparty/site_config/standard/jetzt.sueddeutsche.de.txt
index 6e8af934..00e4cf63 100644..100755
--- a/inc/3rdparty/site_config/standard/jetzt.sueddeutsche.de.txt
+++ b/inc/3rdparty/site_config/standard/jetzt.sueddeutsche.de.txt
@@ -1,22 +1,22 @@
1title: //h1 1title: //h1
2author: //p[contains(@class, 'author')]/a 2author: //p[contains(@class, 'author')]/a
3date: //p[contains(@class, 'time')] 3date: //p[contains(@class, 'time')]
4body: //div[@class='content']/div[contains(@class, 'text')] 4body: //div[@class='content']/div[contains(@class, 'text')]
5 5
6# prevent "no text" errors on multi-page articles 6# prevent "no text" errors on multi-page articles
7tidy: no 7tidy: no
8 8
9# we use a custom next-link detector instead of the print view because 9# we use a custom next-link detector instead of the print view because
10# it's pretty hard to strip out the unwanted parts in the print view 10# it's pretty hard to strip out the unwanted parts in the print view
11autodetect_next_page: no 11autodetect_next_page: no
12next_page_link: //div[contains(@class, 'text')]/div/div[contains(@class, 'paging')]/a[@class='more '] 12next_page_link: //div[contains(@class, 'text')]/div/div[contains(@class, 'paging')]/a[@class='more ']
13 13
14strip: //h1 14strip: //h1
15 15
16strip_id_or_class: meta 16strip_id_or_class: meta
17strip_id_or_class: author 17strip_id_or_class: author
18strip_id_or_class: paging 18strip_id_or_class: paging
19 19
20# prevent "Report an Error" from being recognized as footnote 20# prevent "Report an Error" from being recognized as footnote
21footnotes: no 21footnotes: no
22test_url: http://jetzt.sueddeutsche.de/texte/anzeigen/544308/Alles-flicken \ No newline at end of file 22test_url: http://jetzt.sueddeutsche.de/texte/anzeigen/544308/Alles-flicken \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/jjahnke.net.txt b/inc/3rdparty/site_config/standard/jjahnke.net.txt
index 95c45ee7..d45c8899 100644..100755
--- a/inc/3rdparty/site_config/standard/jjahnke.net.txt
+++ b/inc/3rdparty/site_config/standard/jjahnke.net.txt
@@ -1,4 +1,4 @@
1body: //div[@class='entry'] 1body: //div[@class='entry']
2prune: no 2prune: no
3 3
4test_url: http://www.jjahnke.net/rundbr87.html#2514 \ No newline at end of file 4test_url: http://www.jjahnke.net/rundbr87.html#2514 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/jobbank.gc.ca.txt b/inc/3rdparty/site_config/standard/jobbank.gc.ca.txt
index af8d7d17..1dbe2072 100644..100755
--- a/inc/3rdparty/site_config/standard/jobbank.gc.ca.txt
+++ b/inc/3rdparty/site_config/standard/jobbank.gc.ca.txt
@@ -1,5 +1,5 @@
1body: //div[@id='formatCont_en'] 1body: //div[@id='formatCont_en']
2 2
3prune: no 3prune: no
4 4
5test_url: http://www.jobbank.gc.ca/detail-eng.aspx?Source=JobPosting&OrderNum=6397922 \ No newline at end of file 5test_url: http://www.jobbank.gc.ca/detail-eng.aspx?Source=JobPosting&OrderNum=6397922 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/joelonsoftware.com.txt b/inc/3rdparty/site_config/standard/joelonsoftware.com.txt
index 75fbee5a..241a361f 100644..100755
--- a/inc/3rdparty/site_config/standard/joelonsoftware.com.txt
+++ b/inc/3rdparty/site_config/standard/joelonsoftware.com.txt
@@ -1,21 +1,21 @@
1# Works with old posts too, such as http://www.joelonsoftware.com/articles/fog0000000332.html 1# Works with old posts too, such as http://www.joelonsoftware.com/articles/fog0000000332.html
2 2
3author: substring-after(//div[@class="author"], 'by ') 3author: substring-after(//div[@class="author"], 'by ')
4date: //div[@class="date"] 4date: //div[@class="date"]
5 5
6## Clean stuff at top ## 6## Clean stuff at top ##
7 7
8strip: //h1[1] 8strip: //h1[1]
9strip: //h2[1] 9strip: //h2[1]
10strip: //div[@class="date"] 10strip: //div[@class="date"]
11strip: //div[@class="author"] 11strip: //div[@class="author"]
12 12
13## Clean stuff at bottom ## 13## Clean stuff at bottom ##
14 14
15strip: //blockquote[@class="textmessage"] 15strip: //blockquote[@class="textmessage"]
16strip: //div[@style="width:500px"]/p[last()] 16strip: //div[@style="width:500px"]/p[last()]
17strip: //div[@style="width:500px"]/p[last()-1] 17strip: //div[@style="width:500px"]/p[last()-1]
18strip: //div[@style="width:500px"]/h4[last()] 18strip: //div[@style="width:500px"]/h4[last()]
19strip: //div[@style="width:500px"]/h4[last()-1] 19strip: //div[@style="width:500px"]/h4[last()-1]
20strip: //div[@style="width:500px"]/div[last()] 20strip: //div[@style="width:500px"]/div[last()]
21test_url: http://www.joelonsoftware.com/items/2011/09/15.html \ No newline at end of file 21test_url: http://www.joelonsoftware.com/items/2011/09/15.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/jouire.com.txt b/inc/3rdparty/site_config/standard/jouire.com.txt
index 535a501e..3cf60672 100644..100755
--- a/inc/3rdparty/site_config/standard/jouire.com.txt
+++ b/inc/3rdparty/site_config/standard/jouire.com.txt
@@ -1,3 +1,3 @@
1author: //h1 1author: //h1
2date: //p[contains(@class,'date')] 2date: //p[contains(@class,'date')]
3test_url: http://jouire.com/2011/01/exquisite-whispers/ \ No newline at end of file 3test_url: http://jouire.com/2011/01/exquisite-whispers/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/joystiq.com.txt b/inc/3rdparty/site_config/standard/joystiq.com.txt
index 7fbd467d..7a8e56f8 100644..100755
--- a/inc/3rdparty/site_config/standard/joystiq.com.txt
+++ b/inc/3rdparty/site_config/standard/joystiq.com.txt
@@ -1,8 +1,8 @@
1author: //a[@class="byline-author"] 1author: //a[@class="byline-author"]
2title: //h1[@class="headline"] 2title: //h1[@class="headline"]
3strip: //div[@id="info-card"] 3strip: //div[@id="info-card"]
4strip: //div[@id="breaking-news"] 4strip: //div[@id="breaking-news"]
5strip: //div[@class="rmod list-post-mod"] 5strip: //div[@class="rmod list-post-mod"]
6strip: //div[@id="footer"] 6strip: //div[@id="footer"]
7strip: //div[@id="GH_strip"] 7strip: //div[@id="GH_strip"]
8test_url: http://www.joystiq.com/2012/06/20/magic-the-gathering-duels-of-the-planeswalkers-2013-review/ \ No newline at end of file 8test_url: http://www.joystiq.com/2012/06/20/magic-the-gathering-duels-of-the-planeswalkers-2013-review/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/juedische-allgemeine.de.txt b/inc/3rdparty/site_config/standard/juedische-allgemeine.de.txt
index be844e57..ff5a0244 100644..100755
--- a/inc/3rdparty/site_config/standard/juedische-allgemeine.de.txt
+++ b/inc/3rdparty/site_config/standard/juedische-allgemeine.de.txt
@@ -1,19 +1,19 @@
1body: //div[@id='article_container'] 1body: //div[@id='article_container']
2author: //h4//a[@class='author'] 2author: //h4//a[@class='author']
3title: //h1 3title: //h1
4 4
5replace_string(lang="en"): lang="de" 5replace_string(lang="en"): lang="de"
6replace_string(/>1</a>):/></a> 6replace_string(/>1</a>):/></a>
7 7
8strip_id_or_class: share_toolbox 8strip_id_or_class: share_toolbox
9strip_id_or_class: article_header 9strip_id_or_class: article_header
10strip_id_or_class: phototext 10strip_id_or_class: phototext
11 11
12strip_image_src: icon_author.gif 12strip_image_src: icon_author.gif
13 13
14strip: //img[@src=''] 14strip: //img[@src='']
15strip: //h4[@id='author'] 15strip: //h4[@id='author']
16 16
17prune: no 17prune: no
18 18
19test_url: http://www.juedische-allgemeine.de/article/view/id/13366 \ No newline at end of file 19test_url: http://www.juedische-allgemeine.de/article/view/id/13366 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/juppy.org.txt b/inc/3rdparty/site_config/standard/juppy.org.txt
index e2d07f24..fdf7cdc9 100644..100755
--- a/inc/3rdparty/site_config/standard/juppy.org.txt
+++ b/inc/3rdparty/site_config/standard/juppy.org.txt
@@ -1,8 +1,8 @@
1convert_double_br_tags: yes 1convert_double_br_tags: yes
2 2
3title: //div[@id="storycredits"]/p/span[@class="title"] 3title: //div[@id="storycredits"]/p/span[@class="title"]
4author: //div[@id="storycredits"]/p/br[1]/following-sibling::text() 4author: //div[@id="storycredits"]/p/br[1]/following-sibling::text()
5 5
6strip: //div[@id="storycredits"] 6strip: //div[@id="storycredits"]
7 7
8test_url: http://www.juppy.org/santa/stories.php?ForAuthorID=35&Year=2005 \ No newline at end of file 8test_url: http://www.juppy.org/santa/stories.php?ForAuthorID=35&Year=2005 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/kachestvo.ru.txt b/inc/3rdparty/site_config/standard/kachestvo.ru.txt
index 34404e96..535693c4 100644..100755
--- a/inc/3rdparty/site_config/standard/kachestvo.ru.txt
+++ b/inc/3rdparty/site_config/standard/kachestvo.ru.txt
@@ -1,3 +1,3 @@
1body: //div[contains(@class, 'inner_content')] 1body: //div[contains(@class, 'inner_content')]
2 2
3test_url: http://kachestvo.ru/promtovar/odezhda/denim.html \ No newline at end of file 3test_url: http://kachestvo.ru/promtovar/odezhda/denim.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/kachiblog.com.txt b/inc/3rdparty/site_config/standard/kachiblog.com.txt
new file mode 100755
index 00000000..35baf8df
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/kachiblog.com.txt
@@ -0,0 +1,7 @@
1title: //h3[contains(@class, 'entry-title')]
2date: //abbr[@itemprop='datePublished']/@title
3body: //div[@itemprop='articleBody']
4tidy: no
5
6test_url: http://www.kachiblog.com/2013/05/samsung-galaxy-s4-vs-samsung-galaxy.html
7test_url: http://www.kachiblog.com/feeds/posts/default \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/kathimerini.gr.txt b/inc/3rdparty/site_config/standard/kathimerini.gr.txt
new file mode 100755
index 00000000..2c7c518c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/kathimerini.gr.txt
@@ -0,0 +1,4 @@
1title: //td[contains(@class, 'articleTitlos')]
2body: //td[contains(@class, 'eelantext')]
3
4test_url: http://www.kathimerini.gr/4dcgi/_w_articles_kathremote_1_03/12/2013_530490 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/kenrockwell.com.txt b/inc/3rdparty/site_config/standard/kenrockwell.com.txt
index e6d100ea..90c64cbf 100644..100755
--- a/inc/3rdparty/site_config/standard/kenrockwell.com.txt
+++ b/inc/3rdparty/site_config/standard/kenrockwell.com.txt
@@ -1,7 +1,7 @@
1# Ads 1# Ads
2strip: //table[@align="right"][@width="120"] 2strip: //table[@align="right"][@width="120"]
3 3
4# Affiliate link paragraphs 4# Affiliate link paragraphs
5strip: //a[.="Adorama"]/parent::p[contains(., "goodies")] 5strip: //a[.="Adorama"]/parent::p[contains(., "goodies")]
6strip: //a[.="Adorama"]/parent::p[contains(., "This free website's biggest source of")] 6strip: //a[.="Adorama"]/parent::p[contains(., "This free website's biggest source of")]
7test_url: http://www.kenrockwell.com/tech/composition.htm \ No newline at end of file 7test_url: http://www.kenrockwell.com/tech/composition.htm \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/kicker.de.txt b/inc/3rdparty/site_config/standard/kicker.de.txt
index 7d5daa4b..db4f63c4 100644..100755
--- a/inc/3rdparty/site_config/standard/kicker.de.txt
+++ b/inc/3rdparty/site_config/standard/kicker.de.txt
@@ -1,21 +1,21 @@
1# set body 1# set body
2body: //div[@id='ovArtikel'] 2body: //div[@id='ovArtikel']
3 3
4# set title 4# set title
5title: //div[@id='ovArtikel']/h1 5title: //div[@id='ovArtikel']/h1
6# strip main title and leave sub title 6# strip main title and leave sub title
7strip: //div[@id='ovArtikel']/h1 7strip: //div[@id='ovArtikel']/h1
8 8
9date: //div[@class='publicdate'] 9date: //div[@class='publicdate']
10 10
11#remove captions 11#remove captions
12strip: //*/div[@class='bu'] 12strip: //*/div[@class='bu']
13strip: //*/div[@class='credit'] 13strip: //*/div[@class='credit']
14 14
15#remove adds 15#remove adds
16strip: //*/div[@class='ad-head'] 16strip: //*/div[@class='ad-head']
17strip: //*/div[@class='linksebay'] 17strip: //*/div[@class='linksebay']
18 18
19# remove video content 19# remove video content
20strip: //*/div[@class='ovVideo'] 20strip: //*/div[@class='ovVideo']
21test_url: http://www.kicker.de/news/fussball/frauen/wmfr/frauen-weltmeisterschaft/2011/3/1123662/spielbericht_frankreich-frauen_deutschland-frauen.html \ No newline at end of file 21test_url: http://www.kicker.de/news/fussball/frauen/wmfr/frauen-weltmeisterschaft/2011/3/1123662/spielbericht_frankreich-frauen_deutschland-frauen.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/kickstarter.com.txt b/inc/3rdparty/site_config/standard/kickstarter.com.txt
index c055659f..7b3daa58 100644..100755
--- a/inc/3rdparty/site_config/standard/kickstarter.com.txt
+++ b/inc/3rdparty/site_config/standard/kickstarter.com.txt
@@ -1,7 +1,7 @@
1title: //h1[@id='name'] 1title: //h1[@id='name']
2body: //*[@id='leftcol'] 2body: //*[@id='leftcol']
3 3
4strip_id_or_class: 'share-box' 4strip_id_or_class: 'share-box'
5strip_id_or_class: 'project-faqs' 5strip_id_or_class: 'project-faqs'
6strip_id_or_class: 'report-issue-wrap' 6strip_id_or_class: 'report-issue-wrap'
7test_url: http://www.kickstarter.com/projects/hop/elevation-dock-the-best-dock-for-iphone \ No newline at end of file 7test_url: http://www.kickstarter.com/projects/hop/elevation-dock-the-best-dock-for-iphone \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/kingarthurflour.com.txt b/inc/3rdparty/site_config/standard/kingarthurflour.com.txt
index 2f6783a3..b27539f5 100644..100755
--- a/inc/3rdparty/site_config/standard/kingarthurflour.com.txt
+++ b/inc/3rdparty/site_config/standard/kingarthurflour.com.txt
@@ -1,4 +1,4 @@
1title: //div[@class='post']/h2 1title: //div[@class='post']/h2
2body: //div[@class='entry'] 2body: //div[@class='entry']
3strip: //p[contains(.,'Tags:')] 3strip: //p[contains(.,'Tags:')]
4test_url: http://www.kingarthurflour.com/blog/2011/01/28/a-big-sandwich-for-the-big-game/ \ No newline at end of file 4test_url: http://www.kingarthurflour.com/blog/2011/01/28/a-big-sandwich-for-the-big-game/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/kotaku.com.txt b/inc/3rdparty/site_config/standard/kotaku.com.txt
index be439d75..be439d75 100644..100755
--- a/inc/3rdparty/site_config/standard/kotaku.com.txt
+++ b/inc/3rdparty/site_config/standard/kotaku.com.txt
diff --git a/inc/3rdparty/site_config/standard/kottke.org.txt b/inc/3rdparty/site_config/standard/kottke.org.txt
index f93a61e7..582f251c 100644..100755
--- a/inc/3rdparty/site_config/standard/kottke.org.txt
+++ b/inc/3rdparty/site_config/standard/kottke.org.txt
@@ -1,6 +1,6 @@
1title: //h2 1title: //h2
2author: //*[@id='main']/div/a[1] 2author: //*[@id='main']/div/a[1]
3date: substring-before(substring-after(//div[@class='meta'],'&bull;'),'&bull;') 3date: substring-before(substring-after(//div[@class='meta'],'&bull;'),'&bull;')
4body: //div[@id='main'] 4body: //div[@id='main']
5strip: //div[@class='meta'] 5strip: //div[@class='meta']
6test_url: http://kottke.org/08/02/king-of-kong-a-fistful-of-quarters \ No newline at end of file 6test_url: http://kottke.org/08/02/king-of-kong-a-fistful-of-quarters \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/kumailplus.com.txt b/inc/3rdparty/site_config/standard/kumailplus.com.txt
index 9e15cc34..2f604de0 100644..100755
--- a/inc/3rdparty/site_config/standard/kumailplus.com.txt
+++ b/inc/3rdparty/site_config/standard/kumailplus.com.txt
@@ -1,3 +1,3 @@
1body: //div[@class = "entry-full"] 1body: //div[@class = "entry-full"]
2 2
3test_url: http://www.kumailplus.com/2011/12/02/24308 \ No newline at end of file 3test_url: http://www.kumailplus.com/2011/12/02/24308 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/kumb.com.txt b/inc/3rdparty/site_config/standard/kumb.com.txt
index 3f0d2369..fe350622 100644..100755
--- a/inc/3rdparty/site_config/standard/kumb.com.txt
+++ b/inc/3rdparty/site_config/standard/kumb.com.txt
@@ -1,10 +1,10 @@
1title: //div[@id='centrediv']/h1 1title: //div[@id='centrediv']/h1
2 2
3author: substring-after(//div[@id='centrediv']/h3,'By: ') 3author: substring-after(//div[@id='centrediv']/h3,'By: ')
4 4
5date: substring-after(substring-before(//div[@id='centrediv']/h3,'By: '),'Filed: ') 5date: substring-after(substring-before(//div[@id='centrediv']/h3,'By: '),'Filed: ')
6 6
7body: //div[@class='KonaBody'] 7body: //div[@class='KonaBody']
8 8
9convert_double_br_tags: yes 9convert_double_br_tags: yes
10test_url: http://www.kumb.com/story.php?id=126084 \ No newline at end of file 10test_url: http://www.kumb.com/story.php?id=126084 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/kwerfeldein.de.txt b/inc/3rdparty/site_config/standard/kwerfeldein.de.txt
index 879b4d6c..cf4d3b8c 100644..100755
--- a/inc/3rdparty/site_config/standard/kwerfeldein.de.txt
+++ b/inc/3rdparty/site_config/standard/kwerfeldein.de.txt
@@ -1,9 +1,9 @@
1date: //span[@class='datum'] 1date: //span[@class='datum']
2title: //div[@class='artikel']/h2 2title: //div[@class='artikel']/h2
3body: //div[@class='entry'] 3body: //div[@class='entry']
4strip: //p[@class='tags'] 4strip: //p[@class='tags']
5author: substring-after(//div[@class='authorinfo']/em,'Dies ist ein Artikel von ') 5author: substring-after(//div[@class='authorinfo']/em,'Dies ist ein Artikel von ')
6strip: //div[@class='authorinfo'] 6strip: //div[@class='authorinfo']
7strip: //div[@class='authorpic'] 7strip: //div[@class='authorpic']
8 8
9test_url: http://kwerfeldein.de/index.php/2011/10/17/doppelbelichtungen-mit-konzept/ \ No newline at end of file 9test_url: http://kwerfeldein.de/index.php/2011/10/17/doppelbelichtungen-mit-konzept/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/landetsfria.se.txt b/inc/3rdparty/site_config/standard/landetsfria.se.txt
new file mode 100755
index 00000000..e5317a5a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/landetsfria.se.txt
@@ -0,0 +1,7 @@
1body: //div[contains(@class, 'layout__inner')]//div[contains(@class, 'file-image') or contains(@class, 'node__content')]
2author: //article//div[contains(@class, 'field-byline')]
3strip_id_or_class: rekommenderade
4strip_id_or_class: disqus
5strip_id_or_class: annonser
6
7test_url: http://www.landetsfria.se/artikel/112070 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/laphamsquarterly.org.txt b/inc/3rdparty/site_config/standard/laphamsquarterly.org.txt
index a34e39dd..d25999d0 100644..100755
--- a/inc/3rdparty/site_config/standard/laphamsquarterly.org.txt
+++ b/inc/3rdparty/site_config/standard/laphamsquarterly.org.txt
@@ -1,13 +1,13 @@
1title: //h1[@class='headline'] 1title: //h1[@class='headline']
2body: //div[@class='article'] 2body: //div[@class='article']
3strip: //div[@class='article']//h3[contains(@class, 'section')] 3strip: //div[@class='article']//h3[contains(@class, 'section')]
4strip: //div[@class='article']//ul[contains(@class, 'article-actions')] 4strip: //div[@class='article']//ul[contains(@class, 'article-actions')]
5strip: //div[@id='syndication-upper'] 5strip: //div[@id='syndication-upper']
6strip: //a[@id='syndication'] 6strip: //a[@id='syndication']
7strip: //dl[@id='article-tags'] 7strip: //dl[@id='article-tags']
8strip: //div[@id='article-like'] 8strip: //div[@id='article-like']
9prune: no 9prune: no
10 10
11single_page_link: //li[@class='single-page']/a 11single_page_link: //li[@class='single-page']/a
12 12
13test_url: http://www.laphamsquarterly.org/essays/balanced-diets.php \ No newline at end of file 13test_url: http://www.laphamsquarterly.org/essays/balanced-diets.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/laprensagrafica.com.txt b/inc/3rdparty/site_config/standard/laprensagrafica.com.txt
index e771f81f..82374c0b 100644..100755
--- a/inc/3rdparty/site_config/standard/laprensagrafica.com.txt
+++ b/inc/3rdparty/site_config/standard/laprensagrafica.com.txt
@@ -1,3 +1,3 @@
1tidy: no 1tidy: no
2 2
3test_url: http://www.laprensagrafica.com/opinion/editorial/229252-reflexiones-sobre-la-educacion-que-necesitamos.html \ No newline at end of file 3test_url: http://www.laprensagrafica.com/opinion/editorial/229252-reflexiones-sobre-la-educacion-que-necesitamos.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/laquadrature.net.txt b/inc/3rdparty/site_config/standard/laquadrature.net.txt
index 5bad8e65..746bfca7 100644..100755
--- a/inc/3rdparty/site_config/standard/laquadrature.net.txt
+++ b/inc/3rdparty/site_config/standard/laquadrature.net.txt
@@ -1,10 +1,10 @@
1body: //div[@id='content-content']//div[@class='content'] 1body: //div[@id='content-content']//div[@class='content']
2title: //h1[@class='title'] 2title: //h1[@class='title']
3date: substring-after(//*[@class='submitted'],'Submitted on') 3date: substring-after(//*[@class='submitted'],'Submitted on')
4tidy: no 4tidy: no
5strip: //div[@class='terms terms-inline'] 5strip: //div[@class='terms terms-inline']
6strip: //div[@class='more'] 6strip: //div[@class='more']
7strip: //div[@class='share-links'] 7strip: //div[@class='share-links']
8strip: //table[@id='attachments'] 8strip: //table[@id='attachments']
9 9
10test_url: http://www.laquadrature.net/en/finalization-of-eu-parliaments-weak-net-neutrality-resolution \ No newline at end of file 10test_url: http://www.laquadrature.net/en/finalization-of-eu-parliaments-weak-net-neutrality-resolution \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/lareviewofbooks.org.txt b/inc/3rdparty/site_config/standard/lareviewofbooks.org.txt
index 504dbea1..25e36543 100644..100755
--- a/inc/3rdparty/site_config/standard/lareviewofbooks.org.txt
+++ b/inc/3rdparty/site_config/standard/lareviewofbooks.org.txt
@@ -1,12 +1,12 @@
1#meta data 1#metadata
2title:substring-after(title,'|') 2title: substring-before(//title,' |')
3 3author: //a[contains(@class,'person') and starts-with(@href, '/contributor')]
4author:substring-before( substring-after(//meta[@name = 'description']/@content, normalize-space(substring-after(//title,'|'))),' respond ') 4
5date://h5[@class = 'postDate'] 5#text
6 6body: //div[contains(@class, 'article_body')]
7#text 7
8body://div[@class = 'articleBody'] 8#clean up
9 9strip_id_or_class: recommended_section
10#clean up 10
11strip://center 11test_url: http://lareviewofbooks.org/review/american-politics-redeembale-robert-gates-hillary-clinton-two-memoirs-washington-dc
12test_url: http://lareviewofbooks.org/post/14066007115/literary-transactions-and-their-vicissitudes \ No newline at end of file 12test_url: http://lareviewofbooks.org/interview/souvenirs-future
diff --git a/inc/3rdparty/site_config/standard/latimes.com.txt b/inc/3rdparty/site_config/standard/latimes.com.txt
index 0d6ac851..b2db37bf 100644..100755
--- a/inc/3rdparty/site_config/standard/latimes.com.txt
+++ b/inc/3rdparty/site_config/standard/latimes.com.txt
@@ -1,11 +1,11 @@
1strip: //div[@id="tugs_story_display"] 1strip: //div[@id="tugs_story_display"]
2strip: //div[@id="search_overlay"] 2strip: //div[@id="search_overlay"]
3strip: //div[@id="adv_search"] 3strip: //div[@id="adv_search"]
4body: //div[@class='story'] 4body: //div[@class='story']
5tidy: no 5tidy: no
6convert_double_br_tags: yes 6convert_double_br_tags: yes
7single_page_link: //a[contains(@href, ',print.')] 7single_page_link: //a[contains(@href, ',print.')]
8strip: //p[starts-with(., 'latimes.com')] 8strip: //p[starts-with(., 'latimes.com')]
9strip: //h1[starts-with(., 'latimes.com')] 9strip: //h1[starts-with(., 'latimes.com')]
10strip_id_or_class: cubead 10strip_id_or_class: cubead
11test_url: http://www.latimes.com/news/opinion/commentary/la-oe-gartonash-wilders-20110512,0,2876761.story \ No newline at end of file 11test_url: http://www.latimes.com/news/opinion/commentary/la-oe-gartonash-wilders-20110512,0,2876761.story \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/laughingsquid.com.txt b/inc/3rdparty/site_config/standard/laughingsquid.com.txt
index 1814988a..ab2f834f 100644..100755
--- a/inc/3rdparty/site_config/standard/laughingsquid.com.txt
+++ b/inc/3rdparty/site_config/standard/laughingsquid.com.txt
@@ -1,3 +1,3 @@
1title: //h1[@class='entry-title'] 1title: //h1[@class='entry-title']
2body: //div[@class='entry-content'] 2body: //div[@class='entry-content']
3test_url: http://laughingsquid.com/mysterious-tiny-doors-appearing-around-san-francisco/ \ No newline at end of file 3test_url: http://laughingsquid.com/mysterious-tiny-doors-appearing-around-san-francisco/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/leancrew.com.txt b/inc/3rdparty/site_config/standard/leancrew.com.txt
index 0a4c84ba..e78cf7e6 100644..100755
--- a/inc/3rdparty/site_config/standard/leancrew.com.txt
+++ b/inc/3rdparty/site_config/standard/leancrew.com.txt
@@ -1,9 +1,9 @@
1title: //div[@id="content"]/h1[1] 1title: //div[@id="content"]/h1[1]
2date: substring-before(//p[@class="postdate"], ' at ') 2date: substring-before(//p[@class="postdate"], ' at ')
3author: ("Dr. Drang") 3author: ("Dr. Drang")
4 4
5strip: //div[@id="content"]/h1[1] 5strip: //div[@id="content"]/h1[1]
6strip: //p[@class="postdate"] 6strip: //p[@class="postdate"]
7strip: //h2[@id="respond"] 7strip: //h2[@id="respond"]
8strip: //blockquote[@class="bbpTweet"]/p/span/a/img 8strip: //blockquote[@class="bbpTweet"]/p/span/a/img
9test_url: http://www.leancrew.com/all-this/2011/12/more-shell-less-egg/ \ No newline at end of file 9test_url: http://www.leancrew.com/all-this/2011/12/more-shell-less-egg/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/lefigaro.fr.txt b/inc/3rdparty/site_config/standard/lefigaro.fr.txt
index f5494b96..e720e377 100644..100755
--- a/inc/3rdparty/site_config/standard/lefigaro.fr.txt
+++ b/inc/3rdparty/site_config/standard/lefigaro.fr.txt
@@ -1,8 +1,8 @@
1title: //meta[@name='title']/@content 1title: //meta[@name='title']/@content
2author: //span[@class='sign']//a[@class='journaliste'] 2author: //span[@class='sign']//a[@class='journaliste']
3author: //meta[@name='author']/@content 3author: //meta[@name='author']/@content
4body: //*[@id='article']/div[@class='photo'] | //*[@id='article']/h2 | //*[@id='article']/div[@class='texte'] 4body: //*[@id='article']/div[@class='photo'] | //*[@id='article']/h2 | //*[@id='article']/div[@class='texte']
5date: //time[@pubdate]/@datetime 5date: //time[@pubdate]/@datetime
6prune: no 6prune: no
7test_url: http://www.lefigaro.fr/environnement/2011/11/10/01029-20111110ARTFIG00801-la-chine-confrontee-a-un-immense-defi-ecologique.php 7test_url: http://www.lefigaro.fr/environnement/2011/11/10/01029-20111110ARTFIG00801-la-chine-confrontee-a-un-immense-defi-ecologique.php
8test_url: http://www.lefigaro.fr/conjoncture/2012/11/20/20002-20121120ARTFIG00609-l-usager-devrait-payer-plus-pour-financer-les-transports.php \ No newline at end of file 8test_url: http://www.lefigaro.fr/conjoncture/2012/11/20/20002-20121120ARTFIG00609-l-usager-devrait-payer-plus-pour-financer-les-transports.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/lemonde.fr.txt b/inc/3rdparty/site_config/standard/lemonde.fr.txt
index eb205275..097999b6 100644..100755
--- a/inc/3rdparty/site_config/standard/lemonde.fr.txt
+++ b/inc/3rdparty/site_config/standard/lemonde.fr.txt
@@ -1,13 +1,18 @@
1title: //h1 1title: //h1
2 2
3# they have a single component containing both author and date 3# We can have multiple authors
4#author: //p[@class='source'] 4author: //a[@class='auteur']
5#date: //p[@class='source'] 5
6 6# Last edition date (if any)
7body: //div[@class='contenu_article'] 7date: //time[@itemprop='dateModified']/@datetime
8#Shoot the insane "conjugaison.lemonde.fr" links : 8# Publication date
9strip: //a[contains(@class, 'listLink')] 9date: //time[@itemprop='datePublished']/@datetime
10 10
11prune: no 11
12 12body: //div[@id='articleBody']
13test_url: http://www.lemonde.fr/economie/article/2011/07/05/moody-s-abaisse-la-note-du-portugal-de-quatre-crans_1545237_3234.html \ No newline at end of file 13#Shoot the insane "conjugaison.lemonde.fr" links :
14#strip: //a[contains(@class, 'conjug')]
15
16prune: no
17
18test_url: http://www.lemonde.fr/economie/article/2011/07/05/moody-s-abaisse-la-note-du-portugal-de-quatre-crans_1545237_3234.html
diff --git a/inc/3rdparty/site_config/standard/lesnumeriques.com.txt b/inc/3rdparty/site_config/standard/lesnumeriques.com.txt
index 9b57f726..51e025ae 100644..100755
--- a/inc/3rdparty/site_config/standard/lesnumeriques.com.txt
+++ b/inc/3rdparty/site_config/standard/lesnumeriques.com.txt
@@ -1,9 +1,9 @@
1title: //h1/following::span[@class='fn'] 1title: //h1/following::span[@class='fn']
2# Author: should stop parsing until <br> reached, but I don't know how to do this. 2# Author: should stop parsing until <br> reached, but I don't know how to do this.
3author: //following::div[@class='PDate2'] 3author: //following::div[@class='PDate2']
4date: //following::div[@class='PDate2']/strong 4date: //following::div[@class='PDate2']/strong
5 5
6body: //div[@class='ArTexte'] 6body: //div[@class='ArTexte']
7body: //div[@id='prod_txt_b'] 7body: //div[@id='prod_txt_b']
8body: //div[@class='ArPhotoP'] 8body: //div[@class='ArPhotoP']
9test_url: http://www.lesnumeriques.com/disque-dur-multimedia/popcorn-hour-300-p12231/test.html \ No newline at end of file 9test_url: http://www.lesnumeriques.com/disque-dur-multimedia/popcorn-hour-300-p12231/test.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/letemps.ch.txt b/inc/3rdparty/site_config/standard/letemps.ch.txt
index c4bee7ec..49b019f9 100644..100755
--- a/inc/3rdparty/site_config/standard/letemps.ch.txt
+++ b/inc/3rdparty/site_config/standard/letemps.ch.txt
@@ -1,3 +1,3 @@
1title: //h2 1title: //h2
2strip_image_src: logo.gif 2strip_image_src: logo.gif
3test_url: http://www.letemps.ch/Facet/print/Uuid/7c9f912c-07c9-11e0-9b50-4d96c9eca37f \ No newline at end of file 3test_url: http://www.letemps.ch/Facet/print/Uuid/7c9f912c-07c9-11e0-9b50-4d96c9eca37f \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/libcom.org.txt b/inc/3rdparty/site_config/standard/libcom.org.txt
new file mode 100755
index 00000000..d1404d10
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/libcom.org.txt
@@ -0,0 +1,7 @@
1date: //span[contains(@class, 'page-date')]
2body: //div[@id='node-page']
3strip_id_or_class: book-navigation
4prune: no
5
6test_url: http://libcom.org/library/what-was-the-ussr-aufheben-1
7test_url: http://libcom.org/library-latest/feed \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/lifeandculture.fr.txt b/inc/3rdparty/site_config/standard/lifeandculture.fr.txt
index c3888aa8..0e1dceb1 100644..100755
--- a/inc/3rdparty/site_config/standard/lifeandculture.fr.txt
+++ b/inc/3rdparty/site_config/standard/lifeandculture.fr.txt
@@ -1,3 +1,3 @@
1title: //h2[@class="entry-title"] 1title: //h2[@class="entry-title"]
2body: //div[@class="entry-content"] 2body: //div[@class="entry-content"]
3test_url: http://www.lifeandculture.fr/digital/facebook-and-the-epiphanator-an-end-to-endings/ \ No newline at end of file 3test_url: http://www.lifeandculture.fr/digital/facebook-and-the-epiphanator-an-end-to-endings/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/lifehacker.com.txt b/inc/3rdparty/site_config/standard/lifehacker.com.txt
index 32ade14a..ec97f06c 100644..100755
--- a/inc/3rdparty/site_config/standard/lifehacker.com.txt
+++ b/inc/3rdparty/site_config/standard/lifehacker.com.txt
@@ -1,42 +1,47 @@
1# Adds author text: Gawker sites commonly show as "Author: View Profile" 1# Adds author text: Gawker sites commonly show as "Author: View Profile"
2author://a[@class="plus-icon modfont"] 2author://a[@class="plus-icon modfont"]
3 3
4# Add date and time 4# Add date and time
5date: //span[@class="date"] 5date: //span[@class="date"]
6 6
7# Remove date and time from article text 7body: //div[contains(@class, 'marquee-asset-wrapper') or contains(@class, 'post-content')]
8strip: //span[@class="date"] 8
9 9# Remove date and time from article text
10# Remove login/comment text 10strip: //span[@class="date"]
11strip: //*[(@class="presence_control_external smalltype")] 11
12 12# Remove login/comment text
13strip: //div[@class="nodebyline modfont"] 13strip: //*[(@class="presence_control_external smalltype")]
14 14
15# Remove right sidebar 15strip: //div[@class="nodebyline modfont"]
16strip: //div[@id="rightwrapper"] 16
17 17# Remove right sidebar
18# Remove print header 18strip: //div[@id="rightwrapper"]
19strip: //div[@id='printhead']/h1 19
20 20# Remove print header
21# Remove 'content is restricted' 21strip: //div[@id='printhead']/h1
22strip: //div[@id='agegate_IDHERE'] 22
23 23# Remove 'content is restricted'
24# Remove follow text 24strip: //div[@id='agegate_IDHERE']
25strip: //*[(@class="permalink_ads")] 25
26 26# Remove follow text
27# Remove view/comment count 27strip: //*[(@class="permalink_ads")]
28strip: //div[@id='wrapper']/div[2][@class='postmeta_permalink_wrapper']/div[1][@class='postmeta_permalink']/div[2][@class='pm_line'] 28
29 29strip_id_or_class: inset_groups
30# Remove contact text 30
31strip: //div[@id='wrapper']/div[1][@class='content permalink']/p[6][@class='contactinfo'] 31# Remove view/comment count
32 32strip: //div[@id='wrapper']/div[2][@class='postmeta_permalink_wrapper']/div[1][@class='postmeta_permalink']/div[2][@class='pm_line']
33# Remove medium duplicates of the article image 33
34strip_image_src: medium.jpg 34# Remove contact text
35 35strip: //div[@id='wrapper']/div[1][@class='content permalink']/p[6][@class='contactinfo']
36# Remove "arrow" class at bottom of page 36
37strip: //p[@class="arrow"] 37# Remove medium duplicates of the article image
38 38strip_image_src: medium.jpg
39# Remove "track" image from article body 39
40strip: //img[@alt="track"] 40# Remove "arrow" class at bottom of page
41test_url: http://lifehacker.com/5925801/how-can-i-turn-vague-goals-into-actionable-to+dos 41strip: //p[@class="arrow"]
42test_url: http://lifehacker.com/5941600/hack-an-old-computer-mouse-into-a-wireless-bluetooth-mouse \ No newline at end of file 42
43# Remove "track" image from article body
44strip: //img[@alt="track"]
45test_url: http://lifehacker.com/5925801/how-can-i-turn-vague-goals-into-actionable-to+dos
46test_url: http://lifehacker.com/5941600/hack-an-old-computer-mouse-into-a-wireless-bluetooth-mouse
47test_url: http://lifehacker.com/what-happens-to-the-brain-when-you-meditate-and-how-it-1202533314 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/lifestyle.inquirer.net.txt b/inc/3rdparty/site_config/standard/lifestyle.inquirer.net.txt
new file mode 100755
index 00000000..25d544ae
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/lifestyle.inquirer.net.txt
@@ -0,0 +1,7 @@
1title: //h1[@class='singlePageTitle']
2
3strip: //p[contains(text(), 'Follow Us')]
4strip: //p/strong[contains(text(), 'Recent Stories:')]
5strip: //div[@id="sharefeature"]
6
7test_url: http://lifestyle.inquirer.net/100223/dusting-your-ceiling-fan
diff --git a/inc/3rdparty/site_config/standard/lifeweek.com.cn.txt b/inc/3rdparty/site_config/standard/lifeweek.com.cn.txt
new file mode 100755
index 00000000..e09f6692
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/lifeweek.com.cn.txt
@@ -0,0 +1,23 @@
1# This filter is tested on:
2# http://www.lifeweek.com.cn/2012/1211/39439.shtml
3# http://www.lifeweek.com.cn/2013/0308/40213.shtml
4
5title:substring-before(//h1, '(')
6title://h1
7date://ul[@class='authorbox']/li
8author: substring-after(//ul[@class='authorbox']/li/following-sibling::li, '作者:')
9
10next_page_link: //div[@class='pageturn_list']/a[@class='pagedown']
11body: //div[@class='original ']
12
13strip://h1
14strip://ul[@class='authorbox']
15strip://span[@class='app_p']
16strip://div[@style='text-align:right;']
17strip://div[@class='pageturn_list']
18strip://div[@class='lifespeaks']
19strip://div[@class='vright fr']
20strip://div[@class='copyrt mg20']
21strip://div[@class='keyabout mg20']
22strip://ul[@class='readabout mg20']
23test_url: http://www.lifeweek.com.cn/2013/0308/40213.shtml \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/linkedin.com.txt b/inc/3rdparty/site_config/standard/linkedin.com.txt
index 37e83cf6..37e83cf6 100644..100755
--- a/inc/3rdparty/site_config/standard/linkedin.com.txt
+++ b/inc/3rdparty/site_config/standard/linkedin.com.txt
diff --git a/inc/3rdparty/site_config/standard/livescience.com.txt b/inc/3rdparty/site_config/standard/livescience.com.txt
new file mode 100755
index 00000000..5275d34a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/livescience.com.txt
@@ -0,0 +1,20 @@
1title: //div[@class="album_title"]//h1
2author: substring-before(//div[@class='by_line'], ',')
3date: substring-after(substring-before(//div[@class="album_time"], ' Time'), 'Date: ')
4body: //div[@class="about_text"]
5
6strip: //div[@class='large_popper']
7strip: //span[contains(@id, 'mag_glass')]
8strip: //span[contains(@class, 'img_overlay')]
9strip: //td//span
10strip: //div[@class="center_adsense"]
11strip: //div[@class="article_info"]//div[@class='asset_section']
12strip: //div[@class="article_additional"]
13strip: //div[contains(@style, 'overflow:hidden')]
14strip: //div[@class="aa_text"]
15strip: //div[@id='nointelliTXT']
16
17prune: no
18autodetect_on_failure: no
19
20test_url: http://www.livescience.com/34569-why-flowers-close-at-night-nyctinasty.html
diff --git a/inc/3rdparty/site_config/standard/longform.org.txt b/inc/3rdparty/site_config/standard/longform.org.txt
index 48d5e1a7..1310ec0d 100644..100755
--- a/inc/3rdparty/site_config/standard/longform.org.txt
+++ b/inc/3rdparty/site_config/standard/longform.org.txt
@@ -1,3 +1,3 @@
1single_page_link: //div[@class="post"]/div[@class="title"]/a 1single_page_link: //div[@class="post"]/div[@class="title"]/a
2 2
3test_url: http://longform.org/2011/05/06/disconcerting-new-answers-in-models-suicide/ \ No newline at end of file 3test_url: http://longform.org/2011/05/06/disconcerting-new-answers-in-models-suicide/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/loopinsight.com.txt b/inc/3rdparty/site_config/standard/loopinsight.com.txt
index 08ad90c3..730af947 100644..100755
--- a/inc/3rdparty/site_config/standard/loopinsight.com.txt
+++ b/inc/3rdparty/site_config/standard/loopinsight.com.txt
@@ -1,9 +1,9 @@
1body: //div[@class='container_16']//div[@class='grid_11'] 1body: //div[@class='container_16']//div[@class='grid_11']
2strip: //h2[@class='mast'] 2strip: //h2[@class='mast']
3strip: //div[@class='container_16']//div[@class='grid_11']/h1 3strip: //div[@class='container_16']//div[@class='grid_11']/h1
4strip: //div[@class='container_16']//div[@class='grid_11']/p[1] 4strip: //div[@class='container_16']//div[@class='grid_11']/p[1]
5strip: //div[@class='container_16']//div[@class='grid_11']/div 5strip: //div[@class='container_16']//div[@class='grid_11']/div
6author: //a[starts-with(@title, 'Posts by')] 6author: //a[starts-with(@title, 'Posts by')]
7date: substring-before(substring-after(//time, 'Posted on '), ' at') 7date: substring-before(substring-after(//time, 'Posted on '), ' at')
8test_url: http://www.loopinsight.com/2012/09/13/forget-iphone-5-naysayers-this-thing-is-big/ 8test_url: http://www.loopinsight.com/2012/09/13/forget-iphone-5-naysayers-this-thing-is-big/
9test_url: http://www.loopinsight.com/2011/05/20/playbook-returns-high-misses-sales-targets-by-90/ \ No newline at end of file 9test_url: http://www.loopinsight.com/2011/05/20/playbook-returns-high-misses-sales-targets-by-90/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/lostgarden.com.txt b/inc/3rdparty/site_config/standard/lostgarden.com.txt
index a823e649..d7eb0fa0 100644..100755
--- a/inc/3rdparty/site_config/standard/lostgarden.com.txt
+++ b/inc/3rdparty/site_config/standard/lostgarden.com.txt
@@ -1,3 +1,3 @@
1prune: no 1prune: no
2convert_double_br_tags: yes 2convert_double_br_tags: yes
3test_url: http://www.lostgarden.com/2012/04/loops-and-arcs.html \ No newline at end of file 3test_url: http://www.lostgarden.com/2012/04/loops-and-arcs.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/lovefm.com.txt b/inc/3rdparty/site_config/standard/lovefm.com.txt
new file mode 100755
index 00000000..20d26c56
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/lovefm.com.txt
@@ -0,0 +1,6 @@
1title: //*[@id='title']
2date: //*[@id='date']
3body: //*[@id='desc']
4tidy: no
5
6test_url: http://www.lovefm.com/local_news.php?item=2176 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/lovetv.com.bz.txt b/inc/3rdparty/site_config/standard/lovetv.com.bz.txt
new file mode 100755
index 00000000..a71fccdd
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/lovetv.com.bz.txt
@@ -0,0 +1,9 @@
1title: //div[contains(@class, 'post')]//h1
2body: //div[contains(@class, 'post')]
3strip: //hr
4strip_id_or_class: post-meta
5
6prune: no
7
8test_url: http://www.lovetv.com.bz/2013/06/28/recently-discovered-ancient-maya-wooden-canoe-paddle-to-be-handed-over-to-archaeology/
9test_url: http://www.lovetv.com.bz/feed/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/lrb.co.uk.txt b/inc/3rdparty/site_config/standard/lrb.co.uk.txt
index ce5053d4..f1aacb7d 100644..100755
--- a/inc/3rdparty/site_config/standard/lrb.co.uk.txt
+++ b/inc/3rdparty/site_config/standard/lrb.co.uk.txt
@@ -1,8 +1,12 @@
1title: substring-before(//title, ' LRB') 1title: //div[contains(@class, "article-body")]/hgroup/h1
2 2body: //div[contains(@class, "article-body")]
3body: //div[@class="article-body indent"] 3
4 4date: substring-after(//p[@class="meta-info"]/a, '· ')
5date: substring-after(//p[@class="meta-info"]/a, ' ') 5
6 6author: //div[contains(@class, "article-body")]/hgroup/h2
7prune: no 7
8test_url: http://www.lrb.co.uk/v33/n18/james-meek/its-already-happened \ No newline at end of file 8strip_id_or_class: print-hide
9strip_id_or_class: books
10
11test_url: http://www.lrb.co.uk/v33/n18/james-meek/its-already-happened
12test_url: http://www.lrb.co.uk/v36/n13/benjamin-kunkel/paupers-and-richlings
diff --git a/inc/3rdparty/site_config/standard/luminous-landscape.com.txt b/inc/3rdparty/site_config/standard/luminous-landscape.com.txt
index 92ccf3ba..b445f5eb 100644..100755
--- a/inc/3rdparty/site_config/standard/luminous-landscape.com.txt
+++ b/inc/3rdparty/site_config/standard/luminous-landscape.com.txt
@@ -1,6 +1,6 @@
1title: //h2 1title: //h2
2 2
3body: // div[@id='content'] 3body: // div[@id='content']
4 4
5strip: //div[@class='sidebar_wrapper'] 5strip: //div[@class='sidebar_wrapper']
6test_url: http://www.luminous-landscape.com/tutorials/optimizing_exposure.shtml \ No newline at end of file 6test_url: http://www.luminous-landscape.com/tutorials/optimizing_exposure.shtml \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/luxuo.com.txt b/inc/3rdparty/site_config/standard/luxuo.com.txt
new file mode 100755
index 00000000..a3d5cb17
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/luxuo.com.txt
@@ -0,0 +1,4 @@
1body: //div[@class='post-content']
2prune: no
3
4test_url: http://www.luxuo.com/watches/feed \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/m.bbc.co.uk.txt b/inc/3rdparty/site_config/standard/m.bbc.co.uk.txt
index a8af5438..d1ff0b43 100644..100755
--- a/inc/3rdparty/site_config/standard/m.bbc.co.uk.txt
+++ b/inc/3rdparty/site_config/standard/m.bbc.co.uk.txt
@@ -1,8 +1,8 @@
1title: //div[@class="story-body"]/div[@class="story-inner"]/h1 1title: //div[@class="story-body"]/div[@class="story-inner"]/h1
2body: //div[@class="story-body"] 2body: //div[@class="story-body"]
3date: //p[@class='date']/strong 3date: //p[@class='date']/strong
4author: substring-after(//div[@class="story-inner"]/div[@class="byline"]//span[@class='name'], 'By') 4author: substring-after(//div[@class="story-inner"]/div[@class="byline"]//span[@class='name'], 'By')
5 5
6strip: //div[@class="story-inner"]/div[@class="byline"] 6strip: //div[@class="story-inner"]/div[@class="byline"]
7 7
8test_url: http://m.bbc.co.uk/news/science-environment-19144464 \ No newline at end of file 8test_url: http://m.bbc.co.uk/news/science-environment-19144464 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/m.douban.com.txt b/inc/3rdparty/site_config/standard/m.douban.com.txt
new file mode 100755
index 00000000..ce9a3167
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/m.douban.com.txt
@@ -0,0 +1,13 @@
1# This filter is tested on:
2# http://m.douban.com/note/240776310/?session=6ac86d1e
3# http://m.douban.com/note/208270705/?session=e00ec732_3433229
4
5title: //h2
6author: //a[@class='founder']
7date: substring-after(//span[@class='info'],' | ')
8body: //div[contains(@class,'entry item')]
9
10strip://span[contains(@class,'info')]
11
12convert_double_br_tags: yes
13test_url: http://m.douban.com/note/240776310/?session=6ac86d1e \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/m.vanityfair.com.txt b/inc/3rdparty/site_config/standard/m.vanityfair.com.txt
new file mode 100755
index 00000000..e47ce2ce
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/m.vanityfair.com.txt
@@ -0,0 +1,11 @@
1# Article Metadata
2title: //h1
3author: //span[@class="name"]/a
4date: //time
5
6# Content Pruning
7strip: //h5
8strip: //time
9strip: //div[@class="byline"]
10strip: //h2[@class="headline "]
11test_url: http://m.vanityfair.com/politics/2012/10/michael-lewis-profile-barack-obama \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mac4ever.com.txt b/inc/3rdparty/site_config/standard/mac4ever.com.txt
index 892b47f5..9999758b 100644..100755
--- a/inc/3rdparty/site_config/standard/mac4ever.com.txt
+++ b/inc/3rdparty/site_config/standard/mac4ever.com.txt
@@ -1,5 +1,5 @@
1author: substring-after(//div[@class='author'],'Par ') 1author: substring-after(//div[@class='author'],'Par ')
2date: //div[@class='date'] 2date: //div[@class='date']
3body: //div[@class='content'] 3body: //div[@class='content']
4 4
5test_url: http://www.mac4ever.com/news/64182/icloud_les_prix_en_euros_et_en_chf/ \ No newline at end of file 5test_url: http://www.mac4ever.com/news/64182/icloud_les_prix_en_euros_et_en_chf/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/macdrifter.com.txt b/inc/3rdparty/site_config/standard/macdrifter.com.txt
index fd1ede7d..e57bd640 100644..100755
--- a/inc/3rdparty/site_config/standard/macdrifter.com.txt
+++ b/inc/3rdparty/site_config/standard/macdrifter.com.txt
@@ -1,2 +1,2 @@
1title: substring-before(//title,' Macdrifter') 1title: substring-before(//title,' Macdrifter')
2test_url: http://www.macdrifter.com/2012/03/instacast-on-my-mac/ \ No newline at end of file 2test_url: http://www.macdrifter.com/2012/03/instacast-on-my-mac/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/macformat.techradar.com.txt b/inc/3rdparty/site_config/standard/macformat.techradar.com.txt
index 109eae45..522efb49 100644..100755
--- a/inc/3rdparty/site_config/standard/macformat.techradar.com.txt
+++ b/inc/3rdparty/site_config/standard/macformat.techradar.com.txt
@@ -1,9 +1,9 @@
1# Remove news feed 1# Remove news feed
2strip: //div[@id='news_feed_front'] 2strip: //div[@id='news_feed_front']
3 3
4# Remove pull quote 4# Remove pull quote
5strip: //div[@class='field field-type-text field-field-pull-quote'] 5strip: //div[@class='field field-type-text field-field-pull-quote']
6 6
7# Remove login 7# Remove login
8strip: //div[@class='right_bar_login'] 8strip: //div[@class='right_bar_login']
9test_url: http://macformat.techradar.com/blog/solid-state-storage-bringing-parity-back-mac-29-10-10&article=89189666 \ No newline at end of file 9test_url: http://macformat.techradar.com/blog/solid-state-storage-bringing-parity-back-mac-29-10-10&article=89189666 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/macgeneration.com.txt b/inc/3rdparty/site_config/standard/macgeneration.com.txt
index e6bbe28e..739eff4e 100644..100755
--- a/inc/3rdparty/site_config/standard/macgeneration.com.txt
+++ b/inc/3rdparty/site_config/standard/macgeneration.com.txt
@@ -1,5 +1,5 @@
1author: substring-before(substring-after(//div[@class='dateNews'],'par '),' le') 1author: substring-before(substring-after(//div[@class='dateNews'],'par '),' le')
2date: substring-after(//div[@class='dateNews'],' le ') 2date: substring-after(//div[@class='dateNews'],' le ')
3body: //div[@class='singleNews zoneApple'] 3body: //div[@class='singleNews zoneApple']
4 4
5test_url: http://www.macgeneration.com/news/voir/211162/dropbox-encore-un-mac-et-deux-comptes-dropbox \ No newline at end of file 5test_url: http://www.macgeneration.com/news/voir/211162/dropbox-encore-un-mac-et-deux-comptes-dropbox \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/macmagazine.com.br.txt b/inc/3rdparty/site_config/standard/macmagazine.com.br.txt
index 47ebfd79..da7df695 100644..100755
--- a/inc/3rdparty/site_config/standard/macmagazine.com.br.txt
+++ b/inc/3rdparty/site_config/standard/macmagazine.com.br.txt
@@ -1,21 +1,21 @@
1# Remove sliders 1# Remove sliders
2strip: //*[(@class="slides_container")] 2strip: //*[(@class="slides_container")]
3strip: //div[(@id="slides_two")] 3strip: //div[(@id="slides_two")]
4 4
5# Remove tag cloud 5# Remove tag cloud
6strip: //span[(@class="secao")] 6strip: //span[(@class="secao")]
7 7
8# Fix date article 8# Fix date article
9# TODO 9# TODO
10 10
11# Remove other stuff 11# Remove other stuff
12strip: //div[(@id="idc-container")] 12strip: //div[(@id="idc-container")]
13strip: //div[(@id="idc-noscript")] 13strip: //div[(@id="idc-noscript")]
14strip: //div[(@class="linkwithin_div")] 14strip: //div[(@class="linkwithin_div")]
15strip: //div[(@class="navPosts")] 15strip: //div[(@class="navPosts")]
16strip: //div[(@id="lateral")] 16strip: //div[(@id="lateral")]
17strip: //div[(@id="autor")] 17strip: //div[(@id="autor")]
18strip: //div[(@id="rodape")] 18strip: //div[(@id="rodape")]
19strip: //div[(@id="post")]/h1 19strip: //div[(@id="post")]/h1
20strip: //div[(@id="post")]/div[(@id="boxInformacoes")] 20strip: //div[(@id="post")]/div[(@id="boxInformacoes")]
21test_url: http://macmagazine.com.br/2011/08/01/skype-para-ipad-esta-finalmente-chegando-a-app-store/ \ No newline at end of file 21test_url: http://macmagazine.com.br/2011/08/01/skype-para-ipad-esta-finalmente-chegando-a-app-store/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/macrumors.com.txt b/inc/3rdparty/site_config/standard/macrumors.com.txt
index 76f999d3..83cfb4a6 100644..100755
--- a/inc/3rdparty/site_config/standard/macrumors.com.txt
+++ b/inc/3rdparty/site_config/standard/macrumors.com.txt
@@ -1,10 +1,12 @@
1author: substring-after(//div[@class='byline'], " by ") 1author: substring-after(//div[@class='byline'], " by ")
2date: substring-before(//div[@class='byline'], " by ") 2date: substring-before(//div[@class='byline'], " by ")
3 3
4# set body 4# set body
5body: //div[@class='content'] 5body: //div[@class='content']
6 6strip_id_or_class: commentsContainer
7# set title 7strip_id_or_class: linkback
8title: //h3 8
9# set title
10title: //h3
9#strip: //div[@class='content']/h3 11#strip: //div[@class='content']/h3
10test_url: http://www.macrumors.com/2010/11/10/apple-debuts-new-apple-tv-and-itunes-movie-content-in-japan/ \ No newline at end of file 12test_url: http://www.macrumors.com/2010/11/10/apple-debuts-new-apple-tv-and-itunes-movie-content-in-japan/
diff --git a/inc/3rdparty/site_config/standard/macstories.net.txt b/inc/3rdparty/site_config/standard/macstories.net.txt
index 6e651ca0..639fdd19 100644..100755
--- a/inc/3rdparty/site_config/standard/macstories.net.txt
+++ b/inc/3rdparty/site_config/standard/macstories.net.txt
@@ -1,8 +1,8 @@
1strip: //*[(@id = "featured")] 1strip: //*[(@id = "featured")]
2 2
3author:substring-after( //div[@class='posttitle']/h2[@class='author'],'by ') 3author:substring-after( //div[@class='posttitle']/h2[@class='author'],'by ')
4 4
5date: concat(//div[@class='month'],' ',//div[@class='day']) 5date: concat(//div[@class='month'],' ',//div[@class='day'])
6 6
7#macstories doesn't provide a year, but month/day is better than nothing 7#macstories doesn't provide a year, but month/day is better than nothing
8test_url: http://www.macstories.net/news/instapaper-4-0-available-completely-redesigned-ipad-ui-new-features-search-subscription/ \ No newline at end of file 8test_url: http://www.macstories.net/news/instapaper-4-0-available-completely-redesigned-ipad-ui-new-features-search-subscription/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mactalk.com.au.txt b/inc/3rdparty/site_config/standard/mactalk.com.au.txt
index e8d60522..9be865af 100644..100755
--- a/inc/3rdparty/site_config/standard/mactalk.com.au.txt
+++ b/inc/3rdparty/site_config/standard/mactalk.com.au.txt
@@ -1,4 +1,4 @@
1author://div[@class="article_username_container_full"] 1author://div[@class="article_username_container_full"]
2date://div[@class="article_username_container"] 2date://div[@class="article_username_container"]
3body://div[@class="article cms_clear restore postcontainer"] 3body://div[@class="article cms_clear restore postcontainer"]
4test_url: http://www.mactalk.com.au/content/chat-basil-shkara-developer-taptax-2452/ \ No newline at end of file 4test_url: http://www.mactalk.com.au/content/chat-basil-shkara-developer-taptax-2452/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mactechnews.de.txt b/inc/3rdparty/site_config/standard/mactechnews.de.txt
index c3fc0e44..5c03518a 100644..100755
--- a/inc/3rdparty/site_config/standard/mactechnews.de.txt
+++ b/inc/3rdparty/site_config/standard/mactechnews.de.txt
@@ -1,3 +1,3 @@
1title: substring-after(substring-after(//title, '>'), '>') 1title: substring-after(substring-after(//title, '>'), '>')
2body: //div[@class='NewsArticleContent'] 2body: //div[@class='NewsArticleContent']
3test_url: http://www.mactechnews.de/news/index/Apple-Pressekonferenz-zum-iPhone-4-147316.html \ No newline at end of file 3test_url: http://www.mactechnews.de/news/index/Apple-Pressekonferenz-zum-iPhone-4-147316.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/macworld.com.txt b/inc/3rdparty/site_config/standard/macworld.com.txt
index 96175872..e7d97202 100644..100755
--- a/inc/3rdparty/site_config/standard/macworld.com.txt
+++ b/inc/3rdparty/site_config/standard/macworld.com.txt
@@ -1,24 +1,24 @@
1title: //article//h1 1title: //article//h1
2date: //meta[@name="date"]/@content 2date: //meta[@name="date"]/@content
3author: //div[@class="author-name" or @class="article-byline"]/a[1] 3author: //div[@class="author-name" or @class="article-byline"]/a[1]
4 4
5body: //section[@class="page"] 5body: //section[@class="page"]
6 6
7# remove 'From the Lab' and 'Recent posts' text 7# remove 'From the Lab' and 'Recent posts' text
8strip: //div[@class='blogLabel'] 8strip: //div[@class='blogLabel']
9 9
10# remove byline and meta info 10# remove byline and meta info
11strip: //div[@class="article-meta"] 11strip: //div[@class="article-meta"]
12strip: //div[@class="author-info"] 12strip: //div[@class="author-info"]
13 13
14#strip tags and categories 14#strip tags and categories
15strip: //div[@class="department"] 15strip: //div[@class="department"]
16 16
17#strip product cap links 17#strip product cap links
18strip: //div[@class="cap-main"] 18strip: //div[@class="cap-main"]
19strip: //div[@id="compare-lede"] 19strip: //div[@id="compare-lede"]
20 20
21prune: no 21prune: no
22 22
23# copes less well with Review pages, seems fine for News 23# copes less well with Review pages, seems fine for News
24test_url: http://www.macworld.com/article/163184/2011/10/the_ipod_as_an_iconic_cultural_force.html \ No newline at end of file 24test_url: http://www.macworld.com/article/163184/2011/10/the_ipod_as_an_iconic_cultural_force.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mainichi.jp.txt b/inc/3rdparty/site_config/standard/mainichi.jp.txt
index e701207f..414a2f53 100644..100755
--- a/inc/3rdparty/site_config/standard/mainichi.jp.txt
+++ b/inc/3rdparty/site_config/standard/mainichi.jp.txt
@@ -1,3 +1,3 @@
1body: //div[@class='NewsArticle'] 1body: //div[@class='NewsArticle']
2 2
3test_url: http://mainichi.jp/select/weathernews/20110311/news/20110520k0000e040062000c.html \ No newline at end of file 3test_url: http://mainichi.jp/select/weathernews/20110311/news/20110520k0000e040062000c.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mainpost.de.txt b/inc/3rdparty/site_config/standard/mainpost.de.txt
index a2d25d56..2136de3f 100644..100755
--- a/inc/3rdparty/site_config/standard/mainpost.de.txt
+++ b/inc/3rdparty/site_config/standard/mainpost.de.txt
@@ -1,28 +1,28 @@
1title: substring-before(//title, '|') 1title: substring-before(//title, '|')
2body: //*[@id='content-left'] 2body: //*[@id='content-left']
3 3
4# Why is this not working here? 4# Why is this not working here?
5# body: //*[@id='content-left']/div[@class='content-container'][2]/div[@class='content-body']/div[@class='inner-container']/div[@class='detail'] 5# body: //*[@id='content-left']/div[@class='content-container'][2]/div[@class='content-body']/div[@class='inner-container']/div[@class='detail']
6 6
7 7
8#Header 8#Header
9strip_id_or_class: 'subHead' 9strip_id_or_class: 'subHead'
10strip_id_or_class: 'fl_right' 10strip_id_or_class: 'fl_right'
11strip_id_or_class: 'infolink' 11strip_id_or_class: 'infolink'
12strip_id_or_class: 'content-head' 12strip_id_or_class: 'content-head'
13strip_id_or_class: 'tab' 13strip_id_or_class: 'tab'
14strip_id_or_class: 'tab-active' 14strip_id_or_class: 'tab-active'
15strip: //*[contains(@class,'trenner')] 15strip: //*[contains(@class,'trenner')]
16 16
17# Headline 17# Headline
18strip: //h1/* 18strip: //h1/*
19strip_id_or_class: 'font16' 19strip_id_or_class: 'font16'
20 20
21#Images 21#Images
22strip_id_or_class: 'leftimage' 22strip_id_or_class: 'leftimage'
23strip_id_or_class: 'rightimage' 23strip_id_or_class: 'rightimage'
24 24
25#Comments 25#Comments
26strip: //table 26strip: //table
27strip: //p/following-sibling::*[0] 27strip: //p/following-sibling::*[0]
28test_url: http://www.mainpost.de/ueberregional/meinung/Dioxin-Skandal-bringt-Agrarministerin-in-Bedraengnis;art9517,5920211 \ No newline at end of file 28test_url: http://www.mainpost.de/ueberregional/meinung/Dioxin-Skandal-bringt-Agrarministerin-in-Bedraengnis;art9517,5920211 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/makeuseof.com.txt b/inc/3rdparty/site_config/standard/makeuseof.com.txt
index 6809afed..078e8d08 100644..100755
--- a/inc/3rdparty/site_config/standard/makeuseof.com.txt
+++ b/inc/3rdparty/site_config/standard/makeuseof.com.txt
@@ -1,3 +1,9 @@
1tidy: no 1title: //h1[@class='entry-title']
2 2
3test_url: http://www.makeuseof.com/dir/kindle-it-web-pages-kindle-friendly/ \ No newline at end of file 3body: //article//header//img | //article//section[@class='post']
4
5strip: //article//section[@class='post']/aside
6strip: //article//section[@class='post']/footer
7
8test_url: http://www.makeuseof.com/tag/cool-websites-and-tools-advanced-photo-editor-keep-your-kids-stuff-online-identify-60-languages/
9test_url: http://www.makeuseof.com/tag/what-do-you-think-of-our-new-look-makeuseof-poll/
diff --git a/inc/3rdparty/site_config/standard/manager.co.th.txt b/inc/3rdparty/site_config/standard/manager.co.th.txt
new file mode 100755
index 00000000..cd6c5c01
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/manager.co.th.txt
@@ -0,0 +1,26 @@
1title: //td[@class="headline"]
2author: //font[@color="#003366"]
3date: //td[@class="date"]
4
5strip: //td[@class="headline"]
6strip: //font[@color="#003366"]
7strip: //td[@class="date"]
8
9strip: //img[@src="images/2009/logo_en.gif"]
10
11body: //tbody[@class="body"]
12convert_double_br_tags:yes
13
14strip: //img[@src="/images/TabOver.gif"]
15strip: //td[@width="160"]
16strip: //img[@src="/images/TabUnder.gif"]
17
18strip: //td[@class="small"]
19strip: //td[@height="47"]
20
21strip: //td[@valign="middle"]
22strip: //td[@background="/images/menu_bottombg.gif"]
23strip: //img[@src="/images/sc_footer_l.gif"]
24strip: //img[@src="/images/sc_footer_m.gif"]
25strip: //img[@src="/images/sc_footer_r.gif"]
26test_url: http://www.manager.co.th/Entertainment/ViewNews.aspx?NewsID=9550000101979 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/marco.org.txt b/inc/3rdparty/site_config/standard/marco.org.txt
index ef2e03d3..4bb24a62 100644..100755
--- a/inc/3rdparty/site_config/standard/marco.org.txt
+++ b/inc/3rdparty/site_config/standard/marco.org.txt
@@ -1,8 +1,8 @@
1tidy: no 1tidy: no
2prune: no 2prune: no
3date: //article//time[@pubdate] 3date: //article//time[@pubdate]
4title: //article/header/h2 4title: //article/header/h2
5body: //article 5body: //article
6strip: //header 6strip: //header
7test_url: http://www.marco.org/2012/09/08/businessweek-gruber 7test_url: http://www.marco.org/2012/09/08/businessweek-gruber
8test_url: http://www.marco.org/2012/04/24/might-upgrade-someday \ No newline at end of file 8test_url: http://www.marco.org/2012/04/24/might-upgrade-someday \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/marksdailyapple.com.txt b/inc/3rdparty/site_config/standard/marksdailyapple.com.txt
index 0077f560..0077f560 100644..100755
--- a/inc/3rdparty/site_config/standard/marksdailyapple.com.txt
+++ b/inc/3rdparty/site_config/standard/marksdailyapple.com.txt
diff --git a/inc/3rdparty/site_config/standard/martinfowler.com.txt b/inc/3rdparty/site_config/standard/martinfowler.com.txt
index 8e0e349f..4ff4a9c2 100644..100755
--- a/inc/3rdparty/site_config/standard/martinfowler.com.txt
+++ b/inc/3rdparty/site_config/standard/martinfowler.com.txt
@@ -1,8 +1,8 @@
1date: //div[@id="main"]/p[@class="date"] 1date: //div[@id="main"]/p[@class="date"]
2author: string("Martin Fowler") 2author: string("Martin Fowler")
3body: //div[@id="main"] 3body: //div[@id="main"]
4strip_id_or_class: date 4strip_id_or_class: date
5strip_id_or_class: tags 5strip_id_or_class: tags
6strip_id_or_class: tagLabel 6strip_id_or_class: tagLabel
7strip: //div[@id="main"]/h1[1] 7strip: //div[@id="main"]/h1[1]
8test_url: http://martinfowler.com/bliki/DatabaseThaw.html \ No newline at end of file 8test_url: http://martinfowler.com/bliki/DatabaseThaw.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mashable.com.txt b/inc/3rdparty/site_config/standard/mashable.com.txt
index 2c5a14a6..b6efb6c5 100644..100755
--- a/inc/3rdparty/site_config/standard/mashable.com.txt
+++ b/inc/3rdparty/site_config/standard/mashable.com.txt
@@ -1,4 +1,11 @@
1title: //header[@class='entry-title']/h1 1title: //h1[@class='title']
2body: //div[@class='description'] 2author: substring-after(//span[@class='author_name'], 'By ')
3date: //time
4
5body: //article
3strip: //div[@class='ytm-gallery-box'] 6strip: //div[@class='ytm-gallery-box']
4test_url: http://mashable.com/2011/12/05/india-wants-google-and-facebook-to-censor-user-content/ \ No newline at end of file 7strip: //div[contains(@class, 'adsense')]
8strip: //aside[contains(@class, 'social')]
9strip_id_or_class: article-topics
10
11test_url: http://mashable.com/2013/05/24/myspace-architects-rebuilding-a-brand/
diff --git a/inc/3rdparty/site_config/standard/matt.might.net.txt b/inc/3rdparty/site_config/standard/matt.might.net.txt
new file mode 100755
index 00000000..30d585cf
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/matt.might.net.txt
@@ -0,0 +1,5 @@
1title: //h1
2author: string("Matt Might")
3strip: //h1/following-sibling::div
4
5test_url: http://matt.might.net/articles/oo-cesk/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mattcutts.com.txt b/inc/3rdparty/site_config/standard/mattcutts.com.txt
index 76b1eac6..76b1eac6 100644..100755
--- a/inc/3rdparty/site_config/standard/mattcutts.com.txt
+++ b/inc/3rdparty/site_config/standard/mattcutts.com.txt
diff --git a/inc/3rdparty/site_config/standard/mbl.is.txt b/inc/3rdparty/site_config/standard/mbl.is.txt
index fd26f091..fd26f091 100644..100755
--- a/inc/3rdparty/site_config/standard/mbl.is.txt
+++ b/inc/3rdparty/site_config/standard/mbl.is.txt
diff --git a/inc/3rdparty/site_config/standard/medialens.org.txt b/inc/3rdparty/site_config/standard/medialens.org.txt
index 94f27b71..4c333aa1 100644..100755
--- a/inc/3rdparty/site_config/standard/medialens.org.txt
+++ b/inc/3rdparty/site_config/standard/medialens.org.txt
@@ -1,2 +1,4 @@
1strip: //div[contains(@class, 'article-tools')] 1strip_id_or_class: article-tools
2strip_id_or_class: pagenav
3prune: no
2test_url: http://www.medialens.org/index.php/alerts/alert-archive/2012/713-the-illusion-of-democracy.html \ No newline at end of file 4test_url: http://www.medialens.org/index.php/alerts/alert-archive/2012/713-the-illusion-of-democracy.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/medium.com.txt b/inc/3rdparty/site_config/standard/medium.com.txt
new file mode 100755
index 00000000..acf7cc90
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/medium.com.txt
@@ -0,0 +1,7 @@
1body: //div[contains(@class, 'post-content-inner')]
2strip_id_or_class: follow-ups
3strip_id_or_class: footer
4
5prune: no
6
7test_url: https://medium.com/p/6844c0d7893b \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/megamp3.eu.txt b/inc/3rdparty/site_config/standard/megamp3.eu.txt
new file mode 100755
index 00000000..1b6a1279
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/megamp3.eu.txt
@@ -0,0 +1,8 @@
1title: //h3[@class='episode_title']
2body: //ul[contains(@class, 'episode_imgdesc')]/li/descendant::*
3prune: no
4strip://*[contains(@class, 'plugin')]
5strip://*[contains(@class, 'episode_keywords')]
6
7test_url: http://www.megamp3.eu/?p=episode&name=2013-04-19_la_filiere_progressive_431.mp3
8test_url: http://www.megamp3.eu/feed.xml
diff --git a/inc/3rdparty/site_config/standard/menshealth.com.txt b/inc/3rdparty/site_config/standard/menshealth.com.txt
index e7e1e269..a1a46f63 100644..100755
--- a/inc/3rdparty/site_config/standard/menshealth.com.txt
+++ b/inc/3rdparty/site_config/standard/menshealth.com.txt
@@ -1,16 +1,16 @@
1# need to find a way to eliminate <span> content for "related content" without eliminating important content 1# need to find a way to eliminate <span> content for "related content" without eliminating important content
2 2
3convert_double_br_tags: [yes] 3convert_double_br_tags: [yes]
4#body: //div[@id='leftside'] 4#body: //div[@id='leftside']
5title: //h1 5title: //h1
6title: //h2 6title: //h2
7Author: substring-after(//h4, 'By ') 7Author: substring-after(//h4, 'By ')
8Author: substring-after(//h4, 'By: ') 8Author: substring-after(//h4, 'By: ')
9#Strip: //span 9#Strip: //span
10strip_id_or_class: morefromcat 10strip_id_or_class: morefromcat
11strip_id_or_class: mostpopular 11strip_id_or_class: mostpopular
12strip_id_or_class: articlepagination 12strip_id_or_class: articlepagination
13strip_id_or_class: toolbar 13strip_id_or_class: toolbar
14body: //div[@id='zmodcontent'] 14body: //div[@id='zmodcontent']
15single_page_link: //li[@class='onepage'] //a[contains (@href, 'printer.php')] 15single_page_link: //li[@class='onepage'] //a[contains (@href, 'printer.php')]
16test_url: http://www.menshealth.com/mhlists/pursuit_of_happiness/index.php \ No newline at end of file 16test_url: http://www.menshealth.com/mhlists/pursuit_of_happiness/index.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/metafilter.com.txt b/inc/3rdparty/site_config/standard/metafilter.com.txt
new file mode 100755
index 00000000..a2f3ada9
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/metafilter.com.txt
@@ -0,0 +1,8 @@
1body: //div[contains(@class, 'copy') or contains(@class, 'comments')]
2strip_id_or_class: related
3strip: //a[. = 'Subscribe']
4strip: //h1/span[@class = 'smallcopy']
5strip: //a[@class = 'skip']
6strip: //div[@id = 'logo']
7strip: //div[contains(@class, 'comments') and contains(., 'You are not currently logged in')]
8test_url: http://www.metafilter.com/128101/Probably-more-secure-than-the-Drafts-folder-on-a-shared-Gmail-account \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mforum.cari.com.my.txt b/inc/3rdparty/site_config/standard/mforum.cari.com.my.txt
new file mode 100755
index 00000000..c295d734
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/mforum.cari.com.my.txt
@@ -0,0 +1,6 @@
1body: (//td[starts-with(@id, 'postmessage_')])[1]
2
3prune: no
4
5test_url: http://mforum.cari.com.my/forum.php?mod=viewthread&tid=788033
6test_url: http://mforum.cari.com.my/forum.php?mod=rss&fid=265&auth=0 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mikeash.com.txt b/inc/3rdparty/site_config/standard/mikeash.com.txt
index af8a7d30..abaa6a81 100644..100755
--- a/inc/3rdparty/site_config/standard/mikeash.com.txt
+++ b/inc/3rdparty/site_config/standard/mikeash.com.txt
@@ -1,5 +1,5 @@
1title: //div[@class="blogtitle"] 1title: //div[@class="blogtitle"]
2strip: //div[@class="blogtitle"] 2strip: //div[@class="blogtitle"]
3 3
4author: substring-after(//span[@class="blogheader"], 'Author: ') 4author: substring-after(//span[@class="blogheader"], 'Author: ')
5test_url: http://www.mikeash.com/pyblog/friday-qa-2012-01-13-the-mac-toolbox.html \ No newline at end of file 5test_url: http://www.mikeash.com/pyblog/friday-qa-2012-01-13-the-mac-toolbox.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mikeindustries.com.txt b/inc/3rdparty/site_config/standard/mikeindustries.com.txt
index 3d488e13..fb4636cc 100644..100755
--- a/inc/3rdparty/site_config/standard/mikeindustries.com.txt
+++ b/inc/3rdparty/site_config/standard/mikeindustries.com.txt
@@ -1,9 +1,9 @@
1title: //div[@class='post_content']/h2 1title: //div[@class='post_content']/h2
2date: //div[@class='dateline'] 2date: //div[@class='dateline']
3body: //div[@class='entry'] 3body: //div[@class='entry']
4 4
5strip: //div[@class='closer'] 5strip: //div[@class='closer']
6strip: //div[@class='navigation'] 6strip: //div[@class='navigation']
7strip: //div[@class='aux_pane'] 7strip: //div[@class='aux_pane']
8strip: //div[@class='aux_aux_pane'] 8strip: //div[@class='aux_aux_pane']
9test_url: http://www.mikeindustries.com/blog/archive/2011/10/never-be-another \ No newline at end of file 9test_url: http://www.mikeindustries.com/blog/archive/2011/10/never-be-another \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/minnesota.publicradio.org.txt b/inc/3rdparty/site_config/standard/minnesota.publicradio.org.txt
index 7e43d63c..773a627c 100644..100755
--- a/inc/3rdparty/site_config/standard/minnesota.publicradio.org.txt
+++ b/inc/3rdparty/site_config/standard/minnesota.publicradio.org.txt
@@ -1,10 +1,10 @@
1title: //*[@class="article"]/h1 1title: //*[@class="article"]/h1
2date: //*[@class="article"]/div[@class="date"] 2date: //*[@class="article"]/div[@class="date"]
3 3
4# strip the title and date from the article text 4# strip the title and date from the article text
5strip: //*[@class="article"]/h1 5strip: //*[@class="article"]/h1
6strip: //*[@class="article"]/div[@class="date"] 6strip: //*[@class="article"]/div[@class="date"]
7 7
8# strip annoying <br> between metadata and article 8# strip annoying <br> between metadata and article
9strip: //*[@class="article"]/div[@class="date"]/following-sibling::br 9strip: //*[@class="article"]/div[@class="date"]/following-sibling::br
10test_url: http://minnesota.publicradio.org/display/web/2012/06/19/health/senators-want-health-care-ruling-on-tv/ \ No newline at end of file 10test_url: http://minnesota.publicradio.org/display/web/2012/06/19/health/senators-want-health-care-ruling-on-tv/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/minnpost.com.txt b/inc/3rdparty/site_config/standard/minnpost.com.txt
index 51a0630b..dc926a6f 100644..100755
--- a/inc/3rdparty/site_config/standard/minnpost.com.txt
+++ b/inc/3rdparty/site_config/standard/minnpost.com.txt
@@ -1,5 +1,5 @@
1title: //*[@id="content-header"]/h1 1title: //*[@id="content-header"]/h1
2author: //*[contains(@class, 'byline')]/a/text() 2author: //*[contains(@class, 'byline')]/a/text()
3date: substring-after(//*[contains(@class, 'byline')]/text()[2], '|') 3date: substring-after(//*[contains(@class, 'byline')]/text()[2], '|')
4body: //*[contains(@class, 'node-body')] 4body: //*[contains(@class, 'node-body')]
5test_url: http://www.minnpost.com/eric-black-ink/2012/06/overturning-obamacare-would-be-game-changer-supreme-court \ No newline at end of file 5test_url: http://www.minnpost.com/eric-black-ink/2012/06/overturning-obamacare-would-be-game-changer-supreme-court \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mirrorfootball.co.uk.txt b/inc/3rdparty/site_config/standard/mirrorfootball.co.uk.txt
index 4215a051..2033cf33 100644..100755
--- a/inc/3rdparty/site_config/standard/mirrorfootball.co.uk.txt
+++ b/inc/3rdparty/site_config/standard/mirrorfootball.co.uk.txt
@@ -1,3 +1,3 @@
1# Remove extra links 1# Remove extra links
2strip: //*[@class='appended_html'] 2strip: //*[@class='appended_html']
3test_url: http://www.mirrorfootball.co.uk/news/West-Ham-crisis-Carlton-Cole-slams-diabolical-performance-and-rips-into-Avram-Grant-lack-of-tactical-nous-following-Liverpool-mauling-article636151.html \ No newline at end of file 3test_url: http://www.mirrorfootball.co.uk/news/West-Ham-crisis-Carlton-Cole-slams-diabolical-performance-and-rips-into-Avram-Grant-lack-of-tactical-nous-following-Liverpool-mauling-article636151.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mises.org.txt b/inc/3rdparty/site_config/standard/mises.org.txt
index ae542aa6..73c485e6 100644..100755
--- a/inc/3rdparty/site_config/standard/mises.org.txt
+++ b/inc/3rdparty/site_config/standard/mises.org.txt
@@ -1,5 +1,5 @@
1strip_id_or_class: 'book-ad' 1strip_id_or_class: 'book-ad'
2strip_id_or_class: 'bigger pullquote' 2strip_id_or_class: 'bigger pullquote'
3strip_id_or_class: 'subscribe' 3strip_id_or_class: 'subscribe'
4strip_id_or_class: 'blog-link' 4strip_id_or_class: 'blog-link'
5test_url: http://mises.org/daily/4804 \ No newline at end of file 5test_url: http://mises.org/daily/4804 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mlb.mlb.com.txt b/inc/3rdparty/site_config/standard/mlb.mlb.com.txt
index 30e8aff2..765fab3f 100644..100755
--- a/inc/3rdparty/site_config/standard/mlb.mlb.com.txt
+++ b/inc/3rdparty/site_config/standard/mlb.mlb.com.txt
@@ -1,14 +1,14 @@
1title: //h1[@class='article-headline'] 1title: //h1[@class='article-headline']
2date: //span[@class='timeStamp'] 2date: //span[@class='timeStamp']
3author: substring-before(//p[@class='article-byline'], '/') 3author: substring-before(//p[@class='article-byline'], '/')
4body: //div[@id='article'] 4body: //div[@id='article']
5#strip: //div[@class='inner'] 5#strip: //div[@class='inner']
6strip: //div[@id='article_head'] 6strip: //div[@id='article_head']
7strip: //p[@class='tagLine'] 7strip: //p[@class='tagLine']
8strip: //div[@id='article_related_links'] 8strip: //div[@id='article_related_links']
9strip: //div[@id='article_related_mlb'] 9strip: //div[@id='article_related_mlb']
10strip: //span[@class='more'] 10strip: //span[@class='more']
11strip: //div[@class='article_component'] 11strip: //div[@class='article_component']
12strip: //span[@class='screen_reader'] 12strip: //span[@class='screen_reader']
13strip: //ul[@class='columnists_blurb'] 13strip: //ul[@class='columnists_blurb']
14test_url: http://mlb.mlb.com/news/article.jsp?ymd=20120403&content_id=27880830 \ No newline at end of file 14test_url: http://mlb.mlb.com/news/article.jsp?ymd=20120403&content_id=27880830 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mlb.sbnation.com.txt b/inc/3rdparty/site_config/standard/mlb.sbnation.com.txt
index c4e3389e..8480e302 100644..100755
--- a/inc/3rdparty/site_config/standard/mlb.sbnation.com.txt
+++ b/inc/3rdparty/site_config/standard/mlb.sbnation.com.txt
@@ -1,14 +1,14 @@
1title: //h1[@id = 'stream_title'] 1title: //h1[@id = 'stream_title']
2author: //p[@class = 'byline']/a 2author: //p[@class = 'byline']/a
3date: //span[@class = 'datetime'] 3date: //span[@class = 'datetime']
4 4
5body: //div[@id = 'stream_container'] 5body: //div[@id = 'stream_container']
6strip: //p[@class = 'byline'] 6strip: //p[@class = 'byline']
7strip_id_or_class: stream_summary 7strip_id_or_class: stream_summary
8strip_id_or_class: social-spoken 8strip_id_or_class: social-spoken
9strip_id_or_class: datetime 9strip_id_or_class: datetime
10strip_id_or_class: author-mini-profile 10strip_id_or_class: author-mini-profile
11strip_id_or_class: social-tools 11strip_id_or_class: social-tools
12strip_id_or_class: entry-tags 12strip_id_or_class: entry-tags
13strip_id_or_class: fb-like-box 13strip_id_or_class: fb-like-box
14test_url: http://mlb.sbnation.com/2011/10/17/2495845/2011-world-series-st-louis-cardinals-texas-rangers-home-field-advantage \ No newline at end of file 14test_url: http://mlb.sbnation.com/2011/10/17/2495845/2011-world-series-st-louis-cardinals-texas-rangers-home-field-advantage \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mlssoccer.com.txt b/inc/3rdparty/site_config/standard/mlssoccer.com.txt
index 41e15136..5d706f88 100644..100755
--- a/inc/3rdparty/site_config/standard/mlssoccer.com.txt
+++ b/inc/3rdparty/site_config/standard/mlssoccer.com.txt
@@ -1,6 +1,6 @@
1title: //*[@class="header_title"]/h1 1title: //*[@class="header_title"]/h1
2date: //*[@class="field-date"] 2date: //*[@class="field-date"]
3author: //*[@class="field-author"] 3author: //*[@class="field-author"]
4body: //div[contains(@class, 'content')] 4body: //div[contains(@class, 'content')]
5 5
6test_url: http://www.mlssoccer.com/news/article/2012/06/19/lack-depth-front-forces-arena-alter-las-formation \ No newline at end of file 6test_url: http://www.mlssoccer.com/news/article/2012/06/19/lack-depth-front-forces-arena-alter-las-formation \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mmo-champion.com.txt b/inc/3rdparty/site_config/standard/mmo-champion.com.txt
index 918fae36..50d8a24f 100644..100755
--- a/inc/3rdparty/site_config/standard/mmo-champion.com.txt
+++ b/inc/3rdparty/site_config/standard/mmo-champion.com.txt
@@ -1,5 +1,5 @@
1title: //h1 1title: //h1
2body: //div[@id = 'article_content']/div[contains(@class,'article')] 2body: //div[@id = 'article_content']/div[contains(@class,'article')]
3author: //sub[@class = 'article_promoted_text']/a[starts-with(@href, 'member')] 3author: //sub[@class = 'article_promoted_text']/a[starts-with(@href, 'member')]
4date: //div[@class = 'article_username_container'] 4date: //div[@class = 'article_username_container']
5test_url: http://www.mmo-champion.com/content/2688-Other-Press-Tour-Interviews-A-Night-in-Mists-of-Pandaria-Blue-Posts-MoP-Screenshot \ No newline at end of file 5test_url: http://www.mmo-champion.com/content/2688-Other-Press-Tour-Interviews-A-Night-in-Mists-of-Pandaria-Blue-Posts-MoP-Screenshot \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mnn.com.txt b/inc/3rdparty/site_config/standard/mnn.com.txt
index ddfe6fa2..d3576df2 100644..100755
--- a/inc/3rdparty/site_config/standard/mnn.com.txt
+++ b/inc/3rdparty/site_config/standard/mnn.com.txt
@@ -1,11 +1,11 @@
1tidy: no 1tidy: no
2author: //div[@id="above-content"]//img/@alt | //div[@class="comment-auth"]/span[1]/a/text() 2author: //div[@id="above-content"]//img/@alt | //div[@class="comment-auth"]/span[1]/a/text()
3date: //div[@class="comment-auth"]/div | //div[@class="comment-auth"]/span[2] 3date: //div[@class="comment-auth"]/div | //div[@class="comment-auth"]/span[2]
4body: //div[@class="node"] 4body: //div[@class="node"]
5 5
6strip_id_or_class: vertical-social-bar 6strip_id_or_class: vertical-social-bar
7strip_id_or_class: blogs_paginator 7strip_id_or_class: blogs_paginator
8strip_id_or_class: horizontal-social-links 8strip_id_or_class: horizontal-social-links
9strip_id_or_class: servicelinksdiv 9strip_id_or_class: servicelinksdiv
10 10
11test_url: http://www.mnn.com/green-tech/research-innovations/blogs/5-breakthroughs-that-will-make-solar-power-cheaper-than-coal \ No newline at end of file 11test_url: http://www.mnn.com/green-tech/research-innovations/blogs/5-breakthroughs-that-will-make-solar-power-cheaper-than-coal \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mno.hu.txt b/inc/3rdparty/site_config/standard/mno.hu.txt
index ba158953..8a3f9391 100644..100755
--- a/inc/3rdparty/site_config/standard/mno.hu.txt
+++ b/inc/3rdparty/site_config/standard/mno.hu.txt
@@ -1,14 +1,14 @@
1title: //title 1title: //title
2 2
3author: //div[@class="author"] 3author: //div[@class="author"]
4 4
5strip_id_or_class: 'header' 5strip_id_or_class: 'header'
6strip_id_or_class: 'cikk_ajanlo' 6strip_id_or_class: 'cikk_ajanlo'
7strip_id_or_class: 'buttons' 7strip_id_or_class: 'buttons'
8strip_id_or_class: 'related' 8strip_id_or_class: 'related'
9strip_id_or_class: 'adbox ad_cikk_kozepre' 9strip_id_or_class: 'adbox ad_cikk_kozepre'
10strip_id_or_class: 'cikk-cimkek' 10strip_id_or_class: 'cikk-cimkek'
11strip_id_or_class: 'cikk_ertekeles' 11strip_id_or_class: 'cikk_ertekeles'
12 12
13strip_comments: yes 13strip_comments: yes
14test_url: http://mno.hu/grund/a-gumibottal-hadonaszo-rendort-joval-konnyebb-utalni-1055351 \ No newline at end of file 14test_url: http://mno.hu/grund/a-gumibottal-hadonaszo-rendort-joval-konnyebb-utalni-1055351 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mobile.nytimes.com.txt b/inc/3rdparty/site_config/standard/mobile.nytimes.com.txt
new file mode 100755
index 00000000..c60252ef
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/mobile.nytimes.com.txt
@@ -0,0 +1,4 @@
1title: //h1[contains(@class, 'headline')]
2body: //article[contains(@class, 'full-art')]
3strip_id_or_class: image-credit
4test_url: http://mobile.nytimes.com/2014/06/19/opinion/gail-collins-romney-and-the-2016-contenders-huddle.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mobile.slate.com.txt b/inc/3rdparty/site_config/standard/mobile.slate.com.txt
index d5d81034..6ffcd18f 100644..100755
--- a/inc/3rdparty/site_config/standard/mobile.slate.com.txt
+++ b/inc/3rdparty/site_config/standard/mobile.slate.com.txt
@@ -1,5 +1,5 @@
1title: //h2[@class="article_title"] 1title: //h2[@class="article_title"]
2strip: //a[@class="houseAdLink"] 2strip: //a[@class="houseAdLink"]
3strip: //h1 3strip: //h1
4strip: //div[@class="more_articles"] 4strip: //div[@class="more_articles"]
5test_url: http://mobile.slate.com/rss.jsp?rssid=411&item=http%3a%2f%2fwww.slate.com%2fdefault.aspx%3fdisplaymode%3d201%26id%3d2293749%26device%3drss \ No newline at end of file 5test_url: http://mobile.slate.com/rss.jsp?rssid=411&item=http%3a%2f%2fwww.slate.com%2fdefault.aspx%3fdisplaymode%3d201%26id%3d2293749%26device%3drss \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mobileopportunity.blogspot.com.txt b/inc/3rdparty/site_config/standard/mobileopportunity.blogspot.com.txt
index a1cc5317..82da4aec 100644..100755
--- a/inc/3rdparty/site_config/standard/mobileopportunity.blogspot.com.txt
+++ b/inc/3rdparty/site_config/standard/mobileopportunity.blogspot.com.txt
@@ -1,11 +1,11 @@
1body: //div[@class='post uncustomized-post-template'] 1body: //div[@class='post uncustomized-post-template']
2 2
3# remove duplicate of post title, which is a link 3# remove duplicate of post title, which is a link
4strip: //h3[@class='post-title'] 4strip: //h3[@class='post-title']
5 5
6# remove permalink and timestamp, which isn't useful as it's a time with no date 6# remove permalink and timestamp, which isn't useful as it's a time with no date
7strip: //span[@class='post-timestamp'] 7strip: //span[@class='post-timestamp']
8 8
9# remove labels (tags) 9# remove labels (tags)
10strip: //span[@class='post-labels'] 10strip: //span[@class='post-labels']
11test_url: http://mobileopportunity.blogspot.com/2010/12/rims-q3-financials-tale-of-two.html \ No newline at end of file 11test_url: http://mobileopportunity.blogspot.com/2010/12/rims-q3-financials-tale-of-two.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/modernghana.com.txt b/inc/3rdparty/site_config/standard/modernghana.com.txt
index 4c93d0cf..306ef8d9 100644..100755
--- a/inc/3rdparty/site_config/standard/modernghana.com.txt
+++ b/inc/3rdparty/site_config/standard/modernghana.com.txt
@@ -1,8 +1,8 @@
1title: //meta[@property="og:title"]/@content 1title: //meta[@property="og:title"]/@content
2author: //meta[@name="author"]/@content 2author: //meta[@name="author"]/@content
3date: //span[@class='date1'] 3date: //span[@class='date1']
4body: //div[@id='newsimage'] | //div[@id='bodytext'] 4body: //div[@id='newsimage'] | //div[@id='bodytext']
5tidy: no 5tidy: no
6prune: no 6prune: no
7 7
8test_url: http://www.modernghana.com/news/323765/1/039ghost039-teachers-removed-salaries-allowances-p.html \ No newline at end of file 8test_url: http://www.modernghana.com/news/323765/1/039ghost039-teachers-removed-salaries-allowances-p.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/money.cnn.com.txt b/inc/3rdparty/site_config/standard/money.cnn.com.txt
index a0d1628a..d5e03d20 100644..100755
--- a/inc/3rdparty/site_config/standard/money.cnn.com.txt
+++ b/inc/3rdparty/site_config/standard/money.cnn.com.txt
@@ -1,24 +1,24 @@
1title: //meta[@property="og:title"]/@content 1title: //meta[@property="og:title"]/@content
2title: //h1[@class='storyheadline'] 2title: //h1[@class='storyheadline']
3author: //meta[@name="AUTHOR"]/@content 3author: //meta[@name="AUTHOR"]/@content
4date: //span[@class='cnnDateStamp'] 4date: //span[@class='cnnDateStamp']
5date: //meta[@name="DATE"]/@content 5date: //meta[@name="DATE"]/@content
6body: //div[@id='storytext' or @class='storytext'] 6body: //div[@id='storytext' or @class='storytext']
7 7
8strip_id_or_class: ie_column 8strip_id_or_class: ie_column
9strip_id_or_class: sharewidgets 9strip_id_or_class: sharewidgets
10strip_image_src: bug.gif 10strip_image_src: bug.gif
11 11
12strip: //div[@class="hed_side"] 12strip: //div[@class="hed_side"]
13strip: //span[@class="byline"] 13strip: //span[@class="byline"]
14strip: //a[@class="soc-twtname"] 14strip: //a[@class="soc-twtname"]
15strip: //span[@class="cnnDateStamp"] 15strip: //span[@class="cnnDateStamp"]
16strip: //div[@class="storytimestamp"] 16strip: //div[@class="storytimestamp"]
17strip: //div[@class="cnnCol_side"] 17strip: //div[@class="cnnCol_side"]
18 18
19prune: no 19prune: no
20tidy: no 20tidy: no
21 21
22test_url: http://money.cnn.com/2011/03/15/news/companies/steve_jobs_thought_process.fortune/index.htm?section=money_topstories&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fmoney_topstories+%28Top+Stories%29 22test_url: http://money.cnn.com/2011/03/15/news/companies/steve_jobs_thought_process.fortune/index.htm?section=money_topstories&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fmoney_topstories+%28Top+Stories%29
23test_url: http://money.cnn.com/2012/01/27/markets/markets_newyork/index.htm 23test_url: http://money.cnn.com/2012/01/27/markets/markets_newyork/index.htm
24test_url: http://money.cnn.com/2012/05/13/technology/yahoo-ceo-out-rumor/index.htm \ No newline at end of file 24test_url: http://money.cnn.com/2012/05/13/technology/yahoo-ceo-out-rumor/index.htm \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/monkeyzen.com.txt b/inc/3rdparty/site_config/standard/monkeyzen.com.txt
index f779c38e..f779c38e 100644..100755
--- a/inc/3rdparty/site_config/standard/monkeyzen.com.txt
+++ b/inc/3rdparty/site_config/standard/monkeyzen.com.txt
diff --git a/inc/3rdparty/site_config/standard/moonsault.de.txt b/inc/3rdparty/site_config/standard/moonsault.de.txt
index 061a8d5c..55026eeb 100644..100755
--- a/inc/3rdparty/site_config/standard/moonsault.de.txt
+++ b/inc/3rdparty/site_config/standard/moonsault.de.txt
@@ -1,13 +1,13 @@
1strip_image_src: menu 1strip_image_src: menu
2strip_image_src: templates 2strip_image_src: templates
3strip: //div/a 3strip: //div/a
4strip: //div/b 4strip: //div/b
5strip: //div/strong 5strip: //div/strong
6strip: //td[@width='30%'] 6strip: //td[@width='30%']
7strip: //br[1] 7strip: //br[1]
8strip: //br[2] 8strip: //br[2]
9strip: //br[3] 9strip: //br[3]
10strip: //br[4] 10strip: //br[4]
11strip: //a[@href='http://www.moonsault.de/newzboard/index.php?act=home'] 11strip: //a[@href='http://www.moonsault.de/newzboard/index.php?act=home']
12strip_id_or_class: cse-branding-right 12strip_id_or_class: cse-branding-right
13test_url: http://www.moonsault.de/newzboard/index.php?news=22321&act=previous \ No newline at end of file 13test_url: http://www.moonsault.de/newzboard/index.php?news=22321&act=previous \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/moreintelligentlife.com.txt b/inc/3rdparty/site_config/standard/moreintelligentlife.com.txt
index a7e59c30..780cca4f 100644..100755
--- a/inc/3rdparty/site_config/standard/moreintelligentlife.com.txt
+++ b/inc/3rdparty/site_config/standard/moreintelligentlife.com.txt
@@ -1,7 +1,7 @@
1title: //h1[@class='print-title'] 1title: //h1[@class='print-title']
2body: //div[@class='print-submitted' or @class='print-created' or @class='print-content'] 2body: //div[@class='print-submitted' or @class='print-created' or @class='print-content']
3prune: no 3prune: no
4 4
5single_page_link: //li[@class='print']/a 5single_page_link: //li[@class='print']/a
6 6
7test_url: http://moreintelligentlife.com/content/places/paul-markillie/they-trash-cars-dont-they \ No newline at end of file 7test_url: http://moreintelligentlife.com/content/places/paul-markillie/they-trash-cars-dont-they \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/motherboard.vice.com.txt b/inc/3rdparty/site_config/standard/motherboard.vice.com.txt
index 6faf1c9a..c6312c0e 100644..100755
--- a/inc/3rdparty/site_config/standard/motherboard.vice.com.txt
+++ b/inc/3rdparty/site_config/standard/motherboard.vice.com.txt
@@ -1,5 +1,5 @@
1author: //span[@class="author"]/a 1author: //span[@class="author"]/a
2date: //span[@class="date"] 2date: //span[@class="date"]
3body: //div[@class="story-content"] 3body: //div[@class="story-content"]
4strip: //aside 4strip: //aside
5test_url: http://motherboard.vice.com/blog/you-can-carry-a-copy-of-the-pirate-bay-in-your-pocket \ No newline at end of file 5test_url: http://motherboard.vice.com/blog/you-can-carry-a-copy-of-the-pirate-bay-in-your-pocket \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mothering.com.txt b/inc/3rdparty/site_config/standard/mothering.com.txt
index a9d9195f..a34adff7 100644..100755
--- a/inc/3rdparty/site_config/standard/mothering.com.txt
+++ b/inc/3rdparty/site_config/standard/mothering.com.txt
@@ -1,7 +1,7 @@
1title: //h2[contains(@class,'post_headline')] 1title: //h2[contains(@class,'post_headline')]
2body: //div[@class='entry'] 2body: //div[@class='entry']
3convert_double_br_tags: yes 3convert_double_br_tags: yes
4strip_image_src: _selected.gif 4strip_image_src: _selected.gif
5strip_id_or_class: addthis_ 5strip_id_or_class: addthis_
6strip: //a[contains(@href,'feedburner.com')] 6strip: //a[contains(@href,'feedburner.com')]
7test_url: http://mothering.com/all-things-mothering/inspiration/motherhood-brings-me-down \ No newline at end of file 7test_url: http://mothering.com/all-things-mothering/inspiration/motherhood-brings-me-down \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/motherjones.com.txt b/inc/3rdparty/site_config/standard/motherjones.com.txt
index d58c7d2c..851feb7e 100644..100755
--- a/inc/3rdparty/site_config/standard/motherjones.com.txt
+++ b/inc/3rdparty/site_config/standard/motherjones.com.txt
@@ -1,15 +1,15 @@
1title: //h1 1title: //h1
2body: //div[@id = 'content-area'] 2body: //div[@id = 'content-area']
3next_page_link: //div[@class='node-pager']/a[contains(@class, 'next')] 3next_page_link: //div[@class='node-pager']/a[contains(@class, 'next')]
4tidy: no 4tidy: no
5author: //p[contains(@class, 'byline')]/a 5author: //p[contains(@class, 'byline')]/a
6 6
7strip_id_or_class: node-header 7strip_id_or_class: node-header
8strip_id_or_class: hdr-tools 8strip_id_or_class: hdr-tools
9strip_id_or_class: node-body-break 9strip_id_or_class: node-body-break
10strip_id_or_class: pullquote 10strip_id_or_class: pullquote
11strip_id_or_class: node-pager 11strip_id_or_class: node-pager
12strip_id_or_class: author-bio 12strip_id_or_class: author-bio
13strip_id_or_class: node-footer 13strip_id_or_class: node-footer
14 14
15test_url: http://motherjones.com/politics/2012/02/mac-mcclelland-free-online-shipping-warehouses-labor \ No newline at end of file 15test_url: http://motherjones.com/politics/2012/02/mac-mcclelland-free-online-shipping-warehouses-labor \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/motorfull.com.txt b/inc/3rdparty/site_config/standard/motorfull.com.txt
index c6bec7e9..c6bec7e9 100644..100755
--- a/inc/3rdparty/site_config/standard/motorfull.com.txt
+++ b/inc/3rdparty/site_config/standard/motorfull.com.txt
diff --git a/inc/3rdparty/site_config/standard/movie.douban.com.txt b/inc/3rdparty/site_config/standard/movie.douban.com.txt
new file mode 100755
index 00000000..eae211ed
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/movie.douban.com.txt
@@ -0,0 +1,12 @@
1# This filter is tested on:
2# http://movie.douban.com/review/1062013/
3
4title: //span[contains(@property, 'v:summary')]
5author: //span[contains(@property, 'v:reviewer')]
6date://span[contains(@property, 'v:dtreviewed')]
7body://div[contains(@class, 'main-bd')]
8
9strip://img[contains(@class,'rating')]|//img[contains(@class,'review-stat')]
10convert_double_br_tags: yes
11test_url: http://movie.douban.com/review/1062013/
12test_url: http://movie.douban.com/review/1021870/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/msdn.microsoft.com.txt b/inc/3rdparty/site_config/standard/msdn.microsoft.com.txt
index f4f20450..7a284275 100644..100755
--- a/inc/3rdparty/site_config/standard/msdn.microsoft.com.txt
+++ b/inc/3rdparty/site_config/standard/msdn.microsoft.com.txt
@@ -1,3 +1,3 @@
1body: //div[class="mainBody"] 1body: //div[class="mainBody"]
2footnotes: no 2footnotes: no
3test_url: http://msdn.microsoft.com/en-us/library/hh542796(VS.103).aspx \ No newline at end of file 3test_url: http://msdn.microsoft.com/en-us/library/hh542796(VS.103).aspx \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/msnbc.msn.com.txt b/inc/3rdparty/site_config/standard/msnbc.msn.com.txt
index ad89cda8..f008d2d1 100644..100755
--- a/inc/3rdparty/site_config/standard/msnbc.msn.com.txt
+++ b/inc/3rdparty/site_config/standard/msnbc.msn.com.txt
@@ -1,21 +1,21 @@
1title: //title 1title: //title
2author: //div[@id='byline'] 2author: //div[@id='byline']
3 3
4date: //div[contains(@class,'timestamp')]/abbr/text() 4date: //div[contains(@class,'timestamp')]/abbr/text()
5 5
6body: //div[@id='intellitTXT'] 6body: //div[@id='intellitTXT']
7 7
8strip: //div[@id='byline'] 8strip: //div[@id='byline']
9strip: //div[contains(@class,'timestamp')] 9strip: //div[contains(@class,'timestamp')]
10strip: //div[contains(@class, 'ad-label')] 10strip: //div[contains(@class, 'ad-label')]
11strip: //div[contains(@class, 'ad-break')] 11strip: //div[contains(@class, 'ad-break')]
12strip: //span[contains(@class, 'x-video')] 12strip: //span[contains(@class, 'x-video')]
13strip: //span[contains(@class, 'inline')] 13strip: //span[contains(@class, 'inline')]
14strip: //div[contains(@class, 'video')] 14strip: //div[contains(@class, 'video')]
15strip: //div[contains(@class, 'discuss')] 15strip: //div[contains(@class, 'discuss')]
16strip: //div[@id='most-popular'] 16strip: //div[@id='most-popular']
17strip: //div[contains(@class,'drawer')] 17strip: //div[contains(@class,'drawer')]
18strip: //*[contains(@class, 'hide')] 18strip: //*[contains(@class, 'hide')]
19 19
20footnotes: no 20footnotes: no
21test_url: http://www.msnbc.msn.com/id/44748412/ns/business-world_business/#.TolUv-vfDbE \ No newline at end of file 21test_url: http://www.msnbc.msn.com/id/44748412/ns/business-world_business/#.TolUv-vfDbE \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/myfoxatlanta.com.txt b/inc/3rdparty/site_config/standard/myfoxatlanta.com.txt
new file mode 100755
index 00000000..8a7590ab
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/myfoxatlanta.com.txt
@@ -0,0 +1,5 @@
1body: //div[@id='WNStoryBody']
2author: //div[@id='WNStoryByline']
3prune: no
4
5test_url: http://www.myfoxatlanta.com/category/233685/local-news?clienttype=rss \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/myfoxboston.com.txt b/inc/3rdparty/site_config/standard/myfoxboston.com.txt
index 1a35b4fc..9ad8ce05 100644..100755
--- a/inc/3rdparty/site_config/standard/myfoxboston.com.txt
+++ b/inc/3rdparty/site_config/standard/myfoxboston.com.txt
@@ -1,4 +1,4 @@
1body: //div[@class="col1"]//div[@class="photo"] | //div[@class="detail"]/p[@class="fontStyle21"] | //div[@class="story last"] 1body: //div[@class="col1"]//div[@class="photo"] | //div[@class="detail"]/p[@class="fontStyle21"] | //div[@class="story last"]
2tidy: no 2tidy: no
3 3
4test_url: http://www.myfoxboston.com/dpp/news/local/transit-police-say-woman-spat-on-mbta-bus-driver-2010611 \ No newline at end of file 4test_url: http://www.myfoxboston.com/dpp/news/local/transit-police-say-woman-spat-on-mbta-bus-driver-2010611 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/myrecipes.com.txt b/inc/3rdparty/site_config/standard/myrecipes.com.txt
index 8b99d22d..956be1e6 100644..100755
--- a/inc/3rdparty/site_config/standard/myrecipes.com.txt
+++ b/inc/3rdparty/site_config/standard/myrecipes.com.txt
@@ -1,12 +1,12 @@
1title: //h2[contains(@class, 'name')] 1title: //h2[contains(@class, 'name')]
2body: //div[@class='printFullPageContentContainer']//div[contains(@class, 'recipe')] 2body: //div[@class='printFullPageContentContainer']//div[contains(@class, 'recipe')]
3 3
4strip_id_or_class: photoBy 4strip_id_or_class: photoBy
5strip_id_or_class: link 5strip_id_or_class: link
6 6
7single_page_link: //li[@class='print']/a[contains(@href, '/print/')] 7single_page_link: //li[@class='print']/a[contains(@href, '/print/')]
8 8
9prune: no 9prune: no
10tidy: no 10tidy: no
11 11
12test_url: http://www.myrecipes.com/recipe/hummingbird-cake-10000000387218/ \ No newline at end of file 12test_url: http://www.myrecipes.com/recipe/hummingbird-cake-10000000387218/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/narenji.ir.txt b/inc/3rdparty/site_config/standard/narenji.ir.txt
index 6c3d0c24..6c3d0c24 100644..100755
--- a/inc/3rdparty/site_config/standard/narenji.ir.txt
+++ b/inc/3rdparty/site_config/standard/narenji.ir.txt
diff --git a/inc/3rdparty/site_config/standard/nasa.gov.txt b/inc/3rdparty/site_config/standard/nasa.gov.txt
index d95530f3..7df1112b 100644..100755
--- a/inc/3rdparty/site_config/standard/nasa.gov.txt
+++ b/inc/3rdparty/site_config/standard/nasa.gov.txt
@@ -1,8 +1,8 @@
1title: //div[@class='address']/span 1title: //div[@class='address']/span
2author: substring-before(//span[@class='credits'],',') 2author: substring-before(//span[@class='credits'],',')
3date: //div[@class='promodatepress']/span 3date: //div[@class='promodatepress']/span
4body: //div[@class='default_style_wrap'] 4body: //div[@class='default_style_wrap']
5strip: //div[@class='text_adjust'] 5strip: //div[@class='text_adjust']
6strip: //div[@class='skiplink'] 6strip: //div[@class='skiplink']
7strip: //h2 7strip: //h2
8test_url: http://www.nasa.gov/mission_pages/kepler/news/kepler-21b.html \ No newline at end of file 8test_url: http://www.nasa.gov/mission_pages/kepler/news/kepler-21b.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/nbweekly.com.txt b/inc/3rdparty/site_config/standard/nbweekly.com.txt
index 0b722d33..2645d406 100644..100755
--- a/inc/3rdparty/site_config/standard/nbweekly.com.txt
+++ b/inc/3rdparty/site_config/standard/nbweekly.com.txt
@@ -1,10 +1,10 @@
1date://span[contains(@class,'date')] 1date://span[contains(@class,'date')]
2 2
3body://div[contains(@class,'contWarp')] 3body://div[contains(@class,'contWarp')]
4 4
5strip://div[contains(@class,'keyWord')] 5strip://div[contains(@class,'keyWord')]
6strip://div[contains(@class,'submitComt')] 6strip://div[contains(@class,'submitComt')]
7strip://div[contains(@class,'cmts')] 7strip://div[contains(@class,'cmts')]
8strip://div[contains(@class,'notice')] 8strip://div[contains(@class,'notice')]
9strip://div[contains(@class,'part pt-second')] 9strip://div[contains(@class,'part pt-second')]
10test_url: http://www.nbweekly.com/news/china/201203/29316.aspx \ No newline at end of file 10test_url: http://www.nbweekly.com/news/china/201203/29316.aspx \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/neh.gov.txt b/inc/3rdparty/site_config/standard/neh.gov.txt
index 45136a2b..e7cc4313 100644..100755
--- a/inc/3rdparty/site_config/standard/neh.gov.txt
+++ b/inc/3rdparty/site_config/standard/neh.gov.txt
@@ -1,17 +1,17 @@
1#host configuration should be http://www.neh.gov/news/humanities/ 1#host configuration should be http://www.neh.gov/news/humanities/
2 2
3 3
4#meta data 4#meta data
5title:substring-after(substring-after(//title,':'),':') 5title:substring-after(substring-after(//title,':'),':')
6author:substring-after(//h2[@class = 'subHead'],'By') 6author:substring-after(//h2[@class = 'subHead'],'By')
7date:substring-before(substring-after(//title,':'),':') 7date:substring-before(substring-after(//title,':'),':')
8 8
9#img and caption handling 9#img and caption handling
10wrap_in(small)://div[@id = 'mainContent']/table/descendant::p/descendant::text() 10wrap_in(small)://div[@id = 'mainContent']/table/descendant::p/descendant::text()
11wrap_in(fieldset)://div[@id = 'mainContent']/table 11wrap_in(fieldset)://div[@id = 'mainContent']/table
12 12
13# clean up 13# clean up
14strip: //table[@class = 'marginpaddingTop'] 14strip: //table[@class = 'marginpaddingTop']
15strip: //h2[@class = 'subHead'] 15strip: //h2[@class = 'subHead']
16 16
17test_url: http://www.neh.gov/news/humanities/2011-11/IslamicScholar.html \ No newline at end of file 17test_url: http://www.neh.gov/news/humanities/2011-11/IslamicScholar.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/neomoney.co.txt b/inc/3rdparty/site_config/standard/neomoney.co.txt
index 564d5492..2089fc39 100644..100755
--- a/inc/3rdparty/site_config/standard/neomoney.co.txt
+++ b/inc/3rdparty/site_config/standard/neomoney.co.txt
@@ -1,3 +1,3 @@
1title: //*[@class="header_title"]/h1 1title: //*[@class="header_title"]/h1
2body: //div[contains(@class, 'content')] 2body: //div[contains(@class, 'content')]
3test_url: http://neomoney.co/personal/expatriate-and-migrant-loans/expatriate-loans/ \ No newline at end of file 3test_url: http://neomoney.co/personal/expatriate-and-migrant-loans/expatriate-loans/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/net-security.org.txt b/inc/3rdparty/site_config/standard/net-security.org.txt
index 4e6d66d4..b7fedbf3 100644..100755
--- a/inc/3rdparty/site_config/standard/net-security.org.txt
+++ b/inc/3rdparty/site_config/standard/net-security.org.txt
@@ -1,7 +1,7 @@
1title: //div[@class='content-title'] 1title: //div[@class='content-title']
2#date: substring-after(//div[@class='dernek-text-under'],'Posted on') 2#date: substring-after(//div[@class='dernek-text-under'],'Posted on')
3body: //div[@class='content-item'] 3body: //div[@class='content-item']
4next_page_link: //li[@class='next']/a 4next_page_link: //li[@class='next']/a
5convert_double_br_tags: yes 5convert_double_br_tags: yes
6 6
7test_url: http://www.net-security.org/article.php?id=1732 \ No newline at end of file 7test_url: http://www.net-security.org/article.php?id=1732 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/netmagazine.com.txt b/inc/3rdparty/site_config/standard/netmagazine.com.txt
index 86885445..dcea047c 100644..100755
--- a/inc/3rdparty/site_config/standard/netmagazine.com.txt
+++ b/inc/3rdparty/site_config/standard/netmagazine.com.txt
@@ -1,16 +1,16 @@
1title: //h1 1title: //h1
2author: //div[@class="submitted"]/span 2author: //div[@class="submitted"]/span
3 3
4# seems like this should work, but nothing is returned. Issue with xpath parser? 4# seems like this should work, but nothing is returned. Issue with xpath parser?
5date: //div[@class="submitted"]/time 5date: //div[@class="submitted"]/time
6 6
7body: //div[@id="main-content"] 7body: //div[@id="main-content"]
8 8
9strip_comments: no 9strip_comments: no
10 10
11strip: //h1 11strip: //h1
12strip: //div[@class="submitted"] 12strip: //div[@class="submitted"]
13strip: //dd[@class="profile-avatar"] 13strip: //dd[@class="profile-avatar"]
14strip: //div[@class="author-profile"]/dl/dt[1] 14strip: //div[@class="author-profile"]/dl/dt[1]
15strip: //div[@id="right-col"] 15strip: //div[@id="right-col"]
16test_url: http://www.netmagazine.com/opinions/nielsen-wrong-mobile \ No newline at end of file 16test_url: http://www.netmagazine.com/opinions/nielsen-wrong-mobile \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/netzpolitik.org.txt b/inc/3rdparty/site_config/standard/netzpolitik.org.txt
index 87dc3cdf..7fa43fd7 100644..100755
--- a/inc/3rdparty/site_config/standard/netzpolitik.org.txt
+++ b/inc/3rdparty/site_config/standard/netzpolitik.org.txt
@@ -1,6 +1,6 @@
1title: //h1[@class='entry-title'] 1title: //h1[@class='entry-title']
2author: //a[@ref='author'] 2author: //a[@ref='author']
3date: //span[@class='entry-date'] 3date: //span[@class='entry-date']
4body: //div[@class='entry-content'] 4body: //div[@class='entry-content']
5 5
6test_url: http://netzpolitik.org/2011/buch-generation-facebook/ \ No newline at end of file 6test_url: http://netzpolitik.org/2011/buch-generation-facebook/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/newleftproject.org.txt b/inc/3rdparty/site_config/standard/newleftproject.org.txt
new file mode 100755
index 00000000..d9af99d8
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/newleftproject.org.txt
@@ -0,0 +1,3 @@
1title: //div[contains(@class, 'article_header')]//h3
2
3test_url: http://www.newleftproject.org/index.php/site/article_comments/do_we_need_a_facebook_of_the_left \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/newmatilda.com.txt b/inc/3rdparty/site_config/standard/newmatilda.com.txt
index ab766847..f17ecdc6 100644..100755
--- a/inc/3rdparty/site_config/standard/newmatilda.com.txt
+++ b/inc/3rdparty/site_config/standard/newmatilda.com.txt
@@ -1,9 +1,9 @@
1title: //div[@id="maincontent"]/h1 1title: //div[@id="maincontent"]/h1
2body: //div[@id="maincontent"] 2body: //div[@id="maincontent"]
3date: //div[@id="maincontent"]/p[2] 3date: //div[@id="maincontent"]/p[2]
4author: //ul[@id="contributors"]/li/p/b 4author: //ul[@id="contributors"]/li/p/b
5 5
6strip: //p[@*] 6strip: //p[@*]
7strip: //h1 7strip: //h1
8strip: //div[@id="maincontent"]/div 8strip: //div[@id="maincontent"]/div
9test_url: http://newmatilda.com/2011/07/22/turnbull-makes-sense-climate \ No newline at end of file 9test_url: http://newmatilda.com/2011/07/22/turnbull-makes-sense-climate \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/newrepublic.com.txt b/inc/3rdparty/site_config/standard/newrepublic.com.txt
new file mode 100755
index 00000000..039f0385
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/newrepublic.com.txt
@@ -0,0 +1,8 @@
1author: //span[@class="authors"]
2date: //span[@class="date"]
3body: //div[@class="primary"]
4
5strip: //div[@id="controls"]
6strip: //div[@id="read-next"]
7
8test_url: http://www.newrepublic.com/article/112731/moocs-will-online-education-ruin-university-experience \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/news-gazette.com.txt b/inc/3rdparty/site_config/standard/news-gazette.com.txt
index 1f1e5d3a..2b352707 100644..100755
--- a/inc/3rdparty/site_config/standard/news-gazette.com.txt
+++ b/inc/3rdparty/site_config/standard/news-gazette.com.txt
@@ -1,8 +1,8 @@
1title: //div[@id="main-content"]//h2 1title: //div[@id="main-content"]//h2
2 2
3author: //div[@id="main-content"]//span[@class="authors"] 3author: //div[@id="main-content"]//span[@class="authors"]
4 4
5date: //div[@id="main-content"]//span[@class="timestamp"] 5date: //div[@id="main-content"]//span[@class="timestamp"]
6 6
7body: //div[@id="main-content"]//div[@class="content"] 7body: //div[@id="main-content"]//div[@class="content"]
8test_url: http://www.news-gazette.com/news/business/economy/2011-08-08/ibm-drops-out-blue-waters-project.html \ No newline at end of file 8test_url: http://www.news-gazette.com/news/business/economy/2011-08-08/ibm-drops-out-blue-waters-project.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/news.cnet.com.txt b/inc/3rdparty/site_config/standard/news.cnet.com.txt
index b7ab224a..78af70f4 100644..100755
--- a/inc/3rdparty/site_config/standard/news.cnet.com.txt
+++ b/inc/3rdparty/site_config/standard/news.cnet.com.txt
@@ -1,12 +1,12 @@
1#This should apply to *.cnet.com. Not just news.cnet.com. 1#This should apply to *.cnet.com. Not just news.cnet.com.
2title: //h1 2title: //h1
3author: //img[@class="mugshot"]/@alt 3author: //img[@class="mugshot"]/@alt
4strip: //h1 4strip: //h1
5strip_id_or_class: breadcrumb 5strip_id_or_class: breadcrumb
6strip: //p[@id="introP"] 6strip: //p[@id="introP"]
7strip: //div[@class="postByline"] 7strip: //div[@class="postByline"]
8strip: //div[@class="editorBio"] 8strip: //div[@class="editorBio"]
9strip: //div[@class="inline-slideshow"] 9strip: //div[@class="inline-slideshow"]
10strip: //div[@class="related"] 10strip: //div[@class="related"]
11body: //div[@class="postBody txtWrap"] 11body: //div[@class="postBody txtWrap"]
12test_url: http://news.cnet.com/8301-27076_3-57405303-248/apple-ipad-charging-fine-keep-it-plugged-in/?tag=mncol;posts \ No newline at end of file 12test_url: http://news.cnet.com/8301-27076_3-57405303-248/apple-ipad-charging-fine-keep-it-plugged-in/?tag=mncol;posts \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/news.detik.com.txt b/inc/3rdparty/site_config/standard/news.detik.com.txt
index 3ed1dc85..629bc917 100644..100755
--- a/inc/3rdparty/site_config/standard/news.detik.com.txt
+++ b/inc/3rdparty/site_config/standard/news.detik.com.txt
@@ -1,8 +1,8 @@
1title://div[@class="content_detail"]/h1 1title://div[@class="content_detail"]/h1
2 2
3author://div[@class="author"]/strong 3author://div[@class="author"]/strong
4 4
5date:substring-before(substring-after(//div[@class="content_detail"]/span[@class="date"], ','), ' WIB') 5date:substring-before(substring-after(//div[@class="content_detail"]/span[@class="date"], ','), ' WIB')
6 6
7body://div[@class="text_detail"] 7body://div[@class="text_detail"]
8test_url: http://news.detik.com/read/2012/05/22/225531/1922307/10/menkeu-cek-soal-lolosnya-315-kg-sabu-dari-bea-cukai \ No newline at end of file 8test_url: http://news.detik.com/read/2012/05/22/225531/1922307/10/menkeu-cek-soal-lolosnya-315-kg-sabu-dari-bea-cukai \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/news.kanaloco.jp.txt b/inc/3rdparty/site_config/standard/news.kanaloco.jp.txt
index 6fc86137..5754d47a 100644..100755
--- a/inc/3rdparty/site_config/standard/news.kanaloco.jp.txt
+++ b/inc/3rdparty/site_config/standard/news.kanaloco.jp.txt
@@ -1,9 +1,9 @@
1body: //div[@id='main'] 1body: //div[@id='main']
2strip: //div[@id='sbs'] 2strip: //div[@id='sbs']
3strip: //div[@id='fsizeSwitch'] 3strip: //div[@id='fsizeSwitch']
4strip: //div[@id='googleAd'] 4strip: //div[@id='googleAd']
5strip: //div[@id='detailFoot'] 5strip: //div[@id='detailFoot']
6strip_image_src: counter?key 6strip_image_src: counter?key
7convert_double_br_tags: yes 7convert_double_br_tags: yes
8 8
9test_url: http://news.kanaloco.jp/localnews/article/1105200018/ \ No newline at end of file 9test_url: http://news.kanaloco.jp/localnews/article/1105200018/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/news.mynavi.jp.txt b/inc/3rdparty/site_config/standard/news.mynavi.jp.txt
index ded680f1..1df47314 100644..100755
--- a/inc/3rdparty/site_config/standard/news.mynavi.jp.txt
+++ b/inc/3rdparty/site_config/standard/news.mynavi.jp.txt
@@ -1,11 +1,11 @@
1title: //h2[@class="lyt-hdg-02-04"] 1title: //h2[@class="lyt-hdg-02-04"]
2 2
3author: //div[@class="lyt-namearea"]/a 3author: //div[@class="lyt-namearea"]/a
4 4
5date: //div[@class="lyt-namearea"]/text() 5date: //div[@class="lyt-namearea"]/text()
6 6
7body: //div[@class="articleContent"] 7body: //div[@class="articleContent"]
8 8
9strip: //div[@id="tab-aside"] 9strip: //div[@id="tab-aside"]
10 10
11test_url: http://news.mynavi.jp/articles/2011/12/07/nico/index.html \ No newline at end of file 11test_url: http://news.mynavi.jp/articles/2011/12/07/nico/index.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/news.orf.at.txt b/inc/3rdparty/site_config/standard/news.orf.at.txt
index b60deea4..3b1d3ccb 100644..100755
--- a/inc/3rdparty/site_config/standard/news.orf.at.txt
+++ b/inc/3rdparty/site_config/standard/news.orf.at.txt
@@ -1,11 +1,11 @@
1single_page_link: //div[@id='content']//p[@class='readMore']/a 1single_page_link: //div[@id='content']//p[@class='readMore']/a
2 2
3title: //div[@class='hidden offscreen']/h2 3title: //div[@class='hidden offscreen']/h2
4body: //div[@id="storyText"] 4body: //div[@id="storyText"]
5move_into(//div[@id='storyText']): //div[@class='fact'] 5move_into(//div[@id='storyText']): //div[@class='fact']
6strip: //small[@class='credit'] 6strip: //small[@class='credit']
7strip: //small[@class='caption'] 7strip: //small[@class='caption']
8date: substring-after(//div[@id='storyMeta']//p[@class='date'],'Publiziert am') 8date: substring-after(//div[@id='storyMeta']//p[@class='date'],'Publiziert am')
9strip: //p[@class='toplink'] 9strip: //p[@class='toplink']
10 10
11test_url: http://news.orf.at/stories/2084731/ \ No newline at end of file 11test_url: http://news.orf.at/stories/2084731/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/news.rambler.ru.txt b/inc/3rdparty/site_config/standard/news.rambler.ru.txt
index 743245f8..1d547334 100644..100755
--- a/inc/3rdparty/site_config/standard/news.rambler.ru.txt
+++ b/inc/3rdparty/site_config/standard/news.rambler.ru.txt
@@ -1,9 +1,9 @@
1body: //article 1body: //article
2title: //h1 2title: //h1
3author: //span[@class='b-article-source-dropdown'] 3author: //span[@class='b-article-source-dropdown']
4strip: //span[@class='b-article-photo-incut__source'] 4strip: //span[@class='b-article-photo-incut__source']
5strip: //a[@class='b-read-more b-read-more_bottom'] 5strip: //a[@class='b-read-more b-read-more_bottom']
6 6
7 7
8tidy:no 8tidy:no
9test_url: http://news.rambler.ru/12972208/ \ No newline at end of file 9test_url: http://news.rambler.ru/12972208/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/news.techmeme.com.txt b/inc/3rdparty/site_config/standard/news.techmeme.com.txt
index c80c3327..ba4db828 100644..100755
--- a/inc/3rdparty/site_config/standard/news.techmeme.com.txt
+++ b/inc/3rdparty/site_config/standard/news.techmeme.com.txt
@@ -1,4 +1,4 @@
1body: //div[@class='main']/div[@class='item'] 1body: //div[@class='main']/div[@class='item']
2strip: //div[@class='right'] 2strip: //div[@class='right']
3 3
4test_url: http://news.techmeme.com/110516/fh-rip \ No newline at end of file 4test_url: http://news.techmeme.com/110516/fh-rip \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/news.yahoo.com.txt b/inc/3rdparty/site_config/standard/news.yahoo.com.txt
index 5ee04049..fc1739c8 100644..100755
--- a/inc/3rdparty/site_config/standard/news.yahoo.com.txt
+++ b/inc/3rdparty/site_config/standard/news.yahoo.com.txt
@@ -1,12 +1,12 @@
1title: //meta[@property='og:title']/@content 1title: //meta[@property='og:title']/@content
2title: //h1[@class='headline'] 2title: //h1[@class='headline']
3author: //cite[contains(@class, 'byline') and contains(@class, 'vcard')]//span[@class='fn'] 3author: //cite[contains(@class, 'byline') and contains(@class, 'vcard')]//span[@class='fn']
4date: //cite[contains(@class, 'byline') and contains(@class, 'vcard')]//abbr/@title 4date: //cite[contains(@class, 'byline') and contains(@class, 'vcard')]//abbr/@title
5body: //div[@id='mediaarticlelead']//a[@class='media'] | //div[contains(@class,'yom-art-content')] 5body: //div[@id='mediaarticlelead']//a[@class='media'] | //div[contains(@class,'yom-art-content')]
6#strip: //cite/abbr 6#strip: //cite/abbr
7strip_id_or_class: action 7strip_id_or_class: action
8strip_id_or_class: prefetch 8strip_id_or_class: prefetch
9tidy: no 9tidy: no
10prune: no 10prune: no
11 11
12test_url: http://news.yahoo.com/cold-la-nina-winter-forecast-west-coast-183535067.html \ No newline at end of file 12test_url: http://news.yahoo.com/cold-la-nina-winter-forecast-west-coast-183535067.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/news.ycombinator.com.txt b/inc/3rdparty/site_config/standard/news.ycombinator.com.txt
index 0b01f8a1..f7441d17 100644..100755
--- a/inc/3rdparty/site_config/standard/news.ycombinator.com.txt
+++ b/inc/3rdparty/site_config/standard/news.ycombinator.com.txt
@@ -1,3 +1,3 @@
1strip_comments: no 1strip_comments: no
2strip: //a[. = 'reply'] 2strip: //a[. = 'reply']
3test_url: http://news.ycombinator.com/item?id=1516461 \ No newline at end of file 3test_url: http://news.ycombinator.com/item?id=1516461 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/news.zing.vn.txt b/inc/3rdparty/site_config/standard/news.zing.vn.txt
new file mode 100755
index 00000000..af81e90e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/news.zing.vn.txt
@@ -0,0 +1,3 @@
1body://div[@class="newsdetail_wrapper"]
2strip://div[@class="more_news"]
3test_url: http://news.zing.vn/xa-hoi/s-phat-nang-xe-may-di-duong-tren-cao-ha-noi/a280838.html#home_noibat1 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/news247.gr.txt b/inc/3rdparty/site_config/standard/news247.gr.txt
new file mode 100755
index 00000000..87637bed
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/news247.gr.txt
@@ -0,0 +1,6 @@
1title: //h1[@class='title']
2
3body: //img[@id='relPicsMainPic'] | //div[contains(@class, 'storyContent')]
4
5test_url: http://news247.gr/eidiseis/katatheseis_fwtia_htan_apofasismenoi_akomh_kai_na_afairesoyn_zwes_an_thewrousan_oti_to_thuma_htan_antipalos_toys.2433351.html
6test_url: http://news247.gr/?widget=rssfeed&view=feed&contentId=38291 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/newsbomb.gr.txt b/inc/3rdparty/site_config/standard/newsbomb.gr.txt
index 0500890f..5eb0ea46 100644..100755
--- a/inc/3rdparty/site_config/standard/newsbomb.gr.txt
+++ b/inc/3rdparty/site_config/standard/newsbomb.gr.txt
@@ -1,9 +1,9 @@
1date: //meta[@name='og:article:published_time']/@value 1date: //meta[@name='og:article:published_time']/@value
2 2
3body: //div[@class='itemIntroText' or @class='itemImageBlock' or @class='itemFullText'] 3body: //div[@class='itemIntroText' or @class='itemImageBlock' or @class='itemFullText']
4 4
5strip_id_or_class: itemImageGallery 5strip_id_or_class: itemImageGallery
6 6
7prune: no 7prune: no
8 8
9test_url: http://www.newsbomb.gr/gossip/story/257234/i-proin-moy-protimoyse-na-serfarei-apo-to-na-kanoyme-sex \ No newline at end of file 9test_url: http://www.newsbomb.gr/gossip/story/257234/i-proin-moy-protimoyse-na-serfarei-apo-to-na-kanoyme-sex \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/newsle.com.txt b/inc/3rdparty/site_config/standard/newsle.com.txt
index e500ddcc..e500ddcc 100644..100755
--- a/inc/3rdparty/site_config/standard/newsle.com.txt
+++ b/inc/3rdparty/site_config/standard/newsle.com.txt
diff --git a/inc/3rdparty/site_config/standard/newsmill.se.txt b/inc/3rdparty/site_config/standard/newsmill.se.txt
index eb7d3350..1a990319 100644..100755
--- a/inc/3rdparty/site_config/standard/newsmill.se.txt
+++ b/inc/3rdparty/site_config/standard/newsmill.se.txt
@@ -1,12 +1,12 @@
1title: //h1 1title: //h1
2body: (//div[@class='articleImg']//img)[1] | //p[contains(@class, 'commentTextArticle') or contains(@class, 'articlePublished')] | //div[@id='articleLeftContent'] 2body: (//div[@class='articleImg']//img)[1] | //p[contains(@class, 'commentTextArticle') or contains(@class, 'articlePublished')] | //div[@id='articleLeftContent']
3author: //div[@class='byline']//a[contains(@href, '/user/')] 3author: //div[@class='byline']//a[contains(@href, '/user/')]
4 4
5strip_id_or_class: facts 5strip_id_or_class: facts
6strip_id_or_class: articleBlogsHolder 6strip_id_or_class: articleBlogsHolder
7strip_id_or_class: byline 7strip_id_or_class: byline
8 8
9prune: no 9prune: no
10tidy: no 10tidy: no
11 11
12test_url: http://www.newsmill.se/artikel/2012/05/06/medielogiken-v-ger-tyngre-n-reportrarnas-sikter \ No newline at end of file 12test_url: http://www.newsmill.se/artikel/2012/05/06/medielogiken-v-ger-tyngre-n-reportrarnas-sikter \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/newsunspun.org.txt b/inc/3rdparty/site_config/standard/newsunspun.org.txt
index 860ad66b..247bbebb 100644..100755
--- a/inc/3rdparty/site_config/standard/newsunspun.org.txt
+++ b/inc/3rdparty/site_config/standard/newsunspun.org.txt
@@ -1,10 +1,10 @@
1body: //div[@class='right']//div[@class='articles'] 1body: //div[@class='right']//div[@class='articles']
2author: //div[@id='artinfo']//a[contains(@href, '/author/')] 2author: //div[@id='artinfo']//a[contains(@href, '/author/')]
3strip: //div[@id='artinfo'] 3strip: //div[@id='artinfo']
4strip: //table[//a[contains(@href, 'twitter.com')]] 4strip: //table[//a[contains(@href, 'twitter.com')]]
5strip_id_or_class: twitter 5strip_id_or_class: twitter
6 6
7prune: no 7prune: no
8tidy: no 8tidy: no
9 9
10test_url: http://www.newsunspun.org/eotn/bbc-headline-change-iran-goes-from-not-building-to-undecided-on-nuclear-bomb \ No newline at end of file 10test_url: http://www.newsunspun.org/eotn/bbc-headline-change-iran-goes-from-not-building-to-undecided-on-nuclear-bomb \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/newsweek.com.txt b/inc/3rdparty/site_config/standard/newsweek.com.txt
new file mode 100755
index 00000000..565648ba
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/newsweek.com.txt
@@ -0,0 +1,6 @@
1body: //div[@class = 'article-body']
2title: //h1[@class = 'article-title']
3strip: //aside
4
5test_url: http://www.newsweek.com/day-steve-mcqueen-met-his-new-nazi-neighbor-keith-moon-229741
6test_url: http://www.newsweek.com/2014/06/13/how-greylock-partners-finds-next-facebook-253329.html
diff --git a/inc/3rdparty/site_config/standard/newswise.com.txt b/inc/3rdparty/site_config/standard/newswise.com.txt
new file mode 100755
index 00000000..10120ea1
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/newswise.com.txt
@@ -0,0 +1,17 @@
1prune: no
2tidy: no
3
4title: //h1/a[2]
5body: //div[@id="main"]
6author: //span[@id="articlesource"]
7date: //span[contains(@class, 'releasedate')]
8
9strip: //div[@class="inst-logo"]
10strip: //h1[1]
11
12strip_id_or_class: addthis
13strip_id_or_class: released
14strip_id_or_class: skiptranslate
15strip_id_or_class: flash
16
17test_url: http://www.newswise.com/articles/first-heat-wave-of-season-puts-elderly-at-risk
diff --git a/inc/3rdparty/site_config/standard/newyorker.com.txt b/inc/3rdparty/site_config/standard/newyorker.com.txt
index 5624aa8c..950324a3 100644..100755
--- a/inc/3rdparty/site_config/standard/newyorker.com.txt
+++ b/inc/3rdparty/site_config/standard/newyorker.com.txt
@@ -1,10 +1,11 @@
1title: //h1[@id='articlehed'] | //h2[@id="articleintro"] 1title: //h1[@id='articlehed'] | //h2[@id="articleintro"]
2body: //div[@id='articletext'] 2body: //div[@id='articletext']
3 3
4strip: //ul[@id="bc"] | //div[@id="yrail"] | //div[@class="entry-keywords"] | //div[@class="entry-categories"] | //div[@class="socialUtils"] | //div[@id="footer"] 4strip: //ul[@id="bc"] | //div[@id="yrail"] | //div[@class="entry-keywords"] | //div[@class="entry-categories"] | //div[@class="socialUtils"] | //div[@id="footer"] | //div[@class="cartoon"]
5 5
6date: //h4[@id='articleauthor']/span[@class='dd dds'] 6date: //h4[@id='articleauthor']/span[@class='dd dds']
7date: //div[@id="pagebody"]/div[@class='hentry entry']/div[@class='published'] 7date: //div[@id="pagebody"]/div[@class='hentry entry']/div[@class='published']
8 8
9single_page_link: //div[@class='paginationViewSinglePage']/a 9single_page_link: //div[@class='paginationViewSinglePage']/a
10test_url: http://www.newyorker.com/online/blogs/culture/2012/06/mug-shot-web-sites.html \ No newline at end of file 10test_url: http://www.newyorker.com/online/blogs/culture/2012/06/mug-shot-web-sites.html
11test_url: http://www.newyorker.com/reporting/2013/04/22/130422fa_fact_bilger?currentPage=all&mobify=0 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/next-gen.biz.txt b/inc/3rdparty/site_config/standard/next-gen.biz.txt
index 806a3dfd..b8d235db 100644..100755
--- a/inc/3rdparty/site_config/standard/next-gen.biz.txt
+++ b/inc/3rdparty/site_config/standard/next-gen.biz.txt
@@ -1,16 +1,16 @@
1# 2011-08-22 [carlo@...] initial version 1# 2011-08-22 [carlo@...] initial version
2# 2011-08-22 [carlo@...] removed comments & social links 2# 2011-08-22 [carlo@...] removed comments & social links
3 3
4tidy: no 4tidy: no
5 5
6single_page_link: //a[@class="single active"] 6single_page_link: //a[@class="single active"]
7 7
8body: //div[@id="main"]//div[@class="content-region"]/article 8body: //div[@id="main"]//div[@class="content-region"]/article
9author: //span[@class="author-name"] 9author: //span[@class="author-name"]
10date: //time/text() 10date: //time/text()
11 11
12strip_id_or_class: //aside[@id="related"] 12strip_id_or_class: //aside[@id="related"]
13strip: //footer 13strip: //footer
14 14
15title: //h1 15title: //h1
16test_url: http://www.next-gen.biz/reviews/deus-ex-human-revolution-review \ No newline at end of file 16test_url: http://www.next-gen.biz/reviews/deus-ex-human-revolution-review \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/nfl.com.txt b/inc/3rdparty/site_config/standard/nfl.com.txt
index 70f92473..956b288f 100644..100755
--- a/inc/3rdparty/site_config/standard/nfl.com.txt
+++ b/inc/3rdparty/site_config/standard/nfl.com.txt
@@ -1,11 +1,11 @@
1# doesn't look like selecting an attribute value works? 1# doesn't look like selecting an attribute value works?
2# author: //meta[@id="authorName"]@value 2# author: //meta[@id="authorName"]@value
3 3
4author: substring-after(//li[@id="article-hdr-meta-author"]/text(), "By ") 4author: substring-after(//li[@id="article-hdr-meta-author"]/text(), "By ")
5date: //abbr[@id="article-time"] 5date: //abbr[@id="article-time"]
6title: //div[@id="article-hdr"]/h1 6title: //div[@id="article-hdr"]/h1
7body: //div[@class="articleText"] 7body: //div[@class="articleText"]
8 8
9# strip miscellaneous teasers & etc 9# strip miscellaneous teasers & etc
10strip: //div[@class="removeformobile"] 10strip: //div[@class="removeformobile"]
11test_url: http://www.nfl.com/news/story/09000d5d82388707/article/close-shave-chiefs-haley-perseveres-through-rough-start?module=HP11_content_stream \ No newline at end of file 11test_url: http://www.nfl.com/news/story/09000d5d82388707/article/close-shave-chiefs-haley-perseveres-through-rough-start?module=HP11_content_stream \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ngm.nationalgeographic.com.txt b/inc/3rdparty/site_config/standard/ngm.nationalgeographic.com.txt
index 60834862..44a82a95 100644..100755
--- a/inc/3rdparty/site_config/standard/ngm.nationalgeographic.com.txt
+++ b/inc/3rdparty/site_config/standard/ngm.nationalgeographic.com.txt
@@ -1,7 +1,7 @@
1next_page_link: //div[@class='nextpage_continue']/a 1next_page_link: //div[@class='nextpage_continue']/a
2strip: //div[@class='nextpage_continue'] 2strip: //div[@class='nextpage_continue']
3strip_id_or_class: nextpage 3strip_id_or_class: nextpage
4title: //div[@class='article_title']//h1 4title: //div[@class='article_title']//h1
5body: //div[@class='article_title']/.. 5body: //div[@class='article_title']/..
6body: //div[@class='content'] 6body: //div[@class='content']
7test_url: http://ngm.nationalgeographic.com/2012/02/tsunami/folger-text \ No newline at end of file 7test_url: http://ngm.nationalgeographic.com/2012/02/tsunami/folger-text \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/nhk.or.jp.txt b/inc/3rdparty/site_config/standard/nhk.or.jp.txt
index 0a3bb913..0a3bb913 100644..100755
--- a/inc/3rdparty/site_config/standard/nhk.or.jp.txt
+++ b/inc/3rdparty/site_config/standard/nhk.or.jp.txt
diff --git a/inc/3rdparty/site_config/standard/nintendoworldreport.com.txt b/inc/3rdparty/site_config/standard/nintendoworldreport.com.txt
index 409a8977..f0e28afb 100644..100755
--- a/inc/3rdparty/site_config/standard/nintendoworldreport.com.txt
+++ b/inc/3rdparty/site_config/standard/nintendoworldreport.com.txt
@@ -1,13 +1,13 @@
1body: //div[@id="main"] 1body: //div[@id="main"]
2title: //div[@id="main"]/h3 2title: //div[@id="main"]/h3
3 3
4# Remove &lsquo;Review&rsquo; and &lsquo;Wii&rsquo;. 4# Remove &lsquo;Review&rsquo; and &lsquo;Wii&rsquo;.
5strip: //div[@class="badge"] 5strip: //div[@class="badge"]
6 6
7# Remove duplicate title and country flag. 7# Remove duplicate title and country flag.
8strip: //h3 8strip: //h3
9 9
10# Commented out below are attempts to extract the author and date, which did not work. 10# Commented out below are attempts to extract the author and date, which did not work.
11# author: //p[@class="extra "]/a 11# author: //p[@class="extra "]/a
12# date: //p[@class="extra "]/span[@class="when"] 12# date: //p[@class="extra "]/span[@class="when"]
13test_url: http://www.nintendoworldreport.com/review/28400 \ No newline at end of file 13test_url: http://www.nintendoworldreport.com/review/28400 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/nojesguiden.se.txt b/inc/3rdparty/site_config/standard/nojesguiden.se.txt
index ae2d7e41..b15f0612 100644..100755
--- a/inc/3rdparty/site_config/standard/nojesguiden.se.txt
+++ b/inc/3rdparty/site_config/standard/nojesguiden.se.txt
@@ -1,5 +1,5 @@
1author: //span[@class='meta']/span[@class='username'] 1author: //span[@class='meta']/span[@class='username']
2body: //div[@class='article-content'] 2body: //div[@class='article-content']
3 3
4strip_id_or_class: 'article-actions' 4strip_id_or_class: 'article-actions'
5test_url: http://nojesguiden.se/blogg/maja-bredberg/maja-laser-tidningen-en-helt-vanlig-lordag-i \ No newline at end of file 5test_url: http://nojesguiden.se/blogg/maja-bredberg/maja-laser-tidningen-en-helt-vanlig-lordag-i \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/northumberlandview.ca.txt b/inc/3rdparty/site_config/standard/northumberlandview.ca.txt
index 04a0a34d..88429a78 100644..100755
--- a/inc/3rdparty/site_config/standard/northumberlandview.ca.txt
+++ b/inc/3rdparty/site_config/standard/northumberlandview.ca.txt
@@ -1,11 +1,11 @@
1title: //h1 1title: //h1
2body: //div[@id='pn-maincontent'] 2body: //div[@id='pn-maincontent']
3strip_id_or_class: z-menu 3strip_id_or_class: z-menu
4strip_id_or_class: news_category 4strip_id_or_class: news_category
5strip_id_or_class: news_title 5strip_id_or_class: news_title
6strip_id_or_class: news_modify 6strip_id_or_class: news_modify
7strip_id_or_class: news_morearticlesincat 7strip_id_or_class: news_morearticlesincat
8strip_id_or_class: ezc_comments 8strip_id_or_class: ezc_comments
9strip_comments: yes 9strip_comments: yes
10 10
11test_url: http://www.northumberlandview.ca/index.php?module=news&func=display&sid=5972 \ No newline at end of file 11test_url: http://www.northumberlandview.ca/index.php?module=news&func=display&sid=5972 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/nosalty.hu.txt b/inc/3rdparty/site_config/standard/nosalty.hu.txt
new file mode 100755
index 00000000..7e20cadf
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/nosalty.hu.txt
@@ -0,0 +1,6 @@
1title: //div[@id='tab-recept']//h1
2body: //div[@id='tab-recept']//div[contains(@class, 'column-container')]
3strip_id_or_class: ajanlo-box
4prune: no
5
6test_url: http://www.nosalty.hu/recept/szupergyors-fank \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/nplusonemag.com.txt b/inc/3rdparty/site_config/standard/nplusonemag.com.txt
index 205b1af4..1b817c04 100644..100755
--- a/inc/3rdparty/site_config/standard/nplusonemag.com.txt
+++ b/inc/3rdparty/site_config/standard/nplusonemag.com.txt
@@ -1,6 +1,6 @@
1title: /html/body/div[3]/div/div/h1 1title: /html/body/div[3]/div/div/h1
2 2
3body: //*[@id="article-body"] 3body: //*[@id="article-body"]
4 4
5 5
6test_url: http://nplusonemag.com/the-outskirts-of-progress \ No newline at end of file 6test_url: http://nplusonemag.com/the-outskirts-of-progress \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/npr.org.txt b/inc/3rdparty/site_config/standard/npr.org.txt
index afab0eb3..acd73e48 100644..100755
--- a/inc/3rdparty/site_config/standard/npr.org.txt
+++ b/inc/3rdparty/site_config/standard/npr.org.txt
@@ -1,32 +1,34 @@
1title: //div[contains(@class, 'storytitle')]//h1 1title: //div[contains(@class, 'storytitle')]//h1
2author: //p[@class="byline"]/span 2author: //p[@class="byline"]/span
3body: //div[@id='storyspan02']//*[@class='duration' or @class='download' or contains(@class, 'photo')] | //div[@id='storytext'] | //div[@class='transcript'] 3body: //div[@id='primaryaudio']//*[@class='duration' or @class='download' or contains(@class, 'photo')] | //div[@id='storytext' or @id='supplementarycontent' or contains(@class, 'transcript')]
4date: //meta[@name="date"]/@content 4date: //meta[@name="date"]/@content
5 5
6strip: //div[@class='enlarge_measure'] 6strip_id_or_class: enlarge_measure
7strip: //div[@class='enlarge_html'] 7strip_id_or_class: enlarge_html
8strip: //a[@class='enlargeicon'] 8strip: //a[contains(@class, 'enlargeicon')]
9strip: //div[contains(@class, 'bookedition')] 9strip: //div[contains(@class, 'bookedition')]
10strip: //div[@class='textsize'] 10strip: //div[@class='textsize']
11strip: //ul[@class='genres'] 11strip: //ul[@class='genres']
12strip: //span[@class='bull'] 12strip: //span[@class='bull']
13strip_id_or_class: secondary 13strip_id_or_class: secondary
14strip_id_or_class: con1col 14strip_id_or_class: con1col
15strip: //h3[@class='conheader'] 15strip: //h3[@class='conheader']
16 16
17replace_string(<a name="more">&nbsp;</a>): <!-- no more --> 17replace_string(<a name="more">&nbsp;</a>): <!-- no more -->
18replace_string(<div class="transcript">): <div class="transcript"><h2>Transcript</h2> 18replace_string(<div class="transcript">): <div class="transcript"><h2>Transcript</h2>
19 19replace_string(<div class="transcript storytext">): <div class="transcript storytext"><h2>Transcript</h2>
20prune: no 20
21strip://div[@class="ecommercepop"] 21prune: no
22strip://span[@class="bull"] 22strip://div[@class="ecommercepop"]
23strip://span[@class="purchaseLink"] 23strip://span[@class="bull"]
24strip://div[@class="enlarge_html"] 24strip://span[@class="purchaseLink"]
25strip://div[@class="enlarge_measure"] 25strip://div[@class="enlarge_html"]
26strip://div[@class="container con1col small"] 26strip://div[@class="enlarge_measure"]
27strip://a[contains(@class, "enlargebtn")] 27strip://div[@class="container con1col small"]
28strip://div[contains(@class, "bucketwrap internallink")] 28strip://a[contains(@class, "enlargebtn")]
29 29strip://div[contains(@class, "bucketwrap internallink")]
30test_url: http://www.npr.org/blogs/thetwo-way/2011/07/12/137799301/sports-loses-its-escapist-gleam-in-a-summer-of-court-dates 30
31test_url: http://www.npr.org/2012/07/04/156190948/feeling-under-siege-catholic-leadership-shifts-right 31test_url: http://www.npr.org/blogs/thetwo-way/2011/07/12/137799301/sports-loses-its-escapist-gleam-in-a-summer-of-court-dates
32test_url: http://www.npr.org/2012/12/13/166480907/the-years-best-sci-fi-crosses-galaxies-and-genres \ No newline at end of file 32test_url: http://www.npr.org/2012/07/04/156190948/feeling-under-siege-catholic-leadership-shifts-right
33test_url: http://www.npr.org/2012/12/13/166480907/the-years-best-sci-fi-crosses-galaxies-and-genres
34test_url: http://www.npr.org/templates/story/story.php?storyId=229103221 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/nybooks.com.txt b/inc/3rdparty/site_config/standard/nybooks.com.txt
index 8ecb8961..d95ec68e 100644..100755
--- a/inc/3rdparty/site_config/standard/nybooks.com.txt
+++ b/inc/3rdparty/site_config/standard/nybooks.com.txt
@@ -1,13 +1,13 @@
1strip_id_or_class: sIFR-alternate 1strip_id_or_class: sIFR-alternate
2title: //div[@id='page-title-wrapper']/div[@id='page-title']/h2 2title: //div[@id='page-title-wrapper']/div[@id='page-title']/h2
3single_page_link: //a[contains(@href, 'pagination=false') and not(contains(@href, 'printpage=true'))] 3single_page_link: //a[contains(@href, 'pagination=false') and not(contains(@href, 'printpage=true'))]
4 4
5body: //div[@id = 'article-body'] 5body: //div[@id = 'article-body']
6strip_id_or_class:article-tools 6strip_id_or_class:article-tools
7strip_id_or_class:js_target 7strip_id_or_class:js_target
8strip_id_or_class:marker 8strip_id_or_class:marker
9author://div[@id = 'page-title']/h3 9author://div[@id = 'page-title']/h3
10date://div[@id = 'page-title']/h5/a[starts-with(@href,'/issues/')] 10date://div[@id = 'page-title']/h5/a[starts-with(@href,'/issues/')]
11 11
12 12
13test_url: http://www.nybooks.com/articles/archives/2012/feb/23/were-more-unequal-you-think/ \ No newline at end of file 13test_url: http://www.nybooks.com/articles/archives/2012/feb/23/were-more-unequal-you-think/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/nymag.com.txt b/inc/3rdparty/site_config/standard/nymag.com.txt
index f664c93d..7a1d62d9 100644..100755
--- a/inc/3rdparty/site_config/standard/nymag.com.txt
+++ b/inc/3rdparty/site_config/standard/nymag.com.txt
@@ -1,8 +1,8 @@
1title: //h2[contains(@class, 'primary')] 1title: //h2[contains(@class, 'primary')]
2body: //div[@id='story'] 2body: //div[@id='story']
3author: //*[@class='by']/a 3author: //*[@class='by']/a
4date: substring-after(//*[@class='date'], 'Published') 4date: substring-after(//*[@class='date'], 'Published')
5 5
6next_page_link: //div[@class='page-navigation']//li[@class='next']/a 6next_page_link: //div[@class='page-navigation']//li[@class='next']/a
7 7
8test_url: http://nymag.com/news/features/wall-street-2012-2/ \ No newline at end of file 8test_url: http://nymag.com/news/features/wall-street-2012-2/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/nyteknik.se.txt b/inc/3rdparty/site_config/standard/nyteknik.se.txt
index 8c9e37f4..f4bedb6a 100644..100755
--- a/inc/3rdparty/site_config/standard/nyteknik.se.txt
+++ b/inc/3rdparty/site_config/standard/nyteknik.se.txt
@@ -1,8 +1,8 @@
1title: //div[@class="article default-article"]/h1 1title: //div[@class="article default-article"]/h1
2author: //p[@class="author"]/a[2] 2author: //p[@class="author"]/a[2]
3 3
4# Article introduction: 4# Article introduction:
5#move_into(//div[@class="article-bread"]): //p[@class="lead"] 5#move_into(//div[@class="article-bread"]): //p[@class="lead"]
6 6
7body: //div[@class="article-bread"] 7body: //div[@class="article-bread"]
8test_url: http://www.nyteknik.se/nyheter/energi_miljo/energi/article3391426.ece \ No newline at end of file 8test_url: http://www.nyteknik.se/nyheter/energi_miljo/energi/article3391426.ece \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/nytimes.com.txt b/inc/3rdparty/site_config/standard/nytimes.com.txt
index 8d9a794a..23c9ad11 100644..100755
--- a/inc/3rdparty/site_config/standard/nytimes.com.txt
+++ b/inc/3rdparty/site_config/standard/nytimes.com.txt
@@ -1,36 +1,49 @@
1title://h1[@class="articleHeadline"] 1title://h1[@class="articleHeadline"]
2body://div[@id="article"] 2body://div[@id="article"]
3strip_id_or_class:articleTools 3body://*[@itemprop="articleBody"]
4strip_id_or_class:readerscomment 4strip_id_or_class:articleTools
5#strip://div[contains(@class, "articleInline runaroundLeft")] 5strip_id_or_class:readerscomment
6strip: //div[contains(@class, "doubleRule")] 6#strip://div[contains(@class, "articleInline runaroundLeft")]
7# strip image credit - appears as a bold heading 7strip: //div[contains(@class, "doubleRule")]
8strip: //div[contains(@class, "articleInline")]//h6 8# strip image credit - appears as a bold heading
9strip_id_or_class:enlargeThis 9strip: //div[contains(@class, "articleInline")]//h6
10strip_id_or_class:pageLinks 10strip_id_or_class:enlargeThis
11strip_id_or_class:memberTools 11strip_id_or_class:pageLinks
12strip_id_or_class:articleExtras 12strip_id_or_class:memberTools
13strip_id_or_class:singleAd 13strip_id_or_class:articleExtras
14strip_id_or_class:byline 14strip_id_or_class:singleAd
15strip_id_or_class:dateline 15strip_id_or_class:byline
16strip_id_or_class:articleheadline 16strip_id_or_class:dateline
17strip_id_or_class:articleBottomExtra 17strip_id_or_class:articleheadline
18strip://a[contains(@href, 'nytimes.com/adx/')] 18strip_id_or_class:articleBottomExtra
19strip: //nyt_byline 19strip_id_or_class:shareTools
20strip: //span[contains(@class, 'slideshow') or contains(@class, 'video')] 20strip://a[contains(@href, 'nytimes.com/adx/')]
21strip: //p[@class='caption']//a[contains(., 'More Photos')] 21strip: //nyt_byline
22 22strip: //span[contains(@class, 'slideshow') or contains(@class, 'video')]
23prune: no 23strip: //p[@class='caption']//a[contains(., 'More Photos')]
24tidy: no 24
25 25prune: no
26date: substring-after(//*[contains(@class, 'dateline')], 'Published:') 26tidy: no
27 27
28single_page_link: //link[contains(@href, 'pagewanted=all')] 28find_string: <script
29#single_page_link: //a[contains(@href, 'pagewanted=all') and not(contains(@href, 'login'))] 29replace_string: <div style="display:none"
30 30find_string: </script>
31strip://ul[@id = 'toolsList'] 31replace_string: </div>
32strip://h6[@class = 'kicker'] 32
33author:substring-after(//h6[@class='byline'],'By ') 33date: substring-after(//*[contains(@class, 'dateline')], 'Published:')
34 34
35test_url: http://www.nytimes.com/2011/07/24/books/review/an-academic-authors-unintentional-masterpiece.html 35single_page_link: //link[contains(@href, 'pagewanted=all')]
36test_url: http://www.nytimes.com/2012/06/10/arts/television/the-newsroom-aaron-sorkins-return-to-tv.html \ No newline at end of file 36single_page_link: //link[@rel='alternate' and contains(@href, 'mobile.nytimes.com')]/@href
37single_page_link: concat(substring-before(//div[@id='pageLinks']//a[contains(@href, 'pagewanted=')]/@href, 'pagewanted='), 'pagewanted=all')
38#single_page_link: //a[contains(@href, 'pagewanted=all') and not(contains(@href, 'login'))]
39
40strip://ul[@id = 'toolsList']
41strip://h6[@class = 'kicker']
42author:substring-after(//h6[@class='byline'],'By ')
43
44test_url: http://www.nytimes.com/2011/07/24/books/review/an-academic-authors-unintentional-masterpiece.html
45test_url: http://www.nytimes.com/2012/06/10/arts/television/the-newsroom-aaron-sorkins-return-to-tv.html
46test_url: http://www.nytimes.com/2013/03/25/world/middleeast/israeli-military-responds-after-patrols-come-under-fire-from-syria.html
47test_url: http://www.nytimes.com/2013/08/15/nyregion/when-the-new-york-city-subway-ran-without-rails.html
48test_url: http://www.nytimes.com/2004/02/29/weekinreview/correspondence-class-consciousness-china-s-wealthy-live-creed-hobbes-darwin-meet.html
49test_url: http://www.nytimes.com/2014/06/19/opinion/gail-collins-romney-and-the-2016-contenders-huddle.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/nzz.ch.txt b/inc/3rdparty/site_config/standard/nzz.ch.txt
index 81faabae..749f4f2a 100644..100755
--- a/inc/3rdparty/site_config/standard/nzz.ch.txt
+++ b/inc/3rdparty/site_config/standard/nzz.ch.txt
@@ -1,12 +1,12 @@
1body: //*[@class='article-full'] 1body: //*[@class='article-full']
2title: //h3 2title: //h3
3strip: //header[@class='group'] 3strip: //header[@class='group']
4#body: //p[@class='lead'] 4#body: //p[@class='lead']
5#move_into(//p[@class='lead']): //*[@class='article-full']/figure 5#move_into(//p[@class='lead']): //*[@class='article-full']/figure
6#move_into(//p[@class='lead']): //div[@id='articleBodyText'] 6#move_into(//p[@class='lead']): //div[@id='articleBodyText']
7strip: //div[@id='social-media-floater'] 7strip: //div[@id='social-media-floater']
8strip: //div[@class='advertisement'] 8strip: //div[@class='advertisement']
9strip: //div[@class='infobox'] 9strip: //div[@class='infobox']
10strip: //div[@id='articleComments'] 10strip: //div[@id='articleComments']
11 11
12test_url: http://www.nzz.ch/wissen/wissenschaft/sonnenschutz-fuer-die-erde-1.17282213 \ No newline at end of file 12test_url: http://www.nzz.ch/wissen/wissenschaft/sonnenschutz-fuer-die-erde-1.17282213 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/observer.com.txt b/inc/3rdparty/site_config/standard/observer.com.txt
index e409ca2e..0b107538 100644..100755
--- a/inc/3rdparty/site_config/standard/observer.com.txt
+++ b/inc/3rdparty/site_config/standard/observer.com.txt
@@ -1,7 +1,7 @@
1body: //article[contains(@class, 'instapaper_body')] 1body: //article[contains(@class, 'instapaper_body')]
2 2
3prune: no 3prune: no
4 4
5single_page_link: //a[@id='print-button'] 5single_page_link: //a[@id='print-button']
6 6
7test_url: http://www.observer.com/2008/would-you-take-tumblr-man \ No newline at end of file 7test_url: http://www.observer.com/2008/would-you-take-tumblr-man \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/off.net.mk.txt b/inc/3rdparty/site_config/standard/off.net.mk.txt
index a2fb5f21..bf107876 100644..100755
--- a/inc/3rdparty/site_config/standard/off.net.mk.txt
+++ b/inc/3rdparty/site_config/standard/off.net.mk.txt
@@ -1,7 +1,7 @@
1body: //div[(@id = "content")] 1body: //div[(@id = "content")]
2strip: //div[(@class = "links-bar")] 2strip: //div[(@class = "links-bar")]
3strip: //div[(@class = "povrzani")] 3strip: //div[(@class = "povrzani")]
4strip: //div[(@class = "povrzani-dolu")] 4strip: //div[(@class = "povrzani-dolu")]
5strip: //div[(@class = "tags")] 5strip: //div[(@class = "tags")]
6strip: //h1[(@id = "page-title")] 6strip: //h1[(@id = "page-title")]
7test_url: http://off.net.mk/zhivot-i-zabava/gadzheti/dzhabe-raboti-dzhabe-ne-dishi \ No newline at end of file 7test_url: http://off.net.mk/zhivot-i-zabava/gadzheti/dzhabe-raboti-dzhabe-ne-dishi \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/omaha.com.txt b/inc/3rdparty/site_config/standard/omaha.com.txt
index 53db061d..53db061d 100644..100755
--- a/inc/3rdparty/site_config/standard/omaha.com.txt
+++ b/inc/3rdparty/site_config/standard/omaha.com.txt
diff --git a/inc/3rdparty/site_config/standard/omiliya.org.txt b/inc/3rdparty/site_config/standard/omiliya.org.txt
index 1b39b625..4b3a7202 100644..100755
--- a/inc/3rdparty/site_config/standard/omiliya.org.txt
+++ b/inc/3rdparty/site_config/standard/omiliya.org.txt
@@ -1,9 +1,9 @@
1title: //div[@id='squeeze']/h1 1title: //div[@id='squeeze']/h1
2strip: //div[@id='squeeze']/h1 2strip: //div[@id='squeeze']/h1
3author: //div[@class='submitted']/a 3author: //div[@class='submitted']/a
4strip: //div[@class='submitted']/a 4strip: //div[@class='submitted']/a
5convert_double_br_tags: yes 5convert_double_br_tags: yes
6 6
7 7
8 8
9test_url: http://omiliya.org/content/predchuvstvie.html \ No newline at end of file 9test_url: http://omiliya.org/content/predchuvstvie.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/on.net.mk.txt b/inc/3rdparty/site_config/standard/on.net.mk.txt
index be7a17ef..a95c2b0f 100644..100755
--- a/inc/3rdparty/site_config/standard/on.net.mk.txt
+++ b/inc/3rdparty/site_config/standard/on.net.mk.txt
@@ -1,5 +1,5 @@
1body: //div[(@class = "statija")] 1body: //div[(@class = "statija")]
2strip: //div[(@class = "relatedBlock")] 2strip: //div[(@class = "relatedBlock")]
3strip: //div[(@class = "swftools")] 3strip: //div[(@class = "swftools")]
4strip: //table[(@class = "links")] 4strip: //table[(@class = "links")]
5test_url: http://on.net.mk/video/na-trkala/lamborghini-aventador-avionot-shto-ne-leta \ No newline at end of file 5test_url: http://on.net.mk/video/na-trkala/lamborghini-aventador-avionot-shto-ne-leta \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/online.wsj.com.txt b/inc/3rdparty/site_config/standard/online.wsj.com.txt
index edb52855..448bb7e1 100644..100755
--- a/inc/3rdparty/site_config/standard/online.wsj.com.txt
+++ b/inc/3rdparty/site_config/standard/online.wsj.com.txt
@@ -1,23 +1,25 @@
1title: //meta[@property="og:title"]/@content 1title: //meta[@property="og:title"]/@content
2body: //div[@id='article_story_body'] 2body: //div[@id='article_story_body']
3 3
4author: //h3[@class='byline']/a 4author: //h3[@class='byline']/a
5# for slid show content 5# for slide show content
6body: //ul[@id='imageSlide']//li[@class='firstSlide']//img | (//div[@class='txt_body']//p)[1] 6body: //ul[@id='imageSlide']//li[@class='firstSlide']//img | (//div[@class='txt_body']//p)[1]
7date: //li[@class='dateStamp']/small 7date: //li[@class='dateStamp']/small
8 8
9strip_id_or_class: insetFullBracket 9strip_id_or_class: insetFullBracket
10strip_id_or_class: insettipBox 10strip_id_or_class: insettipBox
11#strip_id_or_class: legacyInset 11#strip_id_or_class: legacyInset
12strip_id_or_class: recipeACShopAndBuyText 12strip_id_or_class: recipeACShopAndBuyText
13 13
14strip: //div[contains(@class, 'insetContent')]//cite 14strip: //div[contains(@class, 'insetContent')]//cite
15strip: //*[contains(@style, 'visibility: hidden;')] 15strip: //*[contains(@style, 'visibility: hidden;')]
16strip: //div[contains(@class, 'insetContent') and not(contains(@class, 'image'))] 16strip: //div[contains(@class, 'insetContent') and not(contains(@class, 'image'))]
17 17strip: //div[contains(@class, 'carousel')]
18prune: no 18
19tidy: no 19prune: no
20 20tidy: no
21test_url: http://online.wsj.com/article/SB10001424052970203363504577185322849515102.html 21
22# slide show 22test_url: http://online.wsj.com/news/articles/SB10001424052702304626304579509100018004342
23test_url: http://online.wsj.com/article/SB10001424052970204791104577110550376458164.html \ No newline at end of file 23test_url: http://online.wsj.com/article/SB10001424052970203363504577185322849515102.html
24# slide show
25test_url: http://online.wsj.com/article/SB10001424052970204791104577110550376458164.html
diff --git a/inc/3rdparty/site_config/standard/onlinewelten.com.txt b/inc/3rdparty/site_config/standard/onlinewelten.com.txt
index 1609fa83..1609fa83 100644..100755
--- a/inc/3rdparty/site_config/standard/onlinewelten.com.txt
+++ b/inc/3rdparty/site_config/standard/onlinewelten.com.txt
diff --git a/inc/3rdparty/site_config/standard/onstartups.com.txt b/inc/3rdparty/site_config/standard/onstartups.com.txt
index cccce8cd..cccce8cd 100644..100755
--- a/inc/3rdparty/site_config/standard/onstartups.com.txt
+++ b/inc/3rdparty/site_config/standard/onstartups.com.txt
diff --git a/inc/3rdparty/site_config/standard/ontologicalgeek.com.txt b/inc/3rdparty/site_config/standard/ontologicalgeek.com.txt
new file mode 100755
index 00000000..a9bf71ef
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/ontologicalgeek.com.txt
@@ -0,0 +1,8 @@
1title: //h1[@class='entry-title']
2
3author: //a[@rel='author']
4
5date: substring-before(//aside[@class='entry-meta'], '|')
6
7body: //div[@class='entry-content']
8test_url: http://ontologicalgeek.com/change-or-live-final-fantasy-x-as-catholic-dystopia/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/opensource.org.txt b/inc/3rdparty/site_config/standard/opensource.org.txt
index 2bd3ccdb..2bd3ccdb 100644..100755
--- a/inc/3rdparty/site_config/standard/opensource.org.txt
+++ b/inc/3rdparty/site_config/standard/opensource.org.txt
diff --git a/inc/3rdparty/site_config/standard/openthemagazine.com.txt b/inc/3rdparty/site_config/standard/openthemagazine.com.txt
index 510eb252..6913eb0e 100644..100755
--- a/inc/3rdparty/site_config/standard/openthemagazine.com.txt
+++ b/inc/3rdparty/site_config/standard/openthemagazine.com.txt
@@ -1,4 +1,4 @@
1body: //div[@id = 'content-inner'] 1body: //div[@id = 'content-inner']
2strip: //div[@id = 'content-bottom'] 2strip: //div[@id = 'content-bottom']
3strip_id_or_class: print_sharebutton 3strip_id_or_class: print_sharebutton
4test_url: http://openthemagazine.com/article/nation/sania-vs-saina \ No newline at end of file 4test_url: http://openthemagazine.com/article/nation/sania-vs-saina \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/openwebx.org.txt b/inc/3rdparty/site_config/standard/openwebx.org.txt
index b7663540..a5dcdb59 100644..100755
--- a/inc/3rdparty/site_config/standard/openwebx.org.txt
+++ b/inc/3rdparty/site_config/standard/openwebx.org.txt
@@ -1,4 +1,4 @@
1body: //div[@class="chapter"] 1body: //div[@class="chapter"]
2prune: no 2prune: no
3tidy: no 3tidy: no
4test_url: http://openwebx.org/docs/springext.html \ No newline at end of file 4test_url: http://openwebx.org/docs/springext.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/orf.at.txt b/inc/3rdparty/site_config/standard/orf.at.txt
index ff16ca79..fb4f2181 100644..100755
--- a/inc/3rdparty/site_config/standard/orf.at.txt
+++ b/inc/3rdparty/site_config/standard/orf.at.txt
@@ -1,11 +1,11 @@
1single_page_link: //div[@id='content']//p[@class='readMore']/a 1single_page_link: //div[@id='content']//p[@class='readMore']/a
2 2
3title: //div[@class='hidden offscreen']/h2 3title: //div[@class='hidden offscreen']/h2
4body: //div[@id="storyText"] 4body: //div[@id="storyText"]
5move_into(//div[@id='storyText']): //div[@class='fact'] 5move_into(//div[@id='storyText']): //div[@class='fact']
6strip: //small[@class='credit'] 6strip: //small[@class='credit']
7strip: //small[@class='caption'] 7strip: //small[@class='caption']
8date: substring-after(//div[@id='storyMeta']//p[@class='date'],'Publiziert am') 8date: substring-after(//div[@id='storyMeta']//p[@class='date'],'Publiziert am')
9strip: //p[@class='toplink'] 9strip: //p[@class='toplink']
10 10
11test_url: http://orf.at/stories/2084731/ \ No newline at end of file 11test_url: http://orf.at/stories/2084731/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/origo.hu.txt b/inc/3rdparty/site_config/standard/origo.hu.txt
index 0dedac3d..50717f25 100644..100755
--- a/inc/3rdparty/site_config/standard/origo.hu.txt
+++ b/inc/3rdparty/site_config/standard/origo.hu.txt
@@ -1,18 +1,18 @@
1title: /html/body/div[5]/div[2]/h1 1title: /html/body/div[5]/div[2]/h1
2body: /html/body/div[5]/div[2]/div[6]/div/div 2body: /html/body/div[5]/div[2]/div[6]/div/div
3body: //*[@id="cikk"] 3body: //*[@id="cikk"]
4strip: /html/body/div[5]/div[2]/h1 4strip: /html/body/div[5]/div[2]/h1
5strip: /html/body/div[5]/div[2]/div[4] 5strip: /html/body/div[5]/div[2]/div[4]
6strip: //*[@id="multidoboz"] 6strip: //*[@id="multidoboz"]
7strip: /html/body/div[5]/div[2]/div[6]/div[2] 7strip: /html/body/div[5]/div[2]/div[6]/div[2]
8strip: //*[@id="comments"] 8strip: //*[@id="comments"]
9strip: //*[@id="rating-doboz"] 9strip: //*[@id="rating-doboz"]
10strip: /html/body/div[5]/div[2]/div[10] 10strip: /html/body/div[5]/div[2]/div[10]
11strip: /html/body/div[5]/div[2]/a 11strip: /html/body/div[5]/div[2]/a
12strip: /html/body/div[5]/div[2]/span 12strip: /html/body/div[5]/div[2]/span
13strip: /html/body/div[5]/div[2]/span[2] 13strip: /html/body/div[5]/div[2]/span[2]
14strip: /html/body/div[5]/div[2]/span[3] 14strip: /html/body/div[5]/div[2]/span[3]
15strip: /html/body/div[5]/div[2]/span[4] 15strip: /html/body/div[5]/div[2]/span[4]
16strip: /html/body/div[5]/div[2]/span[5] 16strip: /html/body/div[5]/div[2]/span[5]
17strip: //*[@id="kommentszam"] 17strip: //*[@id="kommentszam"]
18test_url: http://www.origo.hu/itthon/20110119-lemondott-a-kulturaert-felelos-helyettes-allamtitkar.html \ No newline at end of file 18test_url: http://www.origo.hu/itthon/20110119-lemondott-a-kulturaert-felelos-helyettes-allamtitkar.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/oschina.net.txt b/inc/3rdparty/site_config/standard/oschina.net.txt
new file mode 100755
index 00000000..56451539
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/oschina.net.txt
@@ -0,0 +1,3 @@
1title: //h1
2strip_id_or_class: syntaxhighlighter
3test_url: http://www.oschina.net/translate/event-based-programming-what-async-has-over-sync?print \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/pakistantvdekho.com.txt b/inc/3rdparty/site_config/standard/pakistantvdekho.com.txt
index f03c9551..7e2985e0 100644..100755
--- a/inc/3rdparty/site_config/standard/pakistantvdekho.com.txt
+++ b/inc/3rdparty/site_config/standard/pakistantvdekho.com.txt
@@ -1,11 +1,11 @@
1#body: (//div[@class='ftr-yt-vid'])[1] 1#body: (//div[@class='ftr-yt-vid'])[1]
2body: (//blockquote[contains(@class, 'postcontent')])[1] 2body: (//blockquote[contains(@class, 'postcontent')])[1]
3body: (//div[starts-with(@id, 'post_message')])[1] 3body: (//div[starts-with(@id, 'post_message')])[1]
4 4
5prune: no 5prune: no
6tidy: no 6tidy: no
7 7
8#replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player" 8#replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player"
9#replace_string(</iframe>): </iframe>&nbsp;</div> 9#replace_string(</iframe>): </iframe>&nbsp;</div>
10 10
11test_url: http://pakistantvdekho.com/showthread.php?647741-Sitam-Gar-by-HUM-TV-Episode-07&p=659080#post659080 \ No newline at end of file 11test_url: http://pakistantvdekho.com/showthread.php?647741-Sitam-Gar-by-HUM-TV-Episode-07&p=659080#post659080 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/pakmedia.tv.txt b/inc/3rdparty/site_config/standard/pakmedia.tv.txt
new file mode 100755
index 00000000..5d6e4c8c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/pakmedia.tv.txt
@@ -0,0 +1,17 @@
1title: //h1[@class='entry-title']
2body: //article//div[@class='entry']
3strip_id_or_class: addthis
4strip_id_or_class: gdsrcacheloader
5strip_id_or_class: entry-meta
6strip_id_or_class: entry-tags
7strip_id_or_class: authorbox
8strip: //div[@class='entry']/p[1]
9strip: //img[@width='600' and @height='70']
10# related posts
11strip: //h3[contains(., 'Related posts')]
12strip: //div[contains(@style, 'border: 0pt none ; margin: 0pt; padding: 0pt;')]
13
14prune: no
15tidy: no
16
17test_url: http://pakmedia.tv/tv-one/feed \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/pandagon.net.txt b/inc/3rdparty/site_config/standard/pandagon.net.txt
index d0d2a5d0..35121e14 100644..100755
--- a/inc/3rdparty/site_config/standard/pandagon.net.txt
+++ b/inc/3rdparty/site_config/standard/pandagon.net.txt
@@ -1,5 +1,5 @@
1title://h2 1title://h2
2author://div[@class="posted"]/a 2author://div[@class="posted"]/a
3date://div[@class="date"] 3date://div[@class="date"]
4body://div[@class="entry"] 4body://div[@class="entry"]
5test_url: http://pandagon.net/index.php/site/its-okay-to-admit-that-mass-hysteria-is-real \ No newline at end of file 5test_url: http://pandagon.net/index.php/site/its-okay-to-admit-that-mass-hysteria-is-real \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/pandodaily.com.txt b/inc/3rdparty/site_config/standard/pandodaily.com.txt
index 7d1c2183..a5d427af 100644..100755
--- a/inc/3rdparty/site_config/standard/pandodaily.com.txt
+++ b/inc/3rdparty/site_config/standard/pandodaily.com.txt
@@ -1,5 +1,5 @@
1tidy: no 1tidy: no
2body: //article 2body: //article
3date: //time/@datetime 3date: //time/@datetime
4strip_id_or_class: sharedaddy 4strip_id_or_class: sharedaddy
5test_url: http://pandodaily.com/2012/01/19/ibooks-author-is-not-going-to-hurt-publishers-it-might-even-help-them/ \ No newline at end of file 5test_url: http://pandodaily.com/2012/01/19/ibooks-author-is-not-going-to-hurt-publishers-it-might-even-help-them/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/panic.com.txt b/inc/3rdparty/site_config/standard/panic.com.txt
index 0361f06d..e0e2595c 100644..100755
--- a/inc/3rdparty/site_config/standard/panic.com.txt
+++ b/inc/3rdparty/site_config/standard/panic.com.txt
@@ -1,3 +1,3 @@
1body: //div[@class='entry'] 1body: //div[@class='entry']
2date: //h3[@class='postDate'] 2date: //h3[@class='postDate']
3test_url: http://www.panic.com/blog/2011/07/panic-is-ready-for-lion/ \ No newline at end of file 3test_url: http://www.panic.com/blog/2011/07/panic-is-ready-for-lion/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/papodehomem.com.br.txt b/inc/3rdparty/site_config/standard/papodehomem.com.br.txt
new file mode 100755
index 00000000..2c522da4
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/papodehomem.com.br.txt
@@ -0,0 +1,6 @@
1title: //h2[@class="page_title"]
2body: //div[@class="entry arquivo"]
3author: //span[@class="author"]
4footnotes: yes
5prune: yes
6test_url: http://papodehomem.com.br/um-relato-confessional-sobre-a-maioridade-penal/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/parislemon.com.txt b/inc/3rdparty/site_config/standard/parislemon.com.txt
index a3bd4b0f..cd9bd55d 100644..100755
--- a/inc/3rdparty/site_config/standard/parislemon.com.txt
+++ b/inc/3rdparty/site_config/standard/parislemon.com.txt
@@ -1,6 +1,6 @@
1title: //h2[@class="post-title"] 1title: //h2[@class="post-title"]
2author: substring-after(//div[@class="description"],'Words by ') 2author: substring-after(//div[@class="description"],'Words by ')
3date: //li[@class="date"] 3date: //li[@class="date"]
4strip: //h2[@class="post-title"] 4strip: //h2[@class="post-title"]
5body: //div[@class="copy"] 5body: //div[@class="copy"]
6test_url: http://parislemon.com/post/13462682469/the-15-inch-air \ No newline at end of file 6test_url: http://parislemon.com/post/13462682469/the-15-inch-air \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/parliament.uk.txt b/inc/3rdparty/site_config/standard/parliament.uk.txt
index 478a669f..caaa2e94 100644..100755
--- a/inc/3rdparty/site_config/standard/parliament.uk.txt
+++ b/inc/3rdparty/site_config/standard/parliament.uk.txt
@@ -1,3 +1,3 @@
1title: //h1 1title: //h1
2body: //div[@id='news-article'] 2body: //div[@id='news-article']
3test_url: http://www.parliament.uk/business/committees/committees-a-z/commons-select/backbench-business-committee/news/guidance-for-e-petitioners/ \ No newline at end of file 3test_url: http://www.parliament.uk/business/committees/committees-a-z/commons-select/backbench-business-committee/news/guidance-for-e-petitioners/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/pastebin.com.txt b/inc/3rdparty/site_config/standard/pastebin.com.txt
index 89d13b2a..03b67b7e 100644..100755
--- a/inc/3rdparty/site_config/standard/pastebin.com.txt
+++ b/inc/3rdparty/site_config/standard/pastebin.com.txt
@@ -1,6 +1,6 @@
1title://div[@class="paste_box_line1"]/h1 1title://div[@class="paste_box_line1"]/h1
2author://div[@class="paste_box_line2"]/a 2author://div[@class="paste_box_line2"]/a
3body://div[@class="text"] 3body://div[@class="text"]
4date:substring-before(substring-after(//div[@class="paste_box_line2"],'|'),'|') 4date:substring-before(substring-after(//div[@class="paste_box_line2"],'|'),'|')
5dissolve://li 5dissolve://li
6test_url: http://pastebin.com/LAykd1es \ No newline at end of file 6test_url: http://pastebin.com/LAykd1es \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/pastepad.fivefilters.org.txt b/inc/3rdparty/site_config/standard/pastepad.fivefilters.org.txt
index 40a049e0..c535158d 100644..100755
--- a/inc/3rdparty/site_config/standard/pastepad.fivefilters.org.txt
+++ b/inc/3rdparty/site_config/standard/pastepad.fivefilters.org.txt
@@ -1,5 +1,5 @@
1title: //h1 1title: //h1
2body: //div[@id='ff-pastepad-content'] 2body: //div[@id='ff-pastepad-content']
3prune: no 3prune: no
4# todo: add test file 4# todo: add test file
5test_url: http://pastepad.fivefilters.org/test.html \ No newline at end of file 5test_url: http://pastepad.fivefilters.org/test.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/pathawks.com.txt b/inc/3rdparty/site_config/standard/pathawks.com.txt
index 1a4cd25b..25042224 100644..100755
--- a/inc/3rdparty/site_config/standard/pathawks.com.txt
+++ b/inc/3rdparty/site_config/standard/pathawks.com.txt
@@ -1,8 +1,8 @@
1title://*[contains(@class,'post-title')] 1title://*[contains(@class,'post-title')]
2body://div[contains(@class,'post-body')] 2body://div[contains(@class,'post-body')]
3body://div[contains(@class,'entry-content')] 3body://div[contains(@class,'entry-content')]
4strip_comments:no 4strip_comments:no
5prune:no 5prune:no
6convert_double_br_tags:yes 6convert_double_br_tags:yes
7tidy:yes 7tidy:yes
8test_url: http://www.pathawks.com/2011/06/crazyawesomecoloradotrip.html \ No newline at end of file 8test_url: http://www.pathawks.com/2011/06/crazyawesomecoloradotrip.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/pcast.me.txt b/inc/3rdparty/site_config/standard/pcast.me.txt
index ae38e8e1..ae38e8e1 100644..100755
--- a/inc/3rdparty/site_config/standard/pcast.me.txt
+++ b/inc/3rdparty/site_config/standard/pcast.me.txt
diff --git a/inc/3rdparty/site_config/standard/pcmag.com.txt b/inc/3rdparty/site_config/standard/pcmag.com.txt
index cebea4d7..96bdd95a 100644..100755
--- a/inc/3rdparty/site_config/standard/pcmag.com.txt
+++ b/inc/3rdparty/site_config/standard/pcmag.com.txt
@@ -1,10 +1,10 @@
1prune:yes 1prune:yes
2 2
3date://*[contains(@class,'date')] 3date://*[contains(@class,'date')]
4 4
5body://div[contains(@id,'content')] 5body://div[contains(@id,'content')]
6 6
7next_page_link://a[contains(.,'Next >')] 7next_page_link://a[contains(.,'Next >')]
8 8
9strip_id_or_class:sponsors 9strip_id_or_class:sponsors
10test_url: http://www.pcmag.com/article2/0,2817,2401676,00.asp \ No newline at end of file 10test_url: http://www.pcmag.com/article2/0,2817,2401676,00.asp \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/pcworld.com.txt b/inc/3rdparty/site_config/standard/pcworld.com.txt
index 30ccbb5f..7193f87e 100644..100755
--- a/inc/3rdparty/site_config/standard/pcworld.com.txt
+++ b/inc/3rdparty/site_config/standard/pcworld.com.txt
@@ -1,19 +1,19 @@
1title: //div[@class='articleHead']//h1 1title: //div[@class='articleHead']//h1
2author: //div[@class="author-name"]/a[1] 2author: //div[@class="author-name"]/a[1]
3body: //div[@class="main"] 3body: //div[@class="main"]
4 4
5# remove 'From the Lab' and 'Recent posts' text 5# remove 'From the Lab' and 'Recent posts' text
6strip: //div[@class='blogLabel'] 6strip: //div[@class='blogLabel']
7 7
8# remove byline and meta info 8# remove byline and meta info
9strip: //h1 9strip: //h1
10strip: //div[@class="article-meta"] 10strip: //div[@class="article-meta"]
11strip: //div[@class="author-info"] 11strip: //div[@class="author-info"]
12 12
13#strip tags and categories 13#strip tags and categories
14strip: //div[@class="department"] 14strip: //div[@class="department"]
15 15
16#strip product cap links 16#strip product cap links
17strip: //div[@class="cap-main"] 17strip: //div[@class="cap-main"]
18strip: //div[@id="compare-lede"] 18strip: //div[@id="compare-lede"]
19test_url: http://www.pcworld.com/article/262034/are-printer-companies-gouging-us-on-laser-toner-pricing.html \ No newline at end of file 19test_url: http://www.pcworld.com/article/262034/are-printer-companies-gouging-us-on-laser-toner-pricing.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/penny-arcade.com.txt b/inc/3rdparty/site_config/standard/penny-arcade.com.txt
index f97615f1..a0d5099e 100644..100755
--- a/inc/3rdparty/site_config/standard/penny-arcade.com.txt
+++ b/inc/3rdparty/site_config/standard/penny-arcade.com.txt
@@ -1,23 +1,23 @@
1# 2012-01-14 carlo@... - fixed title, body; added author, date 1# 2012-01-14 carlo@... - fixed title, body; added author, date
2 2
3title: //div[@class="title"]/h2/a 3title: //div[@class="title"]/h2/a
4# body: //div[@class="post"] 4# body: //div[@class="post"]
5# author: //p[@class="iconEmail"]/a 5# author: //p[@class="iconEmail"]/a
6# date: //p[@class="iconDate"] 6# date: //p[@class="iconDate"]
7 7
8# 1/24/2013 yosoyju - fixed author, date, and body, added support for PA Report 8# 1/24/2013 yosoyju - fixed author, date, and body, added support for PA Report
9 9
10# Penny Arcade 10# Penny Arcade
11 11
12author: //li[@class="iconEmail"]/a 12author: //li[@class="iconEmail"]/a
13date: //li[@class="iconDate"] 13date: //li[@class="iconDate"]
14body: //div[@class="body"] 14body: //div[@class="body"]
15 15
16# PA Report 16# PA Report
17 17
18author: //div[@class="meta"]/p/a 18author: //div[@class="meta"]/p/a
19date: substring-after(//div[@class="meta"]/p, '/ ') 19date: substring-after(//div[@class="meta"]/p, '/ ')
20title: substring-after(//title, '- ') 20title: substring-after(//title, '- ')
21 21
22test_url: http://penny-arcade.com/2012/01/13/i-put-some-news-in-your-news 22test_url: http://penny-arcade.com/2012/01/13/i-put-some-news-in-your-news
23test_url: http://penny-arcade.com/report/editorial-article/the-dystopian-future-of-casual-games-personalized-targeted-pricing-and-mech \ No newline at end of file 23test_url: http://penny-arcade.com/report/editorial-article/the-dystopian-future-of-casual-games-personalized-targeted-pricing-and-mech \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/pentaxforums.com.txt b/inc/3rdparty/site_config/standard/pentaxforums.com.txt
index 00f61a48..00f61a48 100644..100755
--- a/inc/3rdparty/site_config/standard/pentaxforums.com.txt
+++ b/inc/3rdparty/site_config/standard/pentaxforums.com.txt
diff --git a/inc/3rdparty/site_config/standard/philadelphiaeagles.com.txt b/inc/3rdparty/site_config/standard/philadelphiaeagles.com.txt
index a369fd65..5ba5f772 100644..100755
--- a/inc/3rdparty/site_config/standard/philadelphiaeagles.com.txt
+++ b/inc/3rdparty/site_config/standard/philadelphiaeagles.com.txt
@@ -1,6 +1,6 @@
1prune: no 1prune: no
2tidy: no 2tidy: no
3body: //div[@class='article-content'] 3body: //div[@class='article-content']
4dissolve: //nobr/a 4dissolve: //nobr/a
5dissolve: //nobr 5dissolve: //nobr
6test_url: http://www.philadelphiaeagles.com/news/article-1/Jacksons-Light-Shined-On-Sunday-Night/51a862de-42b4-40f1-a5a8-ba0fb8a435b7 \ No newline at end of file 6test_url: http://www.philadelphiaeagles.com/news/article-1/Jacksons-Light-Shined-On-Sunday-Night/51a862de-42b4-40f1-a5a8-ba0fb8a435b7 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/philly.com.txt b/inc/3rdparty/site_config/standard/philly.com.txt
index 41318f63..accbd60b 100644..100755
--- a/inc/3rdparty/site_config/standard/philly.com.txt
+++ b/inc/3rdparty/site_config/standard/philly.com.txt
@@ -1,10 +1,10 @@
1title: //h1[@class='entry-title'] 1title: //h1[@class='entry-title']
2author: //p[@class='byline']/span 2author: //p[@class='byline']/span
3body: //@id='body-content' 3body: //@id='body-content'
4date: //div[@class='article_timestamp']/span 4date: //div[@class='article_timestamp']/span
5 5
6strip: //@class=b-group 6strip: //@class=b-group
7strip: //*[contains(@style, 'none')] 7strip: //*[contains(@style, 'none')]
8strip: //a[contains(@href, 'comments')] 8strip: //a[contains(@href, 'comments')]
9strip: //*[contains(@class, 'comment')] 9strip: //*[contains(@class, 'comment')]
10test_url: http://www.philly.com/philly/sports/eagles/20120127_Ohio_State_s_Posey_didn_t_waste_time_lost_to_suspension.html \ No newline at end of file 10test_url: http://www.philly.com/philly/sports/eagles/20120127_Ohio_State_s_Posey_didn_t_waste_time_lost_to_suspension.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/photo.tutsplus.com.txt b/inc/3rdparty/site_config/standard/photo.tutsplus.com.txt
index 4e2ccb01..7f7e3830 100644..100755
--- a/inc/3rdparty/site_config/standard/photo.tutsplus.com.txt
+++ b/inc/3rdparty/site_config/standard/photo.tutsplus.com.txt
@@ -1,6 +1,6 @@
1author: substring-before(//div[@class='post_meta'],' on') 1author: substring-before(//div[@class='post_meta'],' on')
2date: substring-after(substring-before(//div[@class='post_meta'],'with'),' on') 2date: substring-after(substring-before(//div[@class='post_meta'],'with'),' on')
3title: //h1[class='post_title'] 3title: //h1[class='post_title']
4body: //div[@class='article'] 4body: //div[@class='article']
5 5
6test_url: http://photo.tutsplus.com/articles/news/a-brilliant-beginners-guide-to-architectural-photography/ \ No newline at end of file 6test_url: http://photo.tutsplus.com/articles/news/a-brilliant-beginners-guide-to-architectural-photography/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/php.net.txt b/inc/3rdparty/site_config/standard/php.net.txt
index 7c57a84d..cc643f05 100644..100755
--- a/inc/3rdparty/site_config/standard/php.net.txt
+++ b/inc/3rdparty/site_config/standard/php.net.txt
@@ -1,6 +1,6 @@
1body: //div[@id='content'] 1body: //div[@id='content']
2strip_id_or_class: manualnavbar 2strip_id_or_class: manualnavbar
3 3
4prune: no 4prune: no
5 5
6test_url: http://www.php.net/manual/en/migration5.incompatible.php \ No newline at end of file 6test_url: http://www.php.net/manual/en/migration5.incompatible.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/physicstoday.org.txt b/inc/3rdparty/site_config/standard/physicstoday.org.txt
index a8163995..624055b7 100644..100755
--- a/inc/3rdparty/site_config/standard/physicstoday.org.txt
+++ b/inc/3rdparty/site_config/standard/physicstoday.org.txt
@@ -1,7 +1,7 @@
1title: //div[@class='abstitle']//h1 1title: //div[@class='abstitle']//h1
2author: //div[@class='authorList'] 2author: //div[@class='authorList']
3body: //div[@id='fulltext_body'] 3body: //div[@id='fulltext_body']
4 4
5prune: no 5prune: no
6 6
7test_url: http://www.physicstoday.org/resource/1/phtoad/v64/i10/p48_s1?bypassSSO=1 \ No newline at end of file 7test_url: http://www.physicstoday.org/resource/1/phtoad/v64/i10/p48_s1?bypassSSO=1 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/pinterest.com.txt b/inc/3rdparty/site_config/standard/pinterest.com.txt
new file mode 100755
index 00000000..01b6df41
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/pinterest.com.txt
@@ -0,0 +1,5 @@
1title: //title
2body: //div[contains(@class, 'imageContainer')]
3
4test_url: http://pinterest.com/pin/380906080954441188/
5test_url: http://pinterest.com/michaelsorm/architecture/rss \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/pitchfork.com.txt b/inc/3rdparty/site_config/standard/pitchfork.com.txt
index 3decc538..eee96a9c 100644..100755
--- a/inc/3rdparty/site_config/standard/pitchfork.com.txt
+++ b/inc/3rdparty/site_config/standard/pitchfork.com.txt
@@ -1,16 +1,16 @@
1title:concat(//h1,' - ',//h2,' - ',//h3) 1title:concat(//h1,' - ',//h2,' - ',//h3)
2author://address 2author://address
3date://span[@class='pub-date'] 3date://span[@class='pub-date']
4body://div[@id='main'] 4body://div[@id='main']
5single_page_link://link[@rel='canonical'] 5single_page_link://link[@rel='canonical']
6strip://div[@class='info'] 6strip://div[@class='info']
7strip_id_or_class:'object-grid related-content' 7strip_id_or_class:'object-grid related-content'
8strip_id_or_class:'object-prevnext' 8strip_id_or_class:'object-prevnext'
9strip_id_or_class:'object-header' 9strip_id_or_class:'object-header'
10strip_id_or_class:'source' 10strip_id_or_class:'source'
11strip_id_or_class:'label' 11strip_id_or_class:'label'
12strip_id_or_class:'title' 12strip_id_or_class:'title'
13dissolve://ul 13dissolve://ul
14strip://li[@class='next'] 14strip://li[@class='next']
15strip://li[@class='prev'] 15strip://li[@class='prev']
16test_url: http://pitchfork.com/features/why-we-fight/8796-on-the-far-slope-of-the-uncanny-valley/ \ No newline at end of file 16test_url: http://pitchfork.com/features/why-we-fight/8796-on-the-far-slope-of-the-uncanny-valley/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/pittnews.com.txt b/inc/3rdparty/site_config/standard/pittnews.com.txt
index 92777073..c302526d 100644..100755
--- a/inc/3rdparty/site_config/standard/pittnews.com.txt
+++ b/inc/3rdparty/site_config/standard/pittnews.com.txt
@@ -1,8 +1,8 @@
1title: //h2[@class='post-title'] 1title: //h2[@class='post-title']
2author: substring-before(substring-after(//h3[@class='post-byline'],'By:'),'/') 2author: substring-before(substring-after(//h3[@class='post-byline'],'By:'),'/')
3date: substring-before(substring-after(//p[@class='post-details'],'Posted on '),'in') 3date: substring-before(substring-after(//p[@class='post-details'],'Posted on '),'in')
4strip: //h2[@class='post-title'] 4strip: //h2[@class='post-title']
5strip: //p[@class='post-details'] 5strip: //p[@class='post-details']
6strip: //h3[@class='post-byline'] 6strip: //h3[@class='post-byline']
7body: //div[@id='content'] 7body: //div[@id='content']
8test_url: http://pittnews.com/newsstory/mens-basketball-pitt-recruit-robinson-to-bring-leadership/ \ No newline at end of file 8test_url: http://pittnews.com/newsstory/mens-basketball-pitt-recruit-robinson-to-bring-leadership/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/pittsburgh.pirates.mlb.com.txt b/inc/3rdparty/site_config/standard/pittsburgh.pirates.mlb.com.txt
index 824cb064..f2948528 100644..100755
--- a/inc/3rdparty/site_config/standard/pittsburgh.pirates.mlb.com.txt
+++ b/inc/3rdparty/site_config/standard/pittsburgh.pirates.mlb.com.txt
@@ -1,15 +1,15 @@
1title: substring-before(//title,'pirates.com') 1title: substring-before(//title,'pirates.com')
2date: //span[@class='timeStamp'] 2date: //span[@class='timeStamp']
3author: substring-before(substring-after(//div[@class='byLine'],'By'),'/') 3author: substring-before(substring-after(//div[@class='byLine'],'By'),'/')
4body: //div[@id='article'] 4body: //div[@id='article']
5#strip: //div[@class='inner'] 5#strip: //div[@class='inner']
6strip: //div[@id='article_head'] 6strip: //div[@id='article_head']
7strip: //p[@class='tagLine'] 7strip: //p[@class='tagLine']
8strip: //div[@id='article_related_links'] 8strip: //div[@id='article_related_links']
9strip: //div[@id='article_related_mlb'] 9strip: //div[@id='article_related_mlb']
10strip: //div[@id='article_related_club'] 10strip: //div[@id='article_related_club']
11strip: //span[@class='more'] 11strip: //span[@class='more']
12strip: //div[@class='article_component'] 12strip: //div[@class='article_component']
13strip: //span[@class='screen_reader'] 13strip: //span[@class='screen_reader']
14strip: //ul[@class='columnists_blurb'] 14strip: //ul[@class='columnists_blurb']
15test_url: http://pittsburgh.pirates.mlb.com/news/article.jsp?ymd=20120330&content_id=27759040&vkey=news_pit&c_id=pit \ No newline at end of file 15test_url: http://pittsburgh.pirates.mlb.com/news/article.jsp?ymd=20120330&content_id=27759040&vkey=news_pit&c_id=pit \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/pittsburghlive.com.txt b/inc/3rdparty/site_config/standard/pittsburghlive.com.txt
index b3e66166..cc7891f3 100644..100755
--- a/inc/3rdparty/site_config/standard/pittsburghlive.com.txt
+++ b/inc/3rdparty/site_config/standard/pittsburghlive.com.txt
@@ -1,7 +1,7 @@
1title: substring-before(//title,'- Pittsburgh Tribune') 1title: substring-before(//title,'- Pittsburgh Tribune')
2author: substring-before(substring-after(//div[@class='byline'],'By '),',') 2author: substring-before(substring-after(//div[@class='byline'],'By '),',')
3date: substring-after(substring-after(//div[@class='byline'],','),',') 3date: substring-after(substring-after(//div[@class='byline'],','),',')
4body: //div[@id='storyBody'] 4body: //div[@id='storyBody']
5strip: //div[@class='morestories'] 5strip: //div[@class='morestories']
6dissolve: //p[@class='subheader'] 6dissolve: //p[@class='subheader']
7test_url: http://www.pittsburghlive.com/x/pittsburghtrib/sports/columnists/s_785654.html \ No newline at end of file 7test_url: http://www.pittsburghlive.com/x/pittsburghtrib/sports/columnists/s_785654.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/pittsburghmagazine.com.txt b/inc/3rdparty/site_config/standard/pittsburghmagazine.com.txt
index dd715d8f..4d02f6bb 100644..100755
--- a/inc/3rdparty/site_config/standard/pittsburghmagazine.com.txt
+++ b/inc/3rdparty/site_config/standard/pittsburghmagazine.com.txt
@@ -1,8 +1,8 @@
1title: //title 1title: //title
2author: substring-after(//div[@class='by-line'],'BY') 2author: substring-after(//div[@class='by-line'],'BY')
3 3
4body: //div[@id='article-body'] 4body: //div[@id='article-body']
5 5
6strip: //div[@class='by-line'] 6strip: //div[@class='by-line']
7strip: //div[@id='article-body']/h1 7strip: //div[@id='article-body']/h1
8test_url: http://www.pittsburghmagazine.com/Pittsburgh-Magazine/May-2012/Verde-Lights-the-Night/ \ No newline at end of file 8test_url: http://www.pittsburghmagazine.com/Pittsburgh-Magazine/May-2012/Verde-Lights-the-Night/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/pittsburghpanthers.com.txt b/inc/3rdparty/site_config/standard/pittsburghpanthers.com.txt
index 6113b96e..c372284a 100644..100755
--- a/inc/3rdparty/site_config/standard/pittsburghpanthers.com.txt
+++ b/inc/3rdparty/site_config/standard/pittsburghpanthers.com.txt
@@ -1,4 +1,4 @@
1title: //span[@class='StoryHeadline'] 1title: //span[@class='StoryHeadline']
2strip: //div[@class='fivevert'] 2strip: //div[@class='fivevert']
3body: //div[@id='Content'] 3body: //div[@id='Content']
4test_url: http://www.pittsburghpanthers.com/sports/m-baskbl/recaps/031412aaa.html \ No newline at end of file 4test_url: http://www.pittsburghpanthers.com/sports/m-baskbl/recaps/031412aaa.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/pittscriptblog.com.txt b/inc/3rdparty/site_config/standard/pittscriptblog.com.txt
index 3936310d..571874a4 100644..100755
--- a/inc/3rdparty/site_config/standard/pittscriptblog.com.txt
+++ b/inc/3rdparty/site_config/standard/pittscriptblog.com.txt
@@ -1,8 +1,8 @@
1title: //h1[@class='articletitle'] 1title: //h1[@class='articletitle']
2author: substring-after(//span[@class='author'],'by') 2author: substring-after(//span[@class='author'],'by')
3date: //span[@class='created'] 3date: //span[@class='created']
4body: //div[@class='article'] 4body: //div[@class='article']
5strip: //div[@class='headline'] 5strip: //div[@class='headline']
6strip: //p[@class='articleinfo'] 6strip: //p[@class='articleinfo']
7#dissolve: //p[@class='subheader'] 7#dissolve: //p[@class='subheader']
8test_url: http://www.pittscriptblog.com/2012-articles/march/2012-football-opponents-set-and-the-attendance-dilemma.html \ No newline at end of file 8test_url: http://www.pittscriptblog.com/2012-articles/march/2012-football-opponents-set-and-the-attendance-dilemma.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/planetvita.de.txt b/inc/3rdparty/site_config/standard/planetvita.de.txt
new file mode 100755
index 00000000..bfc3342d
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/planetvita.de.txt
@@ -0,0 +1,5 @@
1title: //div[@id='frnRahmen']/div/div[@id='content']/div[2]/h2
2author: //div[@id='content']/div[1]/div/a
3body: //div[@id='content']/div[2]/span
4strip: //div[@id='commenthead']
5test_url: http://www.planetvita.de/news/10389-psn-store-update-vom-03-april-neue-inhalte-fuer-psvita.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/playboy.com.txt b/inc/3rdparty/site_config/standard/playboy.com.txt
index 07b347a0..92834947 100644..100755
--- a/inc/3rdparty/site_config/standard/playboy.com.txt
+++ b/inc/3rdparty/site_config/standard/playboy.com.txt
@@ -1,6 +1,6 @@
1author: //article//*[@class="author"] 1author: //article//*[@class="author"]
2date: //article//*[@class="publication-date"] 2date: //article//*[@class="publication-date"]
3body: //article 3body: //article
4strip: //article/header 4strip: //article/header
5strip: //article/section 5strip: //article/section
6test_url: http://www.playboy.com/playground/view/playboy-interview-jon-hamm \ No newline at end of file 6test_url: http://www.playboy.com/playground/view/playboy-interview-jon-hamm \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/plus.google.com.txt b/inc/3rdparty/site_config/standard/plus.google.com.txt
index 50a5dbf5..4a7ea126 100644..100755
--- a/inc/3rdparty/site_config/standard/plus.google.com.txt
+++ b/inc/3rdparty/site_config/standard/plus.google.com.txt
@@ -1,17 +1,17 @@
1body: //div[@id='contentPane']//div[@class='vg'] 1body: //div[@id='contentPane']//div[@class='vg']
2body: //div[@id='contentPane'] 2body: //div[@id='contentPane']
3 3
4# Grab the author by finding the first profile pic, then backing up a node and getting the title of <a> tag which will be the author hopefully. Sorry can't test this due to parser errors, thanks google :( 4# Grab the author by finding the first profile pic, then backing up a node and getting the title of <a> tag which will be the author hopefully. Sorry can't test this due to parser errors, thanks google :(
5 5
6author: //div[@id='contentPane']//img[contains(@alt, 'profile photo')][1]/../@title 6author: //div[@id='contentPane']//img[contains(@alt, 'profile photo')][1]/../@title
7 7
8 8
9strip: //*[@title="People who +1'd this"]/../.. 9strip: //*[@title="People who +1'd this"]/../..
10strip: //*[contains(@class, 'a-b-f-i-Hg-Uf')] 10strip: //*[contains(@class, 'a-b-f-i-Hg-Uf')]
11strip: //*[@role='menu'] 11strip: //*[@role='menu']
12strip: //img[contains(@alt, 'profile photo')] 12strip: //img[contains(@alt, 'profile photo')]
13strip: //*[@class='a-f-i-Ad'] 13strip: //*[@class='a-f-i-Ad']
14 14
15tidy: no 15tidy: no
16 16
17test_url: http://plus.google.com/u/0/117840649766034848455/posts/FddaP6jeCqp \ No newline at end of file 17test_url: http://plus.google.com/u/0/117840649766034848455/posts/FddaP6jeCqp \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/plzkthxbai.com.txt b/inc/3rdparty/site_config/standard/plzkthxbai.com.txt
index bb9be0a9..ec151b42 100644..100755
--- a/inc/3rdparty/site_config/standard/plzkthxbai.com.txt
+++ b/inc/3rdparty/site_config/standard/plzkthxbai.com.txt
@@ -1,4 +1,4 @@
1title: //h2[@class='jcw-pagetitle' 1title: //h2[@class='jcw-pagetitle'
2date: //p[@class='postinfo'] 2date: //p[@class='postinfo']
3body: //div[@class='contenttext'] 3body: //div[@class='contenttext']
4test_url: http://plzkthxbai.com/blog/2011/06/28/1password-and-internet-security/ \ No newline at end of file 4test_url: http://plzkthxbai.com/blog/2011/06/28/1password-and-internet-security/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/pogue.blogs.nytimes.com.txt b/inc/3rdparty/site_config/standard/pogue.blogs.nytimes.com.txt
index 880311d3..65ddba54 100644..100755
--- a/inc/3rdparty/site_config/standard/pogue.blogs.nytimes.com.txt
+++ b/inc/3rdparty/site_config/standard/pogue.blogs.nytimes.com.txt
@@ -1,4 +1,4 @@
1body: //div[@id="content"]/div[1] 1body: //div[@id="content"]/div[1]
2 2
3title: //h1[@class="entry-title"] 3title: //h1[@class="entry-title"]
4test_url: http://pogue.blogs.nytimes.com/2011/05/12/the-future-of-skype/ \ No newline at end of file 4test_url: http://pogue.blogs.nytimes.com/2011/05/12/the-future-of-skype/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/politico.com.txt b/inc/3rdparty/site_config/standard/politico.com.txt
index c5302d1b..d8f5e575 100755
--- a/inc/3rdparty/site_config/standard/politico.com.txt
+++ b/inc/3rdparty/site_config/standard/politico.com.txt
@@ -1,17 +1,13 @@
1title://div[contains(@class, "article")]/h1 1title://div[contains(@class, "article")]/h1
2body://div[contains(@class,"story-text")] 2body://div[contains(@class,"story-text")]
3 3
4# Why doesn't this work? next_page_link://ul[contains(@class,"pagination")]/li/a[@rel="next"] 4# Why doesn't this work? next_page_link://ul[contains(@class,"pagination")]/li/a[@rel="next"]
5 5
6next_page_link://ul[contains(@class,"pagination")]/li[contains(@class, "current")]/following-sibling::node()/a 6next_page_link://ul[contains(@class,"pagination")]/li[contains(@class, "current")]/following-sibling::node()/a
7next_page_link://div[contains(@class,"pagination")]/ol/li[contains(@class, "current")]/following-sibling::node()/a 7date://meta[@name="publish_date"]/@content
8date://meta[@name="publish_date"]/@content 8
9 9strip://div[contains(@class, "breadcrumbs")]
10strip://div[contains(@class, "breadcrumbs")] 10strip://a[contains(@class, "hidden")]
11strip://a[contains(@class, "hidden")] 11strip://div[contains(@class, "story-embed")]
12strip://div[contains(@class, "story-embed")]
13strip://div[contains(@class, "story-text")]//p/a[contains(text(), "Also on POLITICO:")]/.. 12strip://div[contains(@class, "story-text")]//p/a[contains(text(), "Also on POLITICO:")]/..
14strip://div[contains(@class, "story-interrupt")]
15strip://footer[contains(@class, "author-bio")]
16
17test_url: http://www.politico.com/news/stories/0712/78105.html \ No newline at end of file 13test_url: http://www.politico.com/news/stories/0712/78105.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/politifact.com.txt b/inc/3rdparty/site_config/standard/politifact.com.txt
index fd247b5b..65a8fc57 100644..100755
--- a/inc/3rdparty/site_config/standard/politifact.com.txt
+++ b/inc/3rdparty/site_config/standard/politifact.com.txt
@@ -1,4 +1,4 @@
1body: //div[@id="content"] 1body: //div[@id="content"]
2 2
3strip: //div[@class="pfcontentmid"]/div[position()>4]|//div[@class="pfad"] 3strip: //div[@class="pfcontentmid"]/div[position()>4]|//div[@class="pfad"]
4test_url: http://www.politifact.com/truth-o-meter/statements/2011/may/30/barbara-boxer/barbara-boxer-says-medicare-overhead-far-lower-pri/ \ No newline at end of file 4test_url: http://www.politifact.com/truth-o-meter/statements/2011/may/30/barbara-boxer/barbara-boxer-says-medicare-overhead-far-lower-pri/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/politiken.dk.txt b/inc/3rdparty/site_config/standard/politiken.dk.txt
index 8deecbca..b13f8f87 100644..100755
--- a/inc/3rdparty/site_config/standard/politiken.dk.txt
+++ b/inc/3rdparty/site_config/standard/politiken.dk.txt
@@ -1,13 +1,13 @@
1# 21/10-2011: 1# 21/10-2011:
2# Added Author+Date 2# Added Author+Date
3# Remove fakta-boks if found 3# Remove fakta-boks if found
4# Deleted 'Ls ogs...' filter 4# Deleted 'Læs også...' filter
5# - Change in markup caused it to strip too much. 5# - Change in markup caused it to strip too much.
6 6
7author://span[@class='autor-name'] 7author://span[@class='autor-name']
8date:substring-after(//div[@class='art-created'], ' ') 8date:substring-after(//div[@class='art-created'], ' ')
9title: //h1[contains(@class, 'stor-type')] 9title: //h1[contains(@class, 'stor-type')]
10body: //div[@id='art-body'] 10body: //div[@id='art-body']
11strip: //div[@class='art-fakta article-box'] 11strip: //div[@class='art-fakta article-box']
12 12
13test_url: http://politiken.dk/kultur/boger/skonlitteratur_boger/ECE1426386/makabre-tegneserie-zombier-aeder-alt-levende/ \ No newline at end of file 13test_url: http://politiken.dk/kultur/boger/skonlitteratur_boger/ECE1426386/makabre-tegneserie-zombier-aeder-alt-levende/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/polygon.com.txt b/inc/3rdparty/site_config/standard/polygon.com.txt
new file mode 100755
index 00000000..8fe9b1be
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/polygon.com.txt
@@ -0,0 +1,34 @@
1body: //div[@id='article-content']
2body: //article[@id='entry-top']/div[@class='float_wrapper']
3author: //header/p[@class='byline']/em/a
4date: //header/p[@class='byline']/span[@class='timestamp']
5
6strip: //div[@id='article-content']//header
7strip: //label
8
9#photos on left column (delete all)
10strip: //div[@class='big_photo']
11
12#photos on left column (remove extras used for scroll effect)
13#strip: //div[@class='big_photo']/div[./img]
14#strip: //div[@class='big_photo']/img[position()>1]
15
16strip_id_or_class: vox-lazy-load
17strip_id_or_class: social_buttons
18strip_id_or_class: feature_toc
19
20prune: no
21
22find_string: <noscript>
23replace_string: <div>
24find_string: </noscript>
25replace_string: </div>
26
27#find_string: <script
28#replace_string: <div style="display:none"
29#find_string: </script>
30#replace_string: </div>
31
32strip: //div[@class='float_wrapper']/header
33test_url: http://www.polygon.com/2013/4/5/4189028/donkey-kong-country-returns-3d-new-content
34test_url: http://www.polygon.com/features/2013/8/22/4602568/30-years-xbox-360-playstation-3-wii \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/popularmechanics.com.txt b/inc/3rdparty/site_config/standard/popularmechanics.com.txt
index 85b7656b..2582e6fb 100644..100755
--- a/inc/3rdparty/site_config/standard/popularmechanics.com.txt
+++ b/inc/3rdparty/site_config/standard/popularmechanics.com.txt
@@ -1,8 +1,8 @@
1next_page_link: //div[@id='longPagination']/a[@class='next'] 1next_page_link: //div[@id='longPagination']/a[@class='next']
2 2
3title: //div[@id='contentHeader']//h1 3title: //div[@id='contentHeader']//h1
4 4
5body: //div[@id='articleBody'] 5body: //div[@id='articleBody']
6# this is so sad 6# this is so sad
7body: //div[@id='intelliTXT'] 7body: //div[@id='intelliTXT']
8test_url: http://www.popularmechanics.com/technology/aviation/crashes/what-really-happened-aboard-air-france-447-6611877 \ No newline at end of file 8test_url: http://www.popularmechanics.com/technology/aviation/crashes/what-really-happened-aboard-air-france-447-6611877 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/portertech.ca.txt b/inc/3rdparty/site_config/standard/portertech.ca.txt
new file mode 100755
index 00000000..2897cb57
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/portertech.ca.txt
@@ -0,0 +1,3 @@
1author: //*[(@class = "author")]
2date: //*[(@class = "date")]
3test_url: http://portertech.ca/2012/12/10/iac-morning-market/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/positioningmag.com.txt b/inc/3rdparty/site_config/standard/positioningmag.com.txt
index 21cd833c..f8eeb0a3 100644..100755
--- a/inc/3rdparty/site_config/standard/positioningmag.com.txt
+++ b/inc/3rdparty/site_config/standard/positioningmag.com.txt
@@ -1,19 +1,19 @@
1title: //div[@id="newsDetailTitle"] 1title: //div[@id="newsDetailTitle"]
2author: //span[@id="showAuthor"] 2author: //span[@id="showAuthor"]
3date: //span[@id="showRefDate"] 3date: //span[@id="showRefDate"]
4 4
5strip: //div[@id="breadcrumbs"] 5strip: //div[@id="breadcrumbs"]
6strip: //span[@id="PageTitle"] 6strip: //span[@id="PageTitle"]
7strip: //div[@id="newsDetailAuthorPublish"] 7strip: //div[@id="newsDetailAuthorPublish"]
8 8
9strip: //div[@class="leadPix"] 9strip: //div[@class="leadPix"]
10 10
11strip: //span[@id="ctl00_PageTitle"] 11strip: //span[@id="ctl00_PageTitle"]
12strip: //div[@id="newsDetailTitle"] 12strip: //div[@id="newsDetailTitle"]
13convert_double_br_tags:yes 13convert_double_br_tags:yes
14 14
15strip: //div[@id="newsDetailCredential"] 15strip: //div[@id="newsDetailCredential"]
16strip: //div[@id="sidebar2"] 16strip: //div[@id="sidebar2"]
17strip: //div[@id="footer"] 17strip: //div[@id="footer"]
18 18
19test_url: http://www.positioningmag.com/magazine/details.aspx?id=41083 \ No newline at end of file 19test_url: http://www.positioningmag.com/magazine/details.aspx?id=41083 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/post-gazette.com.txt b/inc/3rdparty/site_config/standard/post-gazette.com.txt
index 1ea945a0..baa9d69d 100644..100755
--- a/inc/3rdparty/site_config/standard/post-gazette.com.txt
+++ b/inc/3rdparty/site_config/standard/post-gazette.com.txt
@@ -1,26 +1,26 @@
1title: //div[@class='story_headline'] 1title: //div[@class='story_headline']
2author: substring-before(substring-after(//div[@class='story_byline'],'By'),'/') 2author: substring-before(substring-after(//div[@class='story_byline'],'By'),'/')
3date: //div[@class='story_lastupdate'] 3date: //div[@class='story_lastupdate']
4body: //div[@id='story'] 4body: //div[@id='story']
5strip: //div[@class='story_byline'] 5strip: //div[@class='story_byline']
6strip: //div[@class='story_lastupdate'] 6strip: //div[@class='story_lastupdate']
7strip: //div[@class='story_headline'] 7strip: //div[@class='story_headline']
8strip: //div[@id='abuse'] 8strip: //div[@id='abuse']
9strip: //h2 9strip: //h2
10strip: //div[@class='pagenumbers_wrap'] 10strip: //div[@class='pagenumbers_wrap']
11strip: //ul[@class='pagenumbers'] 11strip: //ul[@class='pagenumbers']
12strip: //div[starts-with(., 'To report inappropriate comments')] 12strip: //div[starts-with(., 'To report inappropriate comments')]
13 13
14strip_id_or_class: story_share 14strip_id_or_class: story_share
15strip_id_or_class: OUTBRAIN 15strip_id_or_class: OUTBRAIN
16strip_id_or_class: story_box_right 16strip_id_or_class: story_box_right
17strip: //div[a[@href='http://www.post-gazette.com/pg/12062/1213990-42.stm']] 17strip: //div[a[@href='http://www.post-gazette.com/pg/12062/1213990-42.stm']]
18strip: //ul[@id='pikame']/li[position()>1] 18strip: //ul[@id='pikame']/li[position()>1]
19 19
20prune: no 20prune: no
21tidy: no 21tidy: no
22 22
23single_page_link: //a[contains(@href, '?p=0')] 23single_page_link: //a[contains(@href, '?p=0')]
24 24
25test_url: http://www.post-gazette.com/stories/sports/penguins/pens-crosby-expects-to-return-thursday-226648/ 25test_url: http://www.post-gazette.com/stories/sports/penguins/pens-crosby-expects-to-return-thursday-226648/
26test_url: http://www.post-gazette.com/stories/sports/pirates/pirates-fork-over-changes-for-fans-at-pnc-park-629789 \ No newline at end of file 26test_url: http://www.post-gazette.com/stories/sports/pirates/pirates-fork-over-changes-for-fans-at-pnc-park-629789 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/posta.com.tr.txt b/inc/3rdparty/site_config/standard/posta.com.tr.txt
index 86cb5d0b..0f01149c 100644..100755
--- a/inc/3rdparty/site_config/standard/posta.com.tr.txt
+++ b/inc/3rdparty/site_config/standard/posta.com.tr.txt
@@ -1,15 +1,15 @@
1title: //div[@id='divAdnetKeyword']/h1 1title: //div[@id='divAdnetKeyword']/h1
2body: //div[@id='_middle_content_bottom'] 2body: //div[@id='_middle_content_bottom']
3 3
4wrap_in(fieldset)://div[@id='_middle_content_bottom_child2']/img 4wrap_in(fieldset)://div[@id='_middle_content_bottom_child2']/img
5 5
6strip: //div[@id='_middle_content_bottom_child1'] 6strip: //div[@id='_middle_content_bottom_child1']
7strip: //div[@id='_middle_content_bottom_child4'] 7strip: //div[@id='_middle_content_bottom_child4']
8strip: //div[@class='cls'] 8strip: //div[@class='cls']
9strip: //div[@class='iphoneBox'] 9strip: //div[@class='iphoneBox']
10strip: //ul[@class='ilgiliHaber'] 10strip: //ul[@class='ilgiliHaber']
11strip: //div[@class='yorumlar'] 11strip: //div[@class='yorumlar']
12strip: //div[@class='kategoriler'] 12strip: //div[@class='kategoriler']
13strip: //div[@class='textSize'] 13strip: //div[@class='textSize']
14strip: //span[@class='tarih'] 14strip: //span[@class='tarih']
15test_url: http://www.posta.com.tr/yasam/teknoloji/HaberDetay/Fedailer_Istanbul_da.htm?ArticleID=101044 \ No newline at end of file 15test_url: http://www.posta.com.tr/yasam/teknoloji/HaberDetay/Fedailer_Istanbul_da.htm?ArticleID=101044 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/prb.org.txt b/inc/3rdparty/site_config/standard/prb.org.txt
index 7f7a5031..3952ea99 100644..100755
--- a/inc/3rdparty/site_config/standard/prb.org.txt
+++ b/inc/3rdparty/site_config/standard/prb.org.txt
@@ -1,8 +1,8 @@
1title: //h1 1title: //h1
2date: /html/head/meta[@name="date"]/@content 2date: /html/head/meta[@name="date"]/@content
3body: //div[@id="featuredlinksbox"] 3body: //div[@id="featuredlinksbox"]
4strip: //div[@class="relatedbox"] 4strip: //div[@class="relatedbox"]
5strip: //h1 5strip: //h1
6strip: //br 6strip: //br
7strip_image_src: "/images" 7strip_image_src: "/images"
8test_url: http://www.prb.org/Journalists/Webcasts/2011/military-families.aspx \ No newline at end of file 8test_url: http://www.prb.org/Journalists/Webcasts/2011/military-families.aspx \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/prog21.dadgum.com.txt b/inc/3rdparty/site_config/standard/prog21.dadgum.com.txt
index 906c27a0..9a49557e 100644..100755
--- a/inc/3rdparty/site_config/standard/prog21.dadgum.com.txt
+++ b/inc/3rdparty/site_config/standard/prog21.dadgum.com.txt
@@ -1,9 +1,9 @@
1title: //h1 1title: //h1
2body: //div[@id='left'] 2body: //div[@id='left']
3strip: //h1 3strip: //h1
4convert_double_br_tags: yes 4convert_double_br_tags: yes
5strip_id_or_class: entry-footer 5strip_id_or_class: entry-footer
6strip: //h1[. = 'Previously']/following::* 6strip: //h1[. = 'Previously']/following::*
7author: string('James Hague') 7author: string('James Hague')
8date: //div[@class = 'entry-footer']/text() 8date: //div[@class = 'entry-footer']/text()
9test_url: http://prog21.dadgum.com/105.html \ No newline at end of file 9test_url: http://prog21.dadgum.com/105.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/prolost.com.txt b/inc/3rdparty/site_config/standard/prolost.com.txt
index cef811d4..82ebf6bb 100644..100755
--- a/inc/3rdparty/site_config/standard/prolost.com.txt
+++ b/inc/3rdparty/site_config/standard/prolost.com.txt
@@ -1,4 +1,4 @@
1body: //div[@class='body'] 1body: //div[@class='body']
2title: //h2[@class='title'] 2title: //h2[@class='title']
3date: //span[@class='posted-on'] 3date: //span[@class='posted-on']
4test_url: http://prolost.com/blog/2011/10/13/real-men-comp-with-film.html \ No newline at end of file 4test_url: http://prolost.com/blog/2011/10/13/real-men-comp-with-film.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/propublica.org.txt b/inc/3rdparty/site_config/standard/propublica.org.txt
index 11e63bd0..d141ac90 100644..100755
--- a/inc/3rdparty/site_config/standard/propublica.org.txt
+++ b/inc/3rdparty/site_config/standard/propublica.org.txt
@@ -1,11 +1,11 @@
1title: //h1[@class="article-title"] 1title: //h1[@class="article-title"]
2author: //meta[@name="author"]/@content 2author: //meta[@name="author"]/@content
3body: //div[@class="article-full"] 3body: //div[@class="article-full"]
4strip_id_or_class: sidebar_inject 4strip_id_or_class: sidebar_inject
5strip_id_or_class: callout 5strip_id_or_class: callout
6strip_id_or_class: content-inset 6strip_id_or_class: content-inset
7strip_id_or_class: byline-block 7strip_id_or_class: byline-block
8strip_id_or_class: photo-caption 8strip_id_or_class: photo-caption
9strip_id_or_class: foot-tools 9strip_id_or_class: foot-tools
10 10
11test_url: http://www.propublica.org/article/pardon-applicants-benefit-from-friends-in-high-places \ No newline at end of file 11test_url: http://www.propublica.org/article/pardon-applicants-benefit-from-friends-in-high-places \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/prosa.dk.txt b/inc/3rdparty/site_config/standard/prosa.dk.txt
index dedd33d3..ba9ce8b8 100644..100755
--- a/inc/3rdparty/site_config/standard/prosa.dk.txt
+++ b/inc/3rdparty/site_config/standard/prosa.dk.txt
@@ -1,4 +1,4 @@
1author: //p[@class='name'] 1author: //p[@class='name']
2date: substring-before(//p[@class='date'], ' | ') 2date: substring-before(//p[@class='date'], ' | ')
3body: //div[@class='news_single_item'] 3body: //div[@class='news_single_item']
4test_url: http://www.prosa.dk/aktuelt/nyhed/artikel/internetaktivisten-uden-maske/ \ No newline at end of file 4test_url: http://www.prosa.dk/aktuelt/nyhed/artikel/internetaktivisten-uden-maske/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/prospectmagazine.co.uk.txt b/inc/3rdparty/site_config/standard/prospectmagazine.co.uk.txt
index 19059c4a..739d1b9e 100644..100755
--- a/inc/3rdparty/site_config/standard/prospectmagazine.co.uk.txt
+++ b/inc/3rdparty/site_config/standard/prospectmagazine.co.uk.txt
@@ -1,26 +1,26 @@
1#basics 1#basics
2author: (//div[contains(@class,'author')])[1] 2author: (//div[contains(@class,'author')])[1]
3date: substring-before(//a[@class='issue'], '&mdash;') 3date: substring-before(//a[@class='issue'], '&mdash;')
4#body://div[@class = 'entry'] 4#body://div[@class = 'entry']
5# use this until move_into support is ready 5# use this until move_into support is ready
6body: //div[@class = 'entry' or @class='standfirst' or @class='lead_image'] 6body: //div[@class = 'entry' or @class='standfirst' or @class='lead_image']
7 7
8#moves header image and tagline into body 8#moves header image and tagline into body
9move_into(//div[@class='entry']/div)://div[@class = 'lead_image'] 9move_into(//div[@class='entry']/div)://div[@class = 'lead_image']
10move_into(//div[@class='entry']/div)://div[@class = 'standfirst'] 10move_into(//div[@class='entry']/div)://div[@class = 'standfirst']
11 11
12 12
13# moves author info to end of text 13# moves author info to end of text
14move_into(//p[strong[string(.) = 'Follow Prospect on Twitter']])://div[@id='sidebar_content']/p/em 14move_into(//p[strong[string(.) = 'Follow Prospect on Twitter']])://div[@id='sidebar_content']/p/em
15 15
16prune: no 16prune: no
17 17
18# strips social links 18# strips social links
19strip_id_or_class:login-status 19strip_id_or_class:login-status
20strip_id_or_class:shareinpost 20strip_id_or_class:shareinpost
21strip_id_or_class:content_subscribe 21strip_id_or_class:content_subscribe
22strip_id_or_class:postinfo 22strip_id_or_class:postinfo
23strip_id_or_class:postutils 23strip_id_or_class:postutils
24strip_id_or_class:comments 24strip_id_or_class:comments
25strip://strong[string(.) = 'Follow Prospect on Twitter'] 25strip://strong[string(.) = 'Follow Prospect on Twitter']
26test_url: http://www.prospectmagazine.co.uk/2011/07/postmodernism-is-dead-va-exhibition-age-of-authenticism/ \ No newline at end of file 26test_url: http://www.prospectmagazine.co.uk/2011/07/postmodernism-is-dead-va-exhibition-age-of-authenticism/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/protothema.gr.txt b/inc/3rdparty/site_config/standard/protothema.gr.txt
new file mode 100755
index 00000000..fae261b0
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/protothema.gr.txt
@@ -0,0 +1,6 @@
1body: //a[contains(@rel, 'mainphotos')] | //div[contains(@class, 'article-content')]
2
3prune: no
4
5test_url: http://www.protothema.gr//politics/article/326464/diamadopoulou-floridis-kaminis-kai-boutaris-se-ekdilosi-ton-europaion-fileleutheron/
6test_url: http://www.protothema.gr/rss/news/politics/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/psychologytoday.com.txt b/inc/3rdparty/site_config/standard/psychologytoday.com.txt
index 3da3cea3..1bb63c29 100644..100755
--- a/inc/3rdparty/site_config/standard/psychologytoday.com.txt
+++ b/inc/3rdparty/site_config/standard/psychologytoday.com.txt
@@ -1,9 +1,9 @@
1title: //div[@class="page-title"]/h1 1title: //div[@class="page-title"]/h1
2author: //a[@title="View Bio"] 2author: //a[@title="View Bio"]
3date: substring-before(substring-after(//span[@class="submitted"], 'Published on '), ' by') 3date: substring-before(substring-after(//span[@class="submitted"], 'Published on '), ' by')
4strip://div[@class="page-title"]/h1 4strip://div[@class="page-title"]/h1
5strip://div[@class="article-abstract"] 5strip://div[@class="article-abstract"]
6strip://div[@class="article-meta"] 6strip://div[@class="article-meta"]
7strip://div[@id="rightColumn"] 7strip://div[@id="rightColumn"]
8strip://div[@id="inline-content-bottom-left"] 8strip://div[@id="inline-content-bottom-left"]
9test_url: http://www.psychologytoday.com/blog/how-happiness/201205/my-quibble-facebook \ No newline at end of file 9test_url: http://www.psychologytoday.com/blog/how-happiness/201205/my-quibble-facebook \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/publications.parliament.uk.txt b/inc/3rdparty/site_config/standard/publications.parliament.uk.txt
index fa099473..8f32d7a4 100644..100755
--- a/inc/3rdparty/site_config/standard/publications.parliament.uk.txt
+++ b/inc/3rdparty/site_config/standard/publications.parliament.uk.txt
@@ -1,4 +1,4 @@
1author: //meta[@name="Author"] 1author: //meta[@name="Author"]
2date: //meta[@name="Date"] 2date: //meta[@name="Date"]
3strip: //h5 3strip: //h5
4test_url: http://www.publications.parliament.uk/pa/ld201011/ldhansrd/text/111109-0003.htm \ No newline at end of file 4test_url: http://www.publications.parliament.uk/pa/ld201011/ldhansrd/text/111109-0003.htm \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/publico.pt.txt b/inc/3rdparty/site_config/standard/publico.pt.txt
new file mode 100755
index 00000000..bb6a05e1
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/publico.pt.txt
@@ -0,0 +1,12 @@
1title: //h1[@class="entry-title"]
2author: //span[@class="author"]
3body: //article[@itemtype="http://schema.org/Article"]
4date: //time[@itemprop="dateCreated"]
5
6strip: //header[@class="entry-header single-header"]
7strip: //aside[@class="entry-assets"]
8strip: //div[@class="entry-options entry-options-above group"]
9strip: //div[@class="entry-options entry-options-below group"]
10
11convert_double_br_tags: yes
12test_url: http://www.publico.pt/politica/noticia/passos-diz-que-se-limitacao-de-mandatos-fosse-para-todos-os-concelhos-estaria-claro-na-lei-1577691 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/purpleplanetmedia.com.txt b/inc/3rdparty/site_config/standard/purpleplanetmedia.com.txt
index 126f9e27..0f1392a4 100644..100755
--- a/inc/3rdparty/site_config/standard/purpleplanetmedia.com.txt
+++ b/inc/3rdparty/site_config/standard/purpleplanetmedia.com.txt
@@ -1,4 +1,4 @@
1title: //div[@class='title'] 1title: //div[@class='title']
2body: //div[@class='body'] 2body: //div[@class='body']
3next_page_link: //div[@class='source']/text()[contains(., 'page')]/following-sibling::a 3next_page_link: //div[@class='source']/text()[contains(., 'page')]/following-sibling::a
4test_url: http://purpleplanetmedia.com/eye/inte/ngaiman.php \ No newline at end of file 4test_url: http://purpleplanetmedia.com/eye/inte/ngaiman.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/qctimes.com.txt b/inc/3rdparty/site_config/standard/qctimes.com.txt
new file mode 100755
index 00000000..3c3edfeb
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/qctimes.com.txt
@@ -0,0 +1,5 @@
1# this site seems to work OK in the web view, but only occasionally in the instapaper app itself.
2
3body: //div[@class='entry-content']
4author: //span[@class='byline']
5test_url: http://qctimes.com/news/local/woman-faces-perjury-charges-in-meth-case/article_83f4c470-956a-11e2-a921-001a4bcf887a.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/quantumdiaries.org.txt b/inc/3rdparty/site_config/standard/quantumdiaries.org.txt
index a366c1b3..c17fb312 100644..100755
--- a/inc/3rdparty/site_config/standard/quantumdiaries.org.txt
+++ b/inc/3rdparty/site_config/standard/quantumdiaries.org.txt
@@ -1,14 +1,14 @@
1title: //div[contains(@class, "hentry")]/h3 1title: //div[contains(@class, "hentry")]/h3
2 2
3author: //div[contains(@class, "hentry")]/h2[contains(@class, "author_bio")] 3author: //div[contains(@class, "hentry")]/h2[contains(@class, "author_bio")]
4 4
5date: substring-before(substring-after(normalize-space(//p[contains(@class, "postmetadata")]/small), "was posted on "), " and is filed under") 5date: substring-before(substring-after(normalize-space(//p[contains(@class, "postmetadata")]/small), "was posted on "), " and is filed under")
6 6
7body: //div[contains(@class, "entry")] 7body: //div[contains(@class, "entry")]
8 8
9strip_id_or_class: addtoany_share_save_container 9strip_id_or_class: addtoany_share_save_container
10strip_id_or_class: postmetadata 10strip_id_or_class: postmetadata
11strip_id_or_class: author_bio 11strip_id_or_class: author_bio
12strip_id_or_class: author_bio_2 12strip_id_or_class: author_bio_2
13strip: //div[contains(@class, "hentry")]/h3 13strip: //div[contains(@class, "hentry")]/h3
14test_url: http://www.quantumdiaries.org/2011/10/25/piling-up/ \ No newline at end of file 14test_url: http://www.quantumdiaries.org/2011/10/25/piling-up/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/queerty.com.txt b/inc/3rdparty/site_config/standard/queerty.com.txt
index 655f8b80..fc7ab37f 100644..100755
--- a/inc/3rdparty/site_config/standard/queerty.com.txt
+++ b/inc/3rdparty/site_config/standard/queerty.com.txt
@@ -1,3 +1,3 @@
1body: //div[@class='copy'] 1body: //div[@class='copy']
2title: //h1[@class='hed'] 2title: //h1[@class='hed']
3test_url: http://www.queerty.com/rawhide-radicals-meet-five-heroes-from-the-leather-community-20120302/ \ No newline at end of file 3test_url: http://www.queerty.com/rawhide-radicals-meet-five-heroes-from-the-leather-community-20120302/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/quepasa.cl.txt b/inc/3rdparty/site_config/standard/quepasa.cl.txt
index fae4e6a3..fb09a8f3 100644..100755
--- a/inc/3rdparty/site_config/standard/quepasa.cl.txt
+++ b/inc/3rdparty/site_config/standard/quepasa.cl.txt
@@ -1,6 +1,6 @@
1title: //h1 1title: //h1
2 2
3body: //div[@class="cuerpoArticulo"] 3body: //div[@class="cuerpoArticulo"]
4 4
5 5
6test_url: http://www.quepasa.cl/magazine/articulo/print.html?id=5299 \ No newline at end of file 6test_url: http://www.quepasa.cl/magazine/articulo/print.html?id=5299 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/quora.com.txt b/inc/3rdparty/site_config/standard/quora.com.txt
index 3d34f2f8..732d12d7 100644..100755
--- a/inc/3rdparty/site_config/standard/quora.com.txt
+++ b/inc/3rdparty/site_config/standard/quora.com.txt
@@ -1,17 +1,17 @@
1tidy: no 1tidy: no
2prune: no 2prune: no
3body: //div[contains(@class, 'main_col')] 3body: //div[contains(@class, 'main_col')]
4title: //h1 4title: //h1
5 5
6strip_id_or_class: hidden 6strip_id_or_class: hidden
7strip_id_or_class: item_action_bar 7strip_id_or_class: item_action_bar
8strip_id_or_class: answer_voters 8strip_id_or_class: answer_voters
9strip_id_or_class: question_topics 9strip_id_or_class: question_topics
10strip_id_or_class: answer_header_text 10strip_id_or_class: answer_header_text
11strip_id_or_class: editor_link 11strip_id_or_class: editor_link
12strip_id_or_class: view_tag 12strip_id_or_class: view_tag
13strip_id_or_class: include_details 13strip_id_or_class: include_details
14strip_id_or_class: sig_edit 14strip_id_or_class: sig_edit
15strip_id_or_class: profile_photo_img 15strip_id_or_class: profile_photo_img
16 16
17test_url: http://www.quora.com/What-everyday-habit-do-you-wish-you-had-developed-earlier-in-life \ No newline at end of file 17test_url: http://www.quora.com/What-everyday-habit-do-you-wish-you-had-developed-earlier-in-life \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/racjonalista.pl.txt b/inc/3rdparty/site_config/standard/racjonalista.pl.txt
new file mode 100755
index 00000000..19c719d4
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/racjonalista.pl.txt
@@ -0,0 +1,5 @@
1author: /html/body/center/b
2date: /html/body/table/tr[2]/td/i
3single_page_link: //*[@id='oTxt']/table[3]/tr[2]/td/a[1]
4
5test_url: http://www.racjonalista.pl/kk.php/s,7214/q,Geneza.szubrawstwa \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/radar.oreilly.com.txt b/inc/3rdparty/site_config/standard/radar.oreilly.com.txt
index 99ab4bb1..fa66b815 100644..100755
--- a/inc/3rdparty/site_config/standard/radar.oreilly.com.txt
+++ b/inc/3rdparty/site_config/standard/radar.oreilly.com.txt
@@ -1,3 +1,3 @@
1date://span[@class='date'] 1date://span[@class='date']
2body://div[@class='entry-body'] 2body://div[@class='entry-body']
3test_url: http://radar.oreilly.com/2012/01/genome-cloud-digital-humanities-hadoop-world-strata.html \ No newline at end of file 3test_url: http://radar.oreilly.com/2012/01/genome-cloud-digital-humanities-hadoop-world-strata.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/radionz.co.nz.txt b/inc/3rdparty/site_config/standard/radionz.co.nz.txt
index e2617dc5..2496ddab 100644..100755
--- a/inc/3rdparty/site_config/standard/radionz.co.nz.txt
+++ b/inc/3rdparty/site_config/standard/radionz.co.nz.txt
@@ -1,3 +1,3 @@
1body: //div[@class='body'] 1body: //div[@class='body']
2title: //div[@class='newsstory']/h2 2title: //div[@class='newsstory']/h2
3test_url: http://www.radionz.co.nz/news/stories/2010/07/18/12481029a86d \ No newline at end of file 3test_url: http://www.radionz.co.nz/news/stories/2010/07/18/12481029a86d \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/randsinrepose.com.txt b/inc/3rdparty/site_config/standard/randsinrepose.com.txt
index f0c91c51..6970a744 100644..100755
--- a/inc/3rdparty/site_config/standard/randsinrepose.com.txt
+++ b/inc/3rdparty/site_config/standard/randsinrepose.com.txt
@@ -1,11 +1,11 @@
1title: //div[@id='center-col']/h4 1title: //div[@id='center-col']/h4
2author: substring-before(//title,'In') 2author: substring-before(//title,'In')
3date: substring-after(//div[@class='commenttext']/span,'#') 3date: substring-after(//div[@class='commenttext']/span,'#')
4body: //div[@id='center-col'] 4body: //div[@id='center-col']
5strip: //div[@id='center-col']/h4 5strip: //div[@id='center-col']/h4
6strip: //div[@class='graytext'] 6strip: //div[@class='graytext']
7 7
8# Anthony Perez-Sanz 2012.3.14 8# Anthony Perez-Sanz 2012.3.14
9# Removed long gif from the end 9# Removed long gif from the end
10strip: //img[@src='http://www.randsinrepose.com/spreader.gif'] 10strip: //img[@src='http://www.randsinrepose.com/spreader.gif']
11test_url: http://www.randsinrepose.com/archives/2012/03/13/hacking_is_important.html \ No newline at end of file 11test_url: http://www.randsinrepose.com/archives/2012/03/13/hacking_is_important.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/readability.com.txt b/inc/3rdparty/site_config/standard/readability.com.txt
index 80337291..2d5aba76 100644..100755
--- a/inc/3rdparty/site_config/standard/readability.com.txt
+++ b/inc/3rdparty/site_config/standard/readability.com.txt
@@ -1,3 +1,3 @@
1single_page_link: //link[@rel='canonical']/@href 1single_page_link: //link[@rel='canonical']/@href
2 2
3test_url: http://www.readability.com/read?url=http://feeds.gawker.com/~r/lifehacker/full/~3/jaxAjSay_Rw/add-a-rain-gutter-to-a-picnic-table-for-a-built+in-drink-cooler \ No newline at end of file 3test_url: http://www.readability.com/read?url=http://feeds.gawker.com/~r/lifehacker/full/~3/jaxAjSay_Rw/add-a-rain-gutter-to-a-picnic-table-for-a-built+in-drink-cooler \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/readwriteweb.com.txt b/inc/3rdparty/site_config/standard/readwriteweb.com.txt
index ff799aa0..e2aabda9 100644..100755
--- a/inc/3rdparty/site_config/standard/readwriteweb.com.txt
+++ b/inc/3rdparty/site_config/standard/readwriteweb.com.txt
@@ -1,8 +1,8 @@
1title: //h1[@class="titlelink"] 1title: //h1[@class="titlelink"]
2date: //span[@class="timestamp"]/@data-published 2date: //span[@class="timestamp"]/@data-published
3body: //div[@class="asset-content"] 3body: //div[@class="asset-content"]
4strip_id_or_class: related-entries 4strip_id_or_class: related-entries
5strip_id_or_class: like-and-retweet 5strip_id_or_class: like-and-retweet
6 6
7author: //div[@id="submeta"]/a[1] 7author: //div[@id="submeta"]/a[1]
8test_url: http://www.readwriteweb.com/archives/why_facebook_terrifies_google.php \ No newline at end of file 8test_url: http://www.readwriteweb.com/archives/why_facebook_terrifies_google.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/real.gr.txt b/inc/3rdparty/site_config/standard/real.gr.txt
index fe5ab672..1a33610d 100644..100755
--- a/inc/3rdparty/site_config/standard/real.gr.txt
+++ b/inc/3rdparty/site_config/standard/real.gr.txt
@@ -1,3 +1,3 @@
1body: //div[@id='_ctl12__ctl0_Article'] 1body: //div[@id='_ctl12__ctl0_Article']
2prune: no 2prune: no
3autodetect_on_failure: no \ No newline at end of file 3autodetect_on_failure: no \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/recipe.com.txt b/inc/3rdparty/site_config/standard/recipe.com.txt
index 8c8f0e0c..a01aaef4 100644..100755
--- a/inc/3rdparty/site_config/standard/recipe.com.txt
+++ b/inc/3rdparty/site_config/standard/recipe.com.txt
@@ -1,10 +1,10 @@
1body: //div[@class='recipedetailsleft' or @id='recipePrepAndServe' or @id='recipeingredients'] 1body: //div[@class='recipedetailsleft' or @id='recipePrepAndServe' or @id='recipeingredients']
2 2
3strip_id_or_class: location 3strip_id_or_class: location
4strip_id_or_class: savings 4strip_id_or_class: savings
5strip_id_or_class: recipeDetailDescButton 5strip_id_or_class: recipeDetailDescButton
6 6
7prune: no 7prune: no
8tidy: no 8tidy: no
9 9
10test_url: http://www.recipe.com/avocado-basil-pasta/ \ No newline at end of file 10test_url: http://www.recipe.com/avocado-basil-pasta/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/red-hot-girls.com.txt b/inc/3rdparty/site_config/standard/red-hot-girls.com.txt
index 3ae959b1..0403ee86 100644..100755
--- a/inc/3rdparty/site_config/standard/red-hot-girls.com.txt
+++ b/inc/3rdparty/site_config/standard/red-hot-girls.com.txt
@@ -1,5 +1,5 @@
1body: //div[@class='short-text' or starts-with(@id, 'news-id-')] 1body: //div[@class='short-text' or starts-with(@id, 'news-id-')]
2prune: no 2prune: no
3tidy: no 3tidy: no
4 4
5test_url: http://red-hot-girls.com/2011/06/10/the_red_hot_natalia_maria_53_pics.html \ No newline at end of file 5test_url: http://red-hot-girls.com/2011/06/10/the_red_hot_natalia_maria_53_pics.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/reddit.com.txt b/inc/3rdparty/site_config/standard/reddit.com.txt
index 58ca9ece..8871f564 100644..100755
--- a/inc/3rdparty/site_config/standard/reddit.com.txt
+++ b/inc/3rdparty/site_config/standard/reddit.com.txt
@@ -1,16 +1,20 @@
1# This setup grabs the text from a Reddit self post. It ignores all comments etc. 1# This setup grabs the text from a Reddit self post. It ignores all comments etc.
2 2
3title: //p[@class="title"]/a/text() 3title: //p[@class="title"]/a/text()
4 4
5author: //p[@class="tagline"]/a 5author: //p[@class="tagline"]/a
6 6
7# this doesn't work for some reason...? 7# this doesn't work for some reason...?
8date: //p[@class="tagline"]//@datetime 8date: //p[@class="tagline"]//@datetime
9 9
10body: //div[@class="expando"]//div[@class="usertext-body"] 10body: //div[@class="expando"]//div[@class="usertext-body"]
11 11
12strip_id_or_class: tagline 12strip_id_or_class: tagline
13strip_id_or_class: unvotable-message 13strip_id_or_class: unvotable-message
14strip_id_or_class: buttons 14strip_id_or_class: buttons
15 15
16test_url: http://www.reddit.com/r/truegaming/comments/wfe7r/i_wrote_about_the_problems_i_honestly_feel_that/ \ No newline at end of file 16# follow the posted link (unless it's a self post - relative URL, no http://)
17single_page_link: //p[@class="title"]/a[contains(@href, 'http://')]
18
19test_url: http://www.reddit.com/r/truegaming/comments/wfe7r/i_wrote_about_the_problems_i_honestly_feel_that/
20test_url: http://www.reddit.com/r/worldnews/comments/1as37r/twelve_north_korean_soldiers_attempting_to_defect/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/redmondpie.com.txt b/inc/3rdparty/site_config/standard/redmondpie.com.txt
index 12a96187..66cc1707 100644..100755
--- a/inc/3rdparty/site_config/standard/redmondpie.com.txt
+++ b/inc/3rdparty/site_config/standard/redmondpie.com.txt
@@ -1,13 +1,13 @@
1title: //div[@class='posthead']//h2 1title: //div[@class='posthead']//h2
2body: //div[contains(@class, 'postcontent') or @class='posthead'] 2body: //div[contains(@class, 'postcontent') or @class='posthead']
3author: //div[@class='posthead']//a[@rel='author'] 3author: //div[@class='posthead']//a[@rel='author']
4 4
5strip: //div[@class='posthead']//h2 5strip: //div[@class='posthead']//h2
6replace_string(>Advertisements</div>): ></div> 6replace_string(>Advertisements</div>): ></div>
7replace_string(<p>You can follow us on): <p style="display:none;"> 7replace_string(<p>You can follow us on): <p style="display:none;">
8strip_id_or_class: likeThisPost 8strip_id_or_class: likeThisPost
9 9
10prune: no 10prune: no
11tidy: no 11tidy: no
12 12
13test_url: http://www.redmondpie.com/how-to-play-music-directly-from-home-screen-folders-on-iphone/ \ No newline at end of file 13test_url: http://www.redmondpie.com/how-to-play-music-directly-from-home-screen-folders-on-iphone/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/redtape.msnbc.msn.com.txt b/inc/3rdparty/site_config/standard/redtape.msnbc.msn.com.txt
index 4f195a06..8541a0d4 100644..100755
--- a/inc/3rdparty/site_config/standard/redtape.msnbc.msn.com.txt
+++ b/inc/3rdparty/site_config/standard/redtape.msnbc.msn.com.txt
@@ -1,20 +1,20 @@
1# Think there might be something up with your parser that it strips out 'print' from the title :) 1# Think there might be something up with your parser that it strips out 'print' from the title :)
2 2
3title: //meta[@name='title']/@content 3title: //meta[@name='title']/@content
4author: //meta[@name='author']/@content 4author: //meta[@name='author']/@content
5date: //meta[@name='date']/@content 5date: //meta[@name='date']/@content
6 6
7body: //div[@class='articleText'] 7body: //div[@class='articleText']
8 8
9strip: //div[contains(@class, 'day')] 9strip: //div[contains(@class, 'day')]
10strip: //div[contains(@class, 'month')] 10strip: //div[contains(@class, 'month')]
11strip: //div[contains(@class, 'year')] 11strip: //div[contains(@class, 'year')]
12strip: //div[contains(@class, 'time')] 12strip: //div[contains(@class, 'time')]
13strip: //h1[@class='gl_headline'] 13strip: //h1[@class='gl_headline']
14strip: //div[@class='byline'] 14strip: //div[@class='byline']
15strip: //div[@id='left_ear'] 15strip: //div[@id='left_ear']
16strip: //div[@id='right_ear'] 16strip: //div[@id='right_ear']
17strip: //div[contains(@class, 'PopularPosts')] 17strip: //div[contains(@class, 'PopularPosts')]
18strip ://div[@class='discuss_page_break'] 18strip ://div[@class='discuss_page_break']
19strip ://div[contains(@class, 'p-content_TagList')] 19strip ://div[contains(@class, 'p-content_TagList')]
20test_url: http://redtape.msnbc.msn.com/_news/2011/09/28/8020661-sprint-raises-fee-but-wont-free-users-from-two-year-contracts?preview=true \ No newline at end of file 20test_url: http://redtape.msnbc.msn.com/_news/2011/09/28/8020661-sprint-raises-fee-but-wont-free-users-from-two-year-contracts?preview=true \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/reflets.info.txt b/inc/3rdparty/site_config/standard/reflets.info.txt
index 4a9fab67..98a2bbfc 100644..100755
--- a/inc/3rdparty/site_config/standard/reflets.info.txt
+++ b/inc/3rdparty/site_config/standard/reflets.info.txt
@@ -1,5 +1,5 @@
1body://div[@class='storycontent'] 1body://div[@class='storycontent']
2date://div[@class='date'] 2date://div[@class='date']
3strip://li[@class='sharing_label'] 3strip://li[@class='sharing_label']
4strip://a[@class='FlattrButton'] 4strip://a[@class='FlattrButton']
5test_url: http://reflets.info/orange-nokia-siemens-deep-packet-inspection/ \ No newline at end of file 5test_url: http://reflets.info/orange-nokia-siemens-deep-packet-inspection/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/renenekuda.cz.txt b/inc/3rdparty/site_config/standard/renenekuda.cz.txt
index 0b3dee1d..a5361fd0 100644..100755
--- a/inc/3rdparty/site_config/standard/renenekuda.cz.txt
+++ b/inc/3rdparty/site_config/standard/renenekuda.cz.txt
@@ -1,3 +1,3 @@
1title: //*[@class='entry-title'] 1title: //*[@class='entry-title']
2body: //div[@class='entry-content'] 2body: //div[@class='entry-content']
3test_url: http://www.renenekuda.cz/recept-na-produktivitu/ \ No newline at end of file 3test_url: http://www.renenekuda.cz/recept-na-produktivitu/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/resume.se.txt b/inc/3rdparty/site_config/standard/resume.se.txt
new file mode 100755
index 00000000..17122a9b
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/resume.se.txt
@@ -0,0 +1,9 @@
1date: //meta[@name='bi3dPubDate']/@content
2body: //div[contains(@class, 'articleBody')]
3
4prune: no
5
6test_url: http://www.resume.se/nyheter/media/2013/09/18/kvallspress-och-tv-slass-om-playtittarna-men-youtube-ohotat-storst/
7test_url: http://www.resume.se/nyheter/media/2013/09/18/cecilia-blankens-lamnar-mama-for-konkurrent/
8test_url: http://www.resume.se/nyheter/reklam/2013/09/18/ravelli-trodde-jag-var-med-i-blasningen/
9test_url: http://www.resume.se/rss-nyheter \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/retrieverweekly.com.txt b/inc/3rdparty/site_config/standard/retrieverweekly.com.txt
index 1264ee3f..a0a23940 100644..100755
--- a/inc/3rdparty/site_config/standard/retrieverweekly.com.txt
+++ b/inc/3rdparty/site_config/standard/retrieverweekly.com.txt
@@ -1,6 +1,6 @@
1single_page_link://a[contains(@href, 'print')] 1single_page_link://a[contains(@href, 'print')]
2 2
3# Grab metadata from the "printer-friendly" page, after specifying single_page_link 3# Grab metadata from the "printer-friendly" page, after specifying single_page_link
4title://h2 4title://h2
5date://cite 5date://cite
6test_url: http://www.retrieverweekly.com/?cmd=displaystory&story_id=7548&format=html \ No newline at end of file 6test_url: http://www.retrieverweekly.com/?cmd=displaystory&story_id=7548&format=html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/reuters.com.txt b/inc/3rdparty/site_config/standard/reuters.com.txt
index c5c94a4f..7411e62b 100644..100755
--- a/inc/3rdparty/site_config/standard/reuters.com.txt
+++ b/inc/3rdparty/site_config/standard/reuters.com.txt
@@ -1,10 +1,10 @@
1title: //h1[@class='headline3'] 1title: //h1[@class='headline3']
2author: substring-after(//p[@class="byline"], 'By ') 2author: substring-after(//p[@class="byline"], 'By ')
3date: //meta[@name="REVISION_DATE"]/@content 3date: //meta[@name="REVISION_DATE"]/@content
4body: //div[@id='articleImage' or @id='frame_fd1fade'] | //span[@id='articleText'] | //div[@class='pageNavigation'] 4body: //div[@id='articleImage' or @id='frame_fd1fade'] | //span[@id='articleText'] | //div[@class='pageNavigation']
5strip: //li[@class='next'] 5strip: //li[@class='next']
6strip: //span[@class='articleLocation'] 6strip: //span[@class='articleLocation']
7prune: no 7prune: no
8tidy: no 8tidy: no
9 9
10test_url: http://www.reuters.com/article/2011/04/08/us-ivorycoast-killings-idUSTRE73732A20110408 \ No newline at end of file 10test_url: http://www.reuters.com/article/2011/04/08/us-ivorycoast-killings-idUSTRE73732A20110408 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/revistapiaui.estadao.com.br.txt b/inc/3rdparty/site_config/standard/revistapiaui.estadao.com.br.txt
index dbe42932..30e627dc 100644..100755
--- a/inc/3rdparty/site_config/standard/revistapiaui.estadao.com.br.txt
+++ b/inc/3rdparty/site_config/standard/revistapiaui.estadao.com.br.txt
@@ -1,10 +1,10 @@
1title: //div[@class="article_header"]/h3 1title: //div[@class="article_header"]/h3
2author: //div[@class="autor"]/p/* 2author: //div[@class="autor"]/p/*
3date: substring-after(substring-after(//div[@class="flt-left"],"> "), "> ") 3date: substring-after(substring-after(//div[@class="flt-left"],"> "), "> ")
4 4
5move_into(//div[@class="new_article"]): //div[@class="img_article"]/img 5move_into(//div[@class="new_article"]): //div[@class="img_article"]/img
6 6
7body: //div[@class="article_content"] 7body: //div[@class="article_content"]
8convert_double_br_tags: yes 8convert_double_br_tags: yes
9 9
10test_url: http://revistapiaui.estadao.com.br/edicao-68/questoes-latino-americanas/filhos-da-guerra-suja \ No newline at end of file 10test_url: http://revistapiaui.estadao.com.br/edicao-68/questoes-latino-americanas/filhos-da-guerra-suja \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/rezeptwelt.de.txt b/inc/3rdparty/site_config/standard/rezeptwelt.de.txt
new file mode 100644
index 00000000..2093573b
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/rezeptwelt.de.txt
@@ -0,0 +1,5 @@
1body: //div[@class='step-content'] | //div[@class='global-active ingredients-box']
2title: //div[@class='step-1-container']
3
4tidy: no
5test_url: http://www.rezeptwelt.de/backen-herzhaft-rezepte/w%C3%BCrstchen-schlangen/530372 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/richardmuscat.wordpress.com.txt b/inc/3rdparty/site_config/standard/richardmuscat.wordpress.com.txt
index 904a11dd..b0ee92dc 100644..100755
--- a/inc/3rdparty/site_config/standard/richardmuscat.wordpress.com.txt
+++ b/inc/3rdparty/site_config/standard/richardmuscat.wordpress.com.txt
@@ -1,5 +1,5 @@
1body: //div[@id="post"] 1body: //div[@id="post"]
2strip: //div[@id="author-description"] 2strip: //div[@id="author-description"]
3date: //span[@class="entry-date"] 3date: //span[@class="entry-date"]
4author: //span[@class="author vcard"] 4author: //span[@class="author vcard"]
5test_url: http://richardmuscat.wordpress.com/2011/06/20/the-price-of-free/?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+TheBrooksReview+%28The+Brooks+Review%29 \ No newline at end of file 5test_url: http://richardmuscat.wordpress.com/2011/06/20/the-price-of-free/?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+TheBrooksReview+%28The+Brooks+Review%29 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ritemail.blogspot.com.txt b/inc/3rdparty/site_config/standard/ritemail.blogspot.com.txt
index 82cfaf27..ed72915c 100644..100755
--- a/inc/3rdparty/site_config/standard/ritemail.blogspot.com.txt
+++ b/inc/3rdparty/site_config/standard/ritemail.blogspot.com.txt
@@ -1,5 +1,5 @@
1body: //div[@class='post-body entry-content'] 1body: //div[@class='post-body entry-content']
2strip: //div[@id='lws_0'] 2strip: //div[@id='lws_0']
3prune: no 3prune: no
4 4
5test_url: http://ritemail.blogspot.com/2011/06/hayden-panettiere-candids-in-los.html \ No newline at end of file 5test_url: http://ritemail.blogspot.com/2011/06/hayden-panettiere-candids-in-los.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ritholtz.com.txt b/inc/3rdparty/site_config/standard/ritholtz.com.txt
new file mode 100755
index 00000000..d598479e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/ritholtz.com.txt
@@ -0,0 +1,5 @@
1title: //div[@class='post']/h2
2author: substring-before(substring-after(//div[@class='alignright']/small, 'By '),'-')
3date: substring-after(//div[@class='alignright']/small, '-')
4strip: //div[@class='alignleft']
5test_url: http://www.ritholtz.com/blog/2012/09/situational-awareness/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/robertsspaceindustries.com.txt b/inc/3rdparty/site_config/standard/robertsspaceindustries.com.txt
new file mode 100755
index 00000000..b0b90fb7
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/robertsspaceindustries.com.txt
@@ -0,0 +1,4 @@
1strip_id_or_class: 'sharedaddy'
2strip_id_or_class: 'respond'
3strip_id_or_class: 'meta'
4test_url: http://www.robertsspaceindustries.com/news-update-ai-pilots/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/robots.thoughtbot.com.txt b/inc/3rdparty/site_config/standard/robots.thoughtbot.com.txt
new file mode 100755
index 00000000..da5b7bd8
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/robots.thoughtbot.com.txt
@@ -0,0 +1,5 @@
1body: //section[@class='post text']
2title: //h1[@class='title']
3date: //p[@class='post-date']
4strip: //section[@class='meta-info']
5test_url: http://robots.thoughtbot.com/post/32455387133/four-phase-test \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/rockpapershotgun.com.txt b/inc/3rdparty/site_config/standard/rockpapershotgun.com.txt
index 3035527c..f8c9541f 100644..100755
--- a/inc/3rdparty/site_config/standard/rockpapershotgun.com.txt
+++ b/inc/3rdparty/site_config/standard/rockpapershotgun.com.txt
@@ -1,8 +1,8 @@
1title: //h2 1title: //h2
2 2
3strip: //div[ contains(@class, 'respond') ] | //h2 | //h1 3strip: //div[ contains(@class, 'respond') ] | //h2 | //h1
4 4
5date: substring-after(//p[@class='info'], ' on ') 5date: substring-after(//p[@class='info'], ' on ')
6 6
7author: //p[@class='info']//a 7author: //p[@class='info']//a
8test_url: http://www.rockpapershotgun.com/2010/07/29/rps-half-verdict-starcraft-2/ \ No newline at end of file 8test_url: http://www.rockpapershotgun.com/2010/07/29/rps-half-verdict-starcraft-2/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/rodrigo.sharpcube.com.txt b/inc/3rdparty/site_config/standard/rodrigo.sharpcube.com.txt
index abe70351..eef8b11c 100644..100755
--- a/inc/3rdparty/site_config/standard/rodrigo.sharpcube.com.txt
+++ b/inc/3rdparty/site_config/standard/rodrigo.sharpcube.com.txt
@@ -1,7 +1,7 @@
1author: //article/header/span[@class='author'] 1author: //article/header/span[@class='author']
2title://article/header/h1 2title://article/header/h1
3body: //article 3body: //article
4strip: //article/header 4strip: //article/header
5strip: //article/p[@class='metadata'] 5strip: //article/p[@class='metadata']
6footnotes: yes 6footnotes: yes
7test_url: http://rodrigo.sharpcube.com/2010/06/20/using-and-sharing-a-vpn-connection-on-your-mac/ \ No newline at end of file 7test_url: http://rodrigo.sharpcube.com/2010/06/20/using-and-sharing-a-vpn-connection-on-your-mac/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/rogerebert.com.txt b/inc/3rdparty/site_config/standard/rogerebert.com.txt
index 26792330..da215109 100644..100755
--- a/inc/3rdparty/site_config/standard/rogerebert.com.txt
+++ b/inc/3rdparty/site_config/standard/rogerebert.com.txt
@@ -1,8 +1,8 @@
1title: substring-before(//title,':') 1title: substring-before(//title,':')
2author: substring-after(substring-before(//div[@class='text']/b,'/'),'BY') 2author: substring-after(substring-before(//div[@class='text']/b,'/'),'BY')
3 3
4body: //div[@class='text'] 4body: //div[@class='text']
5 5
6strip: //a[contains(@href,'printart')] 6strip: //a[contains(@href,'printart')]
7strip_id_or_class: enlarge_photo 7strip_id_or_class: enlarge_photo
8test_url: http://rogerebert.com/apps/pbcs.dll/article?AID=/20120411/REVIEWS/120419998/1005/GLOSSARY \ No newline at end of file 8test_url: http://rogerebert.com/apps/pbcs.dll/article?AID=/20120411/REVIEWS/120419998/1005/GLOSSARY \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/rolfinjapan.nl.txt b/inc/3rdparty/site_config/standard/rolfinjapan.nl.txt
index d618c23f..2365c42a 100644..100755
--- a/inc/3rdparty/site_config/standard/rolfinjapan.nl.txt
+++ b/inc/3rdparty/site_config/standard/rolfinjapan.nl.txt
@@ -1,6 +1,6 @@
1body: //div[contains(@class, 'inhoud')] 1body: //div[contains(@class, 'inhoud')]
2date: //span[@class ='published'] 2date: //span[@class ='published']
3author: //span[@class ='author'] 3author: //span[@class ='author']
4strip: //div[@class = 'grid_2'] 4strip: //div[@class = 'grid_2']
5strip: //div[@class = 'block-citation-text'] 5strip: //div[@class = 'block-citation-text']
6test_url: http://www.rolfinjapan.nl/2011/06/duizend-kraanvogels/ \ No newline at end of file 6test_url: http://www.rolfinjapan.nl/2011/06/duizend-kraanvogels/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/rollingstone.com.txt b/inc/3rdparty/site_config/standard/rollingstone.com.txt
index 9a10a69e..9a10a69e 100644..100755
--- a/inc/3rdparty/site_config/standard/rollingstone.com.txt
+++ b/inc/3rdparty/site_config/standard/rollingstone.com.txt
diff --git a/inc/3rdparty/site_config/standard/rottentomatoes.com.txt b/inc/3rdparty/site_config/standard/rottentomatoes.com.txt
index b5b29fe4..ef327691 100644..100755
--- a/inc/3rdparty/site_config/standard/rottentomatoes.com.txt
+++ b/inc/3rdparty/site_config/standard/rottentomatoes.com.txt
@@ -1,11 +1,11 @@
1body: //div[@class='movie_content_area'] 1body: //div[@class='movie_content_area']
2strip_id_or_class: tomatometer_bar_help 2strip_id_or_class: tomatometer_bar_help
3strip_id_or_class: critic-links 3strip_id_or_class: critic-links
4strip_id_or_class: top-critics-numbers 4strip_id_or_class: top-critics-numbers
5strip_id_or_class: fan_side 5strip_id_or_class: fan_side
6strip_id_or_class: fblike 6strip_id_or_class: fblike
7strip_id_or_class: rating_widget 7strip_id_or_class: rating_widget
8strip_id_or_class: friend_reviews 8strip_id_or_class: friend_reviews
9prune: no 9prune: no
10 10
11test_url: http://www.rottentomatoes.com/m/thor/ \ No newline at end of file 11test_url: http://www.rottentomatoes.com/m/thor/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/roughtype.com.txt b/inc/3rdparty/site_config/standard/roughtype.com.txt
index f2f00392..a012a67d 100644..100755
--- a/inc/3rdparty/site_config/standard/roughtype.com.txt
+++ b/inc/3rdparty/site_config/standard/roughtype.com.txt
@@ -1,5 +1,5 @@
1body: //div[@class='content'] 1body: //div[@class='content']
2strip: //p[@class='postmeta']/following::* 2strip: //p[@class='postmeta']/following::*
3strip: //p[@class='postmeta'] 3strip: //p[@class='postmeta']
4strip: //p[@align='left'] 4strip: //p[@align='left']
5test_url: http://www.roughtype.com/archives/2012/01/power_to_the_da.php \ No newline at end of file 5test_url: http://www.roughtype.com/archives/2012/01/power_to_the_da.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/roy.gbiv.com.txt b/inc/3rdparty/site_config/standard/roy.gbiv.com.txt
index 6ff03de8..6ff03de8 100644..100755
--- a/inc/3rdparty/site_config/standard/roy.gbiv.com.txt
+++ b/inc/3rdparty/site_config/standard/roy.gbiv.com.txt
diff --git a/inc/3rdparty/site_config/standard/rpgsite.net.txt b/inc/3rdparty/site_config/standard/rpgsite.net.txt
index e7f29bbe..9ddbf0f2 100644..100755
--- a/inc/3rdparty/site_config/standard/rpgsite.net.txt
+++ b/inc/3rdparty/site_config/standard/rpgsite.net.txt
@@ -1,4 +1,4 @@
1body: //div[@id='news-text'] 1body: //div[@id='news-text']
2prune: no 2prune: no
3test_url: http://www.rpgsite.net/news/1964-tetsuya-nomura-says-hell-soon-show-the-future-of-final-fantasy 3test_url: http://www.rpgsite.net/news/1964-tetsuya-nomura-says-hell-soon-show-the-future-of-final-fantasy
4test_url: http://www.rpgsite.net/news/1965-new-atelier-totori-plus-screens-and-artwork \ No newline at end of file 4test_url: http://www.rpgsite.net/news/1965-new-atelier-totori-plus-screens-and-artwork \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/rubysfera.pl.txt b/inc/3rdparty/site_config/standard/rubysfera.pl.txt
index d9df7684..d9d9a431 100644..100755
--- a/inc/3rdparty/site_config/standard/rubysfera.pl.txt
+++ b/inc/3rdparty/site_config/standard/rubysfera.pl.txt
@@ -1,9 +1,9 @@
1author: //div[contains(@class, 'author_text')]/h4/text() 1author: //div[contains(@class, 'author_text')]/h4/text()
2date: //li[@class='date'] 2date: //li[@class='date']
3 3
4# stripping excessive tags 4# stripping excessive tags
5strip: //div[contains(@class, 'entry_meta')] 5strip: //div[contains(@class, 'entry_meta')]
6strip: //div[contains(@class, 'single_meta')] 6strip: //div[contains(@class, 'single_meta')]
7strip: //br[contains(@class, 'clear')] 7strip: //br[contains(@class, 'clear')]
8strip: //h3[contains(., 'Komentarz')] 8strip: //h3[contains(., 'Komentarz')]
9test_url: http://rubysfera.pl/2011/09/10-porad-o-rvm/ \ No newline at end of file 9test_url: http://rubysfera.pl/2011/09/10-porad-o-rvm/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ruhlman.com.txt b/inc/3rdparty/site_config/standard/ruhlman.com.txt
index 7a21c4af..e54b0f0e 100644..100755
--- a/inc/3rdparty/site_config/standard/ruhlman.com.txt
+++ b/inc/3rdparty/site_config/standard/ruhlman.com.txt
@@ -1,6 +1,6 @@
1title: //h1[@class='entry-title'] 1title: //h1[@class='entry-title']
2author: ///span[@class='author vcard'] 2author: ///span[@class='author vcard']
3date: //abbr[@class='published'] 3date: //abbr[@class='published']
4body: //div[@class='entry-content'] 4body: //div[@class='entry-content']
5 5
6test_url: http://ruhlman.com/2009/05/cookbooks-that-teach/ \ No newline at end of file 6test_url: http://ruhlman.com/2009/05/cookbooks-that-teach/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ruttloff.org.txt b/inc/3rdparty/site_config/standard/ruttloff.org.txt
index c036dcf8..43e130a4 100644..100755
--- a/inc/3rdparty/site_config/standard/ruttloff.org.txt
+++ b/inc/3rdparty/site_config/standard/ruttloff.org.txt
@@ -1,3 +1,3 @@
1author: //a[@class='author'] 1author: //a[@class='author']
2tidy: no 2tidy: no
3test_url: http://ruttloff.org/2012/06/13/intervention \ No newline at end of file 3test_url: http://ruttloff.org/2012/06/13/intervention \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/salon.com.txt b/inc/3rdparty/site_config/standard/salon.com.txt
index 04f8afd5..2b47f744 100644..100755
--- a/inc/3rdparty/site_config/standard/salon.com.txt
+++ b/inc/3rdparty/site_config/standard/salon.com.txt
@@ -1,11 +1,11 @@
1title: //meta[@property='og:title']/@content 1title: //meta[@property='og:title']/@content
2author: (//span[@class="byline"]/a)[1] 2author: (//span[@class="byline"]/a)[1]
3date: //span[contains(@class, "toLocalTime")] 3date: //span[contains(@class, "toLocalTime")]
4body: (//div[contains(@class, "articleInner")]//img[contains(@src, 'media.salon.com') and contains(@src, '460x')])[1] | //div[contains(@class, "articleContent") or contains(@class, "writerMeta")] 4body: (//div[contains(@class, "articleInner")]//img[contains(@src, 'media.salon.com') and contains(@src, '460x')])[1] | //div[contains(@class, "articleContent") or contains(@class, "writerMeta")]
5 5
6prune: no 6prune: no
7 7
8# deal with singleton links 8# deal with singleton links
9single_page_link: (//h1/a[contains(@href, '/singleton')])[1] 9single_page_link: (//h1/a[contains(@href, '/singleton')])[1]
10 10
11test_url: http://www.salon.com/2011/10/25/occupying_the_rust_belt/singleton/ \ No newline at end of file 11test_url: http://www.salon.com/2011/10/25/occupying_the_rust_belt/singleton/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/salzburg.com.txt b/inc/3rdparty/site_config/standard/salzburg.com.txt
index 31067481..464f99f1 100644..100755
--- a/inc/3rdparty/site_config/standard/salzburg.com.txt
+++ b/inc/3rdparty/site_config/standard/salzburg.com.txt
@@ -1,6 +1,6 @@
1body: //p[@class='teaser1 darkgrey myriad'] 1body: //p[@class='teaser1 darkgrey myriad']
2move_into(//p[@class='teaser1 darkgrey myriad']): //div[@class='artikel clear'] 2move_into(//p[@class='teaser1 darkgrey myriad']): //div[@class='artikel clear']
3strip: //div[@class='hidden'] 3strip: //div[@class='hidden']
4strip: //div[@id='article_related_source'] 4strip: //div[@id='article_related_source']
5 5
6test_url: http://www.salzburg.com/nachrichten/oesterreich/politik/sn/artikel/deutliche-nachbesserungen-bei-lehrerdienstrecht-19469/ \ No newline at end of file 6test_url: http://www.salzburg.com/nachrichten/oesterreich/politik/sn/artikel/deutliche-nachbesserungen-bei-lehrerdienstrecht-19469/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sanpedrosun.com.txt b/inc/3rdparty/site_config/standard/sanpedrosun.com.txt
new file mode 100755
index 00000000..3f19cced
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/sanpedrosun.com.txt
@@ -0,0 +1,10 @@
1title: //div[contains(@class, 'post')]//h1
2date: //div[contains(@class, 'post')]//h6
3body: //div[contains(@class, 'entry')]
4strip_id_or_class: post_stats
5strip_id_or_class: related-posts
6strip_id_or_class: after_story
7prune: no
8
9test_url: http://www.sanpedrosun.com/community-and-society/2013/06/05/little-angelspre-school-talent-show/
10test_url: http://www.sanpedrosun.com/feed/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/saveyourself.ca.txt b/inc/3rdparty/site_config/standard/saveyourself.ca.txt
index 354f5911..5a5605d9 100644..100755
--- a/inc/3rdparty/site_config/standard/saveyourself.ca.txt
+++ b/inc/3rdparty/site_config/standard/saveyourself.ca.txt
@@ -1,25 +1,25 @@
1title://h1 1title://h1
2 2
3# my section divs seem to interfere with the Instapaper parser, so I ditch 'em 3# my section divs seem to interfere with the Instapaper parser, so I ditch 'em
4dissolve://div[contains(@class, 'section')] 4dissolve://div[contains(@class, 'section')]
5 5
6#these don't seem to be necessary, but just in case 6#these don't seem to be necessary, but just in case
7strip_id_or_class:'masthead' 7strip_id_or_class:'masthead'
8strip_id_or_class:'footer' 8strip_id_or_class:'footer'
9 9
10#again, Instapaper seems to understand where my content is, but just in case 10#again, Instapaper seems to understand where my content is, but just in case
11body://div[@id='content'] 11body://div[@id='content']
12 12
13# in general, I want the Instapaper view to look like my print CSS, so I remove things specified for the screen or non-printing 13# in general, I want the Instapaper view to look like my print CSS, so I remove things specified for the screen or non-printing
14strip_id_or_class:'screen-only' 14strip_id_or_class:'screen-only'
15strip_id_or_class:'no-print' 15strip_id_or_class:'no-print'
16 16
17#other misc removals and simplifications 17#other misc removals and simplifications
18strip_id_or_class:'popup' 18strip_id_or_class:'popup'
19strip_id_or_class:'ZoomSpin' 19strip_id_or_class:'ZoomSpin'
20 20
21#I have a lot of content in sidebars and "meta" asides that can work inline just fine, but has to be distinguished somehow with some minimal formatting, so I put them in blockquotes 21#I have a lot of content in sidebars and "meta" asides that can work inline just fine, but has to be distinguished somehow with some minimal formatting, so I put them in blockquotes
22wrap_in(blockquote)://div[contains(@class, 'sidebar')] 22wrap_in(blockquote)://div[contains(@class, 'sidebar')]
23wrap_in(blockquote)://div[contains(@class, 'meta')] 23wrap_in(blockquote)://div[contains(@class, 'meta')]
24wrap_in(blockquote)://p[contains(@class, 'meta')] 24wrap_in(blockquote)://p[contains(@class, 'meta')]
25test_url: http://saveyourself.ca/tutorials/low-back-pain.php \ No newline at end of file 25test_url: http://saveyourself.ca/tutorials/low-back-pain.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sayidaty.net.txt b/inc/3rdparty/site_config/standard/sayidaty.net.txt
new file mode 100755
index 00000000..2d9f1884
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/sayidaty.net.txt
@@ -0,0 +1,4 @@
1date: //meta[@property='article:published_time']/@content
2body: (//div[contains(@class, 'article-slider')]//img)[1] | //div[contains(@class, 'bottom-article-con')]
3
4test_url: http://www.sayidaty.net/taxonomy/term/10/all/feed \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sbnation.com.txt b/inc/3rdparty/site_config/standard/sbnation.com.txt
index c213843c..41b36755 100644..100755
--- a/inc/3rdparty/site_config/standard/sbnation.com.txt
+++ b/inc/3rdparty/site_config/standard/sbnation.com.txt
@@ -1,28 +1,28 @@
1title: //h1[@id='stream_title'] 1title: //h1[@id='stream_title']
2 2
3# Author and date don't work 3# Author and date don't work
4author: //div[@class='byline'] 4author: //div[@class='byline']
5date: //div[@class='date-stamp'] 5date: //div[@class='date-stamp']
6 6
7body: //div[@class='node-article'] 7body: //div[@class='node-article']
8 8
9strip_id_or_class: fb-like-box 9strip_id_or_class: fb-like-box
10strip_id_or_class: stream-fb-like 10strip_id_or_class: stream-fb-like
11strip_id_or_class: social-meta 11strip_id_or_class: social-meta
12strip_id_or_class: social-spoken 12strip_id_or_class: social-spoken
13strip_id_or_class: twitter-share-button 13strip_id_or_class: twitter-share-button
14strip_id_or_class: twitter-follow-button 14strip_id_or_class: twitter-follow-button
15strip_id_or_class: spinner_node_list 15strip_id_or_class: spinner_node_list
16strip_id_or_class: node-sort-link 16strip_id_or_class: node-sort-link
17strip_id_or_class: stream_title 17strip_id_or_class: stream_title
18strip_id_or_class: stream_summary 18strip_id_or_class: stream_summary
19strip_id_or_class: update-count-container 19strip_id_or_class: update-count-container
20strip_id_or_class: major-updates 20strip_id_or_class: major-updates
21strip_id_or_class: newsletter-slide 21strip_id_or_class: newsletter-slide
22strip_id_or_class: author-mini-profile 22strip_id_or_class: author-mini-profile
23strip_id_or_class: byline 23strip_id_or_class: byline
24strip_id_or_class: header 24strip_id_or_class: header
25strip_id_or_class: footer 25strip_id_or_class: footer
26 26
27# Works, but "no text" errors on: http://www.sbnation.com/nba/2012/3/9/2856780/nba-scores-dwight-howard-bulls-magic-mavs-suns 27# Works, but "no text" errors on: http://www.sbnation.com/nba/2012/3/9/2856780/nba-scores-dwight-howard-bulls-magic-mavs-suns
28test_url: http://www.sbnation.com/nba/2012/3/13/2867226/dwight-howard-trade-rumors-2012-faq-orlando-magic \ No newline at end of file 28test_url: http://www.sbnation.com/nba/2012/3/13/2867226/dwight-howard-trade-rumors-2012-faq-orlando-magic \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/schneier.com.txt b/inc/3rdparty/site_config/standard/schneier.com.txt
index 67181b65..0074a86a 100644..100755
--- a/inc/3rdparty/site_config/standard/schneier.com.txt
+++ b/inc/3rdparty/site_config/standard/schneier.com.txt
@@ -1,25 +1,25 @@
1author: //p[@class='mastname'] 1author: //p[@class='mastname']
2 2
3body: //div[@class='indivbody'] 3body: //div[@class='indivbody']
4date: //div[@class='indivbody']/h2[1] 4date: //div[@class='indivbody']/h2[1]
5 5
6# Remove blog title. Specify first occurrence in case h1 is used in article 6# Remove blog title. Specify first occurrence in case h1 is used in article
7strip: //div[@class='indivbody']/h1[1] 7strip: //div[@class='indivbody']/h1[1]
8 8
9# Remove blog description (the first p element) 9# Remove blog description (the first p element)
10strip: //div[@class='indivbody']/p[1] 10strip: //div[@class='indivbody']/p[1]
11 11
12# Remove navigation (second p element) 12# Remove navigation (second p element)
13strip: //div[@class='indivbody']/p[2] 13strip: //div[@class='indivbody']/p[2]
14 14
15# Remove duplicate of article title. Specify first occurrence in case h3 is used in article 15# Remove duplicate of article title. Specify first occurrence in case h3 is used in article
16strip: //div[@class='indivbody']/h3[1] 16strip: //div[@class='indivbody']/h3[1]
17 17
18# Remove publishing date, it's extracted by rule above 18# Remove publishing date, it's extracted by rule above
19strip: //div[@class='indivbody']/h2[1] 19strip: //div[@class='indivbody']/h2[1]
20 20
21# Remove duplicate of date at end, and newsletter signup 21# Remove duplicate of date at end, and newsletter signup
22strip: //p[@class='posted'] 22strip: //p[@class='posted']
23 23
24# Leave date at top 24# Leave date at top
25test_url: http://www.schneier.com/blog/archives/2010/12/security_in_202.html \ No newline at end of file 25test_url: http://www.schneier.com/blog/archives/2010/12/security_in_202.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/science.orf.at.txt b/inc/3rdparty/site_config/standard/science.orf.at.txt
index 89ebfe08..c4b21834 100644..100755
--- a/inc/3rdparty/site_config/standard/science.orf.at.txt
+++ b/inc/3rdparty/site_config/standard/science.orf.at.txt
@@ -1,11 +1,11 @@
1body: //div[@class="storybox"] 1body: //div[@class="storybox"]
2title: //div[@class="storybox"]//h1 2title: //div[@class="storybox"]//h1
3strip: //p[@class='metaline'] 3strip: //p[@class='metaline']
4date: substring-after(//*[@class='time'],'Erstellt am') 4date: substring-after(//*[@class='time'],'Erstellt am')
5strip: //div[@class='fact'] 5strip: //div[@class='fact']
6strip: //p[@class='backlink'] 6strip: //p[@class='backlink']
7strip: //div[@class='mailto'] 7strip: //div[@class='mailto']
8strip: //div[@id='forumDisclaimer'] 8strip: //div[@id='forumDisclaimer']
9strip: //div[@class='forum'] 9strip: //div[@class='forum']
10 10
11test_url: http://science.orf.at/stories/1700900/ \ No newline at end of file 11test_url: http://science.orf.at/stories/1700900/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/scienceblogs.de.txt b/inc/3rdparty/site_config/standard/scienceblogs.de.txt
index 08c16842..b0dec3d2 100644..100755
--- a/inc/3rdparty/site_config/standard/scienceblogs.de.txt
+++ b/inc/3rdparty/site_config/standard/scienceblogs.de.txt
@@ -1,12 +1,12 @@
1single_page_link: //div[@class='c2c1']/div[@class='toptheme further line']//ul//li/a 1single_page_link: //div[@class='c2c1']/div[@class='toptheme further line']//ul//li/a
2 2
3author: //div[@class='details clear']//a[@class='hi'] 3author: //div[@class='details clear']//a[@class='hi']
4body: //div[@class='title'] 4body: //div[@class='title']
5strip: //p[@class='entrypagination'] 5strip: //p[@class='entrypagination']
6strip: //p[@class='details_top'] 6strip: //p[@class='details_top']
7date: //p[@class='details_top'] 7date: //p[@class='details_top']
8title: //div[@class='title']/h1 8title: //div[@class='title']/h1
9strip: //p[@class='details'] 9strip: //p[@class='details']
10strip: //p[@class='details_bottom'] 10strip: //p[@class='details_bottom']
11 11
12test_url: http://www.scienceblogs.de/astrodicticum-simplex/2011/10/weltuntergang-reloaded-das-jungste-gericht-findet-am-21-oktober-statt.php \ No newline at end of file 12test_url: http://www.scienceblogs.de/astrodicticum-simplex/2011/10/weltuntergang-reloaded-das-jungste-gericht-findet-am-21-oktober-statt.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/scienceticker.info.txt b/inc/3rdparty/site_config/standard/scienceticker.info.txt
index 75a52824..2a06f734 100644..100755
--- a/inc/3rdparty/site_config/standard/scienceticker.info.txt
+++ b/inc/3rdparty/site_config/standard/scienceticker.info.txt
@@ -1,11 +1,11 @@
1body: //div[@class='post'] 1body: //div[@class='post']
2title: //h1[@id='singlePageTitle'] 2title: //h1[@id='singlePageTitle']
3date: substring-before(//small,'&bull; Rubrik') 3date: substring-before(//small,'&bull; Rubrik')
4 4
5strip: //div[@class='post-ratings'] 5strip: //div[@class='post-ratings']
6strip: //div[@class='post-ratings-loading'] 6strip: //div[@class='post-ratings-loading']
7strip: //a[@title='Empfehlen Sie den Text weiter!'] 7strip: //a[@title='Empfehlen Sie den Text weiter!']
8strip: //a[@title='Drucken'] 8strip: //a[@title='Drucken']
9strip: //div[@class='share'] 9strip: //div[@class='share']
10 10
11test_url: http://www.scienceticker.info/2011/11/24/forscher-finden-gedachtnismolekul/ \ No newline at end of file 11test_url: http://www.scienceticker.info/2011/11/24/forscher-finden-gedachtnismolekul/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/scientificamerican.com.txt b/inc/3rdparty/site_config/standard/scientificamerican.com.txt
index d510407d..1b3f31cf 100644..100755
--- a/inc/3rdparty/site_config/standard/scientificamerican.com.txt
+++ b/inc/3rdparty/site_config/standard/scientificamerican.com.txt
@@ -1,25 +1,25 @@
1# 1#
2# After site revisions at SciAm, this configuration does 2# After site revisions at SciAm, this configuration does
3# not work, especially for multi-page articles. For 3# not work, especially for multi-page articles. For
4# every article there is now a "Print" link which 4# every article there is now a "Print" link which
5# is far more reliable. So this configuration should be 5# is far more reliable. So this configuration should be
6# removed or disabled. 6# removed or disabled.
7# 2/3/13 7# 2/3/13
8# 8#
9 9
10# meta data 10# meta data
11title://h1[@class = 'articleTitle'] 11title://h1[@class = 'articleTitle']
12author:substring-after(//span[@class = 'byline'],'By ') 12author:substring-after(//span[@class = 'byline'],'By ')
13date:substring-before(//span[@class = 'datestamp'],'|') 13date:substring-before(//span[@class = 'datestamp'],'|')
14 14
15#body content 15#body content
16body://div[@id = 'articleContent'] 16body://div[@id = 'articleContent']
17#next_page_link://li[@id = 'flairPagination']/a[last()] 17#next_page_link://li[@id = 'flairPagination']/a[last()]
18 18
19single_page_link: //a[contains(@href, 'print=true')] 19single_page_link: //a[contains(@href, 'print=true')]
20 20
21#cleanup 21#cleanup
22strip://div[@class = 'fsgBooks'] 22strip://div[@class = 'fsgBooks']
23 23
24test_url: http://www.scientificamerican.com/article.cfm?id=do-brain-scans-comatose-patients-reveal-conscious-state 24test_url: http://www.scientificamerican.com/article.cfm?id=do-brain-scans-comatose-patients-reveal-conscious-state
25test_url: http://www.scientificamerican.com/article.cfm?id=solar-wind-transforms-venus-into-shape-of-comet \ No newline at end of file 25test_url: http://www.scientificamerican.com/article.cfm?id=solar-wind-transforms-venus-into-shape-of-comet \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/scilogs.de.txt b/inc/3rdparty/site_config/standard/scilogs.de.txt
new file mode 100755
index 00000000..b24d7844
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/scilogs.de.txt
@@ -0,0 +1,15 @@
1title: //h1
2author: //div[@class='date']/a
3date: substring-after(//div[@class='date'], ',')
4body: //div[@class='entrybody']
5
6strip_id_or_class: socialshareprivacy
7strip: //div[@class='entrybody']/br[1]
8
9# Strip related articles
10# 'p'-Tag strips 'Ähnliche Artikel: ' (<br> tags become <p>)
11strip: //div[@class='entrybody']/p[last()]
12strip: //div[@class='entrybody']/ul[last()]
13
14convert_double_br_tags: yes
15test_url: http://www.scilogs.de/wblogs/blog/formbar/fusion/2012-10-08/rundgang-durch-deutschlands-gr-tes-fusionsexperiment \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/scotusblog.com.txt b/inc/3rdparty/site_config/standard/scotusblog.com.txt
index f29e37f9..8881bb45 100644..100755
--- a/inc/3rdparty/site_config/standard/scotusblog.com.txt
+++ b/inc/3rdparty/site_config/standard/scotusblog.com.txt
@@ -1,8 +1,8 @@
1title: //title 1title: //title
2author: //p[@id='author-name-role']/a 2author: //p[@id='author-name-role']/a
3date: substring-after(//p[@class='time'],'Posted') 3date: substring-after(//p[@class='time'],'Posted')
4body: //div[@id='main'] 4body: //div[@id='main']
5strip: //div[@id='author-info'] 5strip: //div[@id='author-info']
6strip: //div[@id='author-links'] 6strip: //div[@id='author-links']
7strip: //h1 7strip: //h1
8test_url: http://www.scotusblog.com/2012/04/shaken-baby-case-an-update/ \ No newline at end of file 8test_url: http://www.scotusblog.com/2012/04/shaken-baby-case-an-update/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/scraplab.net.txt b/inc/3rdparty/site_config/standard/scraplab.net.txt
index 84be27f9..ca7ec195 100644..100755
--- a/inc/3rdparty/site_config/standard/scraplab.net.txt
+++ b/inc/3rdparty/site_config/standard/scraplab.net.txt
@@ -1,3 +1,3 @@
1title: //h2 1title: //h2
2body: //div[@class='body'] 2body: //div[@class='body']
3test_url: http://scraplab.net/2010/10/26/please-keep-your-belongings-with-you-at-all-times/ \ No newline at end of file 3test_url: http://scraplab.net/2010/10/26/please-keep-your-belongings-with-you-at-all-times/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/scripting.com.txt b/inc/3rdparty/site_config/standard/scripting.com.txt
index d8b969b1..5fb0ee79 100644..100755
--- a/inc/3rdparty/site_config/standard/scripting.com.txt
+++ b/inc/3rdparty/site_config/standard/scripting.com.txt
@@ -1,8 +1,8 @@
1strip: //a[starts-with(@href, '#')] 1strip: //a[starts-with(@href, '#')]
2strip: //*[@class='storyByline'] 2strip: //*[@class='storyByline']
3body: //*[@class='storyPageText']/.. 3body: //*[@class='storyPageText']/..
4author: string('Dave Winer') 4author: string('Dave Winer')
5date: substring-before(substring-after(//*[@class='storyByline'], 'on'), 'at') 5date: substring-before(substring-after(//*[@class='storyByline'], 'on'), 'at')
6title: //h1 6title: //h1
7footnotes: no 7footnotes: no
8test_url: http://scripting.com/stories/2011/07/08/yeahImStillYawning.html \ No newline at end of file 8test_url: http://scripting.com/stories/2011/07/08/yeahImStillYawning.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sct.temple.edu.txt b/inc/3rdparty/site_config/standard/sct.temple.edu.txt
index 9927675b..55f24173 100644..100755
--- a/inc/3rdparty/site_config/standard/sct.temple.edu.txt
+++ b/inc/3rdparty/site_config/standard/sct.temple.edu.txt
@@ -1,5 +1,5 @@
1body: //*[@class="entry-content"] 1body: //*[@class="entry-content"]
2title: //h1[@class="entry-title"] 2title: //h1[@class="entry-title"]
3date: //*[@class="entry-date"] 3date: //*[@class="entry-date"]
4author: //*[@class="author vcard"] 4author: //*[@class="author vcard"]
5test_url: http://sct.temple.edu/blogs/news-events/2011/05/congratulations-sct-class-of-2011/ \ No newline at end of file 5test_url: http://sct.temple.edu/blogs/news-events/2011/05/congratulations-sct-class-of-2011/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/searchenginejournal.com.txt b/inc/3rdparty/site_config/standard/searchenginejournal.com.txt
new file mode 100755
index 00000000..dc98af3c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/searchenginejournal.com.txt
@@ -0,0 +1,5 @@
1strip: //ul[contains(@id, "social")]
2strip: //div[contains(@class, "ts-fab-wrapper")]
3strip: //div[contains(@id, 'gpt-ad')]
4
5test_url: http://www.searchenginejournal.com/web-design-vs-seo-it-doesnt-make-much-sense/62294/
diff --git a/inc/3rdparty/site_config/standard/searchengineland.com.txt b/inc/3rdparty/site_config/standard/searchengineland.com.txt
index f176d7c7..fb6a1074 100644..100755
--- a/inc/3rdparty/site_config/standard/searchengineland.com.txt
+++ b/inc/3rdparty/site_config/standard/searchengineland.com.txt
@@ -1,20 +1,20 @@
1body: //div[@class="storyBox"] 1body: //div[@class="storyBox"]
2title: //div[@class="storyBox"]/h1 2title: //div[@class="storyBox"]/h1
3author: //a[@rel="author"] 3author: //a[@rel="author"]
4date: substring-before(//span[@class="dateline"], 'by') 4date: substring-before(//span[@class="dateline"], 'by')
5 5
6#Removes related content but cleans up article text 6#Removes related content but cleans up article text
7strip: //h1 7strip: //h1
8strip: //p[@class="homeStory tdmSideInfo"] 8strip: //p[@class="homeStory tdmSideInfo"]
9strip: //div[@id="bylineShare"] 9strip: //div[@id="bylineShare"]
10strip: //script 10strip: //script
11strip: //hr 11strip: //hr
12 12
13strip_id_or_class: homeStory 13strip_id_or_class: homeStory
14strip_id_or_class: authorpic 14strip_id_or_class: authorpic
15strip_id_or_class: insideComments 15strip_id_or_class: insideComments
16strip_id_or_class: authorbio 16strip_id_or_class: authorbio
17strip_id_or_class: gpt-ad-sel-cube 17strip_id_or_class: gpt-ad-sel-cube
18strip_id_or_class: smxTextAd 18strip_id_or_class: smxTextAd
19 19
20test_url: http://searchengineland.com/googles-jaw-dropping-sponsored-post-campaign-for-chrome-106348 \ No newline at end of file 20test_url: http://searchengineland.com/googles-jaw-dropping-sponsored-post-campaign-for-chrome-106348 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/seattletransitblog.com.txt b/inc/3rdparty/site_config/standard/seattletransitblog.com.txt
index 5129c069..5129c069 100644..100755
--- a/inc/3rdparty/site_config/standard/seattletransitblog.com.txt
+++ b/inc/3rdparty/site_config/standard/seattletransitblog.com.txt
diff --git a/inc/3rdparty/site_config/standard/sebbo.net.txt b/inc/3rdparty/site_config/standard/sebbo.net.txt
index 3e800a16..b6d9c92d 100644..100755
--- a/inc/3rdparty/site_config/standard/sebbo.net.txt
+++ b/inc/3rdparty/site_config/standard/sebbo.net.txt
@@ -1,4 +1,4 @@
1title: substring-before(//title, '') 1title: substring-before(//title, '')
2body: //div[@class = 'entry'] 2body: //div[@class = 'entry']
3strip_id_or_class: 'postmetabox' 3strip_id_or_class: 'postmetabox'
4test_url: http://sebbo.net/2010/12/akkus/ \ No newline at end of file 4test_url: http://sebbo.net/2010/12/akkus/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/select.yeeyan.org.txt b/inc/3rdparty/site_config/standard/select.yeeyan.org.txt
new file mode 100755
index 00000000..6e98b149
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/select.yeeyan.org.txt
@@ -0,0 +1,18 @@
1# This filter is tested on:
2# http://select.yeeyan.org/view/18312/332365
3# http://select.yeeyan.org/view/365295/333788
4# http://select.yeeyan.org/view/174464/332336
5
6tidy:no
7prune:no
8title://h1
9author: //div[@class='sa_author']/span/a
10date: substring-after(//div[@class='sa_author']/span/following-sibling::span, ':')
11body: //div[@class='sa_left closetag']
12wrap_in(b)://div[@class='sa_abstract']
13
14strip://ul[@class='sa_next clearfix']
15strip: //div[@class='sa_author']
16strip: //div[@class='sa_title_box']
17
18test_url: http://select.yeeyan.org/view/258033/333481 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/seriouseats.com.txt b/inc/3rdparty/site_config/standard/seriouseats.com.txt
index d7b4788c..5e633470 100644..100755
--- a/inc/3rdparty/site_config/standard/seriouseats.com.txt
+++ b/inc/3rdparty/site_config/standard/seriouseats.com.txt
@@ -1,15 +1,15 @@
1body: //div[@id='content'] 1body: //div[@id='content']
2 2
3# clean up recipe pages 3# clean up recipe pages
4strip: //h2[@class='fn'] | //h2[@class='double-lined'] | //h3 | //div[@id='threeColumn2'] | //div[@id='threeColumn3'] 4strip: //h2[@class='fn'] | //h2[@class='double-lined'] | //h3 | //div[@id='threeColumn2'] | //div[@id='threeColumn3']
5 5
6#recipe pages 6#recipe pages
7strip_id_or_class: "recipe-feedback" 7strip_id_or_class: "recipe-feedback"
8strip_id_or_class: "comments" 8strip_id_or_class: "comments"
9strip_id_or_class: "procedure-number" 9strip_id_or_class: "procedure-number"
10strip_id_or_class: "more-with-author" 10strip_id_or_class: "more-with-author"
11 11
12#slice 12#slice
13strip_id_or_class: "inner" 13strip_id_or_class: "inner"
14 14
15test_url: http://www.seriouseats.com/recipes/2010/09/peking-duck-mandarin-pancakes-plum-sauce-recipe.html \ No newline at end of file 15test_url: http://www.seriouseats.com/recipes/2010/09/peking-duck-mandarin-pancakes-plum-sauce-recipe.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sf.curbed.com.txt b/inc/3rdparty/site_config/standard/sf.curbed.com.txt
index 9f443d5c..4c10e9c7 100644..100755
--- a/inc/3rdparty/site_config/standard/sf.curbed.com.txt
+++ b/inc/3rdparty/site_config/standard/sf.curbed.com.txt
@@ -1,7 +1,7 @@
1title: //h1[@class='post-title'] 1title: //h1[@class='post-title']
2author: //div[@class='post-byline']/a 2author: //div[@class='post-byline']/a
3date: substring-before(//div[@class='post-byline'], ', by') 3date: substring-before(//div[@class='post-byline'], ', by')
4 4
5body: //div[@class='post-body'] 5body: //div[@class='post-body']
6dissolve: //noscript 6dissolve: //noscript
7test_url: http://sf.curbed.com/archives/2011/10/17/lower_haight_loft_would_really_really_really_like_a_buyer.php \ No newline at end of file 7test_url: http://sf.curbed.com/archives/2011/10/17/lower_haight_loft_would_really_really_really_like_a_buyer.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sf.eater.com.txt b/inc/3rdparty/site_config/standard/sf.eater.com.txt
index fca656d2..1e7c85a0 100644..100755
--- a/inc/3rdparty/site_config/standard/sf.eater.com.txt
+++ b/inc/3rdparty/site_config/standard/sf.eater.com.txt
@@ -1,7 +1,7 @@
1title: //h1[@class="post-title"] 1title: //h1[@class="post-title"]
2author: //div[@class="post-byline"]/a 2author: //div[@class="post-byline"]/a
3date: substring-before(//div[@class='post-byline'], ', by') 3date: substring-before(//div[@class='post-byline'], ', by')
4 4
5body: //div[@class='post-body'] 5body: //div[@class='post-body']
6strip_id_or_class: post-kicker 6strip_id_or_class: post-kicker
7test_url: http://sf.eater.com/archives/2012/05/22/nate_pollack_talks_about_the_american_grilled_cheese_kitchen_moving_into_the_mission.php \ No newline at end of file 7test_url: http://sf.eater.com/archives/2012/05/22/nate_pollack_talks_about_the_american_grilled_cheese_kitchen_moving_into_the_mission.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sfgate.com.txt b/inc/3rdparty/site_config/standard/sfgate.com.txt
index 5f73fbcb..54691122 100644..100755
--- a/inc/3rdparty/site_config/standard/sfgate.com.txt
+++ b/inc/3rdparty/site_config/standard/sfgate.com.txt
@@ -1,12 +1,12 @@
1title: /html/head/title 1title: /html/head/title
2 2
3body: //div[@id = 'articlecontent']/div[contains(@class, 'bodytext')] 3body: //div[@id = 'articlecontent']/div[contains(@class, 'bodytext')]
4author: //div[@class = 'articleheadings']/p[contains(@class,'author')]/span[@class = 'fn'] 4author: //div[@class = 'articleheadings']/p[contains(@class,'author')]/span[@class = 'fn']
5date: //div[@class = 'articleheadings']/span[@class = 'updated'] 5date: //div[@class = 'articleheadings']/span[@class = 'updated']
6strip: //div[div[contains(@class, 'imgbox')]] 6strip: //div[div[contains(@class, 'imgbox')]]
7 7
8body: //div[@class = 'blogitem'] 8body: //div[@class = 'blogitem']
9author: //p[@class="credit"]/span[@class="author"]/a[position() = 1] 9author: //p[@class="credit"]/span[@class="author"]/a[position() = 1]
10date: //span[@class = 'pubdate'] 10date: //span[@class = 'pubdate']
11 11
12test_url: http://www.sfgate.com/columnists/garchik/ \ No newline at end of file 12test_url: http://www.sfgate.com/columnists/garchik/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sfweekly.com.txt b/inc/3rdparty/site_config/standard/sfweekly.com.txt
index a11fe4cb..73c3017e 100644..100755
--- a/inc/3rdparty/site_config/standard/sfweekly.com.txt
+++ b/inc/3rdparty/site_config/standard/sfweekly.com.txt
@@ -1,3 +1,3 @@
1body: //div[contains(@class, 'content_body')] 1body: //div[contains(@class, 'content_body')]
2strip_id_or_class: det_rel 2strip_id_or_class: det_rel
3test_url: http://www.sfweekly.com/2012-03-14/news/cia-lsd-wayne-ritchie-george-h-white-mk-ultra/ \ No newline at end of file 3test_url: http://www.sfweekly.com/2012-03-14/news/cia-lsd-wayne-ritchie-george-h-white-mk-ultra/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/shabayek.com.txt b/inc/3rdparty/site_config/standard/shabayek.com.txt
index b175720e..9a0d60ae 100644..100755
--- a/inc/3rdparty/site_config/standard/shabayek.com.txt
+++ b/inc/3rdparty/site_config/standard/shabayek.com.txt
@@ -1,3 +1,3 @@
1date: //span[@class='date'] 1date: //span[@class='date']
2body: //div[@class='post_content'] 2body: //div[@class='post_content']
3test_url: http://www.shabayek.com/blog/2011/10/16/%D8%AF%D8%B1%D9%88%D8%B3-%D9%85%D9%86-%D9%82%D8%B5%D8%A9-%D8%AA%D8%A3%D8%B3%D9%8A%D8%B3-%D8%AA%D9%88%D9%8A%D8%AA%D8%B1-%E2%80%93%D8%AC3/ \ No newline at end of file 3test_url: http://www.shabayek.com/blog/2011/10/16/%D8%AF%D8%B1%D9%88%D8%B3-%D9%85%D9%86-%D9%82%D8%B5%D8%A9-%D8%AA%D8%A3%D8%B3%D9%8A%D8%B3-%D8%AA%D9%88%D9%8A%D8%AA%D8%B1-%E2%80%93%D8%AC3/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/shawnblanc.net.txt b/inc/3rdparty/site_config/standard/shawnblanc.net.txt
index b536fc3a..bd8438f7 100644..100755
--- a/inc/3rdparty/site_config/standard/shawnblanc.net.txt
+++ b/inc/3rdparty/site_config/standard/shawnblanc.net.txt
@@ -1,11 +1,11 @@
1title://*[@class='primary']/h1 1title://*[@class='primary']/h1
2date: //*[@class='articledate'] 2date: //*[@class='articledate']
3author: substring-before(substring-after(//*[@class='block first']/p,'2012 '),'.') 3author: substring-before(substring-after(//*[@class='block first']/p,'2012 '),'.')
4body: //div[@class='primary'] 4body: //div[@class='primary']
5footnotes: yes 5footnotes: yes
6strip: //*[@class='primary']/h1 6strip: //*[@class='primary']/h1
7strip: //*[@class='articledate'] 7strip: //*[@class='articledate']
8strip: //*[@class='detailsarticle'] 8strip: //*[@class='detailsarticle']
9strip: //*[@class='endnav'] 9strip: //*[@class='endnav']
10strip: //*[@class='endmeta'] 10strip: //*[@class='endmeta']
11test_url: http://shawnblanc.net/2011/11/kindle-touch-review/ \ No newline at end of file 11test_url: http://shawnblanc.net/2011/11/kindle-touch-review/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/shifteleven.com.txt b/inc/3rdparty/site_config/standard/shifteleven.com.txt
index 68059ae1..43fd871d 100644..100755
--- a/inc/3rdparty/site_config/standard/shifteleven.com.txt
+++ b/inc/3rdparty/site_config/standard/shifteleven.com.txt
@@ -1,6 +1,6 @@
1body: //div[ @class='entry-content' ] 1body: //div[ @class='entry-content' ]
2 2
3strip: //div[ contains(@class, 'sharing') ] 3strip: //div[ contains(@class, 'sharing') ]
4 4
5date: //div[ @class='entry-meta' ]/a 5date: //div[ @class='entry-meta' ]/a
6test_url: http://shifteleven.com/articles/2008/05/10/issue-tracking-git-ticgit \ No newline at end of file 6test_url: http://shifteleven.com/articles/2008/05/10/issue-tracking-git-ticgit \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/siasat.pk.txt b/inc/3rdparty/site_config/standard/siasat.pk.txt
index a82ce69c..b10e12de 100644..100755
--- a/inc/3rdparty/site_config/standard/siasat.pk.txt
+++ b/inc/3rdparty/site_config/standard/siasat.pk.txt
@@ -1,11 +1,11 @@
1#body: (//div[@class='ftr-yt-vid'])[1] 1#body: (//div[@class='ftr-yt-vid'])[1]
2body: (//blockquote[contains(@class, 'postcontent')])[1] 2body: (//blockquote[contains(@class, 'postcontent')])[1]
3body: (//div[starts-with(@id, 'post_message')])[1] 3body: (//div[starts-with(@id, 'post_message')])[1]
4 4
5prune: no 5prune: no
6tidy: no 6tidy: no
7 7
8#replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player" 8#replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player"
9#replace_string(</iframe>): </iframe>&nbsp;</div> 9#replace_string(</iframe>): </iframe>&nbsp;</div>
10 10
11test_url: http://www.siasat.pk/forum/showthread.php?107668-Policy-Matters-17th-March-2012-Dr-Shahid-Masood-Gen-Hameed-gul-amp-Fawad-Chudhary-Pak-US-Relationship&p=787733 \ No newline at end of file 11test_url: http://www.siasat.pk/forum/showthread.php?107668-Policy-Matters-17th-March-2012-Dr-Shahid-Masood-Gen-Hameed-gul-amp-Fawad-Chudhary-Pak-US-Relationship&p=787733 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/signalscv.com.txt b/inc/3rdparty/site_config/standard/signalscv.com.txt
new file mode 100755
index 00000000..2d3c388e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/signalscv.com.txt
@@ -0,0 +1,10 @@
1author: //span[contains(@class, 'byline_1')]
2date: //span[@class='posted_date']
3body: //*[contains(@class, 'bigimage_container') or contains(@class, 'overlay_text') or contains(@id, 'articlebody')]
4
5strip_id_or_class: leftWrapper
6
7prune: no
8
9test_url: http://www.signalscv.com/section/46/article/102948/
10test_url: http://www.signalscv.com/syndication/feeds/rss/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/simonwillison.net.txt b/inc/3rdparty/site_config/standard/simonwillison.net.txt
index e3ad6e41..69999698 100644..100755
--- a/inc/3rdparty/site_config/standard/simonwillison.net.txt
+++ b/inc/3rdparty/site_config/standard/simonwillison.net.txt
@@ -1,5 +1,5 @@
1body: //div[contains(@class, "entry")] 1body: //div[contains(@class, "entry")]
2 2
3date: //div[contains(@class, "entryFooter")]/a 3date: //div[contains(@class, "entryFooter")]/a
4 4
5test_url: http://simonwillison.net/2009/Oct/22/redis/ \ No newline at end of file 5test_url: http://simonwillison.net/2009/Oct/22/redis/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/singaporeanstocksinvestor.blogspot.com.txt b/inc/3rdparty/site_config/standard/singaporeanstocksinvestor.blogspot.com.txt
index a1b6b673..46e2d5f2 100644..100755
--- a/inc/3rdparty/site_config/standard/singaporeanstocksinvestor.blogspot.com.txt
+++ b/inc/3rdparty/site_config/standard/singaporeanstocksinvestor.blogspot.com.txt
@@ -1,5 +1,5 @@
1body: //div[@class='post-body'] 1body: //div[@class='post-body']
2strip: //div[@id='lws_0'] 2strip: //div[@id='lws_0']
3prune: no 3prune: no
4 4
5test_url: http://singaporeanstocksinvestor.blogspot.com/2011/04/aims-amp-capital-industrial-reit.html \ No newline at end of file 5test_url: http://singaporeanstocksinvestor.blogspot.com/2011/04/aims-amp-capital-industrial-reit.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/singularityhub.com.txt b/inc/3rdparty/site_config/standard/singularityhub.com.txt
index 3999d4d4..3999d4d4 100644..100755
--- a/inc/3rdparty/site_config/standard/singularityhub.com.txt
+++ b/inc/3rdparty/site_config/standard/singularityhub.com.txt
diff --git a/inc/3rdparty/site_config/standard/sintagoulis.gr.txt b/inc/3rdparty/site_config/standard/sintagoulis.gr.txt
index 822bbeb0..0d05c40e 100644..100755
--- a/inc/3rdparty/site_config/standard/sintagoulis.gr.txt
+++ b/inc/3rdparty/site_config/standard/sintagoulis.gr.txt
@@ -1,6 +1,6 @@
1title: //div[@class='headline']//h2 1title: //div[@class='headline']//h2
2body: //div[contains(@class, 'storycontent')] 2body: //div[contains(@class, 'storycontent')]
3 3
4prune: no 4prune: no
5 5
6test_url: http://sintagoulis.gr/sokolatenia/sokolatenia-mpompa-me-amaretti- \ No newline at end of file 6test_url: http://sintagoulis.gr/sokolatenia/sokolatenia-mpompa-me-amaretti- \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sivers.org.txt b/inc/3rdparty/site_config/standard/sivers.org.txt
new file mode 100755
index 00000000..a88f30d7
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/sivers.org.txt
@@ -0,0 +1,6 @@
1title: //article[@class='post']/header[@class='wrapper']//h1/a
2author: //header[@id='masthead']//h1/a
3date: //article[@class='post']/header[@class='wrapper']//p[@class='postdate']
4body: //div[@id='body-content']
5
6test_url: http://sivers.org/delegate/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/skanesfria.se.txt b/inc/3rdparty/site_config/standard/skanesfria.se.txt
new file mode 100755
index 00000000..a0ddac79
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/skanesfria.se.txt
@@ -0,0 +1,7 @@
1body: //div[contains(@class, 'layout__inner')]//div[contains(@class, 'file-image') or contains(@class, 'node__content')]
2author: //article//div[contains(@class, 'field-byline')]
3strip_id_or_class: rekommenderade
4strip_id_or_class: disqus
5strip_id_or_class: annonser
6
7test_url: http://www.skanesfria.se/artikel/112045 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/slashfilm.com.txt b/inc/3rdparty/site_config/standard/slashfilm.com.txt
index 78d38ecf..4d17176a 100644..100755
--- a/inc/3rdparty/site_config/standard/slashfilm.com.txt
+++ b/inc/3rdparty/site_config/standard/slashfilm.com.txt
@@ -1,15 +1,15 @@
1title: substring-before(//title,'| /Film') 1title: substring-before(//title,'| /Film')
2date: substring-before(substring-after(//p[@class='post-info'],'Posted on '),'by') 2date: substring-before(substring-after(//p[@class='post-info'],'Posted on '),'by')
3strip: //div[@class='pm-left'] 3strip: //div[@class='pm-left']
4strip: //div[@class='pm-right'] 4strip: //div[@class='pm-right']
5strip: //h2/span 5strip: //h2/span
6next_page_link: //h2/strong/a 6next_page_link: //h2/strong/a
7strip: //h2/strong/a 7strip: //h2/strong/a
8strip: //p[contains(text(),'we have to split this post over')] 8strip: //p[contains(text(),'we have to split this post over')]
9strip: //p[@class='post-info'] 9strip: //p[@class='post-info']
10strip: //h1/a 10strip: //h1/a
11strip: //img[contains(@src,'siteimages/authors')] 11strip: //img[contains(@src,'siteimages/authors')]
12strip: //div[@id='header'] 12strip: //div[@id='header']
13strip: //div[@class='topad-right'] 13strip: //div[@class='topad-right']
14strip: //strong[contains(text(),'Cool Posts From Around the Web:')] 14strip: //strong[contains(text(),'Cool Posts From Around the Web:')]
15test_url: http://www.slashfilm.com/superhero-bits-206/ \ No newline at end of file 15test_url: http://www.slashfilm.com/superhero-bits-206/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/slate.com.txt b/inc/3rdparty/site_config/standard/slate.com.txt
index e92f6a06..d5798e01 100644..100755
--- a/inc/3rdparty/site_config/standard/slate.com.txt
+++ b/inc/3rdparty/site_config/standard/slate.com.txt
@@ -1,19 +1,19 @@
1title: //h1[@class="sl-art-head-dek"] 1title: //h1[@class="sl-art-head-dek"]
2body: //article//div[@class='sl-art-body']/div[contains(@class, 'body')] 2body: //article//div[@class='sl-art-body']/div[contains(@class, 'body')]
3strip: //div[@class="department_kicker"] 3strip: //div[@class="department_kicker"]
4strip: //div[@id="insider_ad_wrapper" or @id="insider_ad_inner"] 4strip: //div[@id="insider_ad_wrapper" or @id="insider_ad_inner"]
5strip: //div[@id="bottom_sponsored_links"] 5strip: //div[@id="bottom_sponsored_links"]
6strip: //div[@class="sl-art-ad-midflex"] 6strip: //div[@class="sl-art-ad-midflex"]
7#strip: //dl 7#strip: //dl
8#strip: //p[em/a[contains(@href, 'facebook.com')]] 8#strip: //p[em/a[contains(@href, 'facebook.com')]]
9prune: no 9prune: no
10 10
11author: //div[@id='author_bio']//a[contains(@href, '/author/')] 11author: //div[@id='author_bio']//a[contains(@href, '/author/')]
12author: //a[contains(@href, '/authors.')] 12author: //a[contains(@href, '/authors.')]
13 13
14date: substring-before(substring-after(//span[@class='sl-art-byline'], 'Posted '), ', at ') 14date: substring-before(substring-after(//span[@class='sl-art-byline'], 'Posted '), ', at ')
15 15
16single_page_link: //a[@class='sl-art-sinpage'] 16single_page_link: //a[@class='sl-art-sinpage']
17 17
18test_url: http://www.slate.com/id/2274583/pagenum/all/ 18test_url: http://www.slate.com/id/2274583/pagenum/all/
19test_url: http://www.slate.com/id/2293116/ \ No newline at end of file 19test_url: http://www.slate.com/id/2293116/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/slice.seriouseats.com.txt b/inc/3rdparty/site_config/standard/slice.seriouseats.com.txt
index 1a902b96..e62a3966 100644..100755
--- a/inc/3rdparty/site_config/standard/slice.seriouseats.com.txt
+++ b/inc/3rdparty/site_config/standard/slice.seriouseats.com.txt
@@ -1,15 +1,15 @@
1body: //div[@id='content'] 1body: //div[@id='content']
2 2
3# clean up recipe pages 3# clean up recipe pages
4strip: //h2[@class='fn'] | //h2[@class='double-lined'] | //h3 | //div[@id='threeColumn2'] | //div[@id='threeColumn3'] 4strip: //h2[@class='fn'] | //h2[@class='double-lined'] | //h3 | //div[@id='threeColumn2'] | //div[@id='threeColumn3']
5 5
6#recipe pages 6#recipe pages
7strip_id_or_class: "recipe-feedback" 7strip_id_or_class: "recipe-feedback"
8strip_id_or_class: "comments" 8strip_id_or_class: "comments"
9strip_id_or_class: "procedure-number" 9strip_id_or_class: "procedure-number"
10strip_id_or_class: "more-with-author" 10strip_id_or_class: "more-with-author"
11 11
12#slice 12#slice
13strip_id_or_class: "inner" 13strip_id_or_class: "inner"
14 14
15test_url: http://slice.seriouseats.com/archives/2010/10/the-pizza-lab-how-to-make-great-new-york-style-pizza.html \ No newline at end of file 15test_url: http://slice.seriouseats.com/archives/2010/10/the-pizza-lab-how-to-make-great-new-york-style-pizza.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/slog.thestranger.com.txt b/inc/3rdparty/site_config/standard/slog.thestranger.com.txt
index daa5e31b..f9526945 100644..100755
--- a/inc/3rdparty/site_config/standard/slog.thestranger.com.txt
+++ b/inc/3rdparty/site_config/standard/slog.thestranger.com.txt
@@ -1,4 +1,4 @@
1strip_id_or_class: postCategory 1strip_id_or_class: postCategory
2title: //h3[@class='postTitle'] 2title: //h3[@class='postTitle']
3body: //div[@class='postBody'] 3body: //div[@class='postBody']
4test_url: http://slog.thestranger.com/slog/archives/2010/10/12/sl-letter-of-the-day-leave-it-alone \ No newline at end of file 4test_url: http://slog.thestranger.com/slog/archives/2010/10/12/sl-letter-of-the-day-leave-it-alone \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/smartinvestor.de.txt b/inc/3rdparty/site_config/standard/smartinvestor.de.txt
index ec6c55c8..85ca46de 100644..100755
--- a/inc/3rdparty/site_config/standard/smartinvestor.de.txt
+++ b/inc/3rdparty/site_config/standard/smartinvestor.de.txt
@@ -1,5 +1,5 @@
1title: //td[@class='hweissblau2'] 1title: //td[@class='hweissblau2']
2body: //p[@class='copy'] | //div[@class='Section1'] 2body: //p[@class='copy'] | //div[@class='Section1']
3prune: no 3prune: no
4 4
5test_url: http://www.smartinvestor.de/news/smartinvestor/detail.hbs?itemid=item949496655&recnr=14593 \ No newline at end of file 5test_url: http://www.smartinvestor.de/news/smartinvestor/detail.hbs?itemid=item949496655&recnr=14593 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sme.sk.txt b/inc/3rdparty/site_config/standard/sme.sk.txt
index c3d01ffb..d41612cc 100644..100755
--- a/inc/3rdparty/site_config/standard/sme.sk.txt
+++ b/inc/3rdparty/site_config/standard/sme.sk.txt
@@ -1,3 +1,3 @@
1title: //meta[@property='og:title']/@content 1title: //meta[@property='og:title']/@content
2date: //p[@class='autor_line']/b/text() 2date: //p[@class='autor_line']/b/text()
3test_url: http://www.sme.sk/c/6268206/lipsic-vidi-malcharkove-uplatky.html \ No newline at end of file 3test_url: http://www.sme.sk/c/6268206/lipsic-vidi-malcharkove-uplatky.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/smithsonianmag.com.txt b/inc/3rdparty/site_config/standard/smithsonianmag.com.txt
index 10a3f717..3e8fee95 100644..100755
--- a/inc/3rdparty/site_config/standard/smithsonianmag.com.txt
+++ b/inc/3rdparty/site_config/standard/smithsonianmag.com.txt
@@ -1,20 +1,20 @@
1# meta data 1# meta data
2title://h1[@id = 'articleTitle'] 2title://h1[@id = 'articleTitle']
3author:substring-after(//ul[@id = 'byLine']/li[1],'By ') 3author:substring-after(//ul[@id = 'byLine']/li[1],'By ')
4date:substring-before(substring-after(//ul[@id = 'byLine']/li[last()],','),',') 4date:substring-before(substring-after(//ul[@id = 'byLine']/li[last()],','),',')
5body://div[@id = 'article-body'] 5body://div[@id = 'article-body']
6 6
7# full content 7# full content
8single_page_link://td/li[@class = 'article-singlepage']/a 8single_page_link://td/li[@class = 'article-singlepage']/a
9 9
10# caption clean up 10# caption clean up
11wrap_in(i)://span[@class='articleImageCaptionwide'] 11wrap_in(i)://span[@class='articleImageCaptionwide']
12move_into (//span[@class='articleImageCaptionwide'])://div[@id = 'articleImage']/p 12move_into (//span[@class='articleImageCaptionwide'])://div[@id = 'articleImage']/p
13 13
14 14
15# clean up 15# clean up
16strip://p[@id = 'articlePaginationWrapper'] 16strip://p[@id = 'articlePaginationWrapper']
17strip://ul[contains(@class, 'cat-breadcrumb')] 17strip://ul[contains(@class, 'cat-breadcrumb')]
18strip://div [@class= 'viewMorePhotos'] 18strip://div [@class= 'viewMorePhotos']
19 19
20test_url: http://www.smithsonianmag.com/history-archaeology/The-Goddess-Goes-Home.html \ No newline at end of file 20test_url: http://www.smithsonianmag.com/history-archaeology/The-Goddess-Goes-Home.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/smokingapples.com.txt b/inc/3rdparty/site_config/standard/smokingapples.com.txt
index e22af7a9..c68c1321 100644..100755
--- a/inc/3rdparty/site_config/standard/smokingapples.com.txt
+++ b/inc/3rdparty/site_config/standard/smokingapples.com.txt
@@ -1,5 +1,5 @@
1title: //h2[@class='custom-entry-title'] 1title: //h2[@class='custom-entry-title']
2author: substring-after(//span[@class='author vcard'],'by ') 2author: substring-after(//span[@class='author vcard'],'by ')
3date: substring-after(//span[@class='publ'],'Published on ') 3date: substring-after(//span[@class='publ'],'Published on ')
4body: //div[@class='postentry-content'] 4body: //div[@class='postentry-content']
5test_url: http://smokingapples.com/software/popclip-for-mac/ \ No newline at end of file 5test_url: http://smokingapples.com/software/popclip-for-mac/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/somethingawful.com.txt b/inc/3rdparty/site_config/standard/somethingawful.com.txt
new file mode 100755
index 00000000..48547948
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/somethingawful.com.txt
@@ -0,0 +1,17 @@
1title: //h1
2body: //div[@id = 'content-area']
3author: //p[contains(@class, 'byline')]/a
4autodetect_next_page: yes
5tidy: no
6
7strip_id_or_class: articleid
8strip_id_or_class: logo
9strip_id_or_class: pagebar
10strip_id_or_class: featurenavlinks
11strip_id_or_class: featured_frontpage
12strip_id_or_class: sidebar
13strip_id_or_class: footer
14strip_id_or_class: byline
15strip_id_or_class: logo
16strip_id_or_class: nav_network
17test_url: http://www.somethingawful.com/d/dungeons-and-dragons/wtf-monster-manual.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/songshuhui.net.txt b/inc/3rdparty/site_config/standard/songshuhui.net.txt
new file mode 100755
index 00000000..a9233593
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/songshuhui.net.txt
@@ -0,0 +1,7 @@
1# This filter is tested on:
2# http://songshuhui.net/archives/65522
3# http://songshuhui.net/archives/75760
4title://h2/span/a
5date:substring-before(substring-after(//div[@class='atrctitle']/div, '发表于'),' |')
6body://div[@class='entry']
7test_url: http://songshuhui.net/archives/74819 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sourcebooks.com.txt b/inc/3rdparty/site_config/standard/sourcebooks.com.txt
index 668fc44a..b52169da 100644..100755
--- a/inc/3rdparty/site_config/standard/sourcebooks.com.txt
+++ b/inc/3rdparty/site_config/standard/sourcebooks.com.txt
@@ -1,4 +1,4 @@
1#grab the actual content div 1#grab the actual content div
2body: //div[@class='rt-article'] 2body: //div[@class='rt-article']
3 3
4test_url: http://www.sourcebooks.com/next/sourcebooks-next-our-blog/1601-another-piece-of-the-e-puzzle-or-when-good-ebook-promotions-go-bad.html \ No newline at end of file 4test_url: http://www.sourcebooks.com/next/sourcebooks-next-our-blog/1601-another-piece-of-the-e-puzzle-or-when-good-ebook-promotions-go-bad.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/spectator.co.uk.txt b/inc/3rdparty/site_config/standard/spectator.co.uk.txt
index a05c8395..d0605ed2 100644..100755
--- a/inc/3rdparty/site_config/standard/spectator.co.uk.txt
+++ b/inc/3rdparty/site_config/standard/spectator.co.uk.txt
@@ -1,7 +1,7 @@
1author: /html/body/div[@id='wrapper']/div[@id='main-content']/div[@class='article_body']/a[@class='author-link']/child::text() 1author: /html/body/div[@id='wrapper']/div[@id='main-content']/div[@class='article_body']/a[@class='author-link']/child::text()
2 2
3body: /html/body/div[@id='wrapper']/div[@id='main-content']/div[@class='article_body'] 3body: /html/body/div[@id='wrapper']/div[@id='main-content']/div[@class='article_body']
4 4
5# Not very helpfull, the title and author are container by the same element that contains the body 5# Not very helpfull, the title and author are container by the same element that contains the body
6strip: /html/body/div[@id='wrapper']/div[@id='main-content']/div[@class='article_body']/h2 | /html/body/div[@id='wrapper']/div[@id='main-content']/div[@class='article_body']/a[@class='author-link'] 6strip: /html/body/div[@id='wrapper']/div[@id='main-content']/div[@class='article_body']/h2 | /html/body/div[@id='wrapper']/div[@id='main-content']/div[@class='article_body']/a[@class='author-link']
7test_url: http://www.spectator.co.uk/arts-and-culture/night-and-day/7449683/spotify-sunday-my-personal-soundtrack.thtml \ No newline at end of file 7test_url: http://www.spectator.co.uk/arts-and-culture/night-and-day/7449683/spotify-sunday-my-personal-soundtrack.thtml \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/spectrum.ieee.org.txt b/inc/3rdparty/site_config/standard/spectrum.ieee.org.txt
index 4b0704a8..aea3627e 100644..100755
--- a/inc/3rdparty/site_config/standard/spectrum.ieee.org.txt
+++ b/inc/3rdparty/site_config/standard/spectrum.ieee.org.txt
@@ -1,3 +1,3 @@
1body://div[@class="articleBody"] 1body://div[@class="articleBody"]
2author://p[@class="articleBodyTtl"] 2author://p[@class="articleBodyTtl"]
3test_url: http://spectrum.ieee.org/semiconductors/processors/behind-intels-new-randomnumber-generator/ \ No newline at end of file 3test_url: http://spectrum.ieee.org/semiconductors/processors/behind-intels-new-randomnumber-generator/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/speirs.org.txt b/inc/3rdparty/site_config/standard/speirs.org.txt
index 3bf859e3..3bf859e3 100644..100755
--- a/inc/3rdparty/site_config/standard/speirs.org.txt
+++ b/inc/3rdparty/site_config/standard/speirs.org.txt
diff --git a/inc/3rdparty/site_config/standard/spiegel.de.txt b/inc/3rdparty/site_config/standard/spiegel.de.txt
index 390c075c..413e0155 100644..100755
--- a/inc/3rdparty/site_config/standard/spiegel.de.txt
+++ b/inc/3rdparty/site_config/standard/spiegel.de.txt
@@ -1,75 +1,75 @@
1# A. Niepel, narya.de@... 1# A. Niepel, narya.de@...
2# - added single_page_link 2# - added single_page_link
3# - added author for default and single page view 3# - added author for default and single page view
4# - added date for single page view 4# - added date for single page view
5# fforst@... 5# fforst@...
6# - Fixed it 6# - Fixed it
7# bode2104@... 7# bode2104@...
8# - Fixed single_page_link 8# - Fixed single_page_link
9# - Included intro text in single page view 9# - Included intro text in single page view
10# - Added body in default view 10# - Added body in default view
11 11
12# set body 12# set body
13tidy: no 13tidy: no
14# body in single page view 14# body in single page view
15body: //div[@id="spArticleContent"] 15body: //div[@id="spArticleContent"]
16# body in default view 16# body in default view
17body: //div[@id="spArticleSection"] 17body: //div[@id="spArticleSection"]
18# body in "Fotostrecke" 18# body in "Fotostrecke"
19body: //div[@id="spBigaContent"] 19body: //div[@id="spBigaContent"]
20 20
21# set date in single page view 21# set date in single page view
22date: //div[@id="spArticleContent"]/h3 22date: //div[@id="spArticleContent"]/h3
23# strip date 23# strip date
24strip: //div[@id="spArticleContent"]/h3 24strip: //div[@id="spArticleContent"]/h3
25# set date in "Fotostrecke" 25# set date in "Fotostrecke"
26date: //div[@id="spBigaDatum"] 26date: //div[@id="spBigaDatum"]
27 27
28#set title in single page view 28#set title in single page view
29title: //div[@id='spArticleContent']/h2 29title: //div[@id='spArticleContent']/h2
30# strip title 30# strip title
31strip: //div[@id='spArticleContent']/h1 31strip: //div[@id='spArticleContent']/h1
32strip: //div[@id='spArticleContent']/h2 32strip: //div[@id='spArticleContent']/h2
33#set title in "Fotostrecke" 33#set title in "Fotostrecke"
34title: //div[@class='spBigaHeadline'] 34title: //div[@class='spBigaHeadline']
35 35
36# set author 36# set author
37author: //p[@class="spAuthor"]/a 37author: //p[@class="spAuthor"]/a
38author: substring-after(//p[@class="spAuthor"], 'Von ') 38author: substring-after(//p[@class="spAuthor"], 'Von ')
39# strip author 39# strip author
40strip: //p[@class='spAuthor'] 40strip: //p[@class='spAuthor']
41 41
42# remove captions 42# remove captions
43strip: //*/span[@class='spPicLayerText'] 43strip: //*/span[@class='spPicLayerText']
44strip: //*/div[@class='spPanoPlayerPaneControl'] 44strip: //*/div[@class='spPanoPlayerPaneControl']
45strip: //*/div[@class='spCredit'] 45strip: //*/div[@class='spCredit']
46strip: //*/div[@class='spCredit']/following-sibling::p 46strip: //*/div[@class='spCredit']/following-sibling::p
47 47
48# remove ads 48# remove ads
49strip: //div[@class='spMInline'] 49strip: //div[@class='spMInline']
50 50
51# remove photogalleries and extras 51# remove photogalleries and extras
52strip: //div[@class='spPhotoGallery'] 52strip: //div[@class='spPhotoGallery']
53strip: //div[@class='spPhotoGallery']/following-sibling::br 53strip: //div[@class='spPhotoGallery']/following-sibling::br
54strip: //div[@class='spAssetAlignleft'] 54strip: //div[@class='spAssetAlignleft']
55strip: //div[contains(@class,'spAsset')] 55strip: //div[contains(@class,'spAsset')]
56strip: //br[@clear='all'] 56strip: //br[@clear='all']
57 57
58# remove community functions 58# remove community functions
59strip: //div[@id='spSocialBookmark'] 59strip: //div[@id='spSocialBookmark']
60strip: //div[contains(@class, 'spCommunityBox')] 60strip: //div[contains(@class, 'spCommunityBox')]
61strip: //div[contains(@class, 'spArticleNewsfeedBox')] 61strip: //div[contains(@class, 'spArticleNewsfeedBox')]
62strip: //div[@class='spArticleCredit'] 62strip: //div[@class='spArticleCredit']
63 63
64# remove clutter in "Fotostrecke" 64# remove clutter in "Fotostrecke"
65strip: //div[@id='spBreadcrumb'] 65strip: //div[@id='spBreadcrumb']
66strip: //div[@id='spBigaLatestEntries'] 66strip: //div[@id='spBigaLatestEntries']
67strip: //div[contains(@class, 'spBigaNavi')] 67strip: //div[contains(@class, 'spBigaNavi')]
68strip: //div[@class='spDottedLine'] 68strip: //div[@class='spDottedLine']
69 69
70# Use link to print article for single page view 70# Use link to print article for single page view
71single_page_link: //a[contains(@href, '-druck')] 71single_page_link: //a[contains(@href, '-druck')]
72 72
73# use next link in "Fotostrecke" 73# use next link in "Fotostrecke"
74next_page_link: //a[@class='spBigaControlForw'] 74next_page_link: //a[@class='spBigaControlForw']
75test_url: http://www.spiegel.de/politik/deutschland/0,1518,787602,00.html \ No newline at end of file 75test_url: http://www.spiegel.de/politik/deutschland/0,1518,787602,00.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/spiked-online.com.txt b/inc/3rdparty/site_config/standard/spiked-online.com.txt
new file mode 100755
index 00000000..7ec39c2b
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/spiked-online.com.txt
@@ -0,0 +1,7 @@
1title: //div[@id='articleTitleWrapper' or @id='mainFeature']//h1
2author: //*[@id='authorNameJob']//a
3date: //div[@id='articleMeta']/p
4body: //div[@id='mainFeature']//img | //div[contains(@class, 'fullText')]
5
6test_url: http://www.spiked-online.com/newsite/article/standing_up_to_the_white-coated_gods_of_fortune/13785
7test_url: http://www.spiked-online.com/newsite/article/sex_box_and_the_crisis_of_intimacy/14168 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/spin.com.txt b/inc/3rdparty/site_config/standard/spin.com.txt
index 66f6192b..88eb454c 100644..100755
--- a/inc/3rdparty/site_config/standard/spin.com.txt
+++ b/inc/3rdparty/site_config/standard/spin.com.txt
@@ -1,5 +1,5 @@
1tidy: no 1tidy: no
2body: //section[contains(@class, 'main')] 2body: //section[contains(@class, 'main')]
3strip: //footer 3strip: //footer
4strip: //a[@class='paginated'] 4strip: //a[@class='paginated']
5test_url: http://www.spin.com/articles/bathlands-deep-heart-americas-new-drug-nightmare \ No newline at end of file 5test_url: http://www.spin.com/articles/bathlands-deep-heart-americas-new-drug-nightmare \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/splatf.com.txt b/inc/3rdparty/site_config/standard/splatf.com.txt
index d5671652..3e05a225 100644..100755
--- a/inc/3rdparty/site_config/standard/splatf.com.txt
+++ b/inc/3rdparty/site_config/standard/splatf.com.txt
@@ -1,5 +1,5 @@
1author:string('Dan Frommer/SplatF') 1author:string('Dan Frommer/SplatF')
2date://div[@class='postdate'] 2date://div[@class='postdate']
3body://div[@class='entry'] 3body://div[@class='entry']
4title://div[@class='post']/h1 4title://div[@class='post']/h1
5test_url: http://www.splatf.com/2012/02/month-six/ \ No newline at end of file 5test_url: http://www.splatf.com/2012/02/month-six/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/splitsider.com.txt b/inc/3rdparty/site_config/standard/splitsider.com.txt
index d1d392e7..4bbc7aac 100644..100755
--- a/inc/3rdparty/site_config/standard/splitsider.com.txt
+++ b/inc/3rdparty/site_config/standard/splitsider.com.txt
@@ -1,4 +1,4 @@
1author: //div[@class='byline']/a 1author: //div[@class='byline']/a
2date: //div[@id='date'] 2date: //div[@id='date']
3body: //div[@class='entry'] 3body: //div[@class='entry']
4test_url: http://splitsider.com/2011/10/saturday-nights-children-rob-riggle-2004-2005/ \ No newline at end of file 4test_url: http://splitsider.com/2011/10/saturday-nights-children-rob-riggle-2004-2005/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sport.detik.com.txt b/inc/3rdparty/site_config/standard/sport.detik.com.txt
index b404b829..18552d1e 100644..100755
--- a/inc/3rdparty/site_config/standard/sport.detik.com.txt
+++ b/inc/3rdparty/site_config/standard/sport.detik.com.txt
@@ -1,8 +1,8 @@
1title://div[@class="content_detail"]/h1 1title://div[@class="content_detail"]/h1
2 2
3author://div[@class="author"]/strong 3author://div[@class="author"]/strong
4 4
5date:substring-before(substring-after(//div[@class="content_detail"]/*[@class="date"], ','), ' WIB') 5date:substring-before(substring-after(//div[@class="content_detail"]/*[@class="date"], ','), ' WIB')
6 6
7body://div[@class='text_detail'] 7body://div[@class='text_detail']
8test_url: http://sport.detik.com/sepakbola/read/2012/05/23/065011/1922350/71/agen-silva-ingin-bertahan-di-milan?b99220270 \ No newline at end of file 8test_url: http://sport.detik.com/sepakbola/read/2012/05/23/065011/1922350/71/agen-silva-ingin-bertahan-di-milan?b99220270 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sport.orf.at.txt b/inc/3rdparty/site_config/standard/sport.orf.at.txt
index a794ded9..f0be85c7 100644..100755
--- a/inc/3rdparty/site_config/standard/sport.orf.at.txt
+++ b/inc/3rdparty/site_config/standard/sport.orf.at.txt
@@ -1,11 +1,11 @@
1single_page_link: //div[@id='content']//p[@class='readMore']/a 1single_page_link: //div[@id='content']//p[@class='readMore']/a
2 2
3title: //div[@class='hidden offscreen']/h2 3title: //div[@class='hidden offscreen']/h2
4body: //div[@id="storyText"] 4body: //div[@id="storyText"]
5move_into(//div[@id='storyText']): //div[@class='fact'] 5move_into(//div[@id='storyText']): //div[@class='fact']
6strip: //small[@class='credit'] 6strip: //small[@class='credit']
7strip: //small[@class='caption'] 7strip: //small[@class='caption']
8date: substring-after(//div[@id='storyMeta']//p[@class='date'],'Publiziert am') 8date: substring-after(//div[@id='storyMeta']//p[@class='date'],'Publiziert am')
9strip: //p[@class='toplink'] 9strip: //p[@class='toplink']
10 10
11test_url: http://sport.orf.at/stories/2084851/ \ No newline at end of file 11test_url: http://sport.orf.at/stories/2084851/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sport365.fr.txt b/inc/3rdparty/site_config/standard/sport365.fr.txt
new file mode 100755
index 00000000..8688f40b
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/sport365.fr.txt
@@ -0,0 +1,8 @@
1body: //h2[contains(@class, 'body_head')] | //div[@id='img_article' or contains(@class, 'body_content')]
2body: //div[contains(@class, 'cpanel')]//div[contains(@class, 'thumbnails')]
3prune: no
4strip: //div[starts-with(@class, 'actu_')]
5strip: //div[contains(@class, 'data')]
6
7test_url: http://www.sport365.fr/basketball/nba/new-york-accord-avec-toronto-pour-bargnani-1038773.shtml
8test_url: http://www.sport365.fr/rss.xml \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sports.espn.go.com.txt b/inc/3rdparty/site_config/standard/sports.espn.go.com.txt
index e0f8223c..8c21ef2b 100644..100755
--- a/inc/3rdparty/site_config/standard/sports.espn.go.com.txt
+++ b/inc/3rdparty/site_config/standard/sports.espn.go.com.txt
@@ -1,12 +1,12 @@
1title: //div[@class='headline'] | //div[@class='mod-header']/h3 1title: //div[@class='headline'] | //div[@class='mod-header']/h3
2body: //div[contains(@class, 'article')] 2body: //div[contains(@class, 'article')]
3strip: //div[contains(@class, 'mod-inline')] 3strip: //div[contains(@class, 'mod-inline')]
4strip: //*/span[@class='page-actions']/a 4strip: //*/span[@class='page-actions']/a
5strip: //*/span[@class='page-actions']/a 5strip: //*/span[@class='page-actions']/a
6strip: //div[@class='page-actions']/* 6strip: //div[@class='page-actions']/*
7strip: //div[@class='headline'] | //div[@class='mod-header']/h3 7strip: //div[@class='headline'] | //div[@class='mod-header']/h3
8strip: //div[@class='mod-blog-navigation'] 8strip: //div[@class='mod-blog-navigation']
9strip: //div[@class='monthday'] 9strip: //div[@class='monthday']
10strip: //div[@class='time'] 10strip: //div[@class='time']
11strip: //div[@class='timeofday'] 11strip: //div[@class='timeofday']
12test_url: http://sports.espn.go.com/espn/page2/story?page=simmonsnfl2010/lebron_james_return_clevelend&sportCat=nba \ No newline at end of file 12test_url: http://sports.espn.go.com/espn/page2/story?page=simmonsnfl2010/lebron_james_return_clevelend&sportCat=nba \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sports.yahoo.com.txt b/inc/3rdparty/site_config/standard/sports.yahoo.com.txt
index 96a3bb71..b0f57e2c 100644..100755
--- a/inc/3rdparty/site_config/standard/sports.yahoo.com.txt
+++ b/inc/3rdparty/site_config/standard/sports.yahoo.com.txt
@@ -1,9 +1,9 @@
1title: //div[@id='article']/div[@class='hd']/h1 1title: //div[@id='article']/div[@class='hd']/h1
2body: //p[@id='byline'] | //div[@id='article']//div[@class='body_copy 0'] 2body: //p[@id='byline'] | //div[@id='article']//div[@class='body_copy 0']
3strip: //div[@class='foot'] 3strip: //div[@class='foot']
4strip: //div[@id='sidebar']//div[@class='ft'] 4strip: //div[@id='sidebar']//div[@class='ft']
5strip: //p[@id='byline']//em 5strip: //p[@id='byline']//em
6tidy: no 6tidy: no
7prune: no 7prune: no
8 8
9test_url: http://sports.yahoo.com/nba/news?slug=ap-nbafinals \ No newline at end of file 9test_url: http://sports.yahoo.com/nba/news?slug=ap-nbafinals \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sportschau.de.txt b/inc/3rdparty/site_config/standard/sportschau.de.txt
index 6500e75c..1e58b520 100644..100755
--- a/inc/3rdparty/site_config/standard/sportschau.de.txt
+++ b/inc/3rdparty/site_config/standard/sportschau.de.txt
@@ -1,22 +1,22 @@
1title://div[@id='ardContent']/h1 1title://div[@id='ardContent']/h1
2 2
3author://p[@id='ardAutor'] 3author://p[@id='ardAutor']
4author://span[@id='ardQuelle'] 4author://span[@id='ardQuelle']
5author:string('sportschau.de') 5author:string('sportschau.de')
6 6
7date:substring-after(//span[@id='ardStand'], 'Stand: ') 7date:substring-after(//span[@id='ardStand'], 'Stand: ')
8 8
9body://div[@id='ardContent'] 9body://div[@id='ardContent']
10 10
11strip://div[@id='ardContent']/h1 11strip://div[@id='ardContent']/h1
12strip://p[@id='ardAutor'] 12strip://p[@id='ardAutor']
13strip: //div[@class='embeddedPlayer_clipinfo'] 13strip: //div[@class='embeddedPlayer_clipinfo']
14strip: //div[@class='ardMehrZumThemaRechts'] 14strip: //div[@class='ardMehrZumThemaRechts']
15strip: //*[contains(@class, 'inv')] 15strip: //*[contains(@class, 'inv')]
16 16
17strip: //p[@id='ardAbbinder'] 17strip: //p[@id='ardAbbinder']
18strip: //div[@class='socialBookmarks'] 18strip: //div[@class='socialBookmarks']
19strip: //div[@id='ardContentEnd'] 19strip: //div[@id='ardContentEnd']
20strip: //div[@id='ardDisclaimer'] 20strip: //div[@id='ardDisclaimer']
21strip: //div[@id='ardRechteSpalte'] 21strip: //div[@id='ardRechteSpalte']
22test_url: http://www.sportschau.de/sp/fussball/news201203/17/analyse_leverkusen_gladbach.jsp \ No newline at end of file 22test_url: http://www.sportschau.de/sp/fussball/news201203/17/analyse_leverkusen_gladbach.jsp \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sportsillustrated.cnn.com.txt b/inc/3rdparty/site_config/standard/sportsillustrated.cnn.com.txt
index afc5879f..b3da8138 100644..100755
--- a/inc/3rdparty/site_config/standard/sportsillustrated.cnn.com.txt
+++ b/inc/3rdparty/site_config/standard/sportsillustrated.cnn.com.txt
@@ -1,26 +1,26 @@
1# main sportsillustrated.com articles 1# main sportsillustrated.com articles
2# 2#
3body: //div[@id="cnnStoryContent"] 3body: //div[@id="cnnStoryContent"]
4title: //div[@id="cnnStoryHeadline"]//h1 4title: //div[@id="cnnStoryHeadline"]//h1
5author: //div[@id="cnnSubBanner"]//strong 5author: //div[@id="cnnSubBanner"]//strong
6date: substring-after(//div[@id="cnnTimeStamp"], "Updated: ") 6date: substring-after(//div[@id="cnnTimeStamp"], "Updated: ")
7date: substring-after(//div[@id="cnnTimeStamp"], "Posted: ") 7date: substring-after(//div[@id="cnnTimeStamp"], "Posted: ")
8 8
9# kill ugly font buttons 9# kill ugly font buttons
10strip: //div[@id="cnnSCFontButtons"] 10strip: //div[@id="cnnSCFontButtons"]
11 11
12# kill misc filler videos & etc 12# kill misc filler videos & etc
13strip: //div[@class="cnnDivideContent"] 13strip: //div[@class="cnnDivideContent"]
14strip: //*[@class="cnnTMbox"] 14strip: //*[@class="cnnTMbox"]
15 15
16# si vault articles 16# si vault articles
17# ------------- 17# -------------
18body: //div[@class="siv_artPara"] 18body: //div[@class="siv_artPara"]
19title: //div[@class="siv_artHeader"]//h1 19title: //div[@class="siv_artHeader"]//h1
20author: //div[@class="byline"] 20author: //div[@class="byline"]
21date: //div[@class="date"] 21date: //div[@class="date"]
22 22
23next_page_link: //div[@id='cnnStoryContinue']/a 23next_page_link: //div[@id='cnnStoryContinue']/a
24strip_id_or_class: cnnstorypagination 24strip_id_or_class: cnnstorypagination
25 25
26test_url: http://sportsillustrated.cnn.com/2012/writers/peter_king/02/27/combine/index.html \ No newline at end of file 26test_url: http://sportsillustrated.cnn.com/2012/writers/peter_king/02/27/combine/index.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sprengsatz.de.txt b/inc/3rdparty/site_config/standard/sprengsatz.de.txt
index 16636bc5..5b683811 100644..100755
--- a/inc/3rdparty/site_config/standard/sprengsatz.de.txt
+++ b/inc/3rdparty/site_config/standard/sprengsatz.de.txt
@@ -1,5 +1,5 @@
1title: //h2 1title: //h2
2author: string('Michael Spreng') 2author: string('Michael Spreng')
3date: //div[@class='date'] 3date: //div[@class='date']
4body: //div[@class='entry'] 4body: //div[@class='entry']
5test_url: http://www.sprengsatz.de/?p=3691 \ No newline at end of file 5test_url: http://www.sprengsatz.de/?p=3691 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sqlite.org.txt b/inc/3rdparty/site_config/standard/sqlite.org.txt
index 4872519a..15763c32 100644..100755
--- a/inc/3rdparty/site_config/standard/sqlite.org.txt
+++ b/inc/3rdparty/site_config/standard/sqlite.org.txt
@@ -1,7 +1,7 @@
1body: //div[@id='ff-body'] 1body: //div[@id='ff-body']
2 2
3replace_string(<h1 align=center>): <div id="ff-body"><h1 align=center> 3replace_string(<h1 align=center>): <div id="ff-body"><h1 align=center>
4 4
5prune: no 5prune: no
6 6
7test_url: http://www.sqlite.org/fileformat2.html \ No newline at end of file 7test_url: http://www.sqlite.org/fileformat2.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/squashed.tumblr.com.txt b/inc/3rdparty/site_config/standard/squashed.tumblr.com.txt
index 388209a9..8eae13ed 100644..100755
--- a/inc/3rdparty/site_config/standard/squashed.tumblr.com.txt
+++ b/inc/3rdparty/site_config/standard/squashed.tumblr.com.txt
@@ -1,4 +1,4 @@
1body: //div[@class='content'] 1body: //div[@class='content']
2date: substring-before( //div[@class='unit dateAndNotes'], 'with') 2date: substring-before( //div[@class='unit dateAndNotes'], 'with')
3title: //h3 3title: //h3
4test_url: http://squashed.tumblr.com/post/17613522228/lets-stop-blaming-the-victims-of-predatory-lending \ No newline at end of file 4test_url: http://squashed.tumblr.com/post/17613522228/lets-stop-blaming-the-victims-of-predatory-lending \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/stackoverflow.com.txt b/inc/3rdparty/site_config/standard/stackoverflow.com.txt
index e5317bac..bb95e93a 100644..100755
--- a/inc/3rdparty/site_config/standard/stackoverflow.com.txt
+++ b/inc/3rdparty/site_config/standard/stackoverflow.com.txt
@@ -1,14 +1,14 @@
1body: //div[@class='post-text' or @class='user-action-time' or @class='user-details' or @class='vote'] | //div[@id='answers-header']//h2 1body: //div[@class='post-text' or @class='user-action-time' or @class='user-details' or @class='vote'] | //div[@id='answers-header']//h2
2 2
3replace_string(<div class="user-details"><br></div>): <!-- nothing --> 3replace_string(<div class="user-details"><br></div>): <!-- nothing -->
4replace_string(<div class="vote">): <div class="vote"><h3>Vote count: 4replace_string(<div class="vote">): <div class="vote"><h3>Vote count:
5 5
6strip_id_or_class: vote-up 6strip_id_or_class: vote-up
7strip_id_or_class: vote-down 7strip_id_or_class: vote-down
8strip_id_or_class: star-off 8strip_id_or_class: star-off
9strip_id_or_class: favoritecount 9strip_id_or_class: favoritecount
10strip_id_or_class: -share 10strip_id_or_class: -share
11strip_id_or_class: badgecount 11strip_id_or_class: badgecount
12 12
13 13
14test_url: http://stackoverflow.com/questions/4484289/id-like-to-understand-the-jquery-plugin-syntax \ No newline at end of file 14test_url: http://stackoverflow.com/questions/4484289/id-like-to-understand-the-jquery-plugin-syntax \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/stalbansreview.co.uk.txt b/inc/3rdparty/site_config/standard/stalbansreview.co.uk.txt
index bde14217..a0f1587c 100644..100755
--- a/inc/3rdparty/site_config/standard/stalbansreview.co.uk.txt
+++ b/inc/3rdparty/site_config/standard/stalbansreview.co.uk.txt
@@ -1,14 +1,14 @@
1title: //div[@class='articleLeft']/h3 1title: //div[@class='articleLeft']/h3
2 2
3author: substring-after(//span[@class='articleAuthor']/a,'By ') 3author: substring-after(//span[@class='articleAuthor']/a,'By ')
4 4
5date: substring-before(//span[@class='articleDateTime'],'in ') 5date: substring-before(//span[@class='articleDateTime'],'in ')
6 6
7body: //div[@class='articleLeft'] 7body: //div[@class='articleLeft']
8strip: //div[@class='articleMoreNews'] 8strip: //div[@class='articleMoreNews']
9strip: //div[@class='articleLeft']/h3 9strip: //div[@class='articleLeft']/h3
10strip: //div[@class='articleLeft']/p[@class='articleInfo clearfix'] 10strip: //div[@class='articleLeft']/p[@class='articleInfo clearfix']
11 11
12# Remove duplicate title from text 12# Remove duplicate title from text
13strip: //div[@id='site']/div[5][@class='holder']/div[1][@class='hBlock']/div[1][@class='sglCol article']/h3 13strip: //div[@id='site']/div[5][@class='holder']/div[1][@class='hBlock']/div[1][@class='sglCol article']/h3
14test_url: http://www.stalbansreview.co.uk/news/9581446.New_roundabout_in_King_Harry_Lane/r/?ref=rss \ No newline at end of file 14test_url: http://www.stalbansreview.co.uk/news/9581446.New_roundabout_in_King_Harry_Lane/r/?ref=rss \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/standard.co.uk.txt b/inc/3rdparty/site_config/standard/standard.co.uk.txt
index 22a33484..71a2bda1 100644..100755
--- a/inc/3rdparty/site_config/standard/standard.co.uk.txt
+++ b/inc/3rdparty/site_config/standard/standard.co.uk.txt
@@ -1,16 +1,16 @@
1autodetect_next_page: no 1autodetect_next_page: no
2footnotes: no 2footnotes: no
3dissolve: //div[@class="column-2"]//div[@class="widget"] 3dissolve: //div[@class="column-2"]//div[@class="widget"]
4dissolve: //div[@class="column-2"]//div 4dissolve: //div[@class="column-2"]//div
5 5
6author: //div[@class="innerbyline"]/a 6author: //div[@class="innerbyline"]/a
7strip: //div[@class="innerbyline"]/a 7strip: //div[@class="innerbyline"]/a
8 8
9strip: //p[@class="dateline"] 9strip: //p[@class="dateline"]
10date: //p[@class="dateline"] 10date: //p[@class="dateline"]
11 11
12title: //h1[@class="title"] 12title: //h1[@class="title"]
13author: //div[@class="innerbyline"]/a 13author: //div[@class="innerbyline"]/a
14date: //p[@class="dateline"] 14date: //p[@class="dateline"]
15body: //div[@class="column-2"] 15body: //div[@class="column-2"]
16test_url: http://www.standard.co.uk/lifestyle/esmagazine/grace-and-flavour-pizarro-7938350.html \ No newline at end of file 16test_url: http://www.standard.co.uk/lifestyle/esmagazine/grace-and-flavour-pizarro-7938350.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/staradvertiser.com.txt b/inc/3rdparty/site_config/standard/staradvertiser.com.txt
index 0579455f..254e2c2b 100644..100755
--- a/inc/3rdparty/site_config/standard/staradvertiser.com.txt
+++ b/inc/3rdparty/site_config/standard/staradvertiser.com.txt
@@ -1,11 +1,11 @@
1title: //h1[@id='storyTitle'] 1title: //h1[@id='storyTitle']
2author: substring-after(//span[@class='hsa_postCredit'], 'By ') 2author: substring-after(//span[@class='hsa_postCredit'], 'By ')
3date://span[@class='hsa_dateStamp'] 3date://span[@class='hsa_dateStamp']
4body: //div[@class='storytext'] 4body: //div[@class='storytext']
5strip_id_or_class: insideStoryAd 5strip_id_or_class: insideStoryAd
6strip_id_or_class: printDesc 6strip_id_or_class: printDesc
7strip_id_or_class: sb_2010_story_tools 7strip_id_or_class: sb_2010_story_tools
8strip_id_or_class: FBConnectButton_Text 8strip_id_or_class: FBConnectButton_Text
9strip_id_or_class: breadcrumbs 9strip_id_or_class: breadcrumbs
10prune: no 10prune: no
11test_url: http://www.staradvertiser.com/news/20111112_World_leaders_step_onto_isle_stage.html \ No newline at end of file 11test_url: http://www.staradvertiser.com/news/20111112_World_leaders_step_onto_isle_stage.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/stephenfry.com.txt b/inc/3rdparty/site_config/standard/stephenfry.com.txt
index 1169984f..efd1ec2b 100644..100755
--- a/inc/3rdparty/site_config/standard/stephenfry.com.txt
+++ b/inc/3rdparty/site_config/standard/stephenfry.com.txt
@@ -1,8 +1,8 @@
1title: /html/head/meta[@name='title']/@content 1title: /html/head/meta[@name='title']/@content
2author: //span[contains(concat(' ',normalize-space(@class),' '),' article_author ')]/a 2author: //span[contains(concat(' ',normalize-space(@class),' '),' article_author ')]/a
3date: //span[contains(concat(' ',normalize-space(@class),' '),' article_date ')] 3date: //span[contains(concat(' ',normalize-space(@class),' '),' article_date ')]
4 4
5body: //div[@class='entry-content'] 5body: //div[@class='entry-content']
6 6
7single_page_link: //p[@class='pagination']/a 7single_page_link: //p[@class='pagination']/a
8test_url: http://www.stephenfry.com/2011/10/06/steve-jobs/ \ No newline at end of file 8test_url: http://www.stephenfry.com/2011/10/06/steve-jobs/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/stlbeacon.org.txt b/inc/3rdparty/site_config/standard/stlbeacon.org.txt
index d66fee9f..75379a9c 100644..100755
--- a/inc/3rdparty/site_config/standard/stlbeacon.org.txt
+++ b/inc/3rdparty/site_config/standard/stlbeacon.org.txt
@@ -1,5 +1,5 @@
1title: article/h1 1title: article/h1
2author: //p[@class='byline'] 2author: //p[@class='byline']
3date: //p[@class='date'] 3date: //p[@class='date']
4body: //div[@class='body'] 4body: //div[@class='body']
5test_url: https://www.stlbeacon.org/#!/content/23404/mogop_caucus_031712 \ No newline at end of file 5test_url: https://www.stlbeacon.org/#!/content/23404/mogop_caucus_031712 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/stockholm.etc.se.txt b/inc/3rdparty/site_config/standard/stockholm.etc.se.txt
index 073043d5..2f4f8cb8 100644..100755
--- a/inc/3rdparty/site_config/standard/stockholm.etc.se.txt
+++ b/inc/3rdparty/site_config/standard/stockholm.etc.se.txt
@@ -1,5 +1,5 @@
1strip_id_or_class: 'left' 1strip_id_or_class: 'left'
2strip_id_or_class: 'right' 2strip_id_or_class: 'right'
3strip_id_or_class: 'block-belowcontent' 3strip_id_or_class: 'block-belowcontent'
4 4
5test_url: http://stockholm.etc.se/reportage/bakom-stangda-dorrar-pa-fas-3-massa \ No newline at end of file 5test_url: http://stockholm.etc.se/reportage/bakom-stangda-dorrar-pa-fas-3-massa \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/stockholmsfria.nu.txt b/inc/3rdparty/site_config/standard/stockholmsfria.nu.txt
new file mode 100755
index 00000000..cc8c28b8
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/stockholmsfria.nu.txt
@@ -0,0 +1,7 @@
1body: //div[contains(@class, 'layout__inner')]//div[contains(@class, 'file-image') or contains(@class, 'node__content')]
2author: //article//div[contains(@class, 'field-byline')]
3strip_id_or_class: rekommenderade
4strip_id_or_class: disqus
5strip_id_or_class: annonser
6
7test_url: http://www.stockholmsfria.nu/artikel/112068 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/straightdope.com.txt b/inc/3rdparty/site_config/standard/straightdope.com.txt
new file mode 100755
index 00000000..f01d7ad1
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/straightdope.com.txt
@@ -0,0 +1,6 @@
1body: //div[@id='article' or @id='current_illustration']
2title: //div[@id='article']//h1
3date: //div[@id='article']//div[@class='date']
4prune: no
5
6test_url: http://www.straightdope.com/columns/read/947/whatever-happened-to-adoption-of-the-metric-system-in-the-u-s \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/streetsblog.net.txt b/inc/3rdparty/site_config/standard/streetsblog.net.txt
index 0b62a3d6..6cf03ca6 100644..100755
--- a/inc/3rdparty/site_config/standard/streetsblog.net.txt
+++ b/inc/3rdparty/site_config/standard/streetsblog.net.txt
@@ -1,7 +1,7 @@
1title: //h2[@class="post-title"] 1title: //h2[@class="post-title"]
2date: //span[@class="post-date"] 2date: //span[@class="post-date"]
3body: //div[@class="post-entry"] 3body: //div[@class="post-entry"]
4 4
5#This is also good for *.streetsblog.org, for example: 5#This is also good for *.streetsblog.org, for example:
6#http://dc.streetsblog.org/2011/10/21/friday-job-market/ 6#http://dc.streetsblog.org/2011/10/21/friday-job-market/
7test_url: http://streetsblog.net/2011/10/20/look-out-below-one-in-nine-bridges-structurally-deficient-reports-t4a/ \ No newline at end of file 7test_url: http://streetsblog.net/2011/10/20/look-out-below-one-in-nine-bridges-structurally-deficient-reports-t4a/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/stuff.co.nz.txt b/inc/3rdparty/site_config/standard/stuff.co.nz.txt
index 12fd0939..3756092c 100644..100755
--- a/inc/3rdparty/site_config/standard/stuff.co.nz.txt
+++ b/inc/3rdparty/site_config/standard/stuff.co.nz.txt
@@ -1,22 +1,22 @@
1title://div[@id='left_col']/h1 1title://div[@id='left_col']/h1
2author:substring-after(//span[contains(@class,'storycredit')],'BY ') 2author:substring-after(//span[contains(@class,'storycredit')],'BY ')
3author://span[contains(@class,'storycredit')] 3author://span[contains(@class,'storycredit')]
4date:substring-after(//div[contains(@class,'toolbox_date')],'Last updated ') 4date:substring-after(//div[contains(@class,'toolbox_date')],'Last updated ')
5date://div[contains(@class,'toolbox_date')] 5date://div[contains(@class,'toolbox_date')]
6body://div[@id='left_col'] 6body://div[@id='left_col']
7 7
8strip_id_or_class: toolbox 8strip_id_or_class: toolbox
9strip_id_or_class: story_features 9strip_id_or_class: story_features
10strip_id_or_class: sharebox_new 10strip_id_or_class: sharebox_new
11strip_id_or_class: related_box 11strip_id_or_class: related_box
12strip_id_or_class: sponsored_links 12strip_id_or_class: sponsored_links
13strip_id_or_class: hidden_ad 13strip_id_or_class: hidden_ad
14strip_id_or_class: story_content_top 14strip_id_or_class: story_content_top
15strip_id_or_class: total_number 15strip_id_or_class: total_number
16strip_id_or_class: sort_order 16strip_id_or_class: sort_order
17strip_id_or_class: subscribe_order 17strip_id_or_class: subscribe_order
18 18
19strip://div[contains(@class,'ad_story')] 19strip://div[contains(@class,'ad_story')]
20 20
21test_url: http://www.stuff.co.nz/national/politics/3930344/PM-issues-challenge 21test_url: http://www.stuff.co.nz/national/politics/3930344/PM-issues-challenge
22test_url: http://www.stuff.co.nz/entertainment/7045944/International-praise-for-Ladyhawke \ No newline at end of file 22test_url: http://www.stuff.co.nz/entertainment/7045944/International-praise-for-Ladyhawke \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/stumbleupon.com.txt b/inc/3rdparty/site_config/standard/stumbleupon.com.txt
index 85682166..9adc3c50 100644..100755
--- a/inc/3rdparty/site_config/standard/stumbleupon.com.txt
+++ b/inc/3rdparty/site_config/standard/stumbleupon.com.txt
@@ -1,3 +1,3 @@
1single_page_link: //iframe[@id='stumbleFrame']/@src 1single_page_link: //iframe[@id='tb-stumble-frame']/@src
2 2
3test_url: www.stumbleupon.com/su/35V0wB/zouchmagazine.com/poetry-violet/ \ No newline at end of file 3test_url: http://www.stumbleupon.com/su/35V0wB/zouchmagazine.com/poetry-violet/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/subtraction.com.txt b/inc/3rdparty/site_config/standard/subtraction.com.txt
index 454e37b1..9ba6eb77 100644..100755
--- a/inc/3rdparty/site_config/standard/subtraction.com.txt
+++ b/inc/3rdparty/site_config/standard/subtraction.com.txt
@@ -1,17 +1,17 @@
1title: //*[@id='posts']/div[1]/h2 1title: //*[@id='posts']/div[1]/h2
2author: //*[@id='posts']/div[1]/div[2]/span[2]/a 2author: //*[@id='posts']/div[1]/div[2]/span[2]/a
3date: //*[@class='date'] 3date: //*[@class='date']
4body: //div[@class='body-lead'] 4body: //div[@class='body-lead']
5 5
6# take out the bit saying 'body' 6# take out the bit saying 'body'
7strip: //div[@class='body-lead']/div[@class='info-label'] 7strip: //div[@class='body-lead']/div[@class='info-label']
8 8
9 9
10 10
11 11
12 12
13 13
14 14
15 15
16 16
17test_url: http://www.subtraction.com/2011/02/01/unnecessary-explanations \ No newline at end of file 17test_url: http://www.subtraction.com/2011/02/01/unnecessary-explanations \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sueddeutsche.de.txt b/inc/3rdparty/site_config/standard/sueddeutsche.de.txt
index 4aa9410c..74b8d451 100644..100755
--- a/inc/3rdparty/site_config/standard/sueddeutsche.de.txt
+++ b/inc/3rdparty/site_config/standard/sueddeutsche.de.txt
@@ -1,18 +1,18 @@
1# 2012-12-04: complete rewrite after Sddeutsche.de relaunch - carlo@... 1# 2012-12-04: complete rewrite after Süddeutsche.de relaunch - carlo@...
2 2
3single_page_link: //a[ contains( @href, "/2.220/" ) ] 3single_page_link: //a[ contains( @href, "/2.220/" ) ]
4 4
5body: //article[@id="sitecontent"]/section[@class="body"] 5body: //article[@id="sitecontent"]/section[@class="body"]
6author: //address[@class="author"] 6author: //address[@class="author"]
7date: //div[@class="header"]//h1//span[@class="updated"] 7date: //div[@class="header"]//h1//span[@class="updated"]
8wrap_in(small): //div[@class="footer"] 8wrap_in(small): //div[@class="footer"]
9wrap_in(i): //figcaption/h3 9wrap_in(i): //figcaption/h3
10dissolve: //figcaption//h3 10dissolve: //figcaption//h3
11dissolve: //figure/div[@class="body"] 11dissolve: //figure/div[@class="body"]
12dissolve: //figure/a 12dissolve: //figure/a
13 13
14strip: //figure[ not( contains(@class, "zoomimage" ) ) ] 14strip: //figure[ not( contains(@class, "zoomimage" ) ) ]
15strip: //div[@data-onlineonly="true"] 15strip: //div[@data-onlineonly="true"]
16strip: //address[@class="author"] 16strip: //address[@class="author"]
17 17
18test_url: http://www.sueddeutsche.de/muenchen/mietshaus-am-gaertnerplatz-alles-muss-raus-1.1556693 \ No newline at end of file 18test_url: http://www.sueddeutsche.de/muenchen/mietshaus-am-gaertnerplatz-alles-muss-raus-1.1556693 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/summify.com.txt b/inc/3rdparty/site_config/standard/summify.com.txt
index 1128e1bb..1128e1bb 100644..100755
--- a/inc/3rdparty/site_config/standard/summify.com.txt
+++ b/inc/3rdparty/site_config/standard/summify.com.txt
diff --git a/inc/3rdparty/site_config/standard/suntimes.com.txt b/inc/3rdparty/site_config/standard/suntimes.com.txt
index 13390e4f..6d4594cf 100644..100755
--- a/inc/3rdparty/site_config/standard/suntimes.com.txt
+++ b/inc/3rdparty/site_config/standard/suntimes.com.txt
@@ -1,14 +1,14 @@
1title: //div[@class='story-details']/h1 1title: //div[@class='story-details']/h1
2date: //span[@class='date-time'] 2date: //span[@class='date-time']
3Author: substring-after(//p[@class='by-line'], 'By ') 3Author: substring-after(//p[@class='by-line'], 'By ')
4 4
5strip: //div[@class='videoThumbnails'] 5strip: //div[@class='videoThumbnails']
6strip: //div[@class='ad-square2-container'] 6strip: //div[@class='ad-square2-container']
7strip: //div[@class='homeDeliveryContainer5'] 7strip: //div[@class='homeDeliveryContainer5']
8 8
9strip: //div[@class='image-description'] 9strip: //div[@class='image-description']
10strip: //div[@id='internal-side-bar'] 10strip: //div[@id='internal-side-bar']
11 11
12strip: //span[@class='hide'] 12strip: //span[@class='hide']
13strip: //div[@class='date'] 13strip: //div[@class='date']
14test_url: http://www.suntimes.com/technology/ihnatko/8816567-452/review-kindle-fire-is-no-ipad-killer-but-it-is-a-killer-device.html \ No newline at end of file 14test_url: http://www.suntimes.com/technology/ihnatko/8816567-452/review-kindle-fire-is-no-ipad-killer-but-it-is-a-killer-device.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/svd.se.txt b/inc/3rdparty/site_config/standard/svd.se.txt
index 02b5b8ca..bc0a1ca0 100644..100755
--- a/inc/3rdparty/site_config/standard/svd.se.txt
+++ b/inc/3rdparty/site_config/standard/svd.se.txt
@@ -1,4 +1,14 @@
1# Ads 1body: //div[@id='article-content']
2strip_id_or_class: articlead 2author: //div[@id='article']//div[@class='byline']/p
3 3
4test_url: http://www.svd.se/nyheter/inrikes/manga-huggormsbitna-golfare_5004031.svd \ No newline at end of file 4# Ads
5strip_id_or_class: articlead
6
7# Sharing
8strip_id_or_class: share
9
10prune: no
11
12test_url: http://www.svd.se/nyheter/inrikes/oppositionen-stoppar-skattesankning_8531228.svd
13test_url: http://www.svd.se/nyheter/inrikes/manga-huggormsbitna-golfare_5004031.svd
14test_url: http://www.svd.se/?service=rss&type=senastenytt \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/svt.se.txt b/inc/3rdparty/site_config/standard/svt.se.txt
new file mode 100755
index 00000000..ba35f7d1
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/svt.se.txt
@@ -0,0 +1,16 @@
1title: //article[@role='main']//h1
2body: //article[@role='main']
3strip: //aside
4replace_string(<noscript>): <div>
5replace_string(</noscript>): </div>
6strip_id_or_class: svtHide-No-Js
7strip_id_or_class: aside
8strip_id_or_class: Aside
9strip_id_or_class: hidden
10strip_id_or_class: Share
11tidy: no
12prune: no
13
14test_url: http://www.svt.se/ug/framtidsdrommar-om-jobb-blev-lackande-gifthal
15test_url: http://www.svt.se/nyheter/het-debatt-mellan-borg-och-andersson
16test_url: http://www.svt.se/nyheter/regionalt/svtsormland/sj-tag-evakuerades-efter-rokdrama \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sydsvenskan.se.txt b/inc/3rdparty/site_config/standard/sydsvenskan.se.txt
index da6772aa..24ba1426 100644..100755
--- a/inc/3rdparty/site_config/standard/sydsvenskan.se.txt
+++ b/inc/3rdparty/site_config/standard/sydsvenskan.se.txt
@@ -1,11 +1,18 @@
1title: //h1 1title: //h1
2 2
3author: //a[contains(@href, '/sok/?')]/text() 3author: //a[contains(@href, '/sok/?')]/text()
4 4
5date: substring-after(//span[@class='date'], 'Publicerad ') 5date: //meta[@name='bi3dPubDate']/@content
6 6
7body: //div[@class='two_column_left'] 7body: (//div[contains(@class, 'slider_wrapper')])[1] | //div[@id='article_image' or @class='two_column_left']
8strip_id_or_class: story 8strip_id_or_class: story
9strip: //div[@class='leadText saplo:lead']/h5 9strip_id_or_class: article_body_ad
10 10strip: //div[@class='leadText saplo:lead']/h5
11test_url: http://www.sydsvenskan.se/kultur-och-nojen/-jag-vill-garna--stanna-- \ No newline at end of file 11
12replace_string(<br />): <br /><br />
13
14prune: no
15
16test_url: http://www.sydsvenskan.se/malmo/allt-jag-ager-ligger-pa-botten/
17test_url: http://www.sydsvenskan.se/kultur-och-nojen/-jag-vill-garna--stanna--
18test_url: http://www.sydsvenskan.se/rss.xml \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/symmetrymagazine.org.txt b/inc/3rdparty/site_config/standard/symmetrymagazine.org.txt
index 3109c0e7..5bcfb9ef 100644..100755
--- a/inc/3rdparty/site_config/standard/symmetrymagazine.org.txt
+++ b/inc/3rdparty/site_config/standard/symmetrymagazine.org.txt
@@ -1,12 +1,12 @@
1title: //div[contains(@class, "post")]/h2 1title: //div[contains(@class, "post")]/h2
2 2
3author: //div[contains(@class, "post")]/p[position()=last()]/text()[1] 3author: //div[contains(@class, "post")]/p[position()=last()]/text()[1]
4 4
5date: //div[contains(@class, "post")]/p[1] 5date: //div[contains(@class, "post")]/p[1]
6 6
7body: //div[contains(@class, "post")] 7body: //div[contains(@class, "post")]
8 8
9strip: //div[contains(@class, "post")]/h2[1] 9strip: //div[contains(@class, "post")]/h2[1]
10strip: //div[contains(@class, "post")]/p[1] 10strip: //div[contains(@class, "post")]/p[1]
11strip: //div[contains(@class, "post")]/p[position()=last()] 11strip: //div[contains(@class, "post")]/p[position()=last()]
12test_url: http://www.symmetrymagazine.org/breaking/?p=12784 \ No newline at end of file 12test_url: http://www.symmetrymagazine.org/breaking/?p=12784 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sz-magazin.sueddeutsche.de.txt b/inc/3rdparty/site_config/standard/sz-magazin.sueddeutsche.de.txt
index c3e34977..e058032c 100644..100755
--- a/inc/3rdparty/site_config/standard/sz-magazin.sueddeutsche.de.txt
+++ b/inc/3rdparty/site_config/standard/sz-magazin.sueddeutsche.de.txt
@@ -1,15 +1,15 @@
1title: //h1 1title: //h1
2body://div[@class='drucken'] 2body://div[@class='drucken']
3author: substring-after(//span[@class='autor'], 'Von ') 3author: substring-after(//span[@class='autor'], 'Von ')
4author: //span[@class='autor'] 4author: //span[@class='autor']
5 5
6single_page_link://a[contains(@href, '/drucken/')] 6single_page_link://a[contains(@href, '/drucken/')]
7convert_double_br_tags:yes 7convert_double_br_tags:yes
8 8
9dissolve://div[@class='vorspann'] 9dissolve://div[@class='vorspann']
10 10
11strip://h1 11strip://h1
12strip_id_or_class: klassifizierung 12strip_id_or_class: klassifizierung
13strip_id_or_class: source 13strip_id_or_class: source
14strip_id_or_class: autor 14strip_id_or_class: autor
15test_url: http://sz-magazin.sueddeutsche.de/texte/anzeigen/37567 \ No newline at end of file 15test_url: http://sz-magazin.sueddeutsche.de/texte/anzeigen/37567 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sz.de.txt b/inc/3rdparty/site_config/standard/sz.de.txt
new file mode 100755
index 00000000..f67637d2
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/sz.de.txt
@@ -0,0 +1,18 @@
1# 2012-12-04: complete rewrite after Süddeutsche.de relaunch - carlo@...
2
3single_page_link: //a[ contains( @href, "/2.220/" ) ]
4
5body: //article[@id="sitecontent"]/section[@class="body"]
6author: //address[@class="author"]
7date: //div[@class="header"]//h1//span[@class="updated"]
8wrap_in(small): //div[@class="footer"]
9wrap_in(i): //figcaption/h3
10dissolve: //figcaption//h3
11dissolve: //figure/div[@class="body"]
12dissolve: //figure/a
13
14strip: //figure[ not( contains(@class, "zoomimage" ) ) ]
15strip: //div[@data-onlineonly="true"]
16strip: //address[@class="author"]
17
18test_url: http://sz.de/1.1556693 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tagesschau.de.txt b/inc/3rdparty/site_config/standard/tagesschau.de.txt
index 8ce8a90e..be76cd05 100644..100755
--- a/inc/3rdparty/site_config/standard/tagesschau.de.txt
+++ b/inc/3rdparty/site_config/standard/tagesschau.de.txt
@@ -1,23 +1,23 @@
1title://h1[1] 1title://h1[1]
2 2
3author: substring-after(//em, 'Von ') 3author: substring-after(//em, 'Von ')
4author:string('tagesschau.de') 4author:string('tagesschau.de')
5 5
6date:substring-after(//div[@class='standDatum'], 'Stand: ') 6date:substring-after(//div[@class='standDatum'], 'Stand: ')
7 7
8body://div[contains(@class, 'article')] | //div[contains(@class, 'centerCol')] 8body://div[contains(@class, 'article')] | //div[contains(@class, 'centerCol')]
9 9
10strip://h1[1] 10strip://h1[1]
11strip: //div[contains(@class, 'directLinks')] 11strip: //div[contains(@class, 'directLinks')]
12strip: //div[contains(@class, 'zitatBox')] 12strip: //div[contains(@class, 'zitatBox')]
13strip: //div[contains(@class, 'teaserBox metaBlock')] 13strip: //div[contains(@class, 'teaserBox metaBlock')]
14strip: //*[contains(@class, 'inv')] 14strip: //*[contains(@class, 'inv')]
15strip: //span[@class='imgSubline'] 15strip: //span[@class='imgSubline']
16strip: //*[contains(@class, 'topline')][1] 16strip: //*[contains(@class, 'topline')][1]
17strip: //div[@id='rightCol'][1] 17strip: //div[@id='rightCol'][1]
18strip: //div[@id="footer"][1] 18strip: //div[@id="footer"][1]
19strip: //div[@class="fPlayer"] 19strip: //div[@class="fPlayer"]
20strip: //div[@id='seitenanfang'] 20strip: //div[@id='seitenanfang']
21strip: //div[@class='standDatum'] 21strip: //div[@class='standDatum']
22strip: //em 22strip: //em
23test_url: http://www.tagesschau.de/ausland/wahlkampffrankreich102.html \ No newline at end of file 23test_url: http://www.tagesschau.de/ausland/wahlkampffrankreich102.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tampabay.com.txt b/inc/3rdparty/site_config/standard/tampabay.com.txt
index bfe841c6..47a6ffab 100644..100755
--- a/inc/3rdparty/site_config/standard/tampabay.com.txt
+++ b/inc/3rdparty/site_config/standard/tampabay.com.txt
@@ -1,5 +1,5 @@
1title: //span[@class="entry-title"] 1title: //span[@class="entry-title"]
2author: //*[contains(@class, 'item')]/p/a/text() 2author: //*[contains(@class, 'item')]/p/a/text()
3date: substring-after(//*[contains(@class, 'item')]/p/text()[3], 'Posted:') 3date: substring-after(//*[contains(@class, 'item')]/p/text()[3], 'Posted:')
4body: //div[@class="entry-content"] 4body: //div[@class="entry-content"]
5test_url: http://www.tampabay.com/news/salvador-dali-leaders-want-st-petersburg-city-council-to-put-brakes-on/1236349 \ No newline at end of file 5test_url: http://www.tampabay.com/news/salvador-dali-leaders-want-st-petersburg-city-council-to-put-brakes-on/1236349 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/taptaptap.com.txt b/inc/3rdparty/site_config/standard/taptaptap.com.txt
index 13de70e9..e1e79428 100644..100755
--- a/inc/3rdparty/site_config/standard/taptaptap.com.txt
+++ b/inc/3rdparty/site_config/standard/taptaptap.com.txt
@@ -1,4 +1,4 @@
1title: //h3[@class="storytitle"] 1title: //h3[@class="storytitle"]
2body: //div[@class="post"] 2body: //div[@class="post"]
3strip: //div[@class="blurbBox"] 3strip: //div[@class="blurbBox"]
4test_url: http://taptaptap.com/blog/apples-precedents-vs-apples-guidelines/ \ No newline at end of file 4test_url: http://taptaptap.com/blog/apples-precedents-vs-apples-guidelines/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tasteofhome.com.txt b/inc/3rdparty/site_config/standard/tasteofhome.com.txt
index 77773363..f3234f34 100644..100755
--- a/inc/3rdparty/site_config/standard/tasteofhome.com.txt
+++ b/inc/3rdparty/site_config/standard/tasteofhome.com.txt
@@ -1,15 +1,11 @@
1title: //span[@id='ctl00_ctl00_MainContent_MainContent_RecipeImage1_lblRecipeTitle'] 1title: //div[@id='ctl00_MainContent_ctl00_Div1']//h2
2body: //div[@id='RDNEW']//*[@class='Recipe-imgCon' or @class='Recipe-Intro' or @class='recipeDetails'] 2body: //div[@id='ctl00_MainContent_ctl00_Div1']
3strip_id_or_class: rec-ExRightPanel 3
4strip_id_or_class: divCarousel 4single_page_link: //div[contains(@class, 'recipeHeader')]//a[contains(@href, '/print')]
5strip_id_or_class: preptimeOuter 5
6strip_id_or_class: cooktimeOuter 6strip_image_src: tohPrintL.png
7strip_id_or_class: durationOuter 7
8strip_id_or_class: divImageFooter 8prune: no
9strip_id_or_class: microFormatFnIngred 9
10strip: //span[@class='Recipe-Intro']//*[@class='link' or @class='rating'] 10test_url: http://www.tasteofhome.com/recipes/Grinch-Punch
11 11test_url: http://www.tasteofhome.com/recipes/lactose-free-chocolate-chip-cookies \ No newline at end of file
12prune: no
13tidy: no
14
15test_url: http://www.tasteofhome.com/recipes/Grinch-Punch \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/taz.de.txt b/inc/3rdparty/site_config/standard/taz.de.txt
index 6e84527b..cf853662 100644..100755
--- a/inc/3rdparty/site_config/standard/taz.de.txt
+++ b/inc/3rdparty/site_config/standard/taz.de.txt
@@ -1,8 +1,8 @@
1date: //div[@class='secthead'] 1date: //div[@class='secthead']
2body: //div[@class='sectbody'] 2body: //div[@class='sectbody']
3title: concat(//div[@class='sectbody']/h4,': ',//div[@class='sectbody']/h1) 3title: concat(//div[@class='sectbody']/h4,': ',//div[@class='sectbody']/h1)
4author: //span[@class='author'] 4author: //span[@class='author']
5strip: //p[@class='caption'] 5strip: //p[@class='caption']
6strip_id_or_class: rack 6strip_id_or_class: rack
7 7
8test_url: http://www.taz.de/Protestbewegung-Occupy/!80188/ \ No newline at end of file 8test_url: http://www.taz.de/Protestbewegung-Occupy/!80188/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tbray.org.txt b/inc/3rdparty/site_config/standard/tbray.org.txt
index fbe94fa4..558dc9c8 100644..100755
--- a/inc/3rdparty/site_config/standard/tbray.org.txt
+++ b/inc/3rdparty/site_config/standard/tbray.org.txt
@@ -1,5 +1,5 @@
1body: //div[@id='centercontent'] 1body: //div[@id='centercontent']
2strip: //div[@id='rightcontent'] 2strip: //div[@id='rightcontent']
3date: substring-before( //div[@id='cats'], '') 3date: substring-before( //div[@id='cats'], '')
4title: //h1 4title: //h1
5test_url: http://www.tbray.org/ongoing/When/201x/2012/03/04/Mobile-Money \ No newline at end of file 5test_url: http://www.tbray.org/ongoing/When/201x/2012/03/04/Mobile-Money \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tcmanila.tk.txt b/inc/3rdparty/site_config/standard/tcmanila.tk.txt
new file mode 100755
index 00000000..f6032ec3
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/tcmanila.tk.txt
@@ -0,0 +1,7 @@
1title: //h2
2body: //div[@class="post_content"]
3author: //span[@class="fn"]
4date: //time[@class="updated"]
5strip_comments: //yes
6footnotes: //yes
7test_url: http://tcmanila.tk/post/29189064358/my-2012-roadmap-is-almost-complete-look-at-the \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tcng.org.txt b/inc/3rdparty/site_config/standard/tcng.org.txt
index 765224e4..4873b50d 100644..100755
--- a/inc/3rdparty/site_config/standard/tcng.org.txt
+++ b/inc/3rdparty/site_config/standard/tcng.org.txt
@@ -1,4 +1,4 @@
1title: //div[@id='main-content']/h1 1title: //div[@id='main-content']/h1
2body: //div[@id='main-content'] 2body: //div[@id='main-content']
3strip: //div[@id='main-content']/h1 3strip: //div[@id='main-content']/h1
4test_url: http://www.tcng.org/index.php/blog/view/teaching-basic-health-cutting-down-costs \ No newline at end of file 4test_url: http://www.tcng.org/index.php/blog/view/teaching-basic-health-cutting-down-costs \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tech.fortune.cnn.com.txt b/inc/3rdparty/site_config/standard/tech.fortune.cnn.com.txt
index b6d17da4..da198622 100644..100755
--- a/inc/3rdparty/site_config/standard/tech.fortune.cnn.com.txt
+++ b/inc/3rdparty/site_config/standard/tech.fortune.cnn.com.txt
@@ -1,4 +1,4 @@
1title: //h1[@class='storyheadline'] 1title: //h1[@class='storyheadline']
2body: //div[@class='storytext'] 2body: //div[@class='storytext']
3strip: //strong 3strip: //strong
4test_url: http://tech.fortune.cnn.com/2011/03/17/why-startups-dont-go-public-anymore/?section=money_topstories&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fmoney_topstories+%28Top+Stories%29 \ No newline at end of file 4test_url: http://tech.fortune.cnn.com/2011/03/17/why-startups-dont-go-public-anymore/?section=money_topstories&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fmoney_topstories+%28Top+Stories%29 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tech.gilt.com.txt b/inc/3rdparty/site_config/standard/tech.gilt.com.txt
new file mode 100755
index 00000000..ab564606
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/tech.gilt.com.txt
@@ -0,0 +1,5 @@
1title: //div[@class="title"]/h1
2title: //div[@class="caption"]/h1
3author: substring-after(//div[@class="metadata"]/div[@class="date"]/a[2], 'by ')
4date: //div[@class="metadata"]/div[@class="date"]/a
5test_url: http://tech.gilt.com/post/46359463184/26-3-13-todays-noon-outage-and-what-were-doing-to \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tech.sina.com.cn.txt b/inc/3rdparty/site_config/standard/tech.sina.com.cn.txt
index f7228ebf..75126f9c 100644..100755
--- a/inc/3rdparty/site_config/standard/tech.sina.com.cn.txt
+++ b/inc/3rdparty/site_config/standard/tech.sina.com.cn.txt
@@ -1,11 +1,11 @@
1title://h1[contains(@id,'artibodyTitle')] 1title://h1[contains(@id,'artibodyTitle')]
2 2
3date://span[contains(@id,'pub_date')] 3date://span[contains(@id,'pub_date')]
4 4
5body://div[contains(@id,'artibody')] 5body://div[contains(@id,'artibody')]
6 6
7strip://div[contains(@class,'otherContent')] 7strip://div[contains(@class,'otherContent')]
8 8
9next_page_link://p[@class='page']/a[contains(.,'下一页')] 9next_page_link://p[@class='page']/a[contains(.,'下一页')]
10 10
11test_url: http://tech.sina.com.cn/mobile/n/2012-03-22/07476863046.shtml \ No newline at end of file 11test_url: http://tech.sina.com.cn/mobile/n/2012-03-22/07476863046.shtml \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/techcrunch.com.txt b/inc/3rdparty/site_config/standard/techcrunch.com.txt
index f436acb5..1509c46e 100644..100755
--- a/inc/3rdparty/site_config/standard/techcrunch.com.txt
+++ b/inc/3rdparty/site_config/standard/techcrunch.com.txt
@@ -1,18 +1,18 @@
1body: //div[contains(@class, 'media-container') or contains(@class, 'body-copy')] 1body: //div[contains(@class, 'media-container') or contains(@class, 'body-copy')]
2 2
3author: //a[@class="name"] 3author: //a[@class="name"]
4 4
5date: //div[@class="post-time"] 5date: //div[@class="post-time"]
6 6
7title: //h1[@class="headline"] 7title: //h1[@class="headline"]
8strip_id_or_class: module-crunchbase 8strip_id_or_class: module-crunchbase
9 9
10# The following is for the mobile site 10# The following is for the mobile site
11body: //div[@id="singlentry"] 11body: //div[@id="singlentry"]
12author: substring-after(//span[@class="single-post-meta-top"],'rsaquo; ') 12author: substring-after(//span[@class="single-post-meta-top"],'rsaquo; ')
13date: substring-before(//div[@class="single-post-meta-top"],' @') 13date: substring-before(//div[@class="single-post-meta-top"],' @')
14title: //a[@class="sh2"] 14title: //a[@class="sh2"]
15 15
16prune: no 16prune: no
17 17
18test_url: http://techcrunch.com/2011/10/18/apples-insanely-great-q1-2012/ \ No newline at end of file 18test_url: http://techcrunch.com/2011/10/18/apples-insanely-great-q1-2012/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/techdirt.com.txt b/inc/3rdparty/site_config/standard/techdirt.com.txt
index 727f3701..7db2f95b 100644..100755
--- a/inc/3rdparty/site_config/standard/techdirt.com.txt
+++ b/inc/3rdparty/site_config/standard/techdirt.com.txt
@@ -1,12 +1,12 @@
1body: //div[@class='story'] 1body: //div[@class='story']
2title: //div[@class='story']/h1 2title: //div[@class='story']/h1
3strip: //div[@class='story']/h1 3strip: //div[@class='story']/h1
4 4
5author: //div[@class='details']/p[contains(., 'by ')]/a 5author: //div[@class='details']/p[contains(., 'by ')]/a
6date: //p[@class='storydate'] 6date: //p[@class='storydate']
7 7
8strip: //p[a[contains(., 'Leave a Comment')]] 8strip: //p[a[contains(., 'Leave a Comment')]]
9strip_id_or_class: share 9strip_id_or_class: share
10strip_id_or_class: maincolumn_head 10strip_id_or_class: maincolumn_head
11strip_id_or_class: maincolmod 11strip_id_or_class: maincolmod
12test_url: http://www.techdirt.com/articles/20120112/17455117394/sega-gets-it-right-about-sopa-its-time-hard-reset-copyright-law-congress.shtml \ No newline at end of file 12test_url: http://www.techdirt.com/articles/20120112/17455117394/sega-gets-it-right-about-sopa-its-time-hard-reset-copyright-law-congress.shtml \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/techhive.com.txt b/inc/3rdparty/site_config/standard/techhive.com.txt
new file mode 100755
index 00000000..29720b0b
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/techhive.com.txt
@@ -0,0 +1,18 @@
1title: //div[@class='articleHead']//h1
2author: //div[@class="author-name"]/a[1]
3body: //div[@class="main"]
4
5# remove 'From the Lab' and 'Recent posts' text
6strip: //div[@class='blogLabel']
7
8# remove byline and meta info
9strip: //div[@class="article-meta"]
10strip: //div[@class="author-info"]
11
12#strip tags and categories
13strip: //div[@class="department"]
14
15#strip product cap links
16strip: //div[@class="cap-main"]
17strip: //div[@id="compare-lede"]
18test_url: http://www.techhive.com/article/2010549/up-close-with-blackberry-10.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/techmeme.com.txt b/inc/3rdparty/site_config/standard/techmeme.com.txt
index 8644e00f..0b4bfbd6 100644..100755
--- a/inc/3rdparty/site_config/standard/techmeme.com.txt
+++ b/inc/3rdparty/site_config/standard/techmeme.com.txt
@@ -1,3 +1,3 @@
1single_page_link_in_feed: //b/a 1single_page_link_in_feed: //b/a
2 2
3test_url_feed: http://www.techmeme.com/feed.xml \ No newline at end of file 3test_url_feed: http://www.techmeme.com/feed.xml \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/technicallyjordan.tumblr.com.txt b/inc/3rdparty/site_config/standard/technicallyjordan.tumblr.com.txt
index cc26ee4c..d871b603 100644..100755
--- a/inc/3rdparty/site_config/standard/technicallyjordan.tumblr.com.txt
+++ b/inc/3rdparty/site_config/standard/technicallyjordan.tumblr.com.txt
@@ -1,8 +1,8 @@
1title: //h2 1title: //h2
2author: //meta[@name="author"]/@content 2author: //meta[@name="author"]/@content
3date: //h3 3date: //h3
4body: //div[@class="postBody"] 4body: //div[@class="postBody"]
5strip: //h1 5strip: //h1
6strip: //h2 6strip: //h2
7strip: //h3 7strip: //h3
8test_url: http://technicallyjordan.tumblr.com/post/22914659822/facebook-to-launch-app-store-knock-off \ No newline at end of file 8test_url: http://technicallyjordan.tumblr.com/post/22914659822/facebook-to-launch-app-store-knock-off \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/technologizer.com.txt b/inc/3rdparty/site_config/standard/technologizer.com.txt
new file mode 100755
index 00000000..179bf5a6
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/technologizer.com.txt
@@ -0,0 +1,5 @@
1next_page_link: //a[contains(., 'NEXT PAGE')]
2# following::node() selects text nodes too whereas following::* selects only elements.
3strip: //span[@class='pageo']/following::node()
4strip: //span[@class='pageo']
5test_url: http://technologizer.com/2010/03/08/the-secret-origin-of-windows/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/technologyreview.com.txt b/inc/3rdparty/site_config/standard/technologyreview.com.txt
index 41f21d46..d405eb18 100644..100755
--- a/inc/3rdparty/site_config/standard/technologyreview.com.txt
+++ b/inc/3rdparty/site_config/standard/technologyreview.com.txt
@@ -1,16 +1,16 @@
1title: //header[@class='article-meta']/h1 1title: //header[@class='article-meta']/h1
2title: substring-before(//title, '|') 2title: substring-before(//title, '|')
3 3
4body: //section[contains(@class, 'body')] 4body: //section[contains(@class, 'body')]
5 5
6# Author & Date for News and Featured Stories 6# Author & Date for News and Featured Stories
7author: //ul[@class='byline']/li/a 7author: //ul[@class='byline']/li/a
8author: substring-before(substring-after(//ul[@class='byline']/li, 'By '), ' on') 8author: substring-before(substring-after(//ul[@class='byline']/li, 'By '), ' on')
9date: substring-after(//ul[@class='byline']/li, 'on ') 9date: substring-after(//ul[@class='byline']/li, 'on ')
10 10
11# Author & Date for "Views" 11# Author & Date for "Views"
12author: //div[@class='view-byline']/div[@class='meta']/h2[1] 12author: //div[@class='view-byline']/div[@class='meta']/h2[1]
13date: //div[@class='view-byline']/div[@class='meta']/h2[2] 13date: //div[@class='view-byline']/div[@class='meta']/h2[2]
14 14
15next_page_link: //section[@class='pagination']/a[contains(@class, 'continue')] 15next_page_link: //section[@class='pagination']/a[contains(@class, 'continue')]
16test_url: http://www.technologyreview.com/news/427567/facebooks-telescope-on-human-behavior/ \ No newline at end of file 16test_url: http://www.technologyreview.com/news/427567/facebooks-telescope-on-human-behavior/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/techpinions.com.txt b/inc/3rdparty/site_config/standard/techpinions.com.txt
index 89ed8349..8e1aa96c 100644..100755
--- a/inc/3rdparty/site_config/standard/techpinions.com.txt
+++ b/inc/3rdparty/site_config/standard/techpinions.com.txt
@@ -1,7 +1,7 @@
1body: //div[@class="post"] 1body: //div[@class="post"]
2 2
3strip: //div[@class="post-meta"] 3strip: //div[@class="post-meta"]
4strip: //div[@id="socialicons"] 4strip: //div[@id="socialicons"]
5strip: //div[@id="authorbox"] 5strip: //div[@id="authorbox"]
6 6
7test_url: http://techpinions.com/why-google-and-microsoft-hate-siri/3572 \ No newline at end of file 7test_url: http://techpinions.com/why-google-and-microsoft-hate-siri/3572 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/techradar.com.txt b/inc/3rdparty/site_config/standard/techradar.com.txt
index ed92a974..0a0ca619 100644..100755
--- a/inc/3rdparty/site_config/standard/techradar.com.txt
+++ b/inc/3rdparty/site_config/standard/techradar.com.txt
@@ -1,12 +1,12 @@
1# Title without news/reviews etc. appended 1# Title without news/reviews etc. appended
2title: //div[@id='subColumn1Pad']/div[1][@class='article']/div[1][@class='articleHead']/h1 2title: //div[@id='subColumn1Pad']/div[1][@class='article']/div[1][@class='articleHead']/h1
3 3
4# Remove home link 4# Remove home link
5strip: //div[@id='page_logo']/a 5strip: //div[@id='page_logo']/a
6 6
7# Remove utilities 7# Remove utilities
8strip: //*[(@id = "utilities")] 8strip: //*[(@id = "utilities")]
9 9
10# Remove comments link 10# Remove comments link
11strip: //div[@id='subColumn1Pad']/div[1][@class='article']/div[1][@class='articleHead']/p[@class='tiny'] 11strip: //div[@id='subColumn1Pad']/div[1][@class='article']/div[1][@class='articleHead']/p[@class='tiny']
12test_url: http://www.techradar.com/news/television/sky-to-rebrand-living-as-sky-living-903105 \ No newline at end of file 12test_url: http://www.techradar.com/news/television/sky-to-rebrand-living-as-sky-living-903105 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/telegraaf.nl.txt b/inc/3rdparty/site_config/standard/telegraaf.nl.txt
index ff3cd06e..91b5baf9 100644..100755
--- a/inc/3rdparty/site_config/standard/telegraaf.nl.txt
+++ b/inc/3rdparty/site_config/standard/telegraaf.nl.txt
@@ -1,9 +1,9 @@
1body: //div[@id='artikelKolom'] 1body: //div[@id='artikelKolom']
2strip: //div[@class='broodMediaBox']/div[@class='docbox' or @class='artBannerWrapper'] 2strip: //div[@class='broodMediaBox']/div[@class='docbox' or @class='artBannerWrapper']
3strip: //div[@id='artikeltoolbar'] 3strip: //div[@id='artikeltoolbar']
4strip: //div[@class='reactiebalk artspacer' or @class='bannercenter clearfix artspacer'] 4strip: //div[@class='reactiebalk artspacer' or @class='bannercenter clearfix artspacer']
5strip: //div[@id='artikelKolomRechts' or @id='TMGTweetWidget'] 5strip: //div[@id='artikelKolomRechts' or @id='TMGTweetWidget']
6tidy: no 6tidy: no
7prune: no 7prune: no
8 8
9test_url: http://www.telegraaf.nl/binnenland/10275097/__Identiteit_man_in_sloot_onbekend__.html?cid=rss \ No newline at end of file 9test_url: http://www.telegraaf.nl/binnenland/10275097/__Identiteit_man_in_sloot_onbekend__.html?cid=rss \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/telegraph.co.uk.txt b/inc/3rdparty/site_config/standard/telegraph.co.uk.txt
index e1faf23b..8dcdb42b 100644..100755
--- a/inc/3rdparty/site_config/standard/telegraph.co.uk.txt
+++ b/inc/3rdparty/site_config/standard/telegraph.co.uk.txt
@@ -1,10 +1,10 @@
1body: //div[@class='byline' or @id='storyEmbSlide' or @id='mainBodyArea'] 1body: //div[@class='byline' or @id='storyEmbSlide' or @id='mainBodyArea']
2strip: //p[@class='comments'] 2strip: //p[@class='comments']
3strip: //div[@id='storyEmbSlide']//div[contains(@class, "hide")] 3strip: //div[@id='storyEmbSlide']//div[contains(@class, "hide")]
4strip: //div[@id='tmg-related-links' or @id='outbrain-related-links' or @id='onespot-related-links'] 4strip: //div[@id='tmg-related-links' or @id='outbrain-related-links' or @id='onespot-related-links']
5strip: //p[@class='bbpTweet']/span[@class='timestamp'] 5strip: //p[@class='bbpTweet']/span[@class='timestamp']
6strip: //p[@class='bbpTweet']/span[@class='metadata']//img 6strip: //p[@class='bbpTweet']/span[@class='metadata']//img
7tidy: no 7tidy: no
8prune: no 8prune: no
9 9
10test_url: http://www.telegraph.co.uk/news/worldnews/europe/ireland/8663451/Is-Ireland-divorcing-from-the-Catholic-Church.html \ No newline at end of file 10test_url: http://www.telegraph.co.uk/news/worldnews/europe/ireland/8663451/Is-Ireland-divorcing-from-the-Catholic-Church.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thanhnien.com.vn.txt b/inc/3rdparty/site_config/standard/thanhnien.com.vn.txt
new file mode 100755
index 00000000..596ecc90
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/thanhnien.com.vn.txt
@@ -0,0 +1,4 @@
1body://div[@id="print-news"]
2strip://a
3strip://span[@class="date-line"]
4test_url: http://www.thanhnien.com.vn/pages/20121006/hon-90-trieu-usd-nang-cap-do-thi-can-tho.aspx \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/the-magazine.org.txt b/inc/3rdparty/site_config/standard/the-magazine.org.txt
new file mode 100755
index 00000000..08864657
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/the-magazine.org.txt
@@ -0,0 +1,3 @@
1tidy: no
2
3test_url: http://the-magazine.org/1/alone-together-again \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/theage.com.au.txt b/inc/3rdparty/site_config/standard/theage.com.au.txt
new file mode 100755
index 00000000..ea27c314
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/theage.com.au.txt
@@ -0,0 +1,5 @@
1author: //h3[@class='authorName']
2date: //time
3body: //div[@class='articleBody']
4strip_id_or_class: adspot
5test_url: http://www.theage.com.au/victoria/top-cops-warns-outlaw-bikies-we-have-a-gang-too-20130331-2h1l8.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/theamericanscholar.org.txt b/inc/3rdparty/site_config/standard/theamericanscholar.org.txt
new file mode 100755
index 00000000..38b96672
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/theamericanscholar.org.txt
@@ -0,0 +1,13 @@
1# Article Metadata
2title: //meta[@property="og:title"]/@content
3author: substring-after(//h3, 'By ')
4date: //h4/a[2]
5
6# Content Pruning
7strip: //h4
8strip: //a[@id="print_button"]
9strip: //p[@class="excerpt"]
10strip: //h3
11strip: //div[@class="caption"]
12strip: //center/a/img
13test_url: http://theamericanscholar.org/too-big-to-fail-and-too-risky-to-exist/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/theappleblog.com.txt b/inc/3rdparty/site_config/standard/theappleblog.com.txt
index 3bd555f1..caa5ae0c 100644..100755
--- a/inc/3rdparty/site_config/standard/theappleblog.com.txt
+++ b/inc/3rdparty/site_config/standard/theappleblog.com.txt
@@ -1,3 +1,3 @@
1# Remove home link 1# Remove home link
2strip: //div[@id='blog-title']/a 2strip: //div[@id='blog-title']/a
3test_url: http://theappleblog.com/2010/10/21/the-new-macbook-air-is-underwhelming/ \ No newline at end of file 3test_url: http://theappleblog.com/2010/10/21/the-new-macbook-air-is-underwhelming/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/theatlantic.com.txt b/inc/3rdparty/site_config/standard/theatlantic.com.txt
index 267fd39c..aa41b153 100644..100755
--- a/inc/3rdparty/site_config/standard/theatlantic.com.txt
+++ b/inc/3rdparty/site_config/standard/theatlantic.com.txt
@@ -1,18 +1,20 @@
1title: //div[@id='article']/h1 1title: //div[contains(@class, 'articleHead')]//h1
2title: //h1 2
3 3body: //div[@class='articleText']
4body: //div[@class='articleText'] 4body: //div[@class='articleContent']
5body: //div[@class='articleContent'] 5body: //div[@id='article']
6body: //div[@id='article'] 6date: //*[contains(@class, 'date')]
7date: //*[contains(@class, 'date')] 7author: //div[@id='profile']//*[@class='authors']//a[1]
8author: //div[@id='profile']//*[@class='authors']//a[1] 8author: //*[@class='author']/span
9author: //*[@class='author']/span 9prune: no
10prune: no 10
11 11strip: //div[@class='moreOnBoxWithImages']
12strip: //div[@class='moreOnBoxWithImages'] 12strip: //p[contains(., 'This article available online at:')]
13 13strip: //p[contains(., 'This article available online at:')]/following::*
14single_page_link: //a[@class='print'] 14strip: //div[@class='earthbox']
15 15
16test_url: http://www.theatlantic.com/technology/archive/2011/04/want-to-see-how-crazy-a-bot-run-market-can-be/237773/ 16single_page_link: //article//a[contains(@class, 'print')]
17test_url: http://www.theatlantic.com/magazine/archive/2007/11/the-autumn-of-the-multitaskers/6342/ 17
18test_url: http://www.theatlantic.com/technology/archive/2011/04/want-to-see-how-crazy-a-bot-run-market-can-be/237773/
19test_url: http://www.theatlantic.com/magazine/archive/2007/11/the-autumn-of-the-multitaskers/6342/
18test_url: http://www.theatlantic.com/entertainment/archive/2012/04/30-rock-live-a-funny-reminder-of-why-sitcoms-arent-shot-live-anymore/256447/ \ No newline at end of file 20test_url: http://www.theatlantic.com/entertainment/archive/2012/04/30-rock-live-a-funny-reminder-of-why-sitcoms-arent-shot-live-anymore/256447/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/theatlanticcities.com.txt b/inc/3rdparty/site_config/standard/theatlanticcities.com.txt
new file mode 100755
index 00000000..880f207d
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/theatlanticcities.com.txt
@@ -0,0 +1,17 @@
1# To administrator:
2# Please replace the hostname with "*.theatlanticcities.com"
3
4# This filter is tested on:
5# http://m.theatlanticcities.com/arts-and-lifestyle/2012/04/invisible-borders-define-american-culture/1839/
6# http://www.theatlanticcities.com/housing/2012/11/chinas-holdouts/3981/
7# http://www.theatlanticcities.com/arts-and-lifestyle/2012/12/christmas-time-here/4133/
8
9title://h1
10author: //ul[@class='meta']/li/a
11date: //ul[@class='meta']/li/following-sibling::li
12body://article[@class='post']
13
14strip://h1
15strip://ul[@class='meta']
16strip://div[@class='newsletter-slug']
17test_url: http://www.theatlanticcities.com/arts-and-lifestyle/2012/12/christmas-time-here/4133/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thebostonchannel.com.txt b/inc/3rdparty/site_config/standard/thebostonchannel.com.txt
index 64df90c1..b74442de 100644..100755
--- a/inc/3rdparty/site_config/standard/thebostonchannel.com.txt
+++ b/inc/3rdparty/site_config/standard/thebostonchannel.com.txt
@@ -1,7 +1,7 @@
1title: //meta[@name='og:title']/@content 1title: //meta[@name='og:title']/@content
2date: //meta[@name='created']/@content 2date: //meta[@name='created']/@content
3body: //div[@class="StoryBody" or @class="storyTeaser"] 3body: //div[@class="StoryBody" or @class="storyTeaser"]
4 4
5replace_string(<p></p>): <br /><br /> 5replace_string(<p></p>): <br /><br />
6 6
7test_url: http://www.thebostonchannel.com/slideshow/news/28210648/detail.html \ No newline at end of file 7test_url: http://www.thebostonchannel.com/slideshow/news/28210648/detail.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thebrowser.com.txt b/inc/3rdparty/site_config/standard/thebrowser.com.txt
index c3c20504..807e7dad 100644..100755
--- a/inc/3rdparty/site_config/standard/thebrowser.com.txt
+++ b/inc/3rdparty/site_config/standard/thebrowser.com.txt
@@ -1,10 +1,10 @@
1title: //h2[contains(@class, 'page-title')] 1title: //h2[contains(@class, 'page-title')]
2body: //div[@id='content']/div[contains(@id, 'node-')]/div[@class='content'] 2body: //div[@id='content']/div[contains(@id, 'node-')]/div[@class='content']
3 3
4prune: no 4prune: no
5 5
6strip: //div[contains(@class, 'node-book')]//a[@class='button'] 6strip: //div[contains(@class, 'node-book')]//a[@class='button']
7 7
8single_page_link: //a[@class='tool-print'] 8single_page_link: //a[@class='tool-print']
9 9
10test_url: http://thebrowser.com/interviews/yotam-ottolenghi-on-his-favourite-cookery-books \ No newline at end of file 10test_url: http://thebrowser.com/interviews/yotam-ottolenghi-on-his-favourite-cookery-books \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thecarton.net.txt b/inc/3rdparty/site_config/standard/thecarton.net.txt
index 9ef4ed8b..13fa35a0 100644..100755
--- a/inc/3rdparty/site_config/standard/thecarton.net.txt
+++ b/inc/3rdparty/site_config/standard/thecarton.net.txt
@@ -1,10 +1,10 @@
1title: substring-before(//title, ' &ndash; ') 1title: substring-before(//title, ' &ndash; ')
2author:string('Shawn') 2author:string('Shawn')
3date: //*/time/@pubdate 3date: //*/time/@pubdate
4 4
5 5
6strip: //header 6strip: //header
7strip: //div[@id='prev_next'] 7strip: //div[@id='prev_next']
8strip: //div[@id='masthead'] 8strip: //div[@id='masthead']
9 9
10test_url: http://thecarton.net/2012/12/20/imdb \ No newline at end of file 10test_url: http://thecarton.net/2012/12/20/imdb \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thedaily.com.txt b/inc/3rdparty/site_config/standard/thedaily.com.txt
index 24ebbbac..e255e6a8 100644..100755
--- a/inc/3rdparty/site_config/standard/thedaily.com.txt
+++ b/inc/3rdparty/site_config/standard/thedaily.com.txt
@@ -1,24 +1,24 @@
1#keep all body text 1#keep all body text
2prune: no 2prune: no
3 3
4#title, body, metadata 4#title, body, metadata
5title: //div[@class='story_header']/h1 5title: //div[@class='story_header']/h1
6body: //div[@id='content'] 6body: //div[@id='content']
7author: substring-after(//span[@class='byline'], "by ") 7author: substring-after(//span[@class='byline'], "by ")
8author: substring-after(//span[@class='byline'], "By ") 8author: substring-after(//span[@class='byline'], "By ")
9author: //span[@class='byline'] 9author: //span[@class='byline']
10date: //span[@class='date'] 10date: //span[@class='date']
11 11
12#formatting 12#formatting
13convert_double_br_tags: yes 13convert_double_br_tags: yes
14dissolve: //div[@class='slides_full']/ul/li 14dissolve: //div[@class='slides_full']/ul/li
15 15
16# cleanup 16# cleanup
17strip: //a[@id='story_note'] 17strip: //a[@id='story_note']
18strip: //br 18strip: //br
19strip: //div[@class='intro'] 19strip: //div[@class='intro']
20strip: //div[@class='share-block'] 20strip: //div[@class='share-block']
21strip: //div[@class='sidebar-social'] 21strip: //div[@class='sidebar-social']
22strip: //div[@class='top-stories'] 22strip: //div[@class='top-stories']
23strip: //div[@class='prevnext'] 23strip: //div[@class='prevnext']
24test_url: http://www.thedaily.com/page/2012/01/09/010912-news-college-costs-1-5/ \ No newline at end of file 24test_url: http://www.thedaily.com/page/2012/01/09/010912-news-college-costs-1-5/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thedailybeast.com.txt b/inc/3rdparty/site_config/standard/thedailybeast.com.txt
index 4781c65a..f5e938ae 100644..100755
--- a/inc/3rdparty/site_config/standard/thedailybeast.com.txt
+++ b/inc/3rdparty/site_config/standard/thedailybeast.com.txt
@@ -1,7 +1,7 @@
1title: //h1 1title: //h1
2body: //article/div[contains(@class, 'article-body')] 2body: //article/div[contains(@class, 'article-body')]
3#strip: //header/hgroup/h1 3#strip: //header/hgroup/h1
4strip: //footer[@class='storyFooter'] 4strip: //footer[@class='storyFooter']
5single_page_link: //li[@class='print']/a 5single_page_link: //li[@class='print']/a
6prune: no 6prune: no
7test_url: http://www.thedailybeast.com/articles/2010/04/06/how-mastercard-predicts-divorce.html \ No newline at end of file 7test_url: http://www.thedailybeast.com/articles/2010/04/06/how-mastercard-predicts-divorce.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thedailymash.co.uk.txt b/inc/3rdparty/site_config/standard/thedailymash.co.uk.txt
index 0f15558d..a83a6cf6 100644..100755
--- a/inc/3rdparty/site_config/standard/thedailymash.co.uk.txt
+++ b/inc/3rdparty/site_config/standard/thedailymash.co.uk.txt
@@ -1,14 +1,14 @@
1# Remove duplicated title 1# Remove duplicated title
2strip: //div[@id='content']/div[1][@class='full_intro']/h2 2strip: //div[@id='content']/div[1][@class='full_intro']/h2
3 3
4# Remove links, ads etc. 4# Remove links, ads etc.
5strip: //*[(@class= "aside")] 5strip: //*[(@class= "aside")]
6 6
7# Remove the date and add it to the date published field in Instapaper 7# Remove the date and add it to the date published field in Instapaper
8strip: //div[@class="date"] 8strip: //div[@class="date"]
9date: //div[@class="date"] 9date: //div[@class="date"]
10 10
11# There is no byline on The Daily Mash. 11# There is no byline on The Daily Mash.
12 12
13convert_double_br_tags: yes 13convert_double_br_tags: yes
14test_url: http://www.thedailymash.co.uk/index.php?option=com_content&task=view&id=4994&Itemid=81&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+thedailymash+%28The+Daily+Mash.+It%27s+news+to+us.%29 \ No newline at end of file 14test_url: http://www.thedailymash.co.uk/index.php?option=com_content&task=view&id=4994&Itemid=81&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+thedailymash+%28The+Daily+Mash.+It%27s+news+to+us.%29 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thedisneyblog.com.txt b/inc/3rdparty/site_config/standard/thedisneyblog.com.txt
new file mode 100755
index 00000000..57b3254a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/thedisneyblog.com.txt
@@ -0,0 +1,7 @@
1title: //h1[contains(@class, 'entry-title')]
2author: //span[contains(@class, 'author vcard')]
3date: //span[@class = 'entry-date']
4body: //div[@class='entry-content']
5strip_id_or_class: bottomcontainerBox
6strip_id_or_class: lightsocial_container
7test_url: http://thedisneyblog.com/2012/11/17/videopolis-one-woman-disney-musical-beauty-and-the-beast/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/theeuropean-magazine.com.txt b/inc/3rdparty/site_config/standard/theeuropean-magazine.com.txt
new file mode 100755
index 00000000..a19bae15
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/theeuropean-magazine.com.txt
@@ -0,0 +1,17 @@
1# Tested on:
2# http://theeuropean-magazine.com/352-dyson-george/353-evolution-and-innovation
3# http://theeuropean-magazine.com/522-casertano-stefano/919-morsi-and-the-future-of-egypt
4
5title://h2[@class='article-title']
6author:substring-before(substring-after(//p[@class='article-meta'], 'by'), '&mdash;')
7date:substring-after(//p[@class='article-meta'], '&mdash;')
8body://div[@class='article']
9
10wrap_in(strong)://p[@class='article-teaser']
11move_into(//div[@class='article-head'])://li/img
12
13strip://h2[@class='article-title']
14strip://p[@class='article-meta']
15strip://div[@class='copyright']
16strip://div[@class='opinions-of-readers']
17test_url: http://theeuropean-magazine.com/522-casertano-stefano/919-morsi-and-the-future-of-egypt \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thefilmexperience.net.txt b/inc/3rdparty/site_config/standard/thefilmexperience.net.txt
index e6b5115a..e6b5115a 100644..100755
--- a/inc/3rdparty/site_config/standard/thefilmexperience.net.txt
+++ b/inc/3rdparty/site_config/standard/thefilmexperience.net.txt
diff --git a/inc/3rdparty/site_config/standard/thegamedesignforum.com.txt b/inc/3rdparty/site_config/standard/thegamedesignforum.com.txt
new file mode 100755
index 00000000..849ede77
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/thegamedesignforum.com.txt
@@ -0,0 +1,14 @@
1## ERROR: Removes all images. Please fix, have no idea why (bad HTML?)
2
3title: //h1[@class='featuretitle']
4body: //div[@id='nobordercontentarea']
5
6# remove Twitter badge
7strip: //img[@alt='Follow tgdfweb on Twitter']
8
9# fix for headers not showing for some reason
10wrap_in(h2): //h2[@class='sectionheader']
11dissolve: //h2[@class='sectionheader']
12
13tidy: yes
14test_url: http://thegamedesignforum.com/features/acceleration_flow_1.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/theglobalmail.org.txt b/inc/3rdparty/site_config/standard/theglobalmail.org.txt
index fae0fb29..da1c84f9 100644..100755
--- a/inc/3rdparty/site_config/standard/theglobalmail.org.txt
+++ b/inc/3rdparty/site_config/standard/theglobalmail.org.txt
@@ -1,41 +1,41 @@
1title: //h1[@id="headline"] 1title: //h1[@id="headline"]
2author: //div[contains(@class, "editorial-byline-author")]/a 2author: //div[contains(@class, "editorial-byline-author")]/a
3date: substring-after(//div[contains(@class, "editorial-byline-meta")], " | ") 3date: substring-after(//div[contains(@class, "editorial-byline-meta")], " | ")
4 4
5# The article body contains a mix or article and non-article elements, so lot of manual tweaks are needed 5# The article body contains a mix or article and non-article elements, so lot of manual tweaks are needed
6body: //div[@id="template"] 6body: //div[@id="template"]
7strip_id_or_class: editorial-byline-pic 7strip_id_or_class: editorial-byline-pic
8strip_id_or_class: editorial-byline 8strip_id_or_class: editorial-byline
9strip_id_or_class: headline 9strip_id_or_class: headline
10 10
11# Include the leadin paragraph in the body text, but remove quotes because they're out of context 11# Include the leadin paragraph in the body text, but remove quotes because they're out of context
12dissolve: //div[contains(@id, "leadin")] 12dissolve: //div[contains(@id, "leadin")]
13strip_id_or_class: pullquote 13strip_id_or_class: pullquote
14 14
15# Image captions removed because they're confusing in body text 15# Image captions removed because they're confusing in body text
16strip_id_or_class: image-caption-content 16strip_id_or_class: image-caption-content
17 17
18# Remove header and footer 18# Remove header and footer
19strip_id_or_class: header 19strip_id_or_class: header
20strip_id_or_class: footer 20strip_id_or_class: footer
21 21
22# Remove the hidden logo that seems to be used to cause Facebook to show the logo instead of a random article image 22# Remove the hidden logo that seems to be used to cause Facebook to show the logo instead of a random article image
23strip: /html/body/span[contains(@style, "display: none")] 23strip: /html/body/span[contains(@style, "display: none")]
24 24
25# Remove search box 25# Remove search box
26strip_id_or_class: searchContainer 26strip_id_or_class: searchContainer
27strip: //div[contains(@class, "searchInstruction")] 27strip: //div[contains(@class, "searchInstruction")]
28strip: //div[contains(@class, "searchResults")]/h4 28strip: //div[contains(@class, "searchResults")]/h4
29 29
30# Remove the 'Letters to the Editor' section 30# Remove the 'Letters to the Editor' section
31strip_id_or_class: letter-text 31strip_id_or_class: letter-text
32strip_id_or_class: letter-from 32strip_id_or_class: letter-from
33strip_id_or_class: letter-date 33strip_id_or_class: letter-date
34 34
35# Remove Like/Tweet links 35# Remove Like/Tweet links
36strip_id_or_class: social-tab 36strip_id_or_class: social-tab
37 37
38# Remove 'divider' which causes an inexplicable slash to appear in the article body 38# Remove 'divider' which causes an inexplicable slash to appear in the article body
39strip_id_or_class: divider 39strip_id_or_class: divider
40 40
41test_url: http://www.theglobalmail.org/feature/tiramisu-time-in-pyongyang/88/ \ No newline at end of file 41test_url: http://www.theglobalmail.org/feature/tiramisu-time-in-pyongyang/88/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/theglobeandmail.com.txt b/inc/3rdparty/site_config/standard/theglobeandmail.com.txt
index 90634a08..750f8473 100644..100755
--- a/inc/3rdparty/site_config/standard/theglobeandmail.com.txt
+++ b/inc/3rdparty/site_config/standard/theglobeandmail.com.txt
@@ -1,5 +1,5 @@
1single_page_link: //div[contains(@class, 'pagination')]//a[contains(@title, 'ingle page')] 1single_page_link: //div[contains(@class, 'pagination')]//a[contains(@title, 'ingle page')]
2tidy: no 2tidy: no
3prune: no 3prune: no
4 4
5test_url: http://www.theglobeandmail.com/report-on-business/rob-magazine/how-a-novice-miner-survived-a-summer-in-the-klondike/article2345350/ \ No newline at end of file 5test_url: http://www.theglobeandmail.com/report-on-business/rob-magazine/how-a-novice-miner-survived-a-summer-in-the-klondike/article2345350/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thegreatdiscontent.com.txt b/inc/3rdparty/site_config/standard/thegreatdiscontent.com.txt
new file mode 100755
index 00000000..12442b40
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/thegreatdiscontent.com.txt
@@ -0,0 +1,6 @@
1title: //h1[@id='headline']
2author: substring-after(//section[@class="credits"]/ul/li[1],"Interview by ")
3date: //time[@pubdate]
4body: //article[@class='interview']
5strip: //article[@class='interview']/footer
6test_url: http://thegreatdiscontent.com/jeffrey-zeldman \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/theguardian.com.txt b/inc/3rdparty/site_config/standard/theguardian.com.txt
new file mode 100755
index 00000000..c803e4e4
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/theguardian.com.txt
@@ -0,0 +1,13 @@
1title: //div[@id='main-article-info']//h1
2body: //div[@id='article-wrapper']
3date: //li[@class='publication']//time[@pubdate] | //li[@class='publication']//data[@pubdate]
4strip: //div[contains(@class, 'email-subscription')]
5strip: //div[contains(@class, 'kindleWidget')]
6#strip: //a[not(text())]
7strip_id_or_class: pocket-btn
8author: //li[@class='byline']
9prune: no
10tidy: no
11test_url: http://www.theguardian.com/world/2013/oct/04/nsa-gchq-attack-tor-network-encryption
12test_url: http://www.theguardian.com/world/2013/oct/03/edward-snowden-files-john-lanchester
13test_url: http://www.theguardian.com/commentisfree/2014/jun/15/britishness-search-identity-my-part-in-camerons-odyssey \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/theindychannel.com.txt b/inc/3rdparty/site_config/standard/theindychannel.com.txt
index 3544f247..2cd865bb 100644..100755
--- a/inc/3rdparty/site_config/standard/theindychannel.com.txt
+++ b/inc/3rdparty/site_config/standard/theindychannel.com.txt
@@ -1,13 +1,13 @@
1title: //h1[@class="Headline"] 1title: //h1[@class="Headline"]
2date: substring-after(//div[@class="posted"], 'EDT ') 2date: substring-after(//div[@class="posted"], 'EDT ')
3body: //div[@class="storyBody"] 3body: //div[@class="storyBody"]
4 4
5strip: //td[@class="AssocContentTD"] 5strip: //td[@class="AssocContentTD"]
6strip: //div[@id="pageTitle"] 6strip: //div[@id="pageTitle"]
7strip: //div[@class="posted"] 7strip: //div[@class="posted"]
8strip: //div[@class="updated"] 8strip: //div[@class="updated"]
9strip: //div[@class="js-kit-disclaimer"] 9strip: //div[@class="js-kit-disclaimer"]
10strip: //table[@class="row3table"] 10strip: //table[@class="row3table"]
11strip: //div[@class="container2"] 11strip: //div[@class="container2"]
12strip: //div[@id="delta"] 12strip: //div[@id="delta"]
13test_url: http://www.theindychannel.com/news/31050840/detail.html \ No newline at end of file 13test_url: http://www.theindychannel.com/news/31050840/detail.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/themarker.com.txt b/inc/3rdparty/site_config/standard/themarker.com.txt
new file mode 100755
index 00000000..141b1a3b
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/themarker.com.txt
@@ -0,0 +1,11 @@
1title: //h1[contains(@class, 'mainTitle')]
2author: //ul[@class='author']//a[@rel='author']
3body: //div[@id='article-box']
4prune: no
5tidy: no
6strip_id_or_class: head
7strip_id_or_class: social-nav
8strip_id_or_class: rate
9strip_id_or_class: video
10
11test_url: http://www.themarker.com/markerweek/1.2093167 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/themillions.com.txt b/inc/3rdparty/site_config/standard/themillions.com.txt
index e3e57fea..4d46daee 100644..100755
--- a/inc/3rdparty/site_config/standard/themillions.com.txt
+++ b/inc/3rdparty/site_config/standard/themillions.com.txt
@@ -1,10 +1,10 @@
1title: /html/body/div/div[2]/div/div/div/h3 1title: /html/body/div/div[2]/div/div/div/h3
2 2
3body: /html/body/div/div[2]/div/div/div/div[2] 3body: /html/body/div/div[2]/div/div/div/div[2]
4 4
5strip: /html/body/div/div[2]/div/div/div/div[6]/div[3]/div/div/div 5strip: /html/body/div/div[2]/div/div/div/div[6]/div[3]/div/div/div
6 6
7tidy: no 7tidy: no
8 8
9# any way to get rid of this word character garbage? 9# any way to get rid of this word character garbage?
10test_url: http://www.themillions.com/2010/07/at-the-movies-with-david-mitchell-the-thousand-autumns-of-jacob-de-zoet.html \ No newline at end of file 10test_url: http://www.themillions.com/2010/07/at-the-movies-with-david-mitchell-the-thousand-autumns-of-jacob-de-zoet.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/themuseumofinnocence.com.txt b/inc/3rdparty/site_config/standard/themuseumofinnocence.com.txt
index 518bff93..80aba441 100644..100755
--- a/inc/3rdparty/site_config/standard/themuseumofinnocence.com.txt
+++ b/inc/3rdparty/site_config/standard/themuseumofinnocence.com.txt
@@ -1,7 +1,7 @@
1body: single-review 1body: single-review
2strip_id_or_class: featured-review 2strip_id_or_class: featured-review
3strip_id_or_class: resources 3strip_id_or_class: resources
4strip_id_or_class: rate-the-book 4strip_id_or_class: rate-the-book
5strip_id_or_class: write-review 5strip_id_or_class: write-review
6 6
7test_url: http://themuseumofinnocence.com/review.php?id=1179 \ No newline at end of file 7test_url: http://themuseumofinnocence.com/review.php?id=1179 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thenation.com.txt b/inc/3rdparty/site_config/standard/thenation.com.txt
index d88bcdd6..dab17f0b 100644..100755
--- a/inc/3rdparty/site_config/standard/thenation.com.txt
+++ b/inc/3rdparty/site_config/standard/thenation.com.txt
@@ -1,11 +1,13 @@
1title: //h1[@class='print-title'] 1title: //h2[@property='dc:title']
2body: //div[@class='print-content'] 2#body: //div[@class='print-content']
3author: //a[contains(@href, '/authors')] 3body: //div[@id='wysiwyg']
4author: substring-before(//div[@class='print-created'], '|') 4author: //a[contains(@href, '/authors')]
5date: //span[@class='article-date'] 5author: substring-before(//div[@class='print-created'], '|')
6date: substring-after(//div[@class='print-created'], '|') 6date: //span[@class='article-date']
7prune: no 7date: substring-after(//div[@class='print-created'], '|')
8 8prune: no
9single_page_link: //ul[contains(@class, 'article-actions-bar')]//a[contains(@href, '/print/article/')] 9
10 10#single_page_link: //ul[contains(@class, 'article-actions-bar')]//a[contains(@href, '/print/article/')]
11single_page_link: //ul[contains(@class, 'article-actions-bar')]//a[contains(@href, '?page=full')]
12
11test_url: http://www.thenation.com/article/162331/hard-against-time-roy-fisher \ No newline at end of file 13test_url: http://www.thenation.com/article/162331/hard-against-time-roy-fisher \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thenetworkgarden.blogs.com.txt b/inc/3rdparty/site_config/standard/thenetworkgarden.blogs.com.txt
index 846b8a8a..b7f5f0f0 100644..100755
--- a/inc/3rdparty/site_config/standard/thenetworkgarden.blogs.com.txt
+++ b/inc/3rdparty/site_config/standard/thenetworkgarden.blogs.com.txt
@@ -1,4 +1,4 @@
1body: //div[@id="beta-inner"] 1body: //div[@id="beta-inner"]
2title: //h3[@class="entry-header"] 2title: //h3[@class="entry-header"]
3 3
4test_url: http://thenetworkgarden.blogs.com/weblog/2011/09/microsoft-metro-and-the-next-wave-in-computing.html \ No newline at end of file 4test_url: http://thenetworkgarden.blogs.com/weblog/2011/09/microsoft-metro-and-the-next-wave-in-computing.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thenextgeneration.org.txt b/inc/3rdparty/site_config/standard/thenextgeneration.org.txt
new file mode 100755
index 00000000..dedd989f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/thenextgeneration.org.txt
@@ -0,0 +1,8 @@
1title: //h1[@class='interior-page-title']
2author: //span[@class='author']/a
3date: //div[@class='byline']/time
4body: //div[@class='rich-text-body']
5
6strip: //div[@class='byline']
7strip: //div[@class='offscreen-menu']
8test_url: http://thenextgeneration.org/blog/post/rebrand-announce/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thenextweb.com.txt b/inc/3rdparty/site_config/standard/thenextweb.com.txt
index fdc70005..684fe82d 100644..100755
--- a/inc/3rdparty/site_config/standard/thenextweb.com.txt
+++ b/inc/3rdparty/site_config/standard/thenextweb.com.txt
@@ -1,12 +1,12 @@
1body: //div[@class= 'article-body'] 1body: //div[@class= 'article-body']
2author: //div[@class='featured mb-1']//a[starts-with(@href,'/author')] 2author: //div[@class='featured mb-1']//a[starts-with(@href,'/author')]
3 3
4strip: //div[@class = 'bargo'] 4strip: //div[@class = 'bargo']
5strip: //div[@class = 'tf'] 5strip: //div[@class = 'tf']
6strip: //div[@class = 'article']/div[@class = 'blue-box'] 6strip: //div[@class = 'article']/div[@class = 'blue-box']
7strip_id_or_class: respond 7strip_id_or_class: respond
8 8
9tidy: no 9tidy: no
10next_page_link: //div[@class='pages-wrapper']//span/following-sibling::a/@href 10next_page_link: //div[@class='pages-wrapper']//span/following-sibling::a/@href
11 11
12test_url: http://thenextweb.com/apple/2011/10/12/tnw-review-a-complete-guide-to-apples-ios-5-with-icloud-an-os-14-years-in-the-making/ \ No newline at end of file 12test_url: http://thenextweb.com/apple/2011/10/12/tnw-review-a-complete-guide-to-apples-ios-5-with-icloud-an-os-14-years-in-the-making/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/theoaklandpress.com.txt b/inc/3rdparty/site_config/standard/theoaklandpress.com.txt
index c7132321..c9abda71 100644..100755
--- a/inc/3rdparty/site_config/standard/theoaklandpress.com.txt
+++ b/inc/3rdparty/site_config/standard/theoaklandpress.com.txt
@@ -1,3 +1,3 @@
1body: //div[@id='fullstory'] 1body: //div[@id='fullstory']
2strip: //div[@id='page_leftbar'] 2strip: //div[@id='page_leftbar']
3test_url: http://theoaklandpress.com/articles/2011/04/25/news/doc4db5330e0bce9220005852.txt \ No newline at end of file 3test_url: http://theoaklandpress.com/articles/2011/04/25/news/doc4db5330e0bce9220005852.txt \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/theonion.com.txt b/inc/3rdparty/site_config/standard/theonion.com.txt
index 12918b88..90e8d658 100644..100755
--- a/inc/3rdparty/site_config/standard/theonion.com.txt
+++ b/inc/3rdparty/site_config/standard/theonion.com.txt
@@ -1,11 +1,11 @@
1title: //h2[@class='title'] 1title: //h2[@class='title']
2date: substring-before(//p[@class='meta'], '|') 2date: substring-before(//p[@class='meta'], '|')
3body: //div[@class='story'] 3body: //div[@class='story']
4#body: //div[@class='article_body'] 4#body: //div[@class='article_body']
5 5
6strip: //h2[@class='title'] 6strip: //h2[@class='title']
7strip: //p[@class='meta'] 7strip: //p[@class='meta']
8strip: //div[@class='ga_section'] 8strip: //div[@class='ga_section']
9strip: //div[@id='recent_slider'] 9strip: //div[@id='recent_slider']
10 10
11test_url: http://www.theonion.com/articles/pathetic-bobcats-owner-again-regaling-players-with,27572/ \ No newline at end of file 11test_url: http://www.theonion.com/articles/pathetic-bobcats-owner-again-regaling-players-with,27572/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thepioneerwoman.com.txt b/inc/3rdparty/site_config/standard/thepioneerwoman.com.txt
index f89f3a87..75583cd3 100644..100755
--- a/inc/3rdparty/site_config/standard/thepioneerwoman.com.txt
+++ b/inc/3rdparty/site_config/standard/thepioneerwoman.com.txt
@@ -1,11 +1,11 @@
1title: //h1[@class='post-title'] 1title: //h1[@class='post-title']
2body: //div[@class='post'] 2body: //div[@class='post']
3author: //p[@class='posted-by'] 3author: //p[@class='posted-by']
4date: //div[@class='sprite post-date'] 4date: //div[@class='sprite post-date']
5 5
6# The body of the post doesn't have it's own div so we have to strip out the metadata 6# The body of the post doesn't have it's own div so we have to strip out the metadata
7strip: //div[@class='author_avatar'] 7strip: //div[@class='author_avatar']
8strip: //div[@class='sprite post-date'] 8strip: //div[@class='sprite post-date']
9strip: //h1[@class='post-title'] 9strip: //h1[@class='post-title']
10strip: //p[@class='posted-by'] 10strip: //p[@class='posted-by']
11test_url: http://thepioneerwoman.com/cooking/2011/08/pie-fats-a-comparison/ \ No newline at end of file 11test_url: http://thepioneerwoman.com/cooking/2011/08/pie-fats-a-comparison/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/theregister.co.uk.txt b/inc/3rdparty/site_config/standard/theregister.co.uk.txt
index ebcc55d5..5d30230d 100644..100755
--- a/inc/3rdparty/site_config/standard/theregister.co.uk.txt
+++ b/inc/3rdparty/site_config/standard/theregister.co.uk.txt
@@ -1,5 +1,8 @@
1title: //div[@id="article"]/h2 1# Updated 25-Jan-2014
2author: //div[@id="article"]/p[@class="byline"]/a[1] 2single_page_link: //a[contains(@href, '/Print/')]
3date: //div[@id="article"]/p[@class="dateline"]/a[2] 3
4body: //div[@id="article"]/div[@id="body"] 4title: //div[@id="article"]/h2
5test_url: http://www.theregister.co.uk/2011/10/06/gas_bill_shocker/ \ No newline at end of file 5author: //p[@class="byline"]/a
6date: //p[@class="dateline"]/a[last()]
7
8test_url: http://www.theregister.co.uk/2014/01/24/thirty_years_of_the_apple_macintosh_part_2/
diff --git a/inc/3rdparty/site_config/standard/theroot.com.txt b/inc/3rdparty/site_config/standard/theroot.com.txt
index ebff662d..1f56316d 100644..100755
--- a/inc/3rdparty/site_config/standard/theroot.com.txt
+++ b/inc/3rdparty/site_config/standard/theroot.com.txt
@@ -1,3 +1,3 @@
1body: //div[@id='node-content'] 1body: //div[@id='node-content']
2strip_id_or_class: pager 2strip_id_or_class: pager
3test_url: http://www.theroot.com/views/why-i-am-male-feminist \ No newline at end of file 3test_url: http://www.theroot.com/views/why-i-am-male-feminist \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/therumpus.net.txt b/inc/3rdparty/site_config/standard/therumpus.net.txt
index d01a89bb..84d0e783 100644..100755
--- a/inc/3rdparty/site_config/standard/therumpus.net.txt
+++ b/inc/3rdparty/site_config/standard/therumpus.net.txt
@@ -1,4 +1,4 @@
1title: /html/body/div/div[2]/div/div/h1 1title: /html/body/div/div[2]/div/div/h1
2 2
3body: /html/body/div/div[2]/div/div/div[2] 3body: /html/body/div/div[2]/div/div/div[2]
4test_url: http://therumpus.net/2010/07/the-rumpus-interview-with-david-means/?full=yes \ No newline at end of file 4test_url: http://therumpus.net/2010/07/the-rumpus-interview-with-david-means/?full=yes \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thesiasat.com.txt b/inc/3rdparty/site_config/standard/thesiasat.com.txt
index ab9a99e8..68a8bc8e 100644..100755
--- a/inc/3rdparty/site_config/standard/thesiasat.com.txt
+++ b/inc/3rdparty/site_config/standard/thesiasat.com.txt
@@ -1,11 +1,11 @@
1#body: (//div[@class='ftr-yt-vid'])[1] 1#body: (//div[@class='ftr-yt-vid'])[1]
2body: (//blockquote[contains(@class, 'postcontent')])[1] 2body: (//blockquote[contains(@class, 'postcontent')])[1]
3body: (//div[starts-with(@id, 'post_message')])[1] 3body: (//div[starts-with(@id, 'post_message')])[1]
4 4
5prune: no 5prune: no
6tidy: no 6tidy: no
7 7
8#replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player" 8#replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player"
9#replace_string(</iframe>): </iframe>&nbsp;</div> 9#replace_string(</iframe>): </iframe>&nbsp;</div>
10 10
11test_url: http://www.thesiasat.com/showthread.php?19220-Dunya-News-HASB-E-HAAL-16-06-2012-Part-1-5 \ No newline at end of file 11test_url: http://www.thesiasat.com/showthread.php?19220-Dunya-News-HASB-E-HAAL-16-06-2012-Part-1-5 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thesimpledollar.com.txt b/inc/3rdparty/site_config/standard/thesimpledollar.com.txt
index d5c6c9e0..dcdf2572 100644..100755
--- a/inc/3rdparty/site_config/standard/thesimpledollar.com.txt
+++ b/inc/3rdparty/site_config/standard/thesimpledollar.com.txt
@@ -1,4 +1,4 @@
1title: //h3[@class='post-title']/a[@class='post-title-link'] 1title: //h3[@class='post-title']/a[@class='post-title-link']
2body: //div[@class='post-content'] 2body: //div[@class='post-content']
3author: //div[@class='post-meta-under-title']/a 3author: //div[@class='post-meta-under-title']/a
4test_url: http://www.thesimpledollar.com/2011/09/13/determining-the-size-of-your-emergency-fund/ \ No newline at end of file 4test_url: http://www.thesimpledollar.com/2011/09/13/determining-the-size-of-your-emergency-fund/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thespoiler.co.uk.txt b/inc/3rdparty/site_config/standard/thespoiler.co.uk.txt
index e2ed1e63..ca983281 100644..100755
--- a/inc/3rdparty/site_config/standard/thespoiler.co.uk.txt
+++ b/inc/3rdparty/site_config/standard/thespoiler.co.uk.txt
@@ -1,3 +1,3 @@
1strip: //*[(@id = "content")]/h2 1strip: //*[(@id = "content")]/h2
2strip: //*[(@class = "wp-notable-line")] 2strip: //*[(@class = "wp-notable-line")]
3test_url: http://www.thespoiler.co.uk/index.php/2010/10/21/wayne-rooney-tells-man-utd-its-not-me-its-you \ No newline at end of file 3test_url: http://www.thespoiler.co.uk/index.php/2010/10/21/wayne-rooney-tells-man-utd-its-not-me-its-you \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thespoof.com.txt b/inc/3rdparty/site_config/standard/thespoof.com.txt
index 409dc0c9..f71cfb6b 100644..100755
--- a/inc/3rdparty/site_config/standard/thespoof.com.txt
+++ b/inc/3rdparty/site_config/standard/thespoof.com.txt
@@ -1,9 +1,9 @@
1title: //h1[contains(@class, 'cTitle')] 1title: //h1[contains(@class, 'cTitle')]
2body: //div[contains(@class, 'KonaBody') or @id='articleimageright'] 2body: //div[contains(@class, 'KonaBody') or @id='articleimageright']
3author: //meta[@name='Author']/@content 3author: //meta[@name='Author']/@content
4date: //meta[@name='OriginalPublicationDate']/@content 4date: //meta[@name='OriginalPublicationDate']/@content
5 5
6prune: no 6prune: no
7tidy: no 7tidy: no
8 8
9test_url: http://www.thespoof.com/news/spoof.cfm?headline=s8i108389 \ No newline at end of file 9test_url: http://www.thespoof.com/news/spoof.cfm?headline=s8i108389 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thestranger.com.txt b/inc/3rdparty/site_config/standard/thestranger.com.txt
index 0f9855c8..6fcf4fdf 100644..100755
--- a/inc/3rdparty/site_config/standard/thestranger.com.txt
+++ b/inc/3rdparty/site_config/standard/thestranger.com.txt
@@ -1,12 +1,12 @@
1# savage* filtering is for Savage Love, such as: http://www.thestranger.com/seattle/SavageLove?oid=5135029 1# savage* filtering is for Savage Love, such as: http://www.thestranger.com/seattle/SavageLove?oid=5135029
2 2
3#other filtering are plain articles, such as: http://www.thestranger.com/seattle/the-stranger-election-control-board/Content?oid=5142885 3#other filtering are plain articles, such as: http://www.thestranger.com/seattle/the-stranger-election-control-board/Content?oid=5142885
4 4
5title: //div[@id='savageColumn_head']/h1 5title: //div[@id='savageColumn_head']/h1
6title: //h1[@class="headlineLarge"] 6title: //h1[@class="headlineLarge"]
7 7
8strip: //div[@id='savage_right'] | //div[@id='savageColumn_head'] | //div[@id='savageArticleRight'] | //div[@id='articleRight'] | //div[@class='savAppBanner'] 8strip: //div[@id='savage_right'] | //div[@id='savageColumn_head'] | //div[@id='savageArticleRight'] | //div[@id='articleRight'] | //div[@class='savAppBanner']
9 9
10body: //div[@id='savageColumn'] 10body: //div[@id='savageColumn']
11body: //div[@id='story_text'] 11body: //div[@id='story_text']
12test_url: http://www.thestranger.com/seattle/SavageLove?oid=5135029 \ No newline at end of file 12test_url: http://www.thestranger.com/seattle/SavageLove?oid=5135029 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thestreet.com.txt b/inc/3rdparty/site_config/standard/thestreet.com.txt
index 5de75637..58eabf00 100644..100755
--- a/inc/3rdparty/site_config/standard/thestreet.com.txt
+++ b/inc/3rdparty/site_config/standard/thestreet.com.txt
@@ -1,25 +1,25 @@
1title: //div[@id='storyHdr']/h1 1title: //div[@id='storyHdr']/h1
2title: //div[@id='print']//h2 2title: //div[@id='print']//h2
3body: //div[@class="virtualpage"] 3body: //div[@class="virtualpage"]
4body: //div[@id='print']//div[@id='bd'] 4body: //div[@id='print']//div[@id='bd']
5author: //meta[@name="AUTHOR"]/@content 5author: //meta[@name="AUTHOR"]/@content
6author: (//div[@id='print']//div[@id='bd']/h4)[1] 6author: (//div[@id='print']//div[@id='bd']/h4)[1]
7date: //meta[@name="DATE"]/@content 7date: //meta[@name="DATE"]/@content
8date: //div[@id='print']//div[@id='dte'] 8date: //div[@id='print']//div[@id='dte']
9 9
10strip_id_or_class: articleFooter 10strip_id_or_class: articleFooter
11strip_id_or_class: sidebar 11strip_id_or_class: sidebar
12strip_id_or_class: ie6PrintSubhead 12strip_id_or_class: ie6PrintSubhead
13strip_id_or_class: subHdr 13strip_id_or_class: subHdr
14 14
15 15
16replace_string(<P/>): </p><p> 16replace_string(<P/>): </p><p>
17 17
18prune: no 18prune: no
19 19
20#TODO: redirects back - perhaps needs referer to work 20#TODO: redirects back - perhaps needs referer to work
21single_page_link: //div[@id='storyDetail']//a[contains(@href, '/print/')] 21single_page_link: //div[@id='storyDetail']//a[contains(@href, '/print/')]
22 22
23test_url: http://www.thestreet.com/story/11386556/1/which-of-these-10-dividend-stocks-is-worth-the-risk.html 23test_url: http://www.thestreet.com/story/11386556/1/which-of-these-10-dividend-stocks-is-worth-the-risk.html
24# multi page 24# multi page
25test_url: http://www.thestreet.com/story/11387090/1/7-ubs-stock-picks-for-2012.html \ No newline at end of file 25test_url: http://www.thestreet.com/story/11387090/1/7-ubs-stock-picks-for-2012.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thethaovanhoa.vn.txt b/inc/3rdparty/site_config/standard/thethaovanhoa.vn.txt
index 6b3277eb..6b3277eb 100644..100755
--- a/inc/3rdparty/site_config/standard/thethaovanhoa.vn.txt
+++ b/inc/3rdparty/site_config/standard/thethaovanhoa.vn.txt
diff --git a/inc/3rdparty/site_config/standard/theverge.com.txt b/inc/3rdparty/site_config/standard/theverge.com.txt
index 11c5c153..1e1ce58f 100644..100755
--- a/inc/3rdparty/site_config/standard/theverge.com.txt
+++ b/inc/3rdparty/site_config/standard/theverge.com.txt
@@ -1,31 +1,48 @@
1title: //h1[contains(@class, "headline")] 1author: //p[contains(@class, "byline")]/a[contains(@class, "author")]
2 2
3author: //p[contains(@class, "byline")]/a[contains(@class, "author")] 3date: //span[contains(@class, "publish-date")]/time[@pubdate]/@datetime
4 4
5date: substring-after(normalize-space(//p[contains(@class, "byline")]/span[contains(@class, "publish-date")]), "on ") 5body: //div[contains(@class, 'entry-content')]
6 6# for vergecasts, e.g. http://www.theverge.com/2013/8/22/4648566/the-vergecast-090-august-22th-2013-video
7body: //article[contains(@class, 'feature-entry')] 7body: //article
8body: //article 8prune: no
9prune: no 9#tidy: no
10tidy: no 10
11 11strip: //article/header
12strip: //article/header 12strip: //*[@id='sticky-menu']
13strip: //*[@id='sticky-menu'] 13strip: //aside
14strip: //aside 14strip: //nav
15strip: //nav 15strip: //img[contains(@class, 'vox-lazy-load')]
16 16# deal with bad parsing
17strip_id_or_class: gallery 17strip: //div[contains(@class, 'story-image')]//div[contains(., 'function(')]
18strip_id_or_class: article-meta 18
19strip_id_or_class: story-navigation 19strip_id_or_class: gallery
20strip_id_or_class: slegend 20strip_id_or_class: article-meta
21strip_id_or_class: related-product-meta 21strip_id_or_class: story-navigation
22strip_id_or_class: comments 22strip_id_or_class: slegend
23strip_id_or_class: ui-jump-list 23strip_id_or_class: related-product-meta
24strip_id_or_class: pullquote 24strip_id_or_class: comments
25 25strip_id_or_class: ui-jump-list
26strip: //q 26strip_id_or_class: pullquote
27 27strip_id_or_class: m-ad
28strip: //a[contains(@class, 'entry-section-title')] 28strip_id_or_class: social-sharing
29 29strip_id_or_class: m-video-entry__excerpt
30test_url: http://www.theverge.com/2012/2/29/2821763/lytro-review 30strip_id_or_class: hidden
31test_url: http://www.theverge.com/2011/11/3/2534861/nokia-lumia-800-review \ No newline at end of file 31
32replace_string(<noscript>): <div>
33replace_string(</noscript>): </div>
34
35find_string: <script
36replace_string: <div style="display:none"
37find_string: </script>
38replace_string: </div>
39
40strip: //q
41
42strip: //a[contains(@class, 'entry-section-title')]
43
44test_url: http://www.theverge.com/2012/2/29/2821763/lytro-review
45test_url: http://www.theverge.com/2011/11/3/2534861/nokia-lumia-800-review
46test_url: http://www.theverge.com/2013/2/24/4026114/barnes-noble-shifting-focus-away-from-nook-hardware
47test_url: http://www.theverge.com/2014/6/19/5824072/top-shelf-living-the-dream
48test_url: http://www.theverge.com/rss/frontpage \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/theweek.com.txt b/inc/3rdparty/site_config/standard/theweek.com.txt
index 27281ceb..f98749e2 100644..100755
--- a/inc/3rdparty/site_config/standard/theweek.com.txt
+++ b/inc/3rdparty/site_config/standard/theweek.com.txt
@@ -1,4 +1,4 @@
1body: //div[@class="briefingEntry"] 1body: //div[@class="briefingEntry"]
2prune: no 2prune: no
3 3
4test_url: http://theweek.com/article/index/215763/insider-trading-on-capitol-hill \ No newline at end of file 4test_url: http://theweek.com/article/index/215763/insider-trading-on-capitol-hill \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thinkprogress.org.txt b/inc/3rdparty/site_config/standard/thinkprogress.org.txt
index 8934b68e..1eec4e3c 100644..100755
--- a/inc/3rdparty/site_config/standard/thinkprogress.org.txt
+++ b/inc/3rdparty/site_config/standard/thinkprogress.org.txt
@@ -1,4 +1,4 @@
1author: //p[@class="byline"]/a 1author: //p[@class="byline"]/a
2body: //div[@class="post"] 2body: //div[@class="post"]
3 3
4test_url: http://thinkprogress.org/special/2011/11/12/367040/harvard-law-professor-criticizes-homeland-security-feel-of-overreaction-to-occupy-harvard/ \ No newline at end of file 4test_url: http://thinkprogress.org/special/2011/11/12/367040/harvard-law-professor-criticizes-homeland-security-feel-of-overreaction-to-occupy-harvard/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thisdaylive.com.txt b/inc/3rdparty/site_config/standard/thisdaylive.com.txt
index 958d4b27..73b3c9ed 100644..100755
--- a/inc/3rdparty/site_config/standard/thisdaylive.com.txt
+++ b/inc/3rdparty/site_config/standard/thisdaylive.com.txt
@@ -1,2 +1,2 @@
1body: //div[@class='main-content-panel']/div[@class='img'] | //div[@id='page_content_Content9_oModuleContent_2_div_Body'] 1body: //div[@class='main-content-panel']/div[@class='img'] | //div[@id='page_content_Content9_oModuleContent_2_div_Body']
2test_url: http://www.thisdaylive.com/articles/australia-pm-talks-human-rights-with-chinas-wen/90394/ \ No newline at end of file 2test_url: http://www.thisdaylive.com/articles/australia-pm-talks-human-rights-with-chinas-wen/90394/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thisismynext.com.txt b/inc/3rdparty/site_config/standard/thisismynext.com.txt
index 6850b4be..70b53995 100644..100755
--- a/inc/3rdparty/site_config/standard/thisismynext.com.txt
+++ b/inc/3rdparty/site_config/standard/thisismynext.com.txt
@@ -1,8 +1,8 @@
1author: //div[@class='meta clearfix']/a 1author: //div[@class='meta clearfix']/a
2body: //div[@class='post'] 2body: //div[@class='post']
3 3
4strip: //div[@class='metaCat'] 4strip: //div[@class='metaCat']
5strip: //div[@class='post']/h1 5strip: //div[@class='post']/h1
6strip: //div[@class='post']/div[@class='meta clearfix'] 6strip: //div[@class='post']/div[@class='meta clearfix']
7strip: //div[@class='post']/div[@class='social-bar clearfix'] 7strip: //div[@class='post']/div[@class='social-bar clearfix']
8test_url: http://thisismynext.com/2011/10/18/galaxy-nexus-android-ice-cream-sandwich-pictures-video-hands-on/ \ No newline at end of file 8test_url: http://thisismynext.com/2011/10/18/galaxy-nexus-android-ice-cream-sandwich-pictures-video-hands-on/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tidbits.com.txt b/inc/3rdparty/site_config/standard/tidbits.com.txt
index 8bcf2ec1..1950e58e 100644..100755
--- a/inc/3rdparty/site_config/standard/tidbits.com.txt
+++ b/inc/3rdparty/site_config/standard/tidbits.com.txt
@@ -1,3 +1,3 @@
1author: //span[@class='fn'] 1author: //span[@class='fn']
2date: substring-before(substring-after(//*[@id='center_ajax_sub']/div/div[3],'|'),'|') 2date: substring-before(substring-after(//*[@id='center_ajax_sub']/div/div[3],'|'),'|')
3test_url: http://tidbits.com/article/12651 \ No newline at end of file 3test_url: http://tidbits.com/article/12651 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/time.com.txt b/inc/3rdparty/site_config/standard/time.com.txt
index fd3fe08c..f3f886bc 100644..100755
--- a/inc/3rdparty/site_config/standard/time.com.txt
+++ b/inc/3rdparty/site_config/standard/time.com.txt
@@ -1,14 +1,12 @@
1# 2011-10-25 - carlo@... - Initial setup. 1title: //h1[contains(@class, 'article-title')]
2 2author: //article//span[contains(@class, 'byline')]
3single_page_link: //li[@class='print']/a/@href 3date: //time[@pubdate]/@datetime
4 4body: //section[contains(@class, 'article-body')]
5title: //h1 5prune: no
6author: //meta[@name="byline"]/@content 6tidy: no
7date: //meta[@name="date"]/@content 7
8 8strip: //figcaption
9strip: //span[@class="see"] 9strip: //p[contains(., 'MORE:') and ./a]
10strip: //div[@class="byline"] 10strip: //aside
11strip: //div[@id="date2"] 11
12strip: //h1 12test_url: http://time.com/14478/emotions-may-not-be-so-universal-after-all/ \ No newline at end of file
13
14test_url: http://www.time.com/time/specials/packages/article/0,28804,2094921_2094923_2094924,00.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/timeshighereducation.co.uk.txt b/inc/3rdparty/site_config/standard/timeshighereducation.co.uk.txt
index 17297732..af1c23ce 100644..100755
--- a/inc/3rdparty/site_config/standard/timeshighereducation.co.uk.txt
+++ b/inc/3rdparty/site_config/standard/timeshighereducation.co.uk.txt
@@ -1,6 +1,6 @@
1title: //h1 1title: //h1
2body: //div[@class="storytext"] 2body: //div[@class="storytext"]
3strip: //div[@id="thelogin"] 3strip: //div[@id="thelogin"]
4strip: //*[@class="hide"] 4strip: //*[@class="hide"]
5strip: //div[@id="anchored"] 5strip: //div[@id="anchored"]
6test_url: http://www.timeshighereducation.co.uk/story.asp?sectioncode=26&storycode=416124&c=1 \ No newline at end of file 6test_url: http://www.timeshighereducation.co.uk/story.asp?sectioncode=26&storycode=416124&c=1 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tipb.com.txt b/inc/3rdparty/site_config/standard/tipb.com.txt
index 9533eb0f..b8474d97 100644..100755
--- a/inc/3rdparty/site_config/standard/tipb.com.txt
+++ b/inc/3rdparty/site_config/standard/tipb.com.txt
@@ -1,9 +1,9 @@
1body: //div[@id='content'] 1body: //div[@id='content']
2 2
3strip_id_or_class: featured-box 3strip_id_or_class: featured-box
4strip_id_or_class: postmeta 4strip_id_or_class: postmeta
5strip_id_or_class: respond 5strip_id_or_class: respond
6 6
7author: //a[contains(@href, '/author/') and contains(@title, 'Posts by')] 7author: //a[contains(@href, '/author/') and contains(@title, 'Posts by')]
8date: substring-before(//a[contains(@href, '/author/') and contains(@title, 'Posts by')]/.., ' by ') 8date: substring-before(//a[contains(@href, '/author/') and contains(@title, 'Posts by')]/.., ' by ')
9test_url: http://www.tipb.com/2011/10/17/iphone-4s-review/ \ No newline at end of file 9test_url: http://www.tipb.com/2011/10/17/iphone-4s-review/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tnr.com.txt b/inc/3rdparty/site_config/standard/tnr.com.txt
index 65a1899f..199f5d13 100644..100755
--- a/inc/3rdparty/site_config/standard/tnr.com.txt
+++ b/inc/3rdparty/site_config/standard/tnr.com.txt
@@ -1,17 +1,17 @@
1title: //div[contains(@class, 'article_detail')]/div[@class='entry_header']/h1 1title: //div[contains(@class, 'article_detail')]/div[@class='entry_header']/h1
2title: //div[contains(@class, 'article_detail')]//h1 2title: //div[contains(@class, 'article_detail')]//h1
3title: //h1 3title: //h1
4 4
5body: //div[contains(@class, 'article_detail')] 5body: //div[contains(@class, 'article_detail')]
6 6
7author: //div[@class='article_detail']/div[@class='entry_header']/li/div[@class='author']//h3 7author: //div[@class='article_detail']/div[@class='entry_header']/li/div[@class='author']//h3
8author: div[@class='author']//h3 8author: div[@class='author']//h3
9strip: //div[contains(@class, 'field-field-book-cover')] 9strip: //div[contains(@class, 'field-field-book-cover')]
10 10
11date: translate(//*[@class='post_date' and contains(., ' 20')], '|', '') 11date: translate(//*[@class='post_date' and contains(., ' 20')], '|', '')
12 12
13prune: no 13prune: no
14 14
15single_page_link: //a[@class='print-page'] 15single_page_link: //a[@class='print-page']
16 16
17test_url: http://www.tnr.com/blog/jonathan-chait/92991/did-obama-get-rolled \ No newline at end of file 17test_url: http://www.tnr.com/blog/jonathan-chait/92991/did-obama-get-rolled \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tomdispatch.com.txt b/inc/3rdparty/site_config/standard/tomdispatch.com.txt
index d8548c78..701a2122 100644..100755
--- a/inc/3rdparty/site_config/standard/tomdispatch.com.txt
+++ b/inc/3rdparty/site_config/standard/tomdispatch.com.txt
@@ -1,6 +1,6 @@
1title: //div[@id='maincontent']//div[@class='title'] 1title: //div[@id='maincontent']//div[@class='title']
2body: //div[@id='maincontent']//div[@class='byline'] | //div[@id='maincontent']//div[@class='meat'] 2body: //div[@id='maincontent']//div[@class='byline'] | //div[@id='maincontent']//div[@class='meat']
3 3
4tidy: no 4tidy: no
5 5
6test_url: http://www.tomdispatch.com/post/175436/tomgram:_noam_chomsky%2C_the_imperial_mentality_and_9_11/ \ No newline at end of file 6test_url: http://www.tomdispatch.com/post/175436/tomgram:_noam_chomsky%2C_the_imperial_mentality_and_9_11/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tomshardware.com.txt b/inc/3rdparty/site_config/standard/tomshardware.com.txt
index 2bba6de8..2b437574 100644..100755
--- a/inc/3rdparty/site_config/standard/tomshardware.com.txt
+++ b/inc/3rdparty/site_config/standard/tomshardware.com.txt
@@ -1,8 +1,8 @@
1tidy: no 1tidy: no
2title: //title 2title: //title
3author: //a[@itemprop = 'author'] 3author: //a[@itemprop = 'author']
4date: //time[@itemprop = 'datePublished'] 4date: //time[@itemprop = 'datePublished']
5body: //div[@id = 'intelliTXT'] 5body: //div[@id = 'intelliTXT']
6 6
7next_page_link: //li[@class="pagin next"]/a 7next_page_link: //li[@class="pagin next"]/a
8test_url: http://www.tomshardware.com/reviews/gaming-graphics-card-review,3107.html \ No newline at end of file 8test_url: http://www.tomshardware.com/reviews/gaming-graphics-card-review,3107.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tomshardware.de.txt b/inc/3rdparty/site_config/standard/tomshardware.de.txt
index e910003c..eee57ccf 100644..100755
--- a/inc/3rdparty/site_config/standard/tomshardware.de.txt
+++ b/inc/3rdparty/site_config/standard/tomshardware.de.txt
@@ -1,12 +1,12 @@
1body://div[@id="news-content"]/div[@id="intelliTXT"][1] 1body://div[@id="news-content"]/div[@id="intelliTXT"][1]
2 2
3author://div[@id="header-news-infos"]/a[1] 3author://div[@id="header-news-infos"]/a[1]
4 4
5date: //div[@id="header-news-infos"]/span[1] 5date: //div[@id="header-news-infos"]/span[1]
6 6
7title://h1[@id="header-news-title" and @class="hardwareTitle"][1] 7title://h1[@id="header-news-title" and @class="hardwareTitle"][1]
8 8
9strip://div[@id="news-content"]/div[@id="intelliTXT"]/table 9strip://div[@id="news-content"]/div[@id="intelliTXT"]/table
10 10
11footnotes: no 11footnotes: no
12test_url: http://www.tomshardware.de/DDR4-DDR3-ISSCC-Samsung-Hynix,news-247133.html \ No newline at end of file 12test_url: http://www.tomshardware.de/DDR4-DDR3-ISSCC-Samsung-Hynix,news-247133.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/toolsandtoys.net.txt b/inc/3rdparty/site_config/standard/toolsandtoys.net.txt
index dbe60b15..bb45d890 100644..100755
--- a/inc/3rdparty/site_config/standard/toolsandtoys.net.txt
+++ b/inc/3rdparty/site_config/standard/toolsandtoys.net.txt
@@ -1,6 +1,6 @@
1body: //div[@class='post'] 1body: //div[@class='post']
2 2
3strip: //div[@class='social'] 3strip: //div[@class='social']
4strip: //span[@class='next'] 4strip: //span[@class='next']
5strip: //span[@class='previous'] 5strip: //span[@class='previous']
6test_url: http://toolsandtoys.net/noble-tonic-02/ \ No newline at end of file 6test_url: http://toolsandtoys.net/noble-tonic-02/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tracks.ranea.org.txt b/inc/3rdparty/site_config/standard/tracks.ranea.org.txt
new file mode 100755
index 00000000..5a386470
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/tracks.ranea.org.txt
@@ -0,0 +1,14 @@
1# Metadata
2title: substring-after(//title, 'Coyote Tracks - ')
3author: //meta[@name="author"]/@content
4date: //div[@class="post_header"]/a
5
6# Content Pruning
7strip: //div[@class="column left"]
8strip: //div[@class="pages"]
9strip: //a[@class="text_title"]
10strip: //ol[@class="notes"]
11
12dissolve: //div[@class='column right']/ul
13dissolve: //li[@class='post']
14test_url: http://tracks.ranea.org/post/31431060205/the-next-big-uh-slightly-taller-thing \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/trailer.web-view.net.txt b/inc/3rdparty/site_config/standard/trailer.web-view.net.txt
index e7a9c82d..e7a9c82d 100644..100755
--- a/inc/3rdparty/site_config/standard/trailer.web-view.net.txt
+++ b/inc/3rdparty/site_config/standard/trailer.web-view.net.txt
diff --git a/inc/3rdparty/site_config/standard/trailerzone.de.txt b/inc/3rdparty/site_config/standard/trailerzone.de.txt
new file mode 100755
index 00000000..02151a63
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/trailerzone.de.txt
@@ -0,0 +1,9 @@
1body: //div[@id='video' or @id='main']
2
3strip_id_or_class: socialshareprivacy2
4strip_id_or_class: wp_rp_first
5
6find_string: Genre</strong>
7replace_string: </strong></p><p><strong>Genre</strong>
8
9test_url: http://www.trailerzone.de/g-i-joe-2-die-abrechnung/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/traningslara.se.txt b/inc/3rdparty/site_config/standard/traningslara.se.txt
index 96e491fa..d6cfb6db 100644..100755
--- a/inc/3rdparty/site_config/standard/traningslara.se.txt
+++ b/inc/3rdparty/site_config/standard/traningslara.se.txt
@@ -1,8 +1,8 @@
1title: //div[@class="Post-body"]//span[@class="PostHeader"] 1title: //div[@class="Post-body"]//span[@class="PostHeader"]
2author: //div[@class="PostHeaderIcons metadata"]/a[@title="Author"] 2author: //div[@class="PostHeaderIcons metadata"]/a[@title="Author"]
3date: substring-before(//div[@class="PostHeaderIcons metadata"], '|') 3date: substring-before(//div[@class="PostHeaderIcons metadata"], '|')
4body: //div[@class="Post-body"] 4body: //div[@class="Post-body"]
5strip_id_or_class: print1 5strip_id_or_class: print1
6strip_id_or_class: metadata 6strip_id_or_class: metadata
7strip_id_or_class: authorbox 7strip_id_or_class: authorbox
8test_url: http://traningslara.se/skoinlagg-och-skador-finns-det-nagot-samband/ \ No newline at end of file 8test_url: http://traningslara.se/skoinlagg-och-skador-finns-det-nagot-samband/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/triblive.com.txt b/inc/3rdparty/site_config/standard/triblive.com.txt
index 82797db9..663cafe1 100644..100755
--- a/inc/3rdparty/site_config/standard/triblive.com.txt
+++ b/inc/3rdparty/site_config/standard/triblive.com.txt
@@ -1,13 +1,13 @@
1title: //title 1title: //title
2author: //span/a 2author: //span/a
3date: substring-after(//small,'Published:') 3date: substring-after(//small,'Published:')
4 4
5strip: //h1[@class='vert_class'] 5strip: //h1[@class='vert_class']
6strip: //h1[@class='headline'] 6strip: //h1[@class='headline']
7strip: //img[contains(@src,'logo_triblive.gif')] 7strip: //img[contains(@src,'logo_triblive.gif')]
8 8
9#strip: //h6 9#strip: //h6
10#strip_img_src: logo_triblive.gif 10#strip_img_src: logo_triblive.gif
11 11
12single_page_link: //a[@class='stprint'] 12single_page_link: //a[@class='stprint']
13test_url: http://triblive.com/sports/2819913-85/lemieux-deal-penguins-burkle-nhl-owners-team-mario-bettman-case \ No newline at end of file 13test_url: http://triblive.com/sports/2819913-85/lemieux-deal-penguins-burkle-nhl-owners-team-mario-bettman-case \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/truthdig.com.txt b/inc/3rdparty/site_config/standard/truthdig.com.txt
index e7c1a4bc..9e0663b0 100644..100755
--- a/inc/3rdparty/site_config/standard/truthdig.com.txt
+++ b/inc/3rdparty/site_config/standard/truthdig.com.txt
@@ -1,10 +1,12 @@
1title: //div[@class='printbody']/h1 1title: //div[@class='printbody']/h1
2body: //div[@class='printbody'] 2body: //div[@class='printbody']
3prune: no 3prune: no
4 4
5strip: //div[@class='printbody']/a[@href='http://www.truthdig.com/'] 5strip: //div[@class='printbody']/a[@href='http://www.truthdig.com/']
6strip: //table[@class='footer'] 6strip: //table[@class='footer']
7 7strip: //h6[contains(., 'http://')]
8single_page_link: //div[@class='article_tools']//a[contains(@href, '/print/')] 8
9 9single_page_link: //a[contains(@href, '/print/')]
10test_url: http://www.truthdig.com/report/item/the_election_march_of_the_trolls_20110829/ \ No newline at end of file 10
11test_url: http://www.truthdig.com/report/item/the_election_march_of_the_trolls_20110829/
12test_url: http://www.truthdig.com/dig/item/the_death_of_truth_20130505/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tthfanfic.org.txt b/inc/3rdparty/site_config/standard/tthfanfic.org.txt
index 0dab5b0f..63537c10 100644..100755
--- a/inc/3rdparty/site_config/standard/tthfanfic.org.txt
+++ b/inc/3rdparty/site_config/standard/tthfanfic.org.txt
@@ -1,4 +1,4 @@
1title: //h2 1title: //h2
2author: //a[starts-with(@href, '/AuthorStories')] 2author: //a[starts-with(@href, '/AuthorStories')]
3body: //div[@id='storyinnerbody'] 3body: //div[@id='storyinnerbody']
4test_url: http://www.tthfanfic.org/Story-6512/Kudra+Journeys.htm \ No newline at end of file 4test_url: http://www.tthfanfic.org/Story-6512/Kudra+Journeys.htm \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tthor.com.txt b/inc/3rdparty/site_config/standard/tthor.com.txt
index 902fcd13..902fcd13 100644..100755
--- a/inc/3rdparty/site_config/standard/tthor.com.txt
+++ b/inc/3rdparty/site_config/standard/tthor.com.txt
diff --git a/inc/3rdparty/site_config/standard/tuaw.com.txt b/inc/3rdparty/site_config/standard/tuaw.com.txt
index b86f8ccb..2af00c27 100644..100755
--- a/inc/3rdparty/site_config/standard/tuaw.com.txt
+++ b/inc/3rdparty/site_config/standard/tuaw.com.txt
@@ -1,6 +1,6 @@
1title: //h1[@class='posttitle'] 1title: //h1[@class='posttitle']
2author: //span[@class='author']/a 2author: //span[@class='author']/a
3date: //span[@class='timestamp'] 3date: //span[@class='timestamp']
4body: //div[@class='body'] 4body: //div[@class='body']
5 5
6test_url: http://www.tuaw.com/2011/10/19/apple-posts-fans-memories-of-steve-jobs/ \ No newline at end of file 6test_url: http://www.tuaw.com/2011/10/19/apple-posts-fans-memories-of-steve-jobs/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tuckreview.com.txt b/inc/3rdparty/site_config/standard/tuckreview.com.txt
index a3946cbc..6e18e3da 100644..100755
--- a/inc/3rdparty/site_config/standard/tuckreview.com.txt
+++ b/inc/3rdparty/site_config/standard/tuckreview.com.txt
@@ -1,6 +1,6 @@
1title: //h1[@class='post-title'] 1title: //h1[@class='post-title']
2author: //div[@class='display-name'] 2author: //div[@class='display-name']
3date: //div[@class='date'] 3date: //div[@class='date']
4body: //div[@class='body'] 4body: //div[@class='body']
5footnotes: no 5footnotes: no
6test_url: http://tuckreview.com/2012/8/14/migrating-to-v6 \ No newline at end of file 6test_url: http://tuckreview.com/2012/8/14/migrating-to-v6 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tvtropes.org.txt b/inc/3rdparty/site_config/standard/tvtropes.org.txt
index 08dbba59..3cc3a9cf 100644..100755
--- a/inc/3rdparty/site_config/standard/tvtropes.org.txt
+++ b/inc/3rdparty/site_config/standard/tvtropes.org.txt
@@ -1,20 +1,20 @@
1# Google Custom Search 1# Google Custom Search
2strip_id_or_class: google_branding_style 2strip_id_or_class: google_branding_style
3 3
4# Avoid double title 4# Avoid double title
5strip_id_or_class: pagetitle 5strip_id_or_class: pagetitle
6 6
7# external links are labelled 7# external links are labelled
8strip_image_src: http://static.mediatropes.info/pmwiki/pub/external_link.gif 8strip_image_src: http://static.mediatropes.info/pmwiki/pub/external_link.gif
9 9
10title: //div[@class="pagetitle"] 10title: //div[@class="pagetitle"]
11body: //div[@id="wikitext"] 11body: //div[@id="wikitext"]
12 12
13# don't get clever. 13# don't get clever.
14strip_comments: no 14strip_comments: no
15prune: no 15prune: no
16 16
17# navigation in footer lives inside the wikitext div, annoyingly. 17# navigation in footer lives inside the wikitext div, annoyingly.
18strip_id_or_class: pathholder 18strip_id_or_class: pathholder
19 19
20test_url: http://tvtropes.org/pmwiki/pmwiki.php/Main/WithinParameters \ No newline at end of file 20test_url: http://tvtropes.org/pmwiki/pmwiki.php/Main/WithinParameters \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/twitter.com.txt b/inc/3rdparty/site_config/standard/twitter.com.txt
index 12ab1546..520ebd85 100644..100755
--- a/inc/3rdparty/site_config/standard/twitter.com.txt
+++ b/inc/3rdparty/site_config/standard/twitter.com.txt
@@ -1,9 +1,9 @@
1title: //title 1title: //title
2body: (//p[contains(@class, 'js-tweet-text')])[1] 2body: (//p[contains(@class, 'js-tweet-text')])[1]
3author: (//strong[contains(@class, 'fullname')])[1] 3author: (//strong[contains(@class, 'fullname')])[1]
4date: //span[contains(@class, 'js-short-timestamp')]/@data-time 4date: //span[contains(@class, 'js-short-timestamp')]/@data-time
5 5
6prune: no 6prune: no
7tidy: no 7tidy: no
8 8
9test_url: https://twitter.com/medialens/status/216883678582804480 \ No newline at end of file 9test_url: https://twitter.com/medialens/status/216883678582804480 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/uefa.com.txt b/inc/3rdparty/site_config/standard/uefa.com.txt
index 088d6586..3469be03 100644..100755
--- a/inc/3rdparty/site_config/standard/uefa.com.txt
+++ b/inc/3rdparty/site_config/standard/uefa.com.txt
@@ -1,6 +1,6 @@
1body: //div[@class='d3cmsCBody']//div[@class='pubText pubDate' or @class='newsComment' or contains(@class, 'newsPhoto') or @class='newsText'] 1body: //div[@class='d3cmsCBody']//div[@class='pubText pubDate' or @class='newsComment' or contains(@class, 'newsPhoto') or @class='newsText']
2strip: //div[contains(@class, 'mpindex')] 2strip: //div[contains(@class, 'mpindex')]
3prune: no 3prune: no
4tidy: no 4tidy: no
5 5
6test_url: http://www.uefa.com/uefaeuropaleague/news/newsid=1617320.html \ No newline at end of file 6test_url: http://www.uefa.com/uefaeuropaleague/news/newsid=1617320.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/uk.xbox360.ign.com.txt b/inc/3rdparty/site_config/standard/uk.xbox360.ign.com.txt
index 29e19565..cd9c1361 100644..100755
--- a/inc/3rdparty/site_config/standard/uk.xbox360.ign.com.txt
+++ b/inc/3rdparty/site_config/standard/uk.xbox360.ign.com.txt
@@ -1,23 +1,23 @@
1# applies to uk.ds.ign.com, uk.wii.ign.com etc. 1# applies to uk.ds.ign.com, uk.wii.ign.com etc.
2# possibly to non-UK versions, but I can&rsquo;t test that 2# possibly to non-UK versions, but I can&rsquo;t test that
3 3
4title: //h1[@class="headline"] 4title: //h1[@class="headline"]
5author: //div[@class="hdr-sub byline"]/a 5author: //div[@class="hdr-sub byline"]/a
6date: //h2[@class="publish-date"]/span 6date: //h2[@class="publish-date"]/span
7body: //div[@id="main-article-content"] 7body: //div[@id="main-article-content"]
8 8
9strip: //ul[@class="lnks-readmore"] 9strip: //ul[@class="lnks-readmore"]
10 10
11strip: //div[@class="inlineImageCaption"] 11strip: //div[@class="inlineImageCaption"]
12# can&rsquo;t make the images appear, so remove the captions 12# can&rsquo;t make the images appear, so remove the captions
13 13
14strip: //div[@style="width:468px"] 14strip: //div[@style="width:468px"]
15# video caption links 15# video caption links
16 16
17convert_double_br_tags: yes 17convert_double_br_tags: yes
18 18
19strip_comments: no 19strip_comments: no
20# otherwise the &lsquo;Closing Comments&rsquo; are removed 20# otherwise the &lsquo;Closing Comments&rsquo; are removed
21 21
22# Ratings box could do with some rearranging, but it&rsquo;s tricky 22# Ratings box could do with some rearranging, but it&rsquo;s tricky
23test_url: http://uk.xbox360.ign.com/articles/121/1210717p1.html \ No newline at end of file 23test_url: http://uk.xbox360.ign.com/articles/121/1210717p1.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/uni-watch.com.txt b/inc/3rdparty/site_config/standard/uni-watch.com.txt
index cbe87d19..4a5ae344 100644..100755
--- a/inc/3rdparty/site_config/standard/uni-watch.com.txt
+++ b/inc/3rdparty/site_config/standard/uni-watch.com.txt
@@ -1,17 +1,17 @@
1author: substring-before(substring-after(//div[@class='post-byline'], 'By '), ', on') 1author: substring-before(substring-after(//div[@class='post-byline'], 'By '), ', on')
2date: substring-after(//div[@class='post-byline'], ', on') 2date: substring-after(//div[@class='post-byline'], ', on')
3 3
4# for some reason, the following is producing a "no text [48]" error 4# for some reason, the following is producing a "no text [48]" error
5#title: //div[@class='post-headline'] 5#title: //div[@class='post-headline']
6 6
7# for some reason, the following doesn't appear to isolate just the body copy 7# for some reason, the following doesn't appear to isolate just the body copy
8body: //div[@class='post-bodycopy'] 8body: //div[@class='post-bodycopy']
9 9
10# we solve the above issue by stripping out everything else we don't want 10# we solve the above issue by stripping out everything else we don't want
11# these can probably all be removed if the body: command above worked 11# these can probably all be removed if the body: command above worked
12strip_id_or_class: reply 12strip_id_or_class: reply
13strip_id_or_class: left 13strip_id_or_class: left
14strip_id_or_class: post-headline 14strip_id_or_class: post-headline
15strip_id_or_class: post-byline 15strip_id_or_class: post-byline
16strip_id_or_class: footer 16strip_id_or_class: footer
17test_url: http://www.uni-watch.com/2011/10/18/the-curious-case-of-steve-debergs-microphone-and-speaker/ \ No newline at end of file 17test_url: http://www.uni-watch.com/2011/10/18/the-curious-case-of-steve-debergs-microphone-and-speaker/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/unwinnable.com.txt b/inc/3rdparty/site_config/standard/unwinnable.com.txt
new file mode 100755
index 00000000..05ad86a5
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/unwinnable.com.txt
@@ -0,0 +1,9 @@
1title: //h1[@class='postTitle']
2author: //a[@rel='author']
3date: substring-before(//h4[@class='postAuthor'], '|')
4body: //div[@class='postContent']
5
6strip: //div[@class='simplePullQuote']
7
8wrap_in(figure): //img
9test_url: http://www.unwinnable.com/2013/04/23/gratifying-play/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/uppsalafria.se.txt b/inc/3rdparty/site_config/standard/uppsalafria.se.txt
new file mode 100755
index 00000000..79c59ece
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/uppsalafria.se.txt
@@ -0,0 +1,7 @@
1body: //div[contains(@class, 'layout__inner')]//div[contains(@class, 'file-image') or contains(@class, 'node__content')]
2author: //article//div[contains(@class, 'field-byline')]
3strip_id_or_class: rekommenderade
4strip_id_or_class: disqus
5strip_id_or_class: annonser
6
7test_url: http://www.uppsalafria.se/artikel/97167 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/urbandictionary.com.txt b/inc/3rdparty/site_config/standard/urbandictionary.com.txt
index 86061f77..385c95ca 100644..100755
--- a/inc/3rdparty/site_config/standard/urbandictionary.com.txt
+++ b/inc/3rdparty/site_config/standard/urbandictionary.com.txt
@@ -1,3 +1,3 @@
1title: //title 1title: //title
2body: //td[@id='content'] 2body: //table[@id='entries']
3test_url: http://www.urbandictionary.com/define.php?term=Grown-Ass \ No newline at end of file 3test_url: http://www.urbandictionary.com/define.php?term=Grown-Ass
diff --git a/inc/3rdparty/site_config/standard/usatoday.com.txt b/inc/3rdparty/site_config/standard/usatoday.com.txt
new file mode 100755
index 00000000..710a7b37
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/usatoday.com.txt
@@ -0,0 +1,8 @@
1date: //meta[@itemprop="datePublished"]/@content
2author: //div[@itemprop="author"]
3body: //div[@itemprop='articleBody']
4
5strip_id_or_class: share-tools
6
7test_url: http://www.usatoday.com/story/news/world/2014/03/18/malaysia-plane-search/6552429/
8test_url: http://rssfeeds.usatoday.com/usatoday-NewsTopStories \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/usccb.org.txt b/inc/3rdparty/site_config/standard/usccb.org.txt
index eb10a48f..30c28823 100644..100755
--- a/inc/3rdparty/site_config/standard/usccb.org.txt
+++ b/inc/3rdparty/site_config/standard/usccb.org.txt
@@ -1,6 +1,6 @@
1body: //div[@id='CS_Element_maincontent'] 1body: //div[@id='CS_Element_maincontent']
2 2
3tidy: no 3tidy: no
4prune: no 4prune: no
5 5
6test_url: http://www.usccb.org/bible/readings/072412.cfm \ No newline at end of file 6test_url: http://www.usccb.org/bible/readings/072412.cfm \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/useit.com.txt b/inc/3rdparty/site_config/standard/useit.com.txt
index f6be84c4..b8511c7c 100644..100755
--- a/inc/3rdparty/site_config/standard/useit.com.txt
+++ b/inc/3rdparty/site_config/standard/useit.com.txt
@@ -1,8 +1,8 @@
1title: //h1 1title: //h1
2 2
3date: substring-after(//p[@class='overline']/strong, ',') 3date: substring-after(//p[@class='overline']/strong, ',')
4body: //div[@class="maintext"] 4body: //div[@class="maintext"]
5strip: //p[@class='overline'] 5strip: //p[@class='overline']
6strip: //h1 6strip: //h1
7tidy: no 7tidy: no
8test_url: http://www.useit.com/alertbox/mobile-startup-screen.html \ No newline at end of file 8test_url: http://www.useit.com/alertbox/mobile-startup-screen.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/usfirst.org.txt b/inc/3rdparty/site_config/standard/usfirst.org.txt
new file mode 100755
index 00000000..f02b2d3e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/usfirst.org.txt
@@ -0,0 +1,6 @@
1title: //meta[@property='dc:title']/@content
2date: //div[@class='content']//span[@property='dc:date']/@content
3body: //div[@property='content:encoded']
4prune: no
5
6test_url: http://www.usfirst.org/roboticsprograms/frc/Photo-From-Kickoff-Filming \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/utdailybeacon.com.txt b/inc/3rdparty/site_config/standard/utdailybeacon.com.txt
new file mode 100755
index 00000000..d37911bc
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/utdailybeacon.com.txt
@@ -0,0 +1,5 @@
1title: //h1
2author: //*[@class='byline']
3date: substring-after(//*[@class='pubdatetime'], 'Published: ')
4body: //*[@class='body-block']
5test_url: http://utdailybeacon.com/news/2012/oct/8/energy-forum-continues/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ux.artu.tv.txt b/inc/3rdparty/site_config/standard/ux.artu.tv.txt
index a893bda0..c69f2df9 100644..100755
--- a/inc/3rdparty/site_config/standard/ux.artu.tv.txt
+++ b/inc/3rdparty/site_config/standard/ux.artu.tv.txt
@@ -1,7 +1,7 @@
1author: ("Arturo Toledo") 1author: ("Arturo Toledo")
2title: //div[@class="post"]/h2 2title: //div[@class="post"]/h2
3body: //div[@class="entry"] 3body: //div[@class="entry"]
4 4
5# Remove Twitter button 5# Remove Twitter button
6strip: //div[@class="entry"]/p[2]/a/img 6strip: //div[@class="entry"]/p[2]/a/img
7test_url: http://ux.artu.tv/?p=192 \ No newline at end of file 7test_url: http://ux.artu.tv/?p=192 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/uzivatelsketestovani.cz.txt b/inc/3rdparty/site_config/standard/uzivatelsketestovani.cz.txt
index 3661b06a..3661b06a 100644..100755
--- a/inc/3rdparty/site_config/standard/uzivatelsketestovani.cz.txt
+++ b/inc/3rdparty/site_config/standard/uzivatelsketestovani.cz.txt
diff --git a/inc/3rdparty/site_config/standard/vanityfair.com.txt b/inc/3rdparty/site_config/standard/vanityfair.com.txt
index bfc47d1f..efa38224 100644..100755
--- a/inc/3rdparty/site_config/standard/vanityfair.com.txt
+++ b/inc/3rdparty/site_config/standard/vanityfair.com.txt
@@ -1,30 +1,30 @@
1title: //meta[@property="og:title"]/@content 1title: //meta[@property="og:title"]/@content
2author: //div[contains(@class, 'byline')]//span[contains(@class, 'name')] 2author: //div[contains(@class, 'byline')]//span[contains(@class, 'name')]
3date: //div[contains(@class, 'cn_date_time')] 3date: //div[contains(@class, 'cn_date_time')]
4body: //div[contains(@class, 'pageContainers')] 4body: //div[contains(@class, 'pageContainers')]
5body: //article[@id='items-container'] 5body: //article[@id='items-container']
6#body: //h2[@class='sub-header'] | //div[contains(@class, 'contributor-type') or @class='display-date' or @class='content-container'] 6#body: //h2[@class='sub-header'] | //div[contains(@class, 'contributor-type') or @class='display-date' or @class='content-container']
7 7
8strip_id_or_class: bc 8strip_id_or_class: bc
9strip_id_or_class: utilities 9strip_id_or_class: utilities
10strip_id_or_class: list-supporting 10strip_id_or_class: list-supporting
11strip_id_or_class: yrail 11strip_id_or_class: yrail
12strip_id_or_class: urail 12strip_id_or_class: urail
13 13
14prune: no 14prune: no
15#tidy: no 15#tidy: no
16 16
17strip_id_or_class: super-rubric-section 17strip_id_or_class: super-rubric-section
18strip_id_or_class: cn_date_time 18strip_id_or_class: cn_date_time
19strip_id_or_class: cn_contributors 19strip_id_or_class: cn_contributors
20strip_id_or_class: cn_pagination_controls 20strip_id_or_class: cn_pagination_controls
21strip_id_or_class: cn_features_container 21strip_id_or_class: cn_features_container
22strip_id_or_class: global-footer 22strip_id_or_class: global-footer
23strip_id_or_class: cn_ecom_placement 23strip_id_or_class: cn_ecom_placement
24strip: //li[@class='blogNavPrev'] 24strip: //li[@class='blogNavPrev']
25 25
26single_page_link: //a[@title='Print this page'] 26single_page_link: //a[@title='Print this page']
27 27
28test_url: http://www.vanityfair.com/politics/features/2011/05/egypt-revolutionaries-201105 28test_url: http://www.vanityfair.com/politics/features/2011/05/egypt-revolutionaries-201105
29test_url: http://www.vanityfair.com/politics/features/2008/08/hitchens200808 29test_url: http://www.vanityfair.com/politics/features/2008/08/hitchens200808
30test_url: http://www.vanityfair.com/style/2012/01/prisoners-of-style-201201 \ No newline at end of file 30test_url: http://www.vanityfair.com/style/2012/01/prisoners-of-style-201201 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/varingen.no.txt b/inc/3rdparty/site_config/standard/varingen.no.txt
index 6b5e0ae0..c0133c95 100644..100755
--- a/inc/3rdparty/site_config/standard/varingen.no.txt
+++ b/inc/3rdparty/site_config/standard/varingen.no.txt
@@ -1,5 +1,5 @@
1title: //div[@class='ArticleHeadlineDetailedView'] 1title: //div[@class='ArticleHeadlineDetailedView']
2date: //span[@class='ArticlePublicationDateTimeDetailedView'] 2date: //span[@class='ArticlePublicationDateTimeDetailedView']
3author://span[@class='ArticleBylineDetailedView'] 3author://span[@class='ArticleBylineDetailedView']
4body: //div[@class='ArticleTextDetailedView'] 4body: //div[@class='ArticleTextDetailedView']
5test_url: http://www.varingen.no/Nyheter/tabid/392/Default.aspx?ModuleId=56651&articleView=true \ No newline at end of file 5test_url: http://www.varingen.no/Nyheter/tabid/392/Default.aspx?ModuleId=56651&articleView=true \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/varsity.co.uk.txt b/inc/3rdparty/site_config/standard/varsity.co.uk.txt
index b1db4c35..dfbf69cf 100644..100755
--- a/inc/3rdparty/site_config/standard/varsity.co.uk.txt
+++ b/inc/3rdparty/site_config/standard/varsity.co.uk.txt
@@ -1,4 +1,4 @@
1# FB comments are inside an h2. Weird. Without this, the line 'Comments' is preserved by the text parser 1# FB comments are inside an h2. Weird. Without this, the line 'Comments' is preserved by the text parser
2 2
3strip: //h2 3strip: //h2
4test_url: http://www.varsity.co.uk/reviews/2662 \ No newline at end of file 4test_url: http://www.varsity.co.uk/reviews/2662 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/vea.gov.vn.txt b/inc/3rdparty/site_config/standard/vea.gov.vn.txt
new file mode 100755
index 00000000..9c8420ce
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/vea.gov.vn.txt
@@ -0,0 +1,7 @@
1title://div[@class="detail-new-title"]
2body://div[@class="innerpad"]
3strip://div[@class="ArticleUtility"]
4strip://div[@class="commentPost"]
5strip://div[@class="comment-box"]
6strip://div[@id="TinLienQuan"]
7test_url: http://vea.gov.vn/vn/tintuc/tintuchangngay/Pages/T%C4%83ng-c%C6%B0%E1%BB%9Dng-b%E1%BA%A3o-t%E1%BB%93n-%C4%91%E1%BB%99ng-v%E1%BA%ADt-hoang-d%C3%A3-%E1%BB%9F-Vi%E1%BB%87t-Nam.aspx \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/vedomosti.ru.txt b/inc/3rdparty/site_config/standard/vedomosti.ru.txt
index ba999171..265f9fc7 100644..100755
--- a/inc/3rdparty/site_config/standard/vedomosti.ru.txt
+++ b/inc/3rdparty/site_config/standard/vedomosti.ru.txt
@@ -1,3 +1,3 @@
1title: //td[@class='second_content']/h1 1title: //td[@class='second_content']/h1
2body: //td[@class='second_content']/div[@class='article_text'] 2body: //td[@class='second_content']/div[@class='article_text']
3test_url: http://www.vedomosti.ru/newspaper/article/259377/rasprodazha_mailru \ No newline at end of file 3test_url: http://www.vedomosti.ru/newspaper/article/259377/rasprodazha_mailru \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/veggbilder.no.txt b/inc/3rdparty/site_config/standard/veggbilder.no.txt
index 14144c0f..2a44c317 100644..100755
--- a/inc/3rdparty/site_config/standard/veggbilder.no.txt
+++ b/inc/3rdparty/site_config/standard/veggbilder.no.txt
@@ -1,5 +1,5 @@
1author: //div[@class="blogginnleggForfatter"] 1author: //div[@class="blogginnleggForfatter"]
2date: concat(//div[@class='blogginnleggDatoDag'],' ',//div[@class='blogginnleggDatoMnd']) 2date: concat(//div[@class='blogginnleggDatoDag'],' ',//div[@class='blogginnleggDatoMnd'])
3strip: //div[contains(@id,"bloggDelingslenker")] 3strip: //div[contains(@id,"bloggDelingslenker")]
4strip: //div[contains(@id,"bloggDelingslenker")] 4strip: //div[contains(@id,"bloggDelingslenker")]
5test_url: http://veggbilder.no/blogginnlegg/fristelser \ No newline at end of file 5test_url: http://veggbilder.no/blogginnlegg/fristelser \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/vemedio.com.txt b/inc/3rdparty/site_config/standard/vemedio.com.txt
index 294ace9c..d22fc5cf 100644..100755
--- a/inc/3rdparty/site_config/standard/vemedio.com.txt
+++ b/inc/3rdparty/site_config/standard/vemedio.com.txt
@@ -1,6 +1,6 @@
1title: //h2 1title: //h2
2date: substring-before(//small," &bull; Permalink") 2date: substring-before(//small," &bull; Permalink")
3author:string('Martin Hering') 3author:string('Martin Hering')
4 4
5Strip: //p/small 5Strip: //p/small
6test_url: http://vemedio.com/blog/posts/state-of-support-and-icloud \ No newline at end of file 6test_url: http://vemedio.com/blog/posts/state-of-support-and-icloud \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/venturebeat.com.txt b/inc/3rdparty/site_config/standard/venturebeat.com.txt
index 41bfa8c5..d6321d79 100644..100755
--- a/inc/3rdparty/site_config/standard/venturebeat.com.txt
+++ b/inc/3rdparty/site_config/standard/venturebeat.com.txt
@@ -1,6 +1,6 @@
1title: //h1[@class="entry-title"] 1title: //h1[@class="entry-title"]
2author: //div[@class="author-name"] 2author: //div[@class="author-name"]
3date: //span[@class="the-time"] 3date: //span[@class="the-time"]
4body: //div[@class="entry-content"] 4body: //div[@class="entry-content"]
5strip: //div[@class="vb-gallery"] 5strip: //div[@class="vb-gallery"]
6test_url: http://venturebeat.com/2012/07/17/marissa-mayer-yahoo/#s:mayer-1 \ No newline at end of file 6test_url: http://venturebeat.com/2012/07/17/marissa-mayer-yahoo/#s:mayer-1 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/version2.dk.txt b/inc/3rdparty/site_config/standard/version2.dk.txt
index 74203cad..418b83a1 100644..100755
--- a/inc/3rdparty/site_config/standard/version2.dk.txt
+++ b/inc/3rdparty/site_config/standard/version2.dk.txt
@@ -1,12 +1,12 @@
1title: //article/header/h1 1title: //article/header/h1
2 2
3author: //article/header/section[@class='byline']/span[contains(@class, 'author')]/a 3author: //article/header/section[@class='byline']/span[contains(@class, 'author')]/a
4date: //article/header/section[@class='byline']/span[@class='published']/span 4date: //article/header/section[@class='byline']/span[@class='published']/span
5 5
6body: //article/section[@class='body'] 6body: //article/section[@class='body']
7 7
8convert_double_br_tags: yes 8convert_double_br_tags: yes
9 9
10# This is required, because Tidy chokes on the HTML5 tags... 10# This is required, because Tidy chokes on the HTML5 tags...
11tidy: no 11tidy: no
12test_url: http://www.version2.dk/artikel/17069-amerikansk-hit-investor-er-vild-med-danske-net-ivaerksaettere \ No newline at end of file 12test_url: http://www.version2.dk/artikel/17069-amerikansk-hit-investor-er-vild-med-danske-net-ivaerksaettere \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/verybestbaking.com.txt b/inc/3rdparty/site_config/standard/verybestbaking.com.txt
index 4cdd0c0f..ad0fec66 100644..100755
--- a/inc/3rdparty/site_config/standard/verybestbaking.com.txt
+++ b/inc/3rdparty/site_config/standard/verybestbaking.com.txt
@@ -1,7 +1,7 @@
1title: //title 1title: //title
2body: //div[contains(@class, 'printRecipe')] 2body: //div[contains(@class, 'printRecipe')]
3strip: //div[@class='recipeHeader'] 3strip: //div[@class='recipeHeader']
4prune: no 4prune: no
5tidy: no 5tidy: no
6single_page_link: //ul[@class='printOptions']//a[contains(@href, 'detail.aspx?p=1&showphoto=true')] 6single_page_link: //ul[@class='printOptions']//a[contains(@href, 'detail.aspx?p=1&showphoto=true')]
7test_url: http://www.verybestbaking.com/recipes/143190/Penne-Pasta-with-Sun-dried-Tomato-Cream-Sauce/detail.aspx \ No newline at end of file 7test_url: http://www.verybestbaking.com/recipes/143190/Penne-Pasta-with-Sun-dried-Tomato-Cream-Sauce/detail.aspx \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/vg.no.txt b/inc/3rdparty/site_config/standard/vg.no.txt
index fceeea09..bfadb4a7 100644..100755
--- a/inc/3rdparty/site_config/standard/vg.no.txt
+++ b/inc/3rdparty/site_config/standard/vg.no.txt
@@ -1,3 +1,3 @@
1body: //div[@id='artikkelspalte'] 1body: //div[@id='artikkelspalte']
2strip_id_or_class: 'breadcrumb' 2strip_id_or_class: 'breadcrumb'
3test_url: http://www.vg.no/spill/artikkel.php?artid=10003628 \ No newline at end of file 3test_url: http://www.vg.no/spill/artikkel.php?artid=10003628 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/video.forbes.com.txt b/inc/3rdparty/site_config/standard/video.forbes.com.txt
index 1dca55a3..5db77463 100644..100755
--- a/inc/3rdparty/site_config/standard/video.forbes.com.txt
+++ b/inc/3rdparty/site_config/standard/video.forbes.com.txt
@@ -1,9 +1,9 @@
1title: concat("Video: ", //div[@id='currentVideoTitleDivId']) 1title: concat("Video: ", //div[@id='currentVideoTitleDivId'])
2body: //div[@id='currentVideoDescriptionId'] 2body: //div[@id='currentVideoDescriptionId']
3author: //meta[@name='author']/@content 3author: //meta[@name='author']/@content
4 4
5replace_string(<div id="currentVideoDescriptionId" style="display): <div id="currentVideoDescriptionId" style="displayitplease 5replace_string(<div id="currentVideoDescriptionId" style="display): <div id="currentVideoDescriptionId" style="displayitplease
6 6
7replace_string(<div id="currentVideoTitleDivId" style="display): <div id="currentVideoTitleDivId" style="displayitplease 7replace_string(<div id="currentVideoTitleDivId" style="display): <div id="currentVideoTitleDivId" style="displayitplease
8 8
9test_url: http://video.forbes.com/fvn/business/wells-fargo-inside-the-bank-that-works \ No newline at end of file 9test_url: http://video.forbes.com/fvn/business/wells-fargo-inside-the-bank-that-works \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/videogum.com.txt b/inc/3rdparty/site_config/standard/videogum.com.txt
index a1663813..d93780ca 100644..100755
--- a/inc/3rdparty/site_config/standard/videogum.com.txt
+++ b/inc/3rdparty/site_config/standard/videogum.com.txt
@@ -1,6 +1,6 @@
1title: //h2[@class='posttitle'] 1title: //h2[@class='posttitle']
2date: substring-before(substring-after(//span[@class='postdate'], 'on '), ' by') 2date: substring-before(substring-after(//span[@class='postdate'], 'on '), ' by')
3date: //span[@class='postdate'] 3date: //span[@class='postdate']
4author: //span[@class='postdate']/a 4author: //span[@class='postdate']/a
5body: //div[@class='entry line_top'] 5body: //div[@class='entry line_top']
6test_url: http://videogum.com/395042/here-are-some-afternoon-links-92/list/ \ No newline at end of file 6test_url: http://videogum.com/395042/here-are-some-afternoon-links-92/list/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/villagevoice.com.txt b/inc/3rdparty/site_config/standard/villagevoice.com.txt
index df374602..36e4a2f5 100644..100755
--- a/inc/3rdparty/site_config/standard/villagevoice.com.txt
+++ b/inc/3rdparty/site_config/standard/villagevoice.com.txt
@@ -1,9 +1,9 @@
1title: //h2[@class='headline'] 1title: //h2[@class='headline']
2 2
3body: //div[@class='ContentPrint'] 3body: //div[@class='ContentPrint']
4 4
5prune: no 5prune: no
6 6
7single_page_link: //a[contains(@href, '/printVersion/')] 7single_page_link: //a[contains(@href, '/printVersion/')]
8 8
9test_url: http://www.villagevoice.com/2010-03-16/news/new-york-s-ten-worst-landlords/ \ No newline at end of file 9test_url: http://www.villagevoice.com/2010-03-16/news/new-york-s-ten-worst-landlords/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/vimeo.com.txt b/inc/3rdparty/site_config/standard/vimeo.com.txt
index d6c6701a..f36c9c57 100644..100755
--- a/inc/3rdparty/site_config/standard/vimeo.com.txt
+++ b/inc/3rdparty/site_config/standard/vimeo.com.txt
@@ -1,17 +1,17 @@
1title: //title 1title: //title
2body: //iframe 2body: //iframe
3 3
4find_string: <html>&lt;iframe 4find_string: <html>&lt;iframe
5replace_string: <iframe id="video" 5replace_string: <iframe id="video"
6 6
7find_string: &gt;&lt;/iframe&gt;</html> 7find_string: &gt;&lt;/iframe&gt;</html>
8replace_string: ></iframe> 8replace_string: ></iframe>
9 9
10replace_string(&quot;): " 10replace_string(&quot;): "
11 11
12single_page_link: //link[@type='text/xml+oembed'] 12single_page_link: //link[@type='text/xml+oembed']
13 13
14prune: no 14prune: no
15tidy: no 15tidy: no
16 16
17test_url: http://vimeo.com/35941909 \ No newline at end of file 17test_url: http://vimeo.com/35941909 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/viply.de.txt b/inc/3rdparty/site_config/standard/viply.de.txt
new file mode 100755
index 00000000..e3599c9d
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/viply.de.txt
@@ -0,0 +1,12 @@
1title: //div[@id='singletext']//h1
2body: //div[contains(@class, 'mypictureborder')] | //div[@id='singletext']
3prune: no
4
5strip_id_or_class: singletostart
6strip_id_or_class: navigation
7strip_id_or_class: social
8strip_id_or_class: single_topwrapper
9strip: //a[contains(., 'Nächster Artikel')]
10
11test_url: http://www.viply.de/?p=87973
12test_url: http://www.viply.de/?feed=rss2 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/visir.is.txt b/inc/3rdparty/site_config/standard/visir.is.txt
index 0f03198e..04e09102 100644..100755
--- a/inc/3rdparty/site_config/standard/visir.is.txt
+++ b/inc/3rdparty/site_config/standard/visir.is.txt
@@ -1,14 +1,14 @@
1# Author's name, when present, has 'skrifar:' ('writes:') appended to it. 1# Author's name, when present, has 'skrifar:' ('writes:') appended to it.
2# In case of multiple authors, this would be 'skrifa:', hence only 7 characters 2# In case of multiple authors, this would be 'skrifa:', hence only 7 characters
3# are stripped off. 3# are stripped off.
4author: substring(//div[@class='paragraph']/div[@class='meta'], 0, string-length(//div[@class='paragraph']/div[@class='meta']) - 7) 4author: substring(//div[@class='paragraph']/div[@class='meta'], 0, string-length(//div[@class='paragraph']/div[@class='meta']) - 7)
5 5
6date: //span[@class='date'] 6date: //span[@class='date']
7title: //h1 7title: //h1
8body: //div[@class='paragraph'] 8body: //div[@class='paragraph']
9 9
10# Strip out author string when present 10# Strip out author string when present
11strip: //div[@class='paragraph']/div[@class='meta'] 11strip: //div[@class='paragraph']/div[@class='meta']
12 12
13convert_double_br_tags: yes 13convert_double_br_tags: yes
14test_url: http://visir.is/esb,-ipa,-bhm-og-bsrb/article/2012701319997 \ No newline at end of file 14test_url: http://visir.is/esb,-ipa,-bhm-og-bsrb/article/2012701319997 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/vitispr.com.txt b/inc/3rdparty/site_config/standard/vitispr.com.txt
index 8b2a300e..f2d11c7c 100644..100755
--- a/inc/3rdparty/site_config/standard/vitispr.com.txt
+++ b/inc/3rdparty/site_config/standard/vitispr.com.txt
@@ -1,6 +1,6 @@
1strip: //*[(@id = "ja-search")] 1strip: //*[(@id = "ja-search")]
2body: //*[(@id = "ja-mainbody")] 2body: //*[(@id = "ja-mainbody")]
3body: //*[(@id = "content-mass-bottom")] 3body: //*[(@id = "content-mass-bottom")]
4strip://h3[contains(span,'Related Posts')] 4strip://h3[contains(span,'Related Posts')]
5strip://img 5strip://img
6test_url: http://vitispr.com/blog/coventry-is-a-technology-hotspot \ No newline at end of file 6test_url: http://vitispr.com/blog/coventry-is-a-technology-hotspot \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/vivirmexico.com.txt b/inc/3rdparty/site_config/standard/vivirmexico.com.txt
index e6a72700..e6a72700 100644..100755
--- a/inc/3rdparty/site_config/standard/vivirmexico.com.txt
+++ b/inc/3rdparty/site_config/standard/vivirmexico.com.txt
diff --git a/inc/3rdparty/site_config/standard/vnexpress.net.txt b/inc/3rdparty/site_config/standard/vnexpress.net.txt
index 23c928bf..e5ebc435 100644..100755
--- a/inc/3rdparty/site_config/standard/vnexpress.net.txt
+++ b/inc/3rdparty/site_config/standard/vnexpress.net.txt
@@ -1,8 +1,8 @@
1body: //div[@cpms_content]//h2[@class='Lead'] | //div[@cpms_content]//p[@class='Normal'] | //div[@cpms_content]//table 1body: //div[@cpms_content]//h2[@class='Lead'] | //div[@cpms_content]//p[@class='Normal'] | //div[@cpms_content]//table
2strip://div[@class="box-item"] 2strip://div[@class="box-item"]
3strip://div[@id="ARTICLE_BANNER"] 3strip://div[@id="ARTICLE_BANNER"]
4strip://a 4strip://a
5strip://div[@class="tag-parent"] 5strip://div[@class="tag-parent"]
6strip://div[@class="email-print txtr"] 6strip://div[@class="email-print txtr"]
7 7
8test_url: http://vnexpress.net/gl/xa-hoi/2011/04/tim-thay-nan-nhan-cuoi-cung-vu-sap-mo-da-o-len-co/ \ No newline at end of file 8test_url: http://vnexpress.net/gl/xa-hoi/2011/04/tim-thay-nan-nhan-cuoi-cung-vu-sap-mo-da-o-len-co/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/voices.washingtonpost.com.txt b/inc/3rdparty/site_config/standard/voices.washingtonpost.com.txt
index 6bd0e855..b754aeb8 100644..100755
--- a/inc/3rdparty/site_config/standard/voices.washingtonpost.com.txt
+++ b/inc/3rdparty/site_config/standard/voices.washingtonpost.com.txt
@@ -1,3 +1,3 @@
1title: //h1 1title: //h1
2body: //div[@class='entrytext'] 2body: //div[@class='entrytext']
3test_url: http://voices.washingtonpost.com/ezra-klein/2010/10/why_isnt_monetary_policy_discr.html \ No newline at end of file 3test_url: http://voices.washingtonpost.com/ezra-klein/2010/10/why_isnt_monetary_policy_discr.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/vworker.com.txt b/inc/3rdparty/site_config/standard/vworker.com.txt
index a39c9f4e..cfb9ea1c 100644..100755
--- a/inc/3rdparty/site_config/standard/vworker.com.txt
+++ b/inc/3rdparty/site_config/standard/vworker.com.txt
@@ -1,3 +1,3 @@
1body: //div[contains(@class, 'KonaBody')] 1body: //div[contains(@class, 'KonaBody')]
2 2
3test_url: http://www.vworker.com/RentACoder/misc/BidRequests/ShowBidRequest.asp?lngBidRequestId=1634186 \ No newline at end of file 3test_url: http://www.vworker.com/RentACoder/misc/BidRequests/ShowBidRequest.asp?lngBidRequestId=1634186 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/waffle.wootest.net.txt b/inc/3rdparty/site_config/standard/waffle.wootest.net.txt
index afcba0f3..e92757d7 100644..100755
--- a/inc/3rdparty/site_config/standard/waffle.wootest.net.txt
+++ b/inc/3rdparty/site_config/standard/waffle.wootest.net.txt
@@ -1,4 +1,4 @@
1title: //h2[@class="title"] 1title: //h2[@class="title"]
2body: //div[@class="post"] 2body: //div[@class="post"]
3 3
4test_url: http://waffle.wootest.net/2011/06/22/on-reading-news/ \ No newline at end of file 4test_url: http://waffle.wootest.net/2011/06/22/on-reading-news/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/walrusmagazine.com.txt b/inc/3rdparty/site_config/standard/walrusmagazine.com.txt
index 3ab22172..c53eb0dd 100644..100755
--- a/inc/3rdparty/site_config/standard/walrusmagazine.com.txt
+++ b/inc/3rdparty/site_config/standard/walrusmagazine.com.txt
@@ -1,14 +1,14 @@
1title: //div[@id='pr']/h3 1title: //div[@id='pr']/h3
2author: //div[@class='dateline']//a[contains(@href, '/author/')] 2author: //div[@class='dateline']//a[contains(@href, '/author/')]
3 3
4# print page 4# print page
5body: //div[@id='prbody'] 5body: //div[@id='prbody']
6# standard page 6# standard page
7body: //div[@id='pgbody'] 7body: //div[@id='pgbody']
8 8
9# for multi-page articles 9# for multi-page articles
10single_page_link: //div[@class='tipjar']//a[contains(@href, '/printerFriendly.php?')] 10single_page_link: //div[@class='tipjar']//a[contains(@href, '/printerFriendly.php?')]
11 11
12prune: no 12prune: no
13 13
14test_url: http://www.walrusmagazine.com/articles/2011.12-memoir-kidnapped \ No newline at end of file 14test_url: http://www.walrusmagazine.com/articles/2011.12-memoir-kidnapped \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/warnerbros.fr.txt b/inc/3rdparty/site_config/standard/warnerbros.fr.txt
index a41a3511..21f56352 100644..100755
--- a/inc/3rdparty/site_config/standard/warnerbros.fr.txt
+++ b/inc/3rdparty/site_config/standard/warnerbros.fr.txt
@@ -1,3 +1,3 @@
1title: //h3 1title: //h3
2body: //div[@class="content_wysiwyg"] 2body: //div[@class="content_wysiwyg"]
3test_url: http://www.warnerbros.fr/game-of-thrones-un-junket-vu-de-l-interieur-268.html \ No newline at end of file 3test_url: http://www.warnerbros.fr/game-of-thrones-un-junket-vu-de-l-interieur-268.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/washingtoninstitute.org.txt b/inc/3rdparty/site_config/standard/washingtoninstitute.org.txt
new file mode 100755
index 00000000..17f45677
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/washingtoninstitute.org.txt
@@ -0,0 +1,6 @@
1body: //div[@class='main']//article
2
3prune: no
4
5test_url: http://www.washingtoninstitute.org/policy-analysis/view/striking-syria-lessons-from-the-israeli-experience?goback=.gde_3822158_member_273623672
6test_url: http://www.washingtoninstitute.org/rss/11/10 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/washingtonmonthly.com.txt b/inc/3rdparty/site_config/standard/washingtonmonthly.com.txt
index edf16422..8f8902a5 100644..100755
--- a/inc/3rdparty/site_config/standard/washingtonmonthly.com.txt
+++ b/inc/3rdparty/site_config/standard/washingtonmonthly.com.txt
@@ -1,10 +1,10 @@
1title://a[@class = 'headline-article'] 1title://a[@class = 'headline-article']
2 2
3author: substring-after(//div[@class = 'article']/p[@class = 'author'], 'By ') 3author: substring-after(//div[@class = 'article']/p[@class = 'author'], 'By ')
4date://div[@class = 'article']/span[@class = 'date'] 4date://div[@class = 'article']/span[@class = 'date']
5body://div[@class = 'article'] 5body://div[@class = 'article']
6single_page_link://a[@class = 'print'] 6single_page_link://a[@class = 'print']
7strip://p[@class = 'author'] 7strip://p[@class = 'author']
8strip://a[@class = 'headline-article'] 8strip://a[@class = 'headline-article']
9strip://span[@class = 'date'] 9strip://span[@class = 'date']
10test_url: http://www.washingtonmonthly.com/magazine/julyaugust_2011/features/the_trinity_sisters030380.php \ No newline at end of file 10test_url: http://www.washingtonmonthly.com/magazine/julyaugust_2011/features/the_trinity_sisters030380.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/washingtonpost.com.txt b/inc/3rdparty/site_config/standard/washingtonpost.com.txt
index 2931ca5f..0aa9f1d8 100644..100755
--- a/inc/3rdparty/site_config/standard/washingtonpost.com.txt
+++ b/inc/3rdparty/site_config/standard/washingtonpost.com.txt
@@ -1,21 +1,32 @@
1body: //div[@class="article_body"] 1# Seems to be redirecting to articles.washingtonpost.com for many users
2author://meta[@name='DC.creator']/@content 2
3title://meta[@name='title']/@content 3body: //div[contains(@class, "article_body")]
4date://div[contains(@class,'byline')]//span[contains(@class,'published')]/@title 4# print view
5date://meta[@name="DC.date.issued"]/@content 5body: //div[@id='print_facet']//div[@id='body']
6strip://div[@class="relative primary-slot padding-top img-border gallery-container photo-wrapper"] 6
7strip://div[@id="wp-column six end"] 7author://meta[@name='DC.creator']/@content
8strip://div[contains(@class,'hidden')] 8title://meta[@name='title']/@content
9strip://div[@id='article-side-rail'] 9date://div[contains(@class,'byline')]//span[contains(@class,'published')]/@title
10strip://div[@class="module component todays-paper-module curved"] 10date://meta[@name="DC.date.issued"]/@content
11strip://div[@class="module component live-qa curved img-border"] 11strip://div[@class="relative primary-slot padding-top img-border gallery-container photo-wrapper"]
12strip://div[@class="module component newsletter-signup curved"] 12strip://div[@id="wp-column six end"]
13strip://div[@class="module featured-stories component curved img-border"] 13strip://div[contains(@class,'hidden')]
14 14strip://div[@id='article-side-rail']
15strip_id_or_class: carousel 15strip://div[@class="module component todays-paper-module curved"]
16strip_id_or_class: toolbar 16strip://div[@class="module component live-qa curved img-border"]
17strip_id_or_class: module 17strip://div[@class="module component newsletter-signup curved"]
18 18strip://div[@class="module featured-stories component curved img-border"]
19test_url: http://www.washingtonpost.com/world/europe/in-europe-new-fears-of-german-might/2011/10/19/gIQA3baZ7L_story.html?hpid=z1 19
20test_url: http://www.washingtonpost.com/national/health-science/radical-theory-of-first-americans-places-stone-age-europeans-in-delmarva-20000-years-ago/2012/02/28/gIQA4mriiR_story.html 20strip_id_or_class: carousel
21strip_id_or_class: toolbar
22strip_id_or_class: module
23
24# Change gJQAwdJG4U_story.html to gJQAwdJG4U_print.html
25single_page_link: concat(substring-before(//link[@rel="canonical"]/@href, "_story.html"), "_print.html")
26
27# [OLD] Change gJQAwdJG4U_story.html to gJQAwdJG4U_story_print.html
28#single_page_link: concat(substring-before(//link[@rel="canonical"]/@href, "_story.html"), "_story_print.html")
29
30test_url: http://www.washingtonpost.com/world/europe/in-europe-new-fears-of-german-might/2011/10/19/gIQA3baZ7L_story.html?hpid=z1
31test_url: http://www.washingtonpost.com/national/health-science/radical-theory-of-first-americans-places-stone-age-europeans-in-delmarva-20000-years-ago/2012/02/28/gIQA4mriiR_story.html
21test_url: http://www.washingtonpost.com/lifestyle/magazine/the-sorry-fate-of-a-tech-pioneer-halsey-minor-and-historic-virginia-estate-carters-grove/2012/05/30/gJQAwdJG4U_story.html \ No newline at end of file 32test_url: http://www.washingtonpost.com/lifestyle/magazine/the-sorry-fate-of-a-tech-pioneer-halsey-minor-and-historic-virginia-estate-carters-grove/2012/05/30/gJQAwdJG4U_story.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/web-libre.org.txt b/inc/3rdparty/site_config/standard/web-libre.org.txt
index dfcd0081..9ed43a25 100644..100755
--- a/inc/3rdparty/site_config/standard/web-libre.org.txt
+++ b/inc/3rdparty/site_config/standard/web-libre.org.txt
@@ -1,6 +1,6 @@
1body: //div[@id='template_article'] 1body: //div[@id='template_article']
2 2
3strip_id_or_class: article_more 3strip_id_or_class: article_more
4strip: //hr 4strip: //hr
5 5
6test_url: http://www.web-libre.org/dossiers/jacuzzi-gonflable,8493.html \ No newline at end of file 6test_url: http://www.web-libre.org/dossiers/jacuzzi-gonflable,8493.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/weblog.bignerdranch.com.txt b/inc/3rdparty/site_config/standard/weblog.bignerdranch.com.txt
index 9e75a8a8..578ba523 100644..100755
--- a/inc/3rdparty/site_config/standard/weblog.bignerdranch.com.txt
+++ b/inc/3rdparty/site_config/standard/weblog.bignerdranch.com.txt
@@ -1,5 +1,5 @@
1title://div[@class="post"]/h2 1title://div[@class="post"]/h2
2author://p[@class="postinfo"]/a 2author://p[@class="postinfo"]/a
3date:substring-before(substring-after(//p[@class="postinfo"],' on '),' under ') 3date:substring-before(substring-after(//p[@class="postinfo"],' on '),' under ')
4body://div[@class="contenttext"] 4body://div[@class="contenttext"]
5test_url: http://weblog.bignerdranch.com/?p=304 \ No newline at end of file 5test_url: http://weblog.bignerdranch.com/?p=304 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/weblogs.asp.net.txt b/inc/3rdparty/site_config/standard/weblogs.asp.net.txt
index 3fabda0b..7cfa49d2 100644..100755
--- a/inc/3rdparty/site_config/standard/weblogs.asp.net.txt
+++ b/inc/3rdparty/site_config/standard/weblogs.asp.net.txt
@@ -1,9 +1,9 @@
1title: //h2[@class="pageTitle"] 1title: //h2[@class="pageTitle"]
2strip: //div[@class="postfoot"] 2strip: //div[@class="postfoot"]
3strip: //h2[@class="pageTitle"] 3strip: //h2[@class="pageTitle"]
4strip: //h3[@class="pageTitle"] 4strip: //h3[@class="pageTitle"]
5body: //div[@class="post"] 5body: //div[@class="post"]
6author: substring-before(substring-after(//div[@class="postfoot"], 'by'), 'Filed') 6author: substring-before(substring-after(//div[@class="postfoot"], 'by'), 'Filed')
7date: substring-before(substring-after(//div[@class="postfoot"], 'Published'), 'by') 7date: substring-before(substring-after(//div[@class="postfoot"], 'Published'), 'by')
8 8
9test_url: http://weblogs.asp.net/scottgu/archive/2011/08/31/html-editor-smart-tasks-and-event-handler-generation-asp-net-vnext-series.aspx \ No newline at end of file 9test_url: http://weblogs.asp.net/scottgu/archive/2011/08/31/html-editor-smart-tasks-and-event-handler-generation-asp-net-vnext-series.aspx \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/webpaper.nzz.ch.txt b/inc/3rdparty/site_config/standard/webpaper.nzz.ch.txt
index 8922b02f..cea10147 100644..100755
--- a/inc/3rdparty/site_config/standard/webpaper.nzz.ch.txt
+++ b/inc/3rdparty/site_config/standard/webpaper.nzz.ch.txt
@@ -1,8 +1,8 @@
1tidy: no 1tidy: no
2dissolve: //div[@id="content"]/div/article/header 2dissolve: //div[@id="content"]/div/article/header
3body: //div[@id="content"]/div/article 3body: //div[@id="content"]/div/article
4title: //div[@id="content"]/div/article/h1 4title: //div[@id="content"]/div/article/h1
5date: //div[@id="content"]/div/article/header/div[@id="issueSelectTrigger"] 5date: //div[@id="content"]/div/article/header/div[@id="issueSelectTrigger"]
6strip: //div[@id="content"]/div/article/h1 6strip: //div[@id="content"]/div/article/h1
7 7
8test_url: http://webpaper.nzz.ch/2012/06/23/front/JJKMS/aphrodite-und-die-kommunisten?guest_pass=24a3ca5b6d%3AJJKMS%3Ad30e1be8628c099669671d4da56cdce4187790ba \ No newline at end of file 8test_url: http://webpaper.nzz.ch/2012/06/23/front/JJKMS/aphrodite-und-die-kommunisten?guest_pass=24a3ca5b6d%3AJJKMS%3Ad30e1be8628c099669671d4da56cdce4187790ba \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/webwereld.nl.txt b/inc/3rdparty/site_config/standard/webwereld.nl.txt
new file mode 100755
index 00000000..40a5aa36
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/webwereld.nl.txt
@@ -0,0 +1,8 @@
1strip: //*[@class="paginator"]
2body: //*[@id="articleText"]
3next_page_link: //a[@class="next"]
4
5# No author detection
6# No publishing date detection
7# No author and intro deduplication over multiple pages
8test_url: http://webwereld.nl/analyse/111452/de-code-van-dorifel-nader-bekeken.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/welt.de.txt b/inc/3rdparty/site_config/standard/welt.de.txt
index 6e4f828f..42e65e97 100644..100755
--- a/inc/3rdparty/site_config/standard/welt.de.txt
+++ b/inc/3rdparty/site_config/standard/welt.de.txt
@@ -1,22 +1,22 @@
1# set body 1# set body
2tidy: no 2tidy: no
3body: //div[contains(@class, 'articleContent')] 3body: //div[contains(@class, 'articleContent')]
4 4
5# remove clutter 5# remove clutter
6strip: //div[@class='advertising'] 6strip: //div[@class='advertising']
7strip: //div[@class='themenalarm'] 7strip: //div[@class='themenalarm']
8strip: //div[contains(@class, 'inTextTeaser')] 8strip: //div[contains(@class, 'inTextTeaser')]
9 9
10# remove captions 10# remove captions
11strip: //span[@class='copyRight'] 11strip: //span[@class='copyRight']
12 12
13# remove photo galleries and extras 13# remove photo galleries and extras
14strip: //div[contains(@class, 'textGallery')] 14strip: //div[contains(@class, 'textGallery')]
15strip: //div[contains(@class, 'videoGallery')] 15strip: //div[contains(@class, 'videoGallery')]
16strip: //div[contains(@class, 'imageGallery')] 16strip: //div[contains(@class, 'imageGallery')]
17strip: //div[contains(@class, 'openContent')] 17strip: //div[contains(@class, 'openContent')]
18 18
19# remove comments 19# remove comments
20strip: //div[@id = 'writeComment'] 20strip: //div[@id = 'writeComment']
21 21
22test_url: http://www.welt.de/vermischtes/weltgeschehen/article11050589/27-Bergleute-in-neuseelaendischer-Mine-vermisst.html \ No newline at end of file 22test_url: http://www.welt.de/vermischtes/weltgeschehen/article11050589/27-Bergleute-in-neuseelaendischer-Mine-vermisst.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/westhamtillidie.com.txt b/inc/3rdparty/site_config/standard/westhamtillidie.com.txt
index b9343029..3132e98a 100644..100755
--- a/inc/3rdparty/site_config/standard/westhamtillidie.com.txt
+++ b/inc/3rdparty/site_config/standard/westhamtillidie.com.txt
@@ -1,6 +1,6 @@
1title: substring-before(//title, '') 1title: substring-before(//title, '')
2 2
3body: //div[@class='entry'] 3body: //div[@class='entry']
4strip: //div[@class='sharing_label'] 4strip: //div[@class='sharing_label']
5strip: //div[@class='snap_nopreview sharing robots-nocontent'] 5strip: //div[@class='snap_nopreview sharing robots-nocontent']
6test_url: http://www.westhamtillidie.com/2012/03/11/twelve-things-we-learned-from-the-doncaster-game/ \ No newline at end of file 6test_url: http://www.westhamtillidie.com/2012/03/11/twelve-things-we-learned-from-the-doncaster-game/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/what-if.xkcd.com.txt b/inc/3rdparty/site_config/standard/what-if.xkcd.com.txt
index a88a02c9..a88a02c9 100644..100755
--- a/inc/3rdparty/site_config/standard/what-if.xkcd.com.txt
+++ b/inc/3rdparty/site_config/standard/what-if.xkcd.com.txt
diff --git a/inc/3rdparty/site_config/standard/whatever.scalzi.com.txt b/inc/3rdparty/site_config/standard/whatever.scalzi.com.txt
index 52c5cf1b..100a8c88 100644..100755
--- a/inc/3rdparty/site_config/standard/whatever.scalzi.com.txt
+++ b/inc/3rdparty/site_config/standard/whatever.scalzi.com.txt
@@ -1,7 +1,7 @@
1strip: //div[@class="navigation"] 1strip: //div[@class="navigation"]
2strip: //div[@id="sidebar"] 2strip: //div[@id="sidebar"]
3strip: //div[@id="post-extra-content"] 3strip: //div[@id="post-extra-content"]
4strip: //div[@id="footer"] 4strip: //div[@id="footer"]
5strip: //div[contains(@class, "sharing")] 5strip: //div[contains(@class, "sharing")]
6 6
7test_url: http://whatever.scalzi.com/2011/01/09/quick-giffords-follow-up/ \ No newline at end of file 7test_url: http://whatever.scalzi.com/2011/01/09/quick-giffords-follow-up/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/wheelyric.com.txt b/inc/3rdparty/site_config/standard/wheelyric.com.txt
index aa9783cf..b9eeaa0c 100644..100755
--- a/inc/3rdparty/site_config/standard/wheelyric.com.txt
+++ b/inc/3rdparty/site_config/standard/wheelyric.com.txt
@@ -1,11 +1,11 @@
1body://div[contains(@class,'oAndtLyrics')] 1body://div[contains(@class,'oAndtLyrics')]
2strip://div[contains(@class,'info')] 2strip://div[contains(@class,'info')]
3strip://div[contains(@id,'romanization')] 3strip://div[contains(@id,'romanization')]
4strip://div[contains(@id,'youtube')] 4strip://div[contains(@id,'youtube')]
5strip://div[contains(@id,'romanizationSelector')] 5strip://div[contains(@id,'romanizationSelector')]
6strip://div[contains(@id,'langSelectWrap')] 6strip://div[contains(@id,'langSelectWrap')]
7strip://div[contains(@id,'requestTranslationWrap')] 7strip://div[contains(@id,'requestTranslationWrap')]
8strip://div[contains(@id,'viewMore')] 8strip://div[contains(@id,'viewMore')]
9strip://div[contains(@class,'lyricsListInMainContent')] 9strip://div[contains(@class,'lyricsListInMainContent')]
10strip://div[contains(@class,'descIpNoti')] 10strip://div[contains(@class,'descIpNoti')]
11test_url: http://wheelyric.com/lyrics/121#2 \ No newline at end of file 11test_url: http://wheelyric.com/lyrics/121#2 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/wiki.guildwars.com.txt b/inc/3rdparty/site_config/standard/wiki.guildwars.com.txt
index 1f262a0a..b80fe5d1 100644..100755
--- a/inc/3rdparty/site_config/standard/wiki.guildwars.com.txt
+++ b/inc/3rdparty/site_config/standard/wiki.guildwars.com.txt
@@ -1,8 +1,8 @@
1title: //h1 1title: //h1
2body: //div[@id='content'] 2body: //div[@id='content']
3strip_id_or_class: editsection 3strip_id_or_class: editsection
4strip_id_or_class: toc 4strip_id_or_class: toc
5strip: //div[@id='siteNotice'] 5strip: //div[@id='siteNotice']
6strip: //div[@id='content']//table[last()] 6strip: //div[@id='content']//table[last()]
7prune: no 7prune: no
8test_url: http://wiki.guildwars.com/wiki/Monk \ No newline at end of file 8test_url: http://wiki.guildwars.com/wiki/Monk \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/wiki.guildwars2.com.txt b/inc/3rdparty/site_config/standard/wiki.guildwars2.com.txt
index e176907e..e9233998 100644..100755
--- a/inc/3rdparty/site_config/standard/wiki.guildwars2.com.txt
+++ b/inc/3rdparty/site_config/standard/wiki.guildwars2.com.txt
@@ -1,8 +1,8 @@
1title: //h1 1title: //h1
2body: //div[@id='content'] 2body: //div[@id='content']
3strip_id_or_class: editsection 3strip_id_or_class: editsection
4strip_id_or_class: toc 4strip_id_or_class: toc
5strip: //div[@id='siteNotice'] 5strip: //div[@id='siteNotice']
6strip: //div[@id='content']//table[last()] 6strip: //div[@id='content']//table[last()]
7prune: no 7prune: no
8test_url: http://wiki.guildwars2.com/wiki/Guardian \ No newline at end of file 8test_url: http://wiki.guildwars2.com/wiki/Guardian \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/wikihow.com.txt b/inc/3rdparty/site_config/standard/wikihow.com.txt
new file mode 100755
index 00000000..fe95d3f9
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/wikihow.com.txt
@@ -0,0 +1,15 @@
1# ...&printable=yes
2body: //div[@id='bodycontents']
3prune: no
4tidy: no
5strip_id_or_class: gatEditSection
6strip_id_or_class: relatedwikihows
7#strip: //div[contains(@class, 'step_num')]
8
9replace_string(<script ): <div style="display: none"
10replace_string(</script>): </div>
11
12single_page_link: //a[@id='gatPrintView']
13single_page_link: concat(//link[@rel='canonical']/@href, '?printable=yes')
14
15test_url: http://www.wikihow.com/Start-Your-Own-Country \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/wikitravel.org.txt b/inc/3rdparty/site_config/standard/wikitravel.org.txt
index da5bd0b5..1f32a372 100644..100755
--- a/inc/3rdparty/site_config/standard/wikitravel.org.txt
+++ b/inc/3rdparty/site_config/standard/wikitravel.org.txt
@@ -1,14 +1,14 @@
1# copied from .wikipedia.org.txt 1# copied from .wikipedia.org.txt
2title: //h1[@id='firstHeading' or @class='firstHeading'] 2title: //h1[@id='firstHeading' or @class='firstHeading']
3body: //div[@id = 'bodyContent'] 3body: //div[@id = 'bodyContent']
4strip_id_or_class: editsection 4strip_id_or_class: editsection
5#strip_id_or_class: toc 5#strip_id_or_class: toc
6strip_id_or_class: vertical-navbox 6strip_id_or_class: vertical-navbox
7strip: //table[@id='toc'] | //div[@id='p-toc'] 7strip: //table[@id='toc'] | //div[@id='p-toc']
8strip: //div[@id='catlinks' or @id='contentSub'] 8strip: //div[@id='catlinks' or @id='contentSub']
9strip: //div[@id='jump-to-nav'] 9strip: //div[@id='jump-to-nav']
10strip: //div[@class='thumbcaption']//div[@class='magnify'] 10strip: //div[@class='thumbcaption']//div[@class='magnify']
11strip: //table[@class='navbox'] 11strip: //table[@class='navbox']
12prune: no 12prune: no
13tidy: no 13tidy: no
14test_url: http://wikitravel.org/wiki/en/index.php?title=Bangkok&printable=yes \ No newline at end of file 14test_url: http://wikitravel.org/wiki/en/index.php?title=Bangkok&printable=yes \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/will-self.com.txt b/inc/3rdparty/site_config/standard/will-self.com.txt
index 24467c22..394f9ca4 100644..100755
--- a/inc/3rdparty/site_config/standard/will-self.com.txt
+++ b/inc/3rdparty/site_config/standard/will-self.com.txt
@@ -1,4 +1,4 @@
1strip: //div[@class="widget-area"] 1strip: //div[@class="widget-area"]
2title: //*[@class="entry-title"] 2title: //*[@class="entry-title"]
3date: //time[@class="entry-date"] 3date: //time[@class="entry-date"]
4test_url: http://will-self.com/2012/02/01/real-meals-dominos-pizza/ \ No newline at end of file 4test_url: http://will-self.com/2012/02/01/real-meals-dominos-pizza/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/williampfaff.com.txt b/inc/3rdparty/site_config/standard/williampfaff.com.txt
index fb5f92ed..cefabec0 100644..100755
--- a/inc/3rdparty/site_config/standard/williampfaff.com.txt
+++ b/inc/3rdparty/site_config/standard/williampfaff.com.txt
@@ -1,3 +1,3 @@
1title: substring-after(//span[@class='itemTitle'], ':') 1title: substring-after(//span[@class='itemTitle'], ':')
2body: //div[@id='content'] 2body: //div[@id='content']
3test_url: http://www.williampfaff.com/modules/news/article.php?storyid=491 \ No newline at end of file 3test_url: http://www.williampfaff.com/modules/news/article.php?storyid=491 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/winfuture.de.txt b/inc/3rdparty/site_config/standard/winfuture.de.txt
index bc936370..dddc6f9e 100644..100755
--- a/inc/3rdparty/site_config/standard/winfuture.de.txt
+++ b/inc/3rdparty/site_config/standard/winfuture.de.txt
@@ -1,12 +1,12 @@
1title: //h1/span 1title: //h1/span
2 2
3body: //div[@id="news_content"] 3body: //div[@id="news_content"]
4 4
5author: //div[@class="bookmarks_btm"]/p[1]/a[1]/text() 5author: //div[@class="bookmarks_btm"]/p[1]/a[1]/text()
6 6
7date: //span[@class='date'] 7date: //span[@class='date']
8 8
9# Rubrikenbild entfernen 9# Rubrikenbild entfernen
10strip: //div[@id="news_content"]/a[1] 10strip: //div[@id="news_content"]/a[1]
11 11
12test_url: http://winfuture.de/news,69672.html \ No newline at end of file 12test_url: http://winfuture.de/news,69672.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/winrumors.com.txt b/inc/3rdparty/site_config/standard/winrumors.com.txt
index cedb4390..f25f9c9e 100644..100755
--- a/inc/3rdparty/site_config/standard/winrumors.com.txt
+++ b/inc/3rdparty/site_config/standard/winrumors.com.txt
@@ -1,6 +1,6 @@
1title: //h1[@class='page-heading'] 1title: //h1[@class='page-heading']
2author: //small/strong/a 2author: //small/strong/a
3#their date string is relative, so if you save the page 2 hours after it is posted it may say 'two hours ago, instead of providing a useful date/time' 3#their date string is relative, so if you save the page 2 hours after it is posted it may say 'two hours ago, instead of providing a useful date/time'
4date: substring-before(substring-after(//small,'on'),'with') 4date: substring-before(substring-after(//small,'on'),'with')
5body: //div[@class='entry'] 5body: //div[@class='entry']
6test_url: http://www.winrumors.com/chinese-windows-phone-launch-still-on-track-for-early-2012/ \ No newline at end of file 6test_url: http://www.winrumors.com/chinese-windows-phone-launch-still-on-track-for-early-2012/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/winsupersite.com.txt b/inc/3rdparty/site_config/standard/winsupersite.com.txt
index db6a6fc9..f725b67a 100644..100755
--- a/inc/3rdparty/site_config/standard/winsupersite.com.txt
+++ b/inc/3rdparty/site_config/standard/winsupersite.com.txt
@@ -1,3 +1,3 @@
1date: //*[@class='kicker'] 1date: //*[@class='kicker']
2body: //*[@class='KonaBody'] 2body: //*[@class='KonaBody']
3test_url: http://www.winsupersite.com/article/paul-thurrotts-wininfo/android-malware-surges-separate-studies-141364 \ No newline at end of file 3test_url: http://www.winsupersite.com/article/paul-thurrotts-wininfo/android-malware-surges-separate-studies-141364 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/wired.com.txt b/inc/3rdparty/site_config/standard/wired.com.txt
index 69bbf5b7..f5a72d14 100644..100755
--- a/inc/3rdparty/site_config/standard/wired.com.txt
+++ b/inc/3rdparty/site_config/standard/wired.com.txt
@@ -1,22 +1,25 @@
1title: //meta[@property="og:title"]/@content 1title: //meta[@name='Title']/@content
2title: //h1 2author: //meta[@name='Author']/@content
3title: //*[@class='posttitle'] 3date: //meta[@name='DisplayDate']/@content
4author: //*[@class='entryAuthor']/a[1] 4body: //div[@class='entry']
5author://*[@class='member-title'] 5strip: //p[contains(., 'Pages:') and contains(., 'View All')]
6author://li[@class='author']/a[contains(@href, '/author/')] 6strip: //p[@class='caption']
7date: substring-after(//div[@class='entryAuthor'], '') 7strip: //div[@class='desc' or @class='slide' or @id='slide-info']
8date: substring-before(//*[@class='entryDate'], '|') 8
9body: //div[@class='entry'] 9strip_id_or_class: pullquote
10strip: //span[contains(@class, 'nextprev')] 10strip_id_or_class: left_rail
11#strip_id_or_class: ngg-galleryoverview 11strip_id_or_class: related-container
12# ngg-galleryoverview is the whole content sometimes, e.g. http://www.wired.com/underwire/2011/12/best-mixtapes-of-2011/?pid=5736&viewall=true 12strip_id_or_class: radvert-caption-wrap
13 13
14strip: //p[span[contains(@class, 'contentjump')]] 14# Remove gallery?
15strip: //text()[contains(., 'nextpage')] 15strip_id_or_class: wpgallery
16 16
17prune: no 17#strip: //text()[contains(., 'nextpage')]
18 18
19single_page_link: //a[contains(@href, '/all/1') and contains(@class, 'contentjumpall')] 19prune: no
20 20
21test_url: http://www.wired.com/cloudline/2011/10/meet-arms-cortex-a15-the-future-of-the-ipad-and-possibly-the-macbook-air/ 21single_page_link: //a[.='View All' and contains(@href, '/all/')]
22test_url: http://www.wired.com/threatlevel/2012/05/ff_counterfeiter/all/1 \ No newline at end of file 22
23test_url: http://www.wired.com/cloudline/2011/10/meet-arms-cortex-a15-the-future-of-the-ipad-and-possibly-the-macbook-air/
24test_url: http://www.wired.com/wiredenterprise/2013/09/docker/
25test_url: http://www.wired.com/threatlevel/2012/05/ff_counterfeiter/all/
diff --git a/inc/3rdparty/site_config/standard/wmnf.org.txt b/inc/3rdparty/site_config/standard/wmnf.org.txt
index ffb6b2d1..1d403a91 100644..100755
--- a/inc/3rdparty/site_config/standard/wmnf.org.txt
+++ b/inc/3rdparty/site_config/standard/wmnf.org.txt
@@ -1,13 +1,13 @@
1title: //div[@class="bodyText"]/h1/text() 1title: //div[@class="bodyText"]/h1/text()
2body: //div[@class="bodyText"] 2body: //div[@class="bodyText"]
3 3
4# author and date are separated by only a newline 4# author and date are separated by only a newline
5# can't figure out how to tokenize that yet 5# can't figure out how to tokenize that yet
6author: //div[@class="bodyText"]/span[@class="info"]/text() 6author: //div[@class="bodyText"]/span[@class="info"]/text()
7date: //div[@class="bodyText"]/span[@class="info"]/text() 7date: //div[@class="bodyText"]/span[@class="info"]/text()
8 8
9# strip metdata from body text 9# strip metdata from body text
10strip: //div[@class="bodyText"]/h1/text() 10strip: //div[@class="bodyText"]/h1/text()
11strip: //div[@class="bodyText"]/span[@class="info"] 11strip: //div[@class="bodyText"]/span[@class="info"]
12strip: //div[@class="bodyText"]/span[@class="info"] 12strip: //div[@class="bodyText"]/span[@class="info"]
13test_url: http://www.wmnf.org/news_stories/light-rail-advocates-join-forces-to-combat-opposition-in-pinellas \ No newline at end of file 13test_url: http://www.wmnf.org/news_stories/light-rail-advocates-join-forces-to-combat-opposition-in-pinellas \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/wmpoweruser.com.txt b/inc/3rdparty/site_config/standard/wmpoweruser.com.txt
index d9011d24..70168fbe 100644..100755
--- a/inc/3rdparty/site_config/standard/wmpoweruser.com.txt
+++ b/inc/3rdparty/site_config/standard/wmpoweruser.com.txt
@@ -1,4 +1,4 @@
1date://*[@class="entry-date"] 1date://*[@class="entry-date"]
2author://*[@class="author vcard"] 2author://*[@class="author vcard"]
3strip://*[@style="position:relative;left:72px;top:2px;"]|//*[@id="authorbox"] 3strip://*[@style="position:relative;left:72px;top:2px;"]|//*[@id="authorbox"]
4test_url: http://wmpoweruser.com/breaking-nokia-announces-nfc-support-in-lumia-610-windows-phone-device/ \ No newline at end of file 4test_url: http://wmpoweruser.com/breaking-nokia-announces-nfc-support-in-lumia-610-windows-phone-device/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/worldpoultry.net.txt b/inc/3rdparty/site_config/standard/worldpoultry.net.txt
index 0e42ca5e..b88f9279 100644..100755
--- a/inc/3rdparty/site_config/standard/worldpoultry.net.txt
+++ b/inc/3rdparty/site_config/standard/worldpoultry.net.txt
@@ -1,5 +1,5 @@
1title: //div[@class="content article"]/h1 1title: //div[@class="content article"]/h1
2date: substring-after(//*[@class='date'], '//') 2date: substring-after(//*[@class='date'], '//')
3body: //*[@class='article-content'] 3body: //*[@class='article-content']
4strip: //*[@id='nomodal'] 4strip: //*[@id='nomodal']
5test_url: http://www.worldpoultry.net/news/kyrgyzstan-restricts-poultry-imports-from-russia-and-kazakhstan-9332.html \ No newline at end of file 5test_url: http://www.worldpoultry.net/news/kyrgyzstan-restricts-poultry-imports-from-russia-and-kazakhstan-9332.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/worldwidewords.org.txt b/inc/3rdparty/site_config/standard/worldwidewords.org.txt
index 733d607f..4682e0d3 100644..100755
--- a/inc/3rdparty/site_config/standard/worldwidewords.org.txt
+++ b/inc/3rdparty/site_config/standard/worldwidewords.org.txt
@@ -1,4 +1,4 @@
1title: //p[@id='content'] 1title: //p[@id='content']
2 2
3body: //div[@class='contentblock'] 3body: //div[@class='contentblock']
4test_url: http://www.worldwidewords.org/weirdwords/ww-gro1.htm \ No newline at end of file 4test_url: http://www.worldwidewords.org/weirdwords/ww-gro1.htm \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/wow.joystiq.com.txt b/inc/3rdparty/site_config/standard/wow.joystiq.com.txt
index 759fb81f..44add9c9 100644..100755
--- a/inc/3rdparty/site_config/standard/wow.joystiq.com.txt
+++ b/inc/3rdparty/site_config/standard/wow.joystiq.com.txt
@@ -1,6 +1,6 @@
1title: //h2[@class="posttitle"] 1title: //h2[@class="posttitle"]
2body: //div[@class="post"] 2body: //div[@class="post"]
3strip: //h2[@class="posttitle"] 3strip: //h2[@class="posttitle"]
4strip: //p[@class="filed-under"] 4strip: //p[@class="filed-under"]
5convert_double_br_tags: yes 5convert_double_br_tags: yes
6test_url: http://wow.joystiq.com/2011/06/20/the-overachiever-guide-to-midsummer-festival-2011-achievements/ \ No newline at end of file 6test_url: http://wow.joystiq.com/2011/06/20/the-overachiever-guide-to-midsummer-festival-2011-achievements/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/wpmayor.com.txt b/inc/3rdparty/site_config/standard/wpmayor.com.txt
new file mode 100755
index 00000000..bb4fffc7
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/wpmayor.com.txt
@@ -0,0 +1,8 @@
1body: //div[@id='nrelate_flyout_placeholder']
2
3strip_id_or_class: share
4
5prune: no
6
7test_url: http://www.wpmayor.com/themes/wordpress-portfolio-resume-themes/
8test_url: http://www.wpmayor.com/feed/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/wtatennis.com.txt b/inc/3rdparty/site_config/standard/wtatennis.com.txt
new file mode 100755
index 00000000..1000ab26
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/wtatennis.com.txt
@@ -0,0 +1,7 @@
1title: //h1[contains(@class, 'header-2')]
2body: //article//*[contains(@class, 'teaserText') or contains(@class, 'lastUpdated') or contains(@class, 'image') or contains(@class, 'body')]
3strip_id_or_class: articleIndex
4prune: no
5
6test_url: http://www.wtatennis.com/news/article/3190914
7test_url: http://www.wtatennis.com/news/article/3190244 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/www1.folha.uol.com.br.txt b/inc/3rdparty/site_config/standard/www1.folha.uol.com.br.txt
index 0846be2c..97a5c19d 100644..100755
--- a/inc/3rdparty/site_config/standard/www1.folha.uol.com.br.txt
+++ b/inc/3rdparty/site_config/standard/www1.folha.uol.com.br.txt
@@ -1,15 +1,15 @@
1body://div[@id='articleNew'] 1body://div[@id='articleNew']
2strip://div[@id='articleBy'] 2strip://div[@id='articleBy']
3strip://div[@id='articleDate'] 3strip://div[@id='articleDate']
4strip://td[@class='articleGraphicCredit'] 4strip://td[@class='articleGraphicCredit']
5strip://h1 5strip://h1
6strip://div[@id='articleEnd'] 6strip://div[@id='articleEnd']
7strip://p[@class='tagline'] 7strip://p[@class='tagline']
8strip://div[@class='openBox adslibraryArticle'] 8strip://div[@class='openBox adslibraryArticle']
9strip_id_or_class:ad-180x150-1 9strip_id_or_class:ad-180x150-1
10 10
11 11
12title: //div[@id="articleNew"]/h1 12title: //div[@id="articleNew"]/h1
13author: //div[@id="articleBy"]/p/b 13author: //div[@id="articleBy"]/p/b
14date: substring-before(//div[@id="articleDate"], "-") 14date: substring-before(//div[@id="articleDate"], "-")
15test_url: http://www1.folha.uol.com.br/mundo/1115805-ex-ditador-argentino-videla-e-condenado-a-50-anos-de-prisao.shtml \ No newline at end of file 15test_url: http://www1.folha.uol.com.br/mundo/1115805-ex-ditador-argentino-videla-e-condenado-a-50-anos-de-prisao.shtml \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/www3.imperial.ac.uk.txt b/inc/3rdparty/site_config/standard/www3.imperial.ac.uk.txt
index 71306af2..71306af2 100644..100755
--- a/inc/3rdparty/site_config/standard/www3.imperial.ac.uk.txt
+++ b/inc/3rdparty/site_config/standard/www3.imperial.ac.uk.txt
diff --git a/inc/3rdparty/site_config/standard/wyborcza.pl.txt b/inc/3rdparty/site_config/standard/wyborcza.pl.txt
index f99467c2..638583dc 100644..100755
--- a/inc/3rdparty/site_config/standard/wyborcza.pl.txt
+++ b/inc/3rdparty/site_config/standard/wyborcza.pl.txt
@@ -1,11 +1,9 @@
1title:h1 1body: //div[@id='article']
2author: //*[@class = 'author'] 2strip: //div[@class='head']
3date: //*[@class = 'date'] 3
4body: //*[@id = 'art'] 4strip_id_or_class: txt_upl
5next_page_link: //*[@id='Str']/a[contains(text(), 'nastepne')] 5
6strip: //*[@class = 'rel_zdjTOP'] 6single_page_link: //div[@id='gazeta_article_tools']//a[contains(@class, 'print')]
7strip: //*[@id = 'rel'] 7
8strip: //*[@class = 'txt_upl'] 8test_url: http://wyborcza.pl/1,123455,11536088,Gdy_peknie_fejs__obryzga_wszystko.html?as=1&startsz=x
9strip: //*[@id='Str'] 9test_url: http://wyborcza.pl/1,75478,14880255,Biskup_Dydycz_o_pedofilii_i_tajemnicy_spowiedzi__Zamiast.html \ No newline at end of file
10strip: //*[@id='source']
11test_url: http://wyborcza.pl/1,123455,11536088,Gdy_peknie_fejs__obryzga_wszystko.html?as=1&startsz=x \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/wyctim.com.txt b/inc/3rdparty/site_config/standard/wyctim.com.txt
index d8c8713b..bd7ecf2a 100644..100755
--- a/inc/3rdparty/site_config/standard/wyctim.com.txt
+++ b/inc/3rdparty/site_config/standard/wyctim.com.txt
@@ -1,3 +1,3 @@
1body: //div[@class='article-body'] 1body: //div[@class='article-body']
2title: //h1 2title: //h1
3test_url: http://wyctim.com/icloud-sync-regebbi-rendszereken/ \ No newline at end of file 3test_url: http://wyctim.com/icloud-sync-regebbi-rendszereken/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/wz-newsline.de.txt b/inc/3rdparty/site_config/standard/wz-newsline.de.txt
index fbc1d3d2..5b2be744 100644..100755
--- a/inc/3rdparty/site_config/standard/wz-newsline.de.txt
+++ b/inc/3rdparty/site_config/standard/wz-newsline.de.txt
@@ -1,5 +1,5 @@
1title://h1 1title://h1
2 2
3date://p[@class='articleDate'] 3date://p[@class='articleDate']
4body://div[@class='articleBody wzStandardArticle'] 4body://div[@class='articleBody wzStandardArticle']
5test_url: http://www.wz-newsline.de/home/sport/tennis/federer-zum-vierten-mal-sieger-in-indian-wells-1.938050 \ No newline at end of file 5test_url: http://www.wz-newsline.de/home/sport/tennis/federer-zum-vierten-mal-sieger-in-indian-wells-1.938050 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/xfgjls.com.txt b/inc/3rdparty/site_config/standard/xfgjls.com.txt
new file mode 100755
index 00000000..2dc247a0
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/xfgjls.com.txt
@@ -0,0 +1,11 @@
1# This filter is tested on:
2# http://www.xfgjls.com/magazine/html/?131.html
3# http://www.xfgjls.com/magazine/html/?170.html
4
5body://h3/following-sibling::div
6title: //h3
7date: substring-before(//h3/following-sibling::div/p, ' ')
8author: substring-before(substring-after(//h3/following-sibling::div/p, '作者:'), '来源')
9wrap_in(strong)://span[contains(@style, "FONT-WEIGHT: bold")]
10dissolve://span[@style="FONT-FAMILY: '宋体'; FONT-SIZE: 10.5pt; FONT-WEIGHT: bold; mso-spacerun: 'yes'"]
11test_url: http://www.xfgjls.com/magazine/html/?170.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/xoeb.us.txt b/inc/3rdparty/site_config/standard/xoeb.us.txt
index e02960e0..c09fa4df 100644..100755
--- a/inc/3rdparty/site_config/standard/xoeb.us.txt
+++ b/inc/3rdparty/site_config/standard/xoeb.us.txt
@@ -1,4 +1,4 @@
1title: //h1[@class="entry-title"] 1title: //h1[@class="entry-title"]
2author: //span[@class="fn"] 2author: //span[@class="fn"]
3date: //p[@class="meta"] 3date: //p[@class="meta"]
4test_url: http://xoeb.us/blog/2012/03/16/my-mistakes-with-our-first-release/ \ No newline at end of file 4test_url: http://xoeb.us/blog/2012/03/16/my-mistakes-with-our-first-release/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/yated.com.txt b/inc/3rdparty/site_config/standard/yated.com.txt
index 13a3ea64..13a3ea64 100644..100755
--- a/inc/3rdparty/site_config/standard/yated.com.txt
+++ b/inc/3rdparty/site_config/standard/yated.com.txt
diff --git a/inc/3rdparty/site_config/standard/ynet.co.il.txt b/inc/3rdparty/site_config/standard/ynet.co.il.txt
new file mode 100755
index 00000000..aa86566a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/ynet.co.il.txt
@@ -0,0 +1,26 @@
1body: //span[@id='article_content' or @class='text16g']
2
3# ads
4strip: //div[.//div[contains(@id, 'ads.')]]
5# related content heading
6strip: //p[contains(., 'עוד בערוץ החדשות של ynet:')]
7strip: //p[contains(., 'כותרות אחרונות מהעולם בחדשות ynet:')]
8strip: //div[contains(., 'אינציקלופדיית ynet:')]
9# related content links
10strip: //a[@class='bluelink']
11# strip image bullets
12strip_image_src: ynet_manual_bullet.png
13
14prune: no
15tidy: no
16
17# prevent JS issues
18find_string: <script type='text/javascript'>
19replace_string: <div style="display:none;">
20find_string: </script>
21replace_string: </div>
22
23test_url: http://www.ynet.co.il/articles/0,7340,L-4354266,00.html
24test_url: http://www.ynet.co.il/articles/0,7340,L-4354268,00.html
25#feed
26test_url: http://www.ynet.co.il/Integration/StoryRss2.xml \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/yostivanich.com.txt b/inc/3rdparty/site_config/standard/yostivanich.com.txt
index 9e24db3c..2aeb7e05 100644..100755
--- a/inc/3rdparty/site_config/standard/yostivanich.com.txt
+++ b/inc/3rdparty/site_config/standard/yostivanich.com.txt
@@ -1,5 +1,5 @@
1title://div[@class='entry-title'] 1title://div[@class='entry-title']
2body://div[@class='entry-content'] 2body://div[@class='entry-content']
3strip_comments:yes 3strip_comments:yes
4convert_double_br_tags:yes 4convert_double_br_tags:yes
5test_url: http://www.yostivanich.com/2010/07/11/wired-com-with-world-watching-wikileaks-falls-into-disrepair/ \ No newline at end of file 5test_url: http://www.yostivanich.com/2010/07/11/wired-com-with-world-watching-wikileaks-falls-into-disrepair/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/yourerie.com.txt b/inc/3rdparty/site_config/standard/yourerie.com.txt
new file mode 100755
index 00000000..b46b09e8
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/yourerie.com.txt
@@ -0,0 +1,2 @@
1body: //div[@class="nxFullTextData"]
2test_url: http://yourerie.com/fulltext?nxd_id=306552
diff --git a/inc/3rdparty/site_config/standard/youtube.com.txt b/inc/3rdparty/site_config/standard/youtube.com.txt
index d52b7356..b0d95f1f 100644..100755
--- a/inc/3rdparty/site_config/standard/youtube.com.txt
+++ b/inc/3rdparty/site_config/standard/youtube.com.txt
@@ -1,15 +1,15 @@
1title: //title 1title: //title
2body: //iframe 2body: //iframe
3 3
4find_string: <html>&lt;iframe 4find_string: <html>&lt;iframe
5replace_string: <iframe id="video" 5replace_string: <iframe id="video"
6 6
7find_string: &gt;&lt;/iframe&gt;</html> 7find_string: &gt;&lt;/iframe&gt;</html>
8replace_string: ></iframe> 8replace_string: ></iframe>
9 9
10single_page_link: //link[@type='text/xml+oembed'] 10single_page_link: //link[@type='text/xml+oembed']
11 11
12prune: no 12prune: no
13tidy: no 13tidy: no
14 14
15test_url: http://www.youtube.com/watch?v=F6gLH0r3iVU \ No newline at end of file 15test_url: http://www.youtube.com/watch?v=F6gLH0r3iVU \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/zcommunications.org.txt b/inc/3rdparty/site_config/standard/zcommunications.org.txt
new file mode 100755
index 00000000..4deb49bf
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/zcommunications.org.txt
@@ -0,0 +1,7 @@
1title: //h1[@id='view_title']
2author: //div[contains(@class, 'content_authors')]//a
3body: //div[@id='view_body']
4
5prune: no
6
7test_url: http://www.zcommunications.org/orwellian-language-update-by-edward-s-herman.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/zdnet.com.txt b/inc/3rdparty/site_config/standard/zdnet.com.txt
index b244b229..939fb0e3 100644..100755
--- a/inc/3rdparty/site_config/standard/zdnet.com.txt
+++ b/inc/3rdparty/site_config/standard/zdnet.com.txt
@@ -1,10 +1,10 @@
1title: //h1[@class="h s-1"] 1title: //h1[@class="h s-1"]
2author: substring-before(substring-after(//p[@class="meta s-10"], 'By'), '|') 2author: substring-before(substring-after(//p[@class="meta s-10"], 'By'), '|')
3author: substring-after(//div[@class="bio"]//h3, 'About ') 3author: substring-after(//div[@class="bio"]//h3, 'About ')
4date: substring-after(//p[@class="meta s-10"], '|') 4date: substring-after(//p[@class="meta s-10"], '|')
5date: substring-after(//p[@class="meta"], '|') 5date: substring-after(//p[@class="meta"], '|')
6body: //div[@class="content-1 entry space-1 clear"] 6body: //div[@class="content-1 entry space-1 clear"]
7body: //div[@class="storyBody"] 7body: //div[@class="storyBody"]
8 8
9test_url: http://www.zdnet.com/blog/microsoft/the-bing-back-end-more-on-cosmos-tiger-and-scope/10920 9test_url: http://www.zdnet.com/blog/microsoft/the-bing-back-end-more-on-cosmos-tiger-and-scope/10920
10test_url: http://www.zdnet.com/researchers-find-web-tracking-up-privacy-down-7000000358/ \ No newline at end of file 10test_url: http://www.zdnet.com/researchers-find-web-tracking-up-privacy-down-7000000358/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/zeit.de.txt b/inc/3rdparty/site_config/standard/zeit.de.txt
index 66a7f1ac..9815d478 100644..100755
--- a/inc/3rdparty/site_config/standard/zeit.de.txt
+++ b/inc/3rdparty/site_config/standard/zeit.de.txt
@@ -1,44 +1,45 @@
1# 2012-12-23 [carlo@...] fixed half-assed headlines in articles, removed inline author profiles, adjusted picture captions 1# 2013.10.30 [rezor92] fixed single_page_link
2# 2012-03-17 [dkless@...] Cut metadata parts in the beginning and the ends of the content block; copyright entries for pictures removed; Author fixed, not sure if old entries still valid (I left them); Weird problems with some pages addressed (see last section for removing hidden section) 2# 2012-12-23 [carlo@...] fixed half-assed headlines in articles, removed inline author profiles, adjusted picture captions
3# 2011-12-09 [carlo@...] Removed "related articles" block 3# 2012-03-17 [dkless@...] Cut metadata parts in the beginning and the ends of the content block; copyright entries for pictures removed; Author fixed, not sure if old entries still valid (I left them); Weird problems with some pages addressed (see last section for removing hidden section)
4# 2011-08-23 [carlo@...] changed single page link to use print version: page works better, less ambiguity. Related cleanups and simplifications. 4# 2011-12-09 [carlo@...] Removed "related articles" block
5# 2011-08-20 [carlo@...] added author, fixed date 5# 2011-08-23 [carlo@...] changed single page link to use print version: page works better, less ambiguity. Related cleanups and simplifications.
6 6# 2011-08-20 [carlo@...] added author, fixed date
7 7
8single_page_link: //a[@title='Druckversion'] 8
9tidy: no 9single_page_link: //a[@title='Auf einer Seite']
10 10tidy: no
11title: //title 11
12date: substring-before( //li[@class="date"], " " ) 12title: //title
13author: //li[@class="author"]/a/text() | //li[@class="author first"]/a/text() 13date: substring-before( //li[@class="date"], " " )
14author: substring-after(//li[@class='source first '], 'Quelle: ') 14author: //li[@class="author"]/a/text() | //li[@class="author first"]/a/text()
15 15author: substring-after(//li[@class='source first '], 'Quelle: ')
16strip_id_or_class: articleheader 16
17strip: //div[@id="comments"] | //div[@class="pagination block"] | //p[@class="ressortbacklink"] | //div[@id="relatedArticles"] | // div[@class="inline portrait"] 17strip_id_or_class: articleheader
18 18strip: //div[@id="comments"] | //div[@class="pagination block"] | //p[@class="ressortbacklink"] | //div[@id="relatedArticles"] | // div[@class="inline portrait"]
19#Removes author and date from the start 19
20strip: //ul[@class="tools"] 20#Removes author and date from the start
21#Removes copyright statement - often disturb as first line of the news 21strip: //ul[@class="tools"]
22strip: //p[@class="copyright"] 22#Removes copyright statement - often disturb as first line of the news
23strip: //div[@class="copyright"] 23strip: //p[@class="copyright"]
24#Removes pagination links at the end 24strip: //div[@class="copyright"]
25strip: //div[@class="pagination"] 25#Removes pagination links at the end
26 26strip: //div[@class="pagination"]
27# Fix picture captions 27
28wrap_in(small): //p[@class="caption"]/text() 28# Fix picture captions
29 29wrap_in(small): //p[@class="caption"]/text()
30# Fix sub-headlines 30
31wrap_in(h2): //p/strong 31# Fix sub-headlines
32dissolve: //h2/strong 32wrap_in(h2): //p/strong
33 33dissolve: //h2/strong
34#Sometimes things are embedded in the print version that are not displayed on the web, but will be displayed in the mobilized versions and lead even to problems. These sections are removed here. 34
35strip_id_or_class:"informatives" 35#Sometimes things are embedded in the print version that are not displayed on the web, but will be displayed in the mobilized versions and lead even to problems. These sections are removed here.
36strip_id_or_class:"bottom" 36strip_id_or_class:"informatives"
37strip_id_or_class:"teasermosaic" 37strip_id_or_class:"bottom"
38strip_id_or_class:"comments" 38strip_id_or_class:"teasermosaic"
39strip_id_or_class:"articlefooter af" 39strip_id_or_class:"comments"
40strip_id_or_class:"relateds" 40strip_id_or_class:"articlefooter af"
41strip_id_or_class:"pagination" 41strip_id_or_class:"relateds"
42 42strip_id_or_class:"pagination"
43footnotes: no 43
44test_url: http://www.zeit.de/kultur/film/2012-12/Kurzfilmtag \ No newline at end of file 44footnotes: no
45test_url: http://www.zeit.de/kultur/film/2012-12/Kurzfilmtag
diff --git a/inc/3rdparty/site_config/standard/zerohedge.com.txt b/inc/3rdparty/site_config/standard/zerohedge.com.txt
new file mode 100755
index 00000000..7e76aee5
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/zerohedge.com.txt
@@ -0,0 +1,10 @@
1author: //span[@class='submitted']/a
2strip: //div[@class='clear-block clr']
3strip: //div[@class='picture']
4strip: //span[@class='submitted']
5strip: //div[@class='breadcrumb']
6strip: //div[@class='fivestar-static-form-item']
7strip: //div[@class='js-links']
8strip: //div[@class='links clear-block clear']
9strip: //div[@class='block block-block']
10test_url: http://www.zerohedge.com/news/bernankes-columbus-voyage-end-monetary-policy-world \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/zerokspot.com.txt b/inc/3rdparty/site_config/standard/zerokspot.com.txt
index ea9132aa..afa964db 100644..100755
--- a/inc/3rdparty/site_config/standard/zerokspot.com.txt
+++ b/inc/3rdparty/site_config/standard/zerokspot.com.txt
@@ -1,3 +1,3 @@
1title: //h1 1title: //h1
2body: //div[@id="primarycontent"] 2body: //div[@id="primarycontent"]
3test_url: http://zerokspot.com/weblog/2011/06/26/europython2011/ \ No newline at end of file 3test_url: http://zerokspot.com/weblog/2011/06/26/europython2011/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/zhihu.com.txt b/inc/3rdparty/site_config/standard/zhihu.com.txt
new file mode 100755
index 00000000..3c9d8c1a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/zhihu.com.txt
@@ -0,0 +1,19 @@
1# This filter is tested on:
2# http://www.zhihu.com/question/19587406
3# http://www.zhihu.com/question/20649035
4# http://www.zhihu.com/question/20637942
5
6author: //h3[@class='zm-item-answer-author-wrap']
7title://h2[@class='zm-item-title']
8date://a[@class='answer-date-link meta-item']
9convert_double_br_tags: yes
10
11wrap_in(blockquote)://div[@class='zm-editable-content']
12wrap_in(blockquote)://sup/text()
13dissolve://sup
14
15strip://div[@class='zh-answers-title']
16strip:///div[@class='zm-item-vote-info ']
17strip://div[@class='zm-item-answer-author-info']
18strip://div[@class='zu-blue-info-board zg-r3px']
19test_url: http://www.zhihu.com/question/20637942 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/zingtrain.com.txt b/inc/3rdparty/site_config/standard/zingtrain.com.txt
index 2a2f58a8..188d4dd6 100644..100755
--- a/inc/3rdparty/site_config/standard/zingtrain.com.txt
+++ b/inc/3rdparty/site_config/standard/zingtrain.com.txt
@@ -1,3 +1,3 @@
1title: substring-after(id, 'post')/h2 1title: substring-after(id, 'post')/h2
2body://div[@class = 'entry'] 2body://div[@class = 'entry']
3test_url: http://www.zingtrain.com/category/ontrack/january-2007/ \ No newline at end of file 3test_url: http://www.zingtrain.com/category/ontrack/january-2007/ \ No newline at end of file
diff --git a/inc/poche/Database.class.php b/inc/poche/Database.class.php
index 11cccb72..2c80b64b 100755
--- a/inc/poche/Database.class.php
+++ b/inc/poche/Database.class.php
@@ -5,17 +5,18 @@
5 * @category wallabag 5 * @category wallabag
6 * @author Nicolas Lœuillet <nicolas@loeuillet.org> 6 * @author Nicolas Lœuillet <nicolas@loeuillet.org>
7 * @copyright 2013 7 * @copyright 2013
8 * @license http://www.wtfpl.net/ see COPYING file 8 * @license http://opensource.org/licenses/MIT see COPYING file
9 */ 9 */
10 10
11class Database { 11class Database {
12
12 var $handle; 13 var $handle;
13 private $order = array( 14 private $order = array (
14 'ia' => 'ORDER BY entries.id', 15 'ia' => 'ORDER BY entries.id',
15 'id' => 'ORDER BY entries.id DESC', 16 'id' => 'ORDER BY entries.id DESC',
16 'ta' => 'ORDER BY lower(entries.title)', 17 'ta' => 'ORDER BY lower(entries.title)',
17 'td' => 'ORDER BY lower(entries.title) DESC', 18 'td' => 'ORDER BY lower(entries.title) DESC',
18 'default' => 'ORDER BY entries.id' 19 'default' => 'ORDER BY entries.id'
19 ); 20 );
20 21
21 function __construct() 22 function __construct()
@@ -38,28 +39,17 @@ class Database {
38 } 39 }
39 40
40 $this->handle->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION); 41 $this->handle->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
42 $this->_checkTags();
41 Tools::logm('storage type ' . STORAGE); 43 Tools::logm('storage type ' . STORAGE);
42 } 44 }
43 45
44 private function getHandle() { 46 private function getHandle()
47 {
45 return $this->handle; 48 return $this->handle;
46 } 49 }
47 50
48 public function isInstalled() { 51 private function _checkTags()
49 $sql = "SELECT username FROM users"; 52 {
50 $query = $this->executeQuery($sql, array());
51 if ($query == false) {
52 die(STORAGE . ' database looks empty. You have to create it (you can find database structure in install folder).');
53 }
54 $hasAdmin = count($query->fetchAll());
55
56 if ($hasAdmin == 0)
57 return false;
58
59 return true;
60 }
61
62 public function checkTags() {
63 53
64 if (STORAGE == 'sqlite') { 54 if (STORAGE == 'sqlite') {
65 $sql = ' 55 $sql = '
@@ -123,7 +113,8 @@ class Database {
123 $query = $this->executeQuery($sql, array()); 113 $query = $this->executeQuery($sql, array());
124 } 114 }
125 115
126 public function install($login, $password) { 116 public function install($login, $password)
117 {
127 $sql = 'INSERT INTO users ( username, password, name, email) VALUES (?, ?, ?, ?)'; 118 $sql = 'INSERT INTO users ( username, password, name, email) VALUES (?, ?, ?, ?)';
128 $params = array($login, $password, $login, ' '); 119 $params = array($login, $password, $login, ' ');
129 $query = $this->executeQuery($sql, $params); 120 $query = $this->executeQuery($sql, $params);
@@ -150,7 +141,8 @@ class Database {
150 return TRUE; 141 return TRUE;
151 } 142 }
152 143
153 public function getConfigUser($id) { 144 public function getConfigUser($id)
145 {
154 $sql = "SELECT * FROM users_config WHERE user_id = ?"; 146 $sql = "SELECT * FROM users_config WHERE user_id = ?";
155 $query = $this->executeQuery($sql, array($id)); 147 $query = $this->executeQuery($sql, array($id));
156 $result = $query->fetchAll(); 148 $result = $query->fetchAll();
@@ -163,7 +155,8 @@ class Database {
163 return $user_config; 155 return $user_config;
164 } 156 }
165 157
166 public function userExists($username) { 158 public function userExists($username)
159 {
167 $sql = "SELECT * FROM users WHERE username=?"; 160 $sql = "SELECT * FROM users WHERE username=?";
168 $query = $this->executeQuery($sql, array($username)); 161 $query = $this->executeQuery($sql, array($username));
169 $login = $query->fetchAll(); 162 $login = $query->fetchAll();
@@ -174,13 +167,14 @@ class Database {
174 } 167 }
175 } 168 }
176 169
177 public function login($username, $password, $isauthenticated=false) { 170 public function login($username, $password, $isauthenticated = FALSE)
171 {
178 if ($isauthenticated) { 172 if ($isauthenticated) {
179 $sql = "SELECT * FROM users WHERE username=?"; 173 $sql = "SELECT * FROM users WHERE username=?";
180 $query = $this->executeQuery($sql, array($username)); 174 $query = $this->executeQuery($sql, array($username));
181 } else { 175 } else {
182 $sql = "SELECT * FROM users WHERE username=? AND password=?"; 176 $sql = "SELECT * FROM users WHERE username=? AND password=?";
183 $query = $this->executeQuery($sql, array($username, $password)); 177 $query = $this->executeQuery($sql, array($username, $password));
184 } 178 }
185 $login = $query->fetchAll(); 179 $login = $query->fetchAll();
186 180
@@ -204,7 +198,8 @@ class Database {
204 $query = $this->executeQuery($sql_update, $params_update); 198 $query = $this->executeQuery($sql_update, $params_update);
205 } 199 }
206 200
207 public function updateUserConfig($userId, $key, $value) { 201 public function updateUserConfig($userId, $key, $value)
202 {
208 $config = $this->getConfigUser($userId); 203 $config = $this->getConfigUser($userId);
209 204
210 if (! isset($config[$key])) { 205 if (! isset($config[$key])) {
@@ -218,7 +213,8 @@ class Database {
218 $query = $this->executeQuery($sql, $params); 213 $query = $this->executeQuery($sql, $params);
219 } 214 }
220 215
221 private function executeQuery($sql, $params) { 216 private function executeQuery($sql, $params)
217 {
222 try 218 try
223 { 219 {
224 $query = $this->getHandle()->prepare($sql); 220 $query = $this->getHandle()->prepare($sql);
@@ -232,28 +228,32 @@ class Database {
232 } 228 }
233 } 229 }
234 230
235 public function listUsers($username=null) { 231 public function listUsers($username = NULL)
232 {
236 $sql = 'SELECT count(*) FROM users'.( $username ? ' WHERE username=?' : ''); 233 $sql = 'SELECT count(*) FROM users'.( $username ? ' WHERE username=?' : '');
237 $query = $this->executeQuery($sql, ( $username ? array($username) : array())); 234 $query = $this->executeQuery($sql, ( $username ? array($username) : array()));
238 list($count) = $query->fetch(); 235 list($count) = $query->fetch();
239 return $count; 236 return $count;
240 } 237 }
241 238
242 public function getUserPassword($userID) { 239 public function getUserPassword($userID)
240 {
243 $sql = "SELECT * FROM users WHERE id=?"; 241 $sql = "SELECT * FROM users WHERE id=?";
244 $query = $this->executeQuery($sql, array($userID)); 242 $query = $this->executeQuery($sql, array($userID));
245 $password = $query->fetchAll(); 243 $password = $query->fetchAll();
246 return isset($password[0]['password']) ? $password[0]['password'] : null; 244 return isset($password[0]['password']) ? $password[0]['password'] : null;
247 } 245 }
248 246
249 public function deleteUserConfig($userID) { 247 public function deleteUserConfig($userID)
248 {
250 $sql_action = 'DELETE from users_config WHERE user_id=?'; 249 $sql_action = 'DELETE from users_config WHERE user_id=?';
251 $params_action = array($userID); 250 $params_action = array($userID);
252 $query = $this->executeQuery($sql_action, $params_action); 251 $query = $this->executeQuery($sql_action, $params_action);
253 return $query; 252 return $query;
254 } 253 }
255 254
256 public function deleteTagsEntriesAndEntries($userID) { 255 public function deleteTagsEntriesAndEntries($userID)
256 {
257 $entries = $this->retrieveAll($userID); 257 $entries = $this->retrieveAll($userID);
258 foreach($entries as $entryid) { 258 foreach($entries as $entryid) {
259 $tags = $this->retrieveTagsByEntry($entryid); 259 $tags = $this->retrieveTagsByEntry($entryid);
@@ -264,20 +264,23 @@ class Database {
264 } 264 }
265 } 265 }
266 266
267 public function deleteUser($userID) { 267 public function deleteUser($userID)
268 {
268 $sql_action = 'DELETE from users WHERE id=?'; 269 $sql_action = 'DELETE from users WHERE id=?';
269 $params_action = array($userID); 270 $params_action = array($userID);
270 $query = $this->executeQuery($sql_action, $params_action); 271 $query = $this->executeQuery($sql_action, $params_action);
271 } 272 }
272 273
273 public function updateContentAndTitle($id, $title, $body, $user_id) { 274 public function updateContentAndTitle($id, $title, $body, $user_id)
275 {
274 $sql_action = 'UPDATE entries SET content = ?, title = ? WHERE id=? AND user_id=?'; 276 $sql_action = 'UPDATE entries SET content = ?, title = ? WHERE id=? AND user_id=?';
275 $params_action = array($body, $title, $id, $user_id); 277 $params_action = array($body, $title, $id, $user_id);
276 $query = $this->executeQuery($sql_action, $params_action); 278 $query = $this->executeQuery($sql_action, $params_action);
277 return $query; 279 return $query;
278 } 280 }
279 281
280 public function retrieveUnfetchedEntries($user_id, $limit) { 282 public function retrieveUnfetchedEntries($user_id, $limit)
283 {
281 284
282 $sql_limit = "LIMIT 0,".$limit; 285 $sql_limit = "LIMIT 0,".$limit;
283 if (STORAGE == 'postgres') { 286 if (STORAGE == 'postgres') {
@@ -291,7 +294,8 @@ class Database {
291 return $entries; 294 return $entries;
292 } 295 }
293 296
294 public function retrieveUnfetchedEntriesCount($user_id) { 297 public function retrieveUnfetchedEntriesCount($user_id)
298 {
295 $sql = "SELECT count(*) FROM entries WHERE (content = '' OR content IS NULL) AND title LIKE 'Untitled - Import%' AND user_id=?"; 299 $sql = "SELECT count(*) FROM entries WHERE (content = '' OR content IS NULL) AND title LIKE 'Untitled - Import%' AND user_id=?";
296 $query = $this->executeQuery($sql, array($user_id)); 300 $query = $this->executeQuery($sql, array($user_id));
297 list($count) = $query->fetch(); 301 list($count) = $query->fetch();
@@ -299,7 +303,8 @@ class Database {
299 return $count; 303 return $count;
300 } 304 }
301 305
302 public function retrieveAll($user_id) { 306 public function retrieveAll($user_id)
307 {
303 $sql = "SELECT * FROM entries WHERE user_id=? ORDER BY id"; 308 $sql = "SELECT * FROM entries WHERE user_id=? ORDER BY id";
304 $query = $this->executeQuery($sql, array($user_id)); 309 $query = $this->executeQuery($sql, array($user_id));
305 $entries = $query->fetchAll(); 310 $entries = $query->fetchAll();
@@ -307,7 +312,8 @@ class Database {
307 return $entries; 312 return $entries;
308 } 313 }
309 314
310 public function retrieveOneById($id, $user_id) { 315 public function retrieveOneById($id, $user_id)
316 {
311 $entry = NULL; 317 $entry = NULL;
312 $sql = "SELECT * FROM entries WHERE id=? AND user_id=?"; 318 $sql = "SELECT * FROM entries WHERE id=? AND user_id=?";
313 $params = array(intval($id), $user_id); 319 $params = array(intval($id), $user_id);
@@ -317,7 +323,8 @@ class Database {
317 return isset($entry[0]) ? $entry[0] : null; 323 return isset($entry[0]) ? $entry[0] : null;
318 } 324 }
319 325
320 public function retrieveOneByURL($url, $user_id) { 326 public function retrieveOneByURL($url, $user_id)
327 {
321 $entry = NULL; 328 $entry = NULL;
322 $sql = "SELECT * FROM entries WHERE url=? AND user_id=?"; 329 $sql = "SELECT * FROM entries WHERE url=? AND user_id=?";
323 $params = array($url, $user_id); 330 $params = array($url, $user_id);
@@ -327,13 +334,15 @@ class Database {
327 return isset($entry[0]) ? $entry[0] : null; 334 return isset($entry[0]) ? $entry[0] : null;
328 } 335 }
329 336
330 public function reassignTags($old_entry_id, $new_entry_id) { 337 public function reassignTags($old_entry_id, $new_entry_id)
338 {
331 $sql = "UPDATE tags_entries SET entry_id=? WHERE entry_id=?"; 339 $sql = "UPDATE tags_entries SET entry_id=? WHERE entry_id=?";
332 $params = array($new_entry_id, $old_entry_id); 340 $params = array($new_entry_id, $old_entry_id);
333 $query = $this->executeQuery($sql, $params); 341 $query = $this->executeQuery($sql, $params);
334 } 342 }
335 343
336 public function getEntriesByView($view, $user_id, $limit = '', $tag_id = 0) { 344 public function getEntriesByView($view, $user_id, $limit = '', $tag_id = 0)
345 {
337 switch ($view) { 346 switch ($view) {
338 case 'archive': 347 case 'archive':
339 $sql = "SELECT * FROM entries WHERE user_id=? AND is_read=? "; 348 $sql = "SELECT * FROM entries WHERE user_id=? AND is_read=? ";
@@ -361,9 +370,10 @@ class Database {
361 $entries = $query->fetchAll(); 370 $entries = $query->fetchAll();
362 371
363 return $entries; 372 return $entries;
364 } 373 }
365 374
366 public function getEntriesByViewCount($view, $user_id, $tag_id = 0) { 375 public function getEntriesByViewCount($view, $user_id, $tag_id = 0)
376 {
367 switch ($view) { 377 switch ($view) {
368 case 'archive': 378 case 'archive':
369 $sql = "SELECT count(*) FROM entries WHERE user_id=? AND is_read=? "; 379 $sql = "SELECT count(*) FROM entries WHERE user_id=? AND is_read=? ";
@@ -391,7 +401,8 @@ class Database {
391 return $count; 401 return $count;
392 } 402 }
393 403
394 public function updateContent($id, $content, $user_id) { 404 public function updateContent($id, $content, $user_id)
405 {
395 $sql_action = 'UPDATE entries SET content = ? WHERE id=? AND user_id=?'; 406 $sql_action = 'UPDATE entries SET content = ? WHERE id=? AND user_id=?';
396 $params_action = array($content, $id, $user_id); 407 $params_action = array($content, $id, $user_id);
397 $query = $this->executeQuery($sql_action, $params_action); 408 $query = $this->executeQuery($sql_action, $params_action);
@@ -406,7 +417,8 @@ class Database {
406 * @param integer $user_id 417 * @param integer $user_id
407 * @return integer $id of inserted record 418 * @return integer $id of inserted record
408 */ 419 */
409 public function add($url, $title, $content, $user_id, $isFavorite=0, $isRead=0) { 420 public function add($url, $title, $content, $user_id, $isFavorite=0, $isRead=0)
421 {
410 $sql_action = 'INSERT INTO entries ( url, title, content, user_id, is_fav, is_read ) VALUES (?, ?, ?, ?, ?, ?)'; 422 $sql_action = 'INSERT INTO entries ( url, title, content, user_id, is_fav, is_read ) VALUES (?, ?, ?, ?, ?, ?)';
411 $params_action = array($url, $title, $content, $user_id, $isFavorite, $isRead); 423 $params_action = array($url, $title, $content, $user_id, $isFavorite, $isRead);
412 424
@@ -419,36 +431,42 @@ class Database {
419 return $id; 431 return $id;
420 } 432 }
421 433
422 public function deleteById($id, $user_id) { 434 public function deleteById($id, $user_id)
435 {
423 $sql_action = "DELETE FROM entries WHERE id=? AND user_id=?"; 436 $sql_action = "DELETE FROM entries WHERE id=? AND user_id=?";
424 $params_action = array($id, $user_id); 437 $params_action = array($id, $user_id);
425 $query = $this->executeQuery($sql_action, $params_action); 438 $query = $this->executeQuery($sql_action, $params_action);
426 return $query; 439 return $query;
427 } 440 }
428 441
429 public function favoriteById($id, $user_id) { 442 public function favoriteById($id, $user_id)
443 {
430 $sql_action = "UPDATE entries SET is_fav=NOT is_fav WHERE id=? AND user_id=?"; 444 $sql_action = "UPDATE entries SET is_fav=NOT is_fav WHERE id=? AND user_id=?";
431 $params_action = array($id, $user_id); 445 $params_action = array($id, $user_id);
432 $query = $this->executeQuery($sql_action, $params_action); 446 $query = $this->executeQuery($sql_action, $params_action);
433 } 447 }
434 448
435 public function archiveById($id, $user_id) { 449 public function archiveById($id, $user_id)
450 {
436 $sql_action = "UPDATE entries SET is_read=NOT is_read WHERE id=? AND user_id=?"; 451 $sql_action = "UPDATE entries SET is_read=NOT is_read WHERE id=? AND user_id=?";
437 $params_action = array($id, $user_id); 452 $params_action = array($id, $user_id);
438 $query = $this->executeQuery($sql_action, $params_action); 453 $query = $this->executeQuery($sql_action, $params_action);
439 } 454 }
440 455
441 public function archiveAll($user_id) { 456 public function archiveAll($user_id)
457 {
442 $sql_action = "UPDATE entries SET is_read=? WHERE user_id=? AND is_read=?"; 458 $sql_action = "UPDATE entries SET is_read=? WHERE user_id=? AND is_read=?";
443 $params_action = array($user_id, 1, 0); 459 $params_action = array($user_id, 1, 0);
444 $query = $this->executeQuery($sql_action, $params_action); 460 $query = $this->executeQuery($sql_action, $params_action);
445 } 461 }
446 462
447 public function getLastId($column = '') { 463 public function getLastId($column = '')
464 {
448 return $this->getHandle()->lastInsertId($column); 465 return $this->getHandle()->lastInsertId($column);
449 } 466 }
450 467
451 public function search($term, $user_id, $limit = '') { 468 public function search($term, $user_id, $limit = '')
469 {
452 $search = '%'.$term.'%'; 470 $search = '%'.$term.'%';
453 $sql_action = "SELECT * FROM entries WHERE user_id=? AND (content LIKE ? OR title LIKE ? OR url LIKE ?) "; //searches in content, title and URL 471 $sql_action = "SELECT * FROM entries WHERE user_id=? AND (content LIKE ? OR title LIKE ? OR url LIKE ?) "; //searches in content, title and URL
454 $sql_action .= $this->getEntriesOrder().' ' . $limit; 472 $sql_action .= $this->getEntriesOrder().' ' . $limit;
@@ -457,7 +475,8 @@ class Database {
457 return $query->fetchAll(); 475 return $query->fetchAll();
458 } 476 }
459 477
460 public function retrieveAllTags($user_id, $term = null) { 478 public function retrieveAllTags($user_id, $term = NULL)
479 {
461 $sql = "SELECT DISTINCT tags.*, count(entries.id) AS entriescount FROM tags 480 $sql = "SELECT DISTINCT tags.*, count(entries.id) AS entriescount FROM tags
462 LEFT JOIN tags_entries ON tags_entries.tag_id=tags.id 481 LEFT JOIN tags_entries ON tags_entries.tag_id=tags.id
463 LEFT JOIN entries ON tags_entries.entry_id=entries.id 482 LEFT JOIN entries ON tags_entries.entry_id=entries.id
@@ -471,7 +490,8 @@ class Database {
471 return $tags; 490 return $tags;
472 } 491 }
473 492
474 public function retrieveTag($id, $user_id) { 493 public function retrieveTag($id, $user_id)
494 {
475 $tag = NULL; 495 $tag = NULL;
476 $sql = "SELECT DISTINCT tags.* FROM tags 496 $sql = "SELECT DISTINCT tags.* FROM tags
477 LEFT JOIN tags_entries ON tags_entries.tag_id=tags.id 497 LEFT JOIN tags_entries ON tags_entries.tag_id=tags.id
@@ -481,10 +501,11 @@ class Database {
481 $query = $this->executeQuery($sql, $params); 501 $query = $this->executeQuery($sql, $params);
482 $tag = $query->fetchAll(); 502 $tag = $query->fetchAll();
483 503
484 return isset($tag[0]) ? $tag[0] : null; 504 return isset($tag[0]) ? $tag[0] : NULL;
485 } 505 }
486 506
487 public function retrieveEntriesByTag($tag_id, $user_id) { 507 public function retrieveEntriesByTag($tag_id, $user_id)
508 {
488 $sql = 509 $sql =
489 "SELECT entries.* FROM entries 510 "SELECT entries.* FROM entries
490 LEFT JOIN tags_entries ON tags_entries.entry_id=entries.id 511 LEFT JOIN tags_entries ON tags_entries.entry_id=entries.id
@@ -495,7 +516,8 @@ class Database {
495 return $entries; 516 return $entries;
496 } 517 }
497 518
498 public function retrieveTagsByEntry($entry_id) { 519 public function retrieveTagsByEntry($entry_id)
520 {
499 $sql = 521 $sql =
500 "SELECT tags.* FROM tags 522 "SELECT tags.* FROM tags
501 LEFT JOIN tags_entries ON tags_entries.tag_id=tags.id 523 LEFT JOIN tags_entries ON tags_entries.tag_id=tags.id
@@ -506,14 +528,16 @@ class Database {
506 return $tags; 528 return $tags;
507 } 529 }
508 530
509 public function removeTagForEntry($entry_id, $tag_id) { 531 public function removeTagForEntry($entry_id, $tag_id)
532 {
510 $sql_action = "DELETE FROM tags_entries WHERE tag_id=? AND entry_id=?"; 533 $sql_action = "DELETE FROM tags_entries WHERE tag_id=? AND entry_id=?";
511 $params_action = array($tag_id, $entry_id); 534 $params_action = array($tag_id, $entry_id);
512 $query = $this->executeQuery($sql_action, $params_action); 535 $query = $this->executeQuery($sql_action, $params_action);
513 return $query; 536 return $query;
514 } 537 }
515 538
516 public function cleanUnusedTag($tag_id) { 539 public function cleanUnusedTag($tag_id)
540 {
517 $sql_action = "SELECT tags.* FROM tags JOIN tags_entries ON tags_entries.tag_id=tags.id WHERE tags.id=?"; 541 $sql_action = "SELECT tags.* FROM tags JOIN tags_entries ON tags_entries.tag_id=tags.id WHERE tags.id=?";
518 $query = $this->executeQuery($sql_action,array($tag_id)); 542 $query = $this->executeQuery($sql_action,array($tag_id));
519 $tagstokeep = $query->fetchAll(); 543 $tagstokeep = $query->fetchAll();
@@ -532,7 +556,8 @@ class Database {
532 556
533 } 557 }
534 558
535 public function retrieveTagByValue($value) { 559 public function retrieveTagByValue($value)
560 {
536 $tag = NULL; 561 $tag = NULL;
537 $sql = "SELECT * FROM tags WHERE value=?"; 562 $sql = "SELECT * FROM tags WHERE value=?";
538 $params = array($value); 563 $params = array($value);
@@ -542,27 +567,29 @@ class Database {
542 return isset($tag[0]) ? $tag[0] : null; 567 return isset($tag[0]) ? $tag[0] : null;
543 } 568 }
544 569
545 public function createTag($value) { 570 public function createTag($value)
571 {
546 $sql_action = 'INSERT INTO tags ( value ) VALUES (?)'; 572 $sql_action = 'INSERT INTO tags ( value ) VALUES (?)';
547 $params_action = array($value); 573 $params_action = array($value);
548 $query = $this->executeQuery($sql_action, $params_action); 574 $query = $this->executeQuery($sql_action, $params_action);
549 return $query; 575 return $query;
550 } 576 }
551 577
552 public function setTagToEntry($tag_id, $entry_id) { 578 public function setTagToEntry($tag_id, $entry_id)
579 {
553 $sql_action = 'INSERT INTO tags_entries ( tag_id, entry_id ) VALUES (?, ?)'; 580 $sql_action = 'INSERT INTO tags_entries ( tag_id, entry_id ) VALUES (?, ?)';
554 $params_action = array($tag_id, $entry_id); 581 $params_action = array($tag_id, $entry_id);
555 $query = $this->executeQuery($sql_action, $params_action); 582 $query = $this->executeQuery($sql_action, $params_action);
556 return $query; 583 return $query;
557 } 584 }
558 585
559 private function getEntriesOrder() { 586 private function getEntriesOrder()
560 if (isset($_SESSION['sort']) and array_key_exists($_SESSION['sort'], $this->order)) { 587 {
561 return $this->order[$_SESSION['sort']]; 588 if (isset($_SESSION['sort']) and array_key_exists($_SESSION['sort'], $this->order)) {
562 } 589 return $this->order[$_SESSION['sort']];
563 else {
564 return $this->order['default'];
565 }
566 } 590 }
567 591 else {
592 return $this->order['default'];
593 }
594 }
568} 595}
diff --git a/inc/poche/Language.class.php b/inc/poche/Language.class.php
new file mode 100644
index 00000000..8d3912f5
--- /dev/null
+++ b/inc/poche/Language.class.php
@@ -0,0 +1,113 @@
1<?php
2/**
3 * wallabag, self hostable application allowing you to not miss any content anymore
4 *
5 * @category wallabag
6 * @author Nicolas Lœuillet <nicolas@loeuillet.org>
7 * @copyright 2013
8 * @license http://opensource.org/licenses/MIT see COPYING file
9 */
10
11class Language
12{
13 protected $wallabag;
14
15 private $currentLanguage;
16
17 private $languageNames = array(
18 'cs_CZ.utf8' => 'čeština',
19 'de_DE.utf8' => 'German',
20 'en_EN.utf8' => 'English',
21 'es_ES.utf8' => 'Español',
22 'fa_IR.utf8' => 'فارسی',
23 'fr_FR.utf8' => 'Français',
24 'it_IT.utf8' => 'Italiano',
25 'pl_PL.utf8' => 'Polski',
26 'pt_BR.utf8' => 'Português (Brasil)',
27 'ru_RU.utf8' => 'Pусский',
28 'sl_SI.utf8' => 'Slovenščina',
29 'uk_UA.utf8' => 'Українська',
30 );
31
32 public function __construct(Poche $wallabag)
33 {
34 $this->wallabag = $wallabag;
35 $pocheUser = Session::getParam('poche_user');
36 $language = (is_null($pocheUser) ? LANG : $pocheUser->getConfigValue('language'));
37
38 @putenv('LC_ALL=' . $language);
39 setlocale(LC_ALL, $language);
40 bindtextdomain($language, LOCALE);
41 textdomain($language);
42
43 $this->currentLanguage = $language;
44 }
45
46 public function getLanguage() {
47 return $this->currentLanguage;
48 }
49
50 public function getInstalledLanguages() {
51 $handle = opendir(LOCALE);
52 $languages = array();
53
54 while (($language = readdir($handle)) !== false) {
55 # Languages are stored in a directory, so all directory names are languages
56 # @todo move language installation data to database
57 if (! is_dir(LOCALE . '/' . $language) || in_array($language, array('..', '.', 'tools'))) {
58 continue;
59 }
60
61 $current = false;
62
63 if ($language === $this->getLanguage()) {
64 $current = true;
65 }
66
67 $languages[] = array('name' => (isset($this->languageNames[$language]) ? $this->languageNames[$language] : $language), 'value' => $language, 'current' => $current);
68 }
69
70 return $languages;
71 }
72
73
74 /**
75 * Update language for current user
76 *
77 * @param $newLanguage
78 */
79 public function updateLanguage($newLanguage)
80 {
81 # we are not going to change it to the current language
82 if ($newLanguage == $this->getLanguage()) {
83 $this->wallabag->messages->add('w', _('still using the "' . $this->getLanguage() . '" language!'));
84 Tools::redirect('?view=config');
85 }
86
87 $languages = $this->getInstalledLanguages();
88 $actualLanguage = false;
89
90 foreach ($languages as $language) {
91 if ($language['value'] == $newLanguage) {
92 $actualLanguage = true;
93 break;
94 }
95 }
96
97 if (!$actualLanguage) {
98 $this->wallabag->messages->add('e', _('that language does not seem to be installed'));
99 Tools::redirect('?view=config');
100 }
101
102 $this->wallabag->store->updateUserConfig($this->wallabag->user->getId(), 'language', $newLanguage);
103 $this->wallabag->messages->add('s', _('you have changed your language preferences'));
104
105 $currentConfig = $_SESSION['poche_user']->config;
106 $currentConfig['language'] = $newLanguage;
107
108 $_SESSION['poche_user']->setConfig($currentConfig);
109
110 Tools::emptyCache();
111 Tools::redirect('?view=config');
112 }
113} \ No newline at end of file
diff --git a/inc/poche/Poche.class.php b/inc/poche/Poche.class.php
index 2b0c3bf8..3e9c2991 100755
--- a/inc/poche/Poche.class.php
+++ b/inc/poche/Poche.class.php
@@ -5,402 +5,126 @@
5 * @category wallabag 5 * @category wallabag
6 * @author Nicolas Lœuillet <nicolas@loeuillet.org> 6 * @author Nicolas Lœuillet <nicolas@loeuillet.org>
7 * @copyright 2013 7 * @copyright 2013
8 * @license http://www.wtfpl.net/ see COPYING file 8 * @license http://opensource.org/licenses/MIT see COPYING file
9 */ 9 */
10 10
11class Poche 11class Poche
12{ 12{
13 public static $canRenderTemplates = true; 13 /**
14 public static $configFileAvailable = true; 14 * @var User
15 15 */
16 public $user; 16 public $user;
17 /**
18 * @var Database
19 */
17 public $store; 20 public $store;
21 /**
22 * @var Template
23 */
18 public $tpl; 24 public $tpl;
25 /**
26 * @var Language
27 */
28 public $language;
29 /**
30 * @var Routing
31 */
32 public $routing;
33 /**
34 * @var Messages
35 */
19 public $messages; 36 public $messages;
37 /**
38 * @var Paginator
39 */
20 public $pagination; 40 public $pagination;
21 41
22 private $currentTheme = '';
23 private $currentLanguage = '';
24 private $notInstalledMessage = array();
25
26 private $language_names = array(
27 'cs_CZ.utf8' => 'čeština',
28 'de_DE.utf8' => 'German',
29 'en_EN.utf8' => 'English',
30 'es_ES.utf8' => 'Español',
31 'fa_IR.utf8' => 'فارسی',
32 'fr_FR.utf8' => 'Français',
33 'it_IT.utf8' => 'Italiano',
34 'pl_PL.utf8' => 'Polski',
35 'pt_BR.utf8' => 'Português (Brasil)',
36 'ru_RU.utf8' => 'Pусский',
37 'sl_SI.utf8' => 'Slovenščina',
38 'uk_UA.utf8' => 'Українська',
39 );
40 public function __construct() 42 public function __construct()
41 { 43 {
42 if ($this->configFileIsAvailable()) { 44 $this->init();
43 $this->init();
44 }
45
46 if ($this->themeIsInstalled()) {
47 $this->initTpl();
48 }
49
50 if ($this->systemIsInstalled()) {
51 $this->store = new Database();
52 $this->messages = new Messages();
53 # installation
54 if (! $this->store->isInstalled()) {
55 $this->install();
56 }
57 $this->store->checkTags();
58 }
59 } 45 }
60 46
61 private function init() 47 private function init()
62 { 48 {
63 Tools::initPhp(); 49 Tools::initPhp();
64 50
65 if (isset($_SESSION['poche_user']) && $_SESSION['poche_user'] != array()) { 51 $pocheUser = Session::getParam('poche_user');
66 $this->user = $_SESSION['poche_user']; 52
53 if ($pocheUser && $pocheUser != array()) {
54 $this->user = $pocheUser;
67 } else { 55 } else {
68 # fake user, just for install & login screens 56 // fake user, just for install & login screens
69 $this->user = new User(); 57 $this->user = new User();
70 $this->user->setConfig($this->getDefaultConfig()); 58 $this->user->setConfig($this->getDefaultConfig());
71 } 59 }
72 60
73 # l10n 61 $this->pagination = new Paginator($this->user->getConfigValue('pager'), 'p');
74 $language = $this->user->getConfigValue('language'); 62 $this->language = new Language($this);
75 @putenv('LC_ALL=' . $language); 63 $this->tpl = new Template($this);
76 setlocale(LC_ALL, $language); 64 $this->store = new Database();
77 bindtextdomain($language, LOCALE); 65 $this->messages = new Messages();
78 textdomain($language); 66 $this->routing = new Routing($this);
79
80 # Pagination
81 $this->pagination = new Paginator($this->user->getConfigValue('pager'), 'p');
82
83 # Set up theme
84 $themeDirectory = $this->user->getConfigValue('theme');
85
86 if ($themeDirectory === false) {
87 $themeDirectory = DEFAULT_THEME;
88 }
89
90 $this->currentTheme = $themeDirectory;
91
92 # Set up language
93 $languageDirectory = $this->user->getConfigValue('language');
94
95 if ($languageDirectory === false) {
96 $languageDirectory = DEFAULT_THEME;
97 }
98
99 $this->currentLanguage = $languageDirectory;
100 }
101
102 public function configFileIsAvailable() {
103 if (! self::$configFileAvailable) {
104 $this->notInstalledMessage[] = 'You have to copy (don\'t just rename!) inc/poche/config.inc.default.php to inc/poche/config.inc.php.';
105
106 return false;
107 }
108
109 return true;
110 } 67 }
111 68
112 public function themeIsInstalled() { 69 public function run()
113 $passTheme = TRUE; 70 {
114 # Twig is an absolute requirement for Poche to function. Abort immediately if the Composer installer hasn't been run yet 71 $this->routing->run();
115 if (! self::$canRenderTemplates) {
116 $this->notInstalledMessage[] = 'Twig does not seem to be installed. Please initialize the Composer installation to automatically fetch dependencies. You can also download <a href="http://wllbg.org/vendor">vendor.zip</a> and extract it in your wallabag folder.';
117 $passTheme = FALSE;
118 }
119
120 if (! is_writable(CACHE)) {
121 $this->notInstalledMessage[] = 'You don\'t have write access on cache directory.';
122
123 self::$canRenderTemplates = false;
124
125 $passTheme = FALSE;
126 }
127
128 # Check if the selected theme and its requirements are present
129 $theme = $this->getTheme();
130
131 if ($theme != '' && ! is_dir(THEME . '/' . $theme)) {
132 $this->notInstalledMessage[] = 'The currently selected theme (' . $theme . ') does not seem to be properly installed (Missing directory: ' . THEME . '/' . $theme . ')';
133
134 self::$canRenderTemplates = false;
135
136 $passTheme = FALSE;
137 }
138
139 $themeInfo = $this->getThemeInfo($theme);
140 if (isset($themeInfo['requirements']) && is_array($themeInfo['requirements'])) {
141 foreach ($themeInfo['requirements'] as $requiredTheme) {
142 if (! is_dir(THEME . '/' . $requiredTheme)) {
143 $this->notInstalledMessage[] = 'The required "' . $requiredTheme . '" theme is missing for the current theme (' . $theme . ')';
144
145 self::$canRenderTemplates = false;
146
147 $passTheme = FALSE;
148 }
149 }
150 }
151
152 if (!$passTheme) {
153 return FALSE;
154 }
155
156
157 return true;
158 } 72 }
159 73
160 /** 74 /**
161 * all checks before installation. 75 * Creates a new user
162 * @todo move HTML to template
163 * @return boolean
164 */ 76 */
165 public function systemIsInstalled() 77 public function createNewUser($username, $password)
166 {
167 $msg = TRUE;
168
169 $configSalt = defined('SALT') ? constant('SALT') : '';
170
171 if (empty($configSalt)) {
172 $this->notInstalledMessage[] = 'You have not yet filled in the SALT value in the config.inc.php file.';
173 $msg = FALSE;
174 }
175 if (STORAGE == 'sqlite' && ! file_exists(STORAGE_SQLITE)) {
176 Tools::logm('sqlite file doesn\'t exist');
177 $this->notInstalledMessage[] = 'sqlite file doesn\'t exist, you can find it in install folder. Copy it in /db folder.';
178 $msg = FALSE;
179 }
180 if (is_dir(ROOT . '/install') && ! DEBUG_POCHE) {
181 $this->notInstalledMessage[] = 'you have to delete the /install folder before using poche.';
182 $msg = FALSE;
183 }
184 if (STORAGE == 'sqlite' && ! is_writable(STORAGE_SQLITE)) {
185 Tools::logm('you don\'t have write access on sqlite file');
186 $this->notInstalledMessage[] = 'You don\'t have write access on sqlite file.';
187 $msg = FALSE;
188 }
189
190 if (! $msg) {
191 return false;
192 }
193
194 return true;
195 }
196
197 public function getNotInstalledMessage() {
198 return $this->notInstalledMessage;
199 }
200
201 private function initTpl()
202 { 78 {
203 $loaderChain = new Twig_Loader_Chain(); 79 if (!empty($username) && !empty($password)){
204 $theme = $this->getTheme(); 80 $newUsername = filter_var($username, FILTER_SANITIZE_STRING);
205 81 if (!$this->store->userExists($newUsername)){
206 # add the current theme as first to the loader chain so Twig will look there first for overridden template files 82 if ($this->store->install($newUsername, Tools::encodeString($password . $newUsername))) {
207 try { 83 Tools::logm('The new user ' . $newUsername . ' has been installed');
208 $loaderChain->addLoader(new Twig_Loader_Filesystem(THEME . '/' . $theme)); 84 $this->messages->add('s', sprintf(_('The new user %s has been installed. Do you want to <a href="?logout">logout ?</a>'), $newUsername));
209 } catch (Twig_Error_Loader $e) {
210 # @todo isInstalled() should catch this, inject Twig later
211 die('The currently selected theme (' . $theme . ') does not seem to be properly installed (' . THEME . '/' . $theme .' is missing)');
212 }
213
214 # add all required themes to the loader chain
215 $themeInfo = $this->getThemeInfo($theme);
216 if (isset($themeInfo['requirements']) && is_array($themeInfo['requirements'])) {
217 foreach ($themeInfo['requirements'] as $requiredTheme) {
218 try {
219 $loaderChain->addLoader(new Twig_Loader_Filesystem(THEME . '/' . $requiredTheme));
220 } catch (Twig_Error_Loader $e) {
221 # @todo isInstalled() should catch this, inject Twig later
222 die('The required "' . $requiredTheme . '" theme is missing for the current theme (' . $theme . ')');
223 }
224 }
225 }
226
227 if (DEBUG_POCHE) {
228 $twigParams = array();
229 } else {
230 $twigParams = array('cache' => CACHE);
231 }
232
233 $this->tpl = new Twig_Environment($loaderChain, $twigParams);
234 $this->tpl->addExtension(new Twig_Extensions_Extension_I18n());
235
236 # filter to display domain name of an url
237 $filter = new Twig_SimpleFilter('getDomain', 'Tools::getDomain');
238 $this->tpl->addFilter($filter);
239
240 # filter for reading time
241 $filter = new Twig_SimpleFilter('getReadingTime', 'Tools::getReadingTime');
242 $this->tpl->addFilter($filter);
243 }
244
245 public function createNewUser() {
246 if (isset($_GET['newuser'])){
247 if ($_POST['newusername'] != "" && $_POST['password4newuser'] != ""){
248 $newusername = filter_var($_POST['newusername'], FILTER_SANITIZE_STRING);
249 if (!$this->store->userExists($newusername)){
250 if ($this->store->install($newusername, Tools::encodeString($_POST['password4newuser'] . $newusername))) {
251 Tools::logm('The new user '.$newusername.' has been installed');
252 $this->messages->add('s', sprintf(_('The new user %s has been installed. Do you want to <a href="?logout">logout ?</a>'),$newusername));
253 Tools::redirect();
254 }
255 else {
256 Tools::logm('error during adding new user');
257 Tools::redirect();
258 }
259 }
260 else {
261 $this->messages->add('e', sprintf(_('Error : An user with the name %s already exists !'),$newusername));
262 Tools::logm('An user with the name '.$newusername.' already exists !');
263 Tools::redirect(); 85 Tools::redirect();
264 } 86 }
265 }
266 }
267 }
268
269 public function deleteUser(){
270 if (isset($_GET['deluser'])){
271 if ($this->store->listUsers() > 1) {
272 if (Tools::encodeString($_POST['password4deletinguser'].$this->user->getUsername()) == $this->store->getUserPassword($this->user->getId())) {
273 $username = $this->user->getUsername();
274 $this->store->deleteUserConfig($this->user->getId());
275 Tools::logm('The configuration for user '. $username .' has been deleted !');
276 $this->store->deleteTagsEntriesAndEntries($this->user->getId());
277 Tools::logm('The entries for user '. $username .' has been deleted !');
278 $this->store->deleteUser($this->user->getId());
279 Tools::logm('User '. $username .' has been completely deleted !');
280 Session::logout();
281 Tools::logm('logout');
282 Tools::redirect();
283 $this->messages->add('s', sprintf(_('User %s has been successfully deleted !'),$newusername));
284 }
285 else { 87 else {
286 Tools::logm('Bad password !'); 88 Tools::logm('error during adding new user');
287 $this->messages->add('e', _('Error : The password is wrong !'));
288 }
289 }
290 else {
291 Tools::logm('Only user !');
292 $this->messages->add('e', _('Error : You are the only user, you cannot delete your account !'));
293 }
294 }
295 }
296
297 private function install()
298 {
299 Tools::logm('poche still not installed');
300 echo $this->tpl->render('install.twig', array(
301 'token' => Session::getToken(),
302 'theme' => $this->getTheme(),
303 'poche_url' => Tools::getPocheUrl()
304 ));
305 if (isset($_GET['install'])) {
306 if (($_POST['password'] == $_POST['password_repeat'])
307 && $_POST['password'] != "" && $_POST['login'] != "") {
308 # let's rock, install poche baby !
309 if ($this->store->install($_POST['login'], Tools::encodeString($_POST['password'] . $_POST['login'])))
310 {
311 Session::logout();
312 Tools::logm('poche is now installed');
313 Tools::redirect(); 89 Tools::redirect();
314 } 90 }
315 } 91 }
316 else { 92 else {
317 Tools::logm('error during installation'); 93 $this->messages->add('e', sprintf(_('Error : An user with the name %s already exists !'), $newUsername));
94 Tools::logm('An user with the name ' . $newUsername . ' already exists !');
318 Tools::redirect(); 95 Tools::redirect();
319 } 96 }
320 } 97 }
321 exit();
322 }
323
324 public function getTheme() {
325 return $this->currentTheme;
326 } 98 }
327 99
328 /** 100 /**
329 * Provides theme information by parsing theme.ini file if present in the theme's root directory. 101 * Delete an existing user
330 * In all cases, the following data will be returned:
331 * - name: theme's name, or key if the theme is unnamed,
332 * - current: boolean informing if the theme is the current user theme.
333 *
334 * @param string $theme Theme key (directory name)
335 * @return array|boolean Theme information, or false if the theme doesn't exist.
336 */ 102 */
337 public function getThemeInfo($theme) { 103 public function deleteUser($password)
338 if (!is_dir(THEME . '/' . $theme)) { 104 {
339 return false; 105 if ($this->store->listUsers() > 1) {
340 } 106 if (Tools::encodeString($password . $this->user->getUsername()) == $this->store->getUserPassword($this->user->getId())) {
341 107 $username = $this->user->getUsername();
342 $themeIniFile = THEME . '/' . $theme . '/theme.ini'; 108 $this->store->deleteUserConfig($this->user->getId());
343 $themeInfo = array(); 109 Tools::logm('The configuration for user '. $username .' has been deleted !');
344 110 $this->store->deleteTagsEntriesAndEntries($this->user->getId());
345 if (is_file($themeIniFile) && is_readable($themeIniFile)) { 111 Tools::logm('The entries for user '. $username .' has been deleted !');
346 $themeInfo = parse_ini_file($themeIniFile); 112 $this->store->deleteUser($this->user->getId());
347 } 113 Tools::logm('User '. $username .' has been completely deleted !');
348 114 Session::logout();
349 if ($themeInfo === false) { 115 Tools::logm('logout');
350 $themeInfo = array(); 116 Tools::redirect();
351 } 117 $this->messages->add('s', sprintf(_('User %s has been successfully deleted !'), $username));
352 if (!isset($themeInfo['name'])) {
353 $themeInfo['name'] = $theme;
354 }
355 $themeInfo['current'] = ($theme === $this->getTheme());
356
357 return $themeInfo;
358 }
359
360 public function getInstalledThemes() {
361 $handle = opendir(THEME);
362 $themes = array();
363
364 while (($theme = readdir($handle)) !== false) {
365 # Themes are stored in a directory, so all directory names are themes
366 # @todo move theme installation data to database
367 if (!is_dir(THEME . '/' . $theme) || in_array($theme, array('.', '..'))) {
368 continue;
369 }
370
371 $themes[$theme] = $this->getThemeInfo($theme);
372 }
373
374 ksort($themes);
375
376 return $themes;
377 }
378
379 public function getLanguage() {
380 return $this->currentLanguage;
381 }
382
383 public function getInstalledLanguages() {
384 $handle = opendir(LOCALE);
385 $languages = array();
386
387 while (($language = readdir($handle)) !== false) {
388 # Languages are stored in a directory, so all directory names are languages
389 # @todo move language installation data to database
390 if (! is_dir(LOCALE . '/' . $language) || in_array($language, array('..', '.', 'tools'))) {
391 continue;
392 } 118 }
393 119 else {
394 $current = false; 120 Tools::logm('Bad password !');
395 121 $this->messages->add('e', _('Error : The password is wrong !'));
396 if ($language === $this->getLanguage()) {
397 $current = true;
398 } 122 }
399
400 $languages[] = array('name' => (isset($this->language_names[$language]) ? $this->language_names[$language] : $language), 'value' => $language, 'current' => $current);
401 } 123 }
402 124 else {
403 return $languages; 125 Tools::logm('Only user !');
126 $this->messages->add('e', _('Error : You are the only user, you cannot delete your account !'));
127 }
404 } 128 }
405 129
406 public function getDefaultConfig() 130 public function getDefaultConfig()
@@ -425,7 +149,7 @@ class Poche
425 $body = $content['rss']['channel']['item']['description']; 149 $body = $content['rss']['channel']['item']['description'];
426 150
427 // clean content from prevent xss attack 151 // clean content from prevent xss attack
428 $purifier = $this->getPurifier(); 152 $purifier = $this->_getPurifier();
429 $title = $purifier->purify($title); 153 $title = $purifier->purify($title);
430 $body = $purifier->purify($body); 154 $body = $purifier->purify($body);
431 155
@@ -437,7 +161,7 @@ class Poche
437 if ( $last_id ) { 161 if ( $last_id ) {
438 Tools::logm('add link ' . $url->getUrl()); 162 Tools::logm('add link ' . $url->getUrl());
439 if (DOWNLOAD_PICTURES) { 163 if (DOWNLOAD_PICTURES) {
440 $content = filtre_picture($body, $url->getUrl(), $last_id); 164 $content = Picture::filterPicture($body, $url->getUrl(), $last_id);
441 Tools::logm('updating content article'); 165 Tools::logm('updating content article');
442 $this->store->updateContent($last_id, $content, $this->user->getId()); 166 $this->store->updateContent($last_id, $content, $this->user->getId());
443 } 167 }
@@ -472,7 +196,7 @@ class Poche
472 $msg = 'delete link #' . $id; 196 $msg = 'delete link #' . $id;
473 if ($this->store->deleteById($id, $this->user->getId())) { 197 if ($this->store->deleteById($id, $this->user->getId())) {
474 if (DOWNLOAD_PICTURES) { 198 if (DOWNLOAD_PICTURES) {
475 remove_directory(ABS_PATH . $id); 199 Picture::removeDirectory(ABS_PATH . $id);
476 } 200 }
477 $this->messages->add('s', _('the link has been deleted successfully')); 201 $this->messages->add('s', _('the link has been deleted successfully'));
478 } 202 }
@@ -590,16 +314,16 @@ class Poche
590 switch ($view) 314 switch ($view)
591 { 315 {
592 case 'config': 316 case 'config':
593 $dev_infos = $this->getPocheVersion('dev'); 317 $dev_infos = $this->_getPocheVersion('dev');
594 $dev = trim($dev_infos[0]); 318 $dev = trim($dev_infos[0]);
595 $check_time_dev = date('d-M-Y H:i', $dev_infos[1]); 319 $check_time_dev = date('d-M-Y H:i', $dev_infos[1]);
596 $prod_infos = $this->getPocheVersion('prod'); 320 $prod_infos = $this->_getPocheVersion('prod');
597 $prod = trim($prod_infos[0]); 321 $prod = trim($prod_infos[0]);
598 $check_time_prod = date('d-M-Y H:i', $prod_infos[1]); 322 $check_time_prod = date('d-M-Y H:i', $prod_infos[1]);
599 $compare_dev = version_compare(POCHE, $dev); 323 $compare_dev = version_compare(POCHE, $dev);
600 $compare_prod = version_compare(POCHE, $prod); 324 $compare_prod = version_compare(POCHE, $prod);
601 $themes = $this->getInstalledThemes(); 325 $themes = $this->tpl->getInstalledThemes();
602 $languages = $this->getInstalledLanguages(); 326 $languages = $this->language->getInstalledLanguages();
603 $token = $this->user->getConfigValue('token'); 327 $token = $this->user->getConfigValue('token');
604 $http_auth = (isset($_SERVER['PHP_AUTH_USER']) || isset($_SERVER['REMOTE_USER'])) ? true : false; 328 $http_auth = (isset($_SERVER['PHP_AUTH_USER']) || isset($_SERVER['REMOTE_USER'])) ? true : false;
605 $only_user = ($this->store->listUsers() > 1) ? false : true; 329 $only_user = ($this->store->listUsers() > 1) ? false : true;
@@ -703,7 +427,7 @@ class Poche
703 'listmode' => (isset($_COOKIE['listmode']) ? true : false), 427 'listmode' => (isset($_COOKIE['listmode']) ? true : false),
704 ); 428 );
705 429
706 //if id is given - we retrive entries by tag: id is tag id 430 //if id is given - we retrieve entries by tag: id is tag id
707 if ($id) { 431 if ($id) {
708 $tpl_vars['tag'] = $this->store->retrieveTag($id, $this->user->getId()); 432 $tpl_vars['tag'] = $this->store->retrieveTag($id, $this->user->getId());
709 $tpl_vars['id'] = intval($id); 433 $tpl_vars['id'] = intval($id);
@@ -733,7 +457,7 @@ class Poche
733 * @todo set the new password in function header like this updatePassword($newPassword) 457 * @todo set the new password in function header like this updatePassword($newPassword)
734 * @return boolean 458 * @return boolean
735 */ 459 */
736 public function updatePassword() 460 public function updatePassword($password, $confirmPassword)
737 { 461 {
738 if (MODE_DEMO) { 462 if (MODE_DEMO) {
739 $this->messages->add('i', _('in demo mode, you can\'t update your password')); 463 $this->messages->add('i', _('in demo mode, you can\'t update your password'));
@@ -741,10 +465,10 @@ class Poche
741 Tools::redirect('?view=config'); 465 Tools::redirect('?view=config');
742 } 466 }
743 else { 467 else {
744 if (isset($_POST['password']) && isset($_POST['password_repeat'])) { 468 if (isset($password) && isset($confirmPassword)) {
745 if ($_POST['password'] == $_POST['password_repeat'] && $_POST['password'] != "") { 469 if ($password == $confirmPassword && !empty($password)) {
746 $this->messages->add('s', _('your password has been updated')); 470 $this->messages->add('s', _('your password has been updated'));
747 $this->store->updatePassword($this->user->getId(), Tools::encodeString($_POST['password'] . $this->user->getUsername())); 471 $this->store->updatePassword($this->user->getId(), Tools::encodeString($password . $this->user->getUsername()));
748 Session::logout(); 472 Session::logout();
749 Tools::logm('password updated'); 473 Tools::logm('password updated');
750 Tools::redirect(); 474 Tools::redirect();
@@ -757,102 +481,25 @@ class Poche
757 } 481 }
758 } 482 }
759 483
760 public function updateTheme()
761 {
762 # no data
763 if (empty($_POST['theme'])) {
764 }
765
766 # we are not going to change it to the current theme...
767 if ($_POST['theme'] == $this->getTheme()) {
768 $this->messages->add('w', _('still using the "' . $this->getTheme() . '" theme!'));
769 Tools::redirect('?view=config');
770 }
771
772 $themes = $this->getInstalledThemes();
773 $actualTheme = false;
774
775 foreach (array_keys($themes) as $theme) {
776 if ($theme == $_POST['theme']) {
777 $actualTheme = true;
778 break;
779 }
780 }
781
782 if (! $actualTheme) {
783 $this->messages->add('e', _('that theme does not seem to be installed'));
784 Tools::redirect('?view=config');
785 }
786
787 $this->store->updateUserConfig($this->user->getId(), 'theme', $_POST['theme']);
788 $this->messages->add('s', _('you have changed your theme preferences'));
789
790 $currentConfig = $_SESSION['poche_user']->config;
791 $currentConfig['theme'] = $_POST['theme'];
792
793 $_SESSION['poche_user']->setConfig($currentConfig);
794
795 $this->emptyCache();
796
797 Tools::redirect('?view=config');
798 }
799
800 public function updateLanguage()
801 {
802 # no data
803 if (empty($_POST['language'])) {
804 }
805
806 # we are not going to change it to the current language...
807 if ($_POST['language'] == $this->getLanguage()) {
808 $this->messages->add('w', _('still using the "' . $this->getLanguage() . '" language!'));
809 Tools::redirect('?view=config');
810 }
811
812 $languages = $this->getInstalledLanguages();
813 $actualLanguage = false;
814
815 foreach ($languages as $language) {
816 if ($language['value'] == $_POST['language']) {
817 $actualLanguage = true;
818 break;
819 }
820 }
821
822 if (! $actualLanguage) {
823 $this->messages->add('e', _('that language does not seem to be installed'));
824 Tools::redirect('?view=config');
825 }
826
827 $this->store->updateUserConfig($this->user->getId(), 'language', $_POST['language']);
828 $this->messages->add('s', _('you have changed your language preferences'));
829
830 $currentConfig = $_SESSION['poche_user']->config;
831 $currentConfig['language'] = $_POST['language'];
832
833 $_SESSION['poche_user']->setConfig($currentConfig);
834
835 $this->emptyCache();
836
837 Tools::redirect('?view=config');
838 }
839 /** 484 /**
840 * get credentials from differents sources 485 * Get credentials from differents sources
841 * it redirects the user to the $referer link 486 * It redirects the user to the $referer link
487 *
842 * @return array 488 * @return array
843 */ 489 */
844 private function credentials() { 490 private function credentials()
845 if(isset($_SERVER['PHP_AUTH_USER'])) { 491 {
846 return array($_SERVER['PHP_AUTH_USER'],'php_auth',true); 492 if (isset($_SERVER['PHP_AUTH_USER'])) {
493 return array($_SERVER['PHP_AUTH_USER'], 'php_auth', true);
847 } 494 }
848 if(!empty($_POST['login']) && !empty($_POST['password'])) { 495 if (!empty($_POST['login']) && !empty($_POST['password'])) {
849 return array($_POST['login'],$_POST['password'],false); 496 return array($_POST['login'], $_POST['password'], false);
850 } 497 }
851 if(isset($_SERVER['REMOTE_USER'])) { 498 if (isset($_SERVER['REMOTE_USER'])) {
852 return array($_SERVER['REMOTE_USER'],'http_auth',true); 499 return array($_SERVER['REMOTE_USER'], 'http_auth', true);
853 } 500 }
854 501
855 return array(false,false,false); 502 return array(false, false, false);
856 } 503 }
857 504
858 /** 505 /**
@@ -909,129 +556,148 @@ class Poche
909 } 556 }
910 557
911 /** 558 /**
912 * import datas into your poche 559 * import datas into your wallabag
913 * @return boolean 560 * @return boolean
914 */ 561 */
915 public function import() { 562 public function import()
916 563 {
917 if ( isset($_FILES['file']) ) { 564 if (isset($_FILES['file'])) {
918 Tools::logm('Import stated: parsing file'); 565 Tools::logm('Import stated: parsing file');
919 566
920 // assume, that file is in json format 567 // assume, that file is in json format
921 $str_data = file_get_contents($_FILES['file']['tmp_name']); 568
922 $data = json_decode($str_data, true); 569 $str_data = file_get_contents($_FILES['file']['tmp_name']);
923 570 $data = json_decode($str_data, true);
924 if ( $data === null ) { 571 if ($data === null) {
925 //not json - assume html 572
926 $html = new simple_html_dom(); 573 // not json - assume html
927 $html->load_file($_FILES['file']['tmp_name']); 574
928 $data = array(); 575 $html = new simple_html_dom();
929 $read = 0; 576 $html->load_file($_FILES['file']['tmp_name']);
930 foreach (array('ol','ul') as $list) { 577 $data = array();
931 foreach ($html->find($list) as $ul) { 578 $read = 0;
932 foreach ($ul->find('li') as $li) { 579 foreach(array('ol','ul') as $list) {
933 $tmpEntry = array(); 580 foreach($html->find($list) as $ul) {
934 $a = $li->find('a'); 581 foreach($ul->find('li') as $li) {
935 $tmpEntry['url'] = $a[0]->href; 582 $tmpEntry = array();
936 $tmpEntry['tags'] = $a[0]->tags; 583 $a = $li->find('a');
937 $tmpEntry['is_read'] = $read; 584 $tmpEntry['url'] = $a[0]->href;
938 if ($tmpEntry['url']) { 585 $tmpEntry['tags'] = $a[0]->tags;
939 $data[] = $tmpEntry; 586 $tmpEntry['is_read'] = $read;
940 } 587 if ($tmpEntry['url']) {
941 } 588 $data[] = $tmpEntry;
942 # the second <ol/ul> is for read links 589 }
943 $read = ((sizeof($data) && $read)?0:1); 590 }
591
592 // the second <ol/ul> is for read links
593
594 $read = ((sizeof($data) && $read) ? 0 : 1);
595 }
596 }
944 } 597 }
945 }
946 }
947 598
948 //for readability structure 599 // for readability structure
949 foreach ($data as $record) { 600
950 if (is_array($record)) { 601 foreach($data as $record) {
951 $data[] = $record; 602 if (is_array($record)) {
952 foreach ($record as $record2) { 603 $data[] = $record;
953 if (is_array($record2)) { 604 foreach($record as $record2) {
954 $data[] = $record2; 605 if (is_array($record2)) {
955 } 606 $data[] = $record2;
607 }
608 }
609 }
610 }
611
612 $urlsInserted = array(); //urls of articles inserted
613 foreach($data as $record) {
614 $url = trim(isset($record['article__url']) ? $record['article__url'] : (isset($record['url']) ? $record['url'] : ''));
615 if ($url and !in_array($url, $urlsInserted)) {
616 $title = (isset($record['title']) ? $record['title'] : _('Untitled - Import - ') . '</a> <a href="./?import">' . _('click to finish import') . '</a><a>');
617 $body = (isset($record['content']) ? $record['content'] : '');
618 $isRead = (isset($record['is_read']) ? intval($record['is_read']) : (isset($record['archive']) ? intval($record['archive']) : 0));
619 $isFavorite = (isset($record['is_fav']) ? intval($record['is_fav']) : (isset($record['favorite']) ? intval($record['favorite']) : 0));
620
621 // insert new record
622
623 $id = $this->store->add($url, $title, $body, $this->user->getId() , $isFavorite, $isRead);
624 if ($id) {
625 $urlsInserted[] = $url; //add
626 if (isset($record['tags']) && trim($record['tags'])) {
627
628 // @TODO: set tags
629
630 }
631 }
632 }
956 } 633 }
957 }
958 }
959 634
960 $urlsInserted = array(); //urls of articles inserted 635 $i = sizeof($urlsInserted);
961 foreach ($data as $record) { 636 if ($i > 0) {
962 $url = trim( isset($record['article__url']) ? $record['article__url'] : (isset($record['url']) ? $record['url'] : '') ); 637 $this->messages->add('s', _('Articles inserted: ') . $i . _('. Please note, that some may be marked as "read".'));
963 if ( $url and !in_array($url, $urlsInserted) ) {
964 $title = (isset($record['title']) ? $record['title'] : _('Untitled - Import - ').'</a> <a href="./?import">'._('click to finish import').'</a><a>');
965 $body = (isset($record['content']) ? $record['content'] : '');
966 $isRead = (isset($record['is_read']) ? intval($record['is_read']) : (isset($record['archive'])?intval($record['archive']):0));
967 $isFavorite = (isset($record['is_fav']) ? intval($record['is_fav']) : (isset($record['favorite'])?intval($record['favorite']):0) );
968 //insert new record
969 $id = $this->store->add($url, $title, $body, $this->user->getId(), $isFavorite, $isRead);
970 if ( $id ) {
971 $urlsInserted[] = $url; //add
972
973 if ( isset($record['tags']) && trim($record['tags']) ) {
974 //@TODO: set tags
975
976 }
977 } 638 }
978 }
979 }
980 639
981 $i = sizeof($urlsInserted); 640 Tools::logm('Import of articles finished: ' . $i . ' articles added (w/o content if not provided).');
982 if ( $i > 0 ) {
983 $this->messages->add('s', _('Articles inserted: ').$i._('. Please note, that some may be marked as "read".'));
984 } 641 }
985 Tools::logm('Import of articles finished: '.$i.' articles added (w/o content if not provided).');
986 }
987 //file parsing finished here
988 642
989 //now download article contents if any 643 // file parsing finished here
644 // now download article contents if any
645 // check if we need to download any content
990 646
991 //check if we need to download any content 647 $recordsDownloadRequired = $this->store->retrieveUnfetchedEntriesCount($this->user->getId());
992 $recordsDownloadRequired = $this->store->retrieveUnfetchedEntriesCount($this->user->getId());
993 if ( $recordsDownloadRequired == 0 ) {
994 //nothing to download
995 $this->messages->add('s', _('Import finished.'));
996 Tools::logm('Import finished completely');
997 Tools::redirect();
998 }
999 else {
1000 //if just inserted - don't download anything, download will start in next reload
1001 if ( !isset($_FILES['file']) ) {
1002 //download next batch
1003 Tools::logm('Fetching next batch of articles...');
1004 $items = $this->store->retrieveUnfetchedEntries($this->user->getId(), IMPORT_LIMIT);
1005 648
1006 $purifier = $this->getPurifier(); 649 if ($recordsDownloadRequired == 0) {
1007 650
1008 foreach ($items as $item) { 651 // nothing to download
1009 $url = new Url(base64_encode($item['url']));
1010 Tools::logm('Fetching article '.$item['id']);
1011 $content = Tools::getPageContent($url);
1012 652
1013 $title = (($content['rss']['channel']['item']['title'] != '') ? $content['rss']['channel']['item']['title'] : _('Untitled')); 653 $this->messages->add('s', _('Import finished.'));
1014 $body = (($content['rss']['channel']['item']['description'] != '') ? $content['rss']['channel']['item']['description'] : _('Undefined')); 654 Tools::logm('Import finished completely');
655 Tools::redirect();
656 }
657 else {
658
659 // if just inserted - don't download anything, download will start in next reload
660
661 if (!isset($_FILES['file'])) {
1015 662
1016 //clean content to prevent xss attack 663 // download next batch
1017 $title = $purifier->purify($title);
1018 $body = $purifier->purify($body);
1019 664
1020 $this->store->updateContentAndTitle($item['id'], $title, $body, $this->user->getId()); 665 Tools::logm('Fetching next batch of articles...');
1021 Tools::logm('Article '.$item['id'].' updated.'); 666 $items = $this->store->retrieveUnfetchedEntries($this->user->getId() , IMPORT_LIMIT);
1022 } 667 $purifier = $this->_getPurifier();
668 foreach($items as $item) {
669 $url = new Url(base64_encode($item['url']));
670 Tools::logm('Fetching article ' . $item['id']);
671 $content = Tools::getPageContent($url);
672 $title = (($content['rss']['channel']['item']['title'] != '') ? $content['rss']['channel']['item']['title'] : _('Untitled'));
673 $body = (($content['rss']['channel']['item']['description'] != '') ? $content['rss']['channel']['item']['description'] : _('Undefined'));
1023 674
675 // clean content to prevent xss attack
676
677 $title = $purifier->purify($title);
678 $body = $purifier->purify($body);
679 $this->store->updateContentAndTitle($item['id'], $title, $body, $this->user->getId());
680 Tools::logm('Article ' . $item['id'] . ' updated.');
681 }
682 }
1024 } 683 }
1025 }
1026 684
1027 return array('includeImport'=>true, 'import'=>array('recordsDownloadRequired'=>$recordsDownloadRequired, 'recordsUnderDownload'=> IMPORT_LIMIT, 'delay'=> IMPORT_DELAY * 1000) ); 685 return array(
686 'includeImport' => true,
687 'import' => array(
688 'recordsDownloadRequired' => $recordsDownloadRequired,
689 'recordsUnderDownload' => IMPORT_LIMIT,
690 'delay' => IMPORT_DELAY * 1000
691 )
692 );
1028 } 693 }
1029 694
1030 /** 695 /**
1031 * export poche entries in json 696 * export poche entries in json
1032 * @return json all poche entries 697 * @return json all poche entries
1033 */ 698 */
1034 public function export() { 699 public function export()
700 {
1035 $filename = "wallabag-export-".$this->user->getId()."-".date("Y-m-d").".json"; 701 $filename = "wallabag-export-".$this->user->getId()."-".date("Y-m-d").".json";
1036 header('Content-Disposition: attachment; filename='.$filename); 702 header('Content-Disposition: attachment; filename='.$filename);
1037 703
@@ -1047,7 +713,7 @@ class Poche
1047 * @param string $which 'prod' or 'dev' 713 * @param string $which 'prod' or 'dev'
1048 * @return string latest $which version 714 * @return string latest $which version
1049 */ 715 */
1050 private function getPocheVersion($which = 'prod') { 716 private function _getPocheVersion($which = 'prod') {
1051 $cache_file = CACHE . '/' . $which; 717 $cache_file = CACHE . '/' . $which;
1052 $check_time = time(); 718 $check_time = time();
1053 719
@@ -1062,29 +728,27 @@ class Poche
1062 return array($version, $check_time); 728 return array($version, $check_time);
1063 } 729 }
1064 730
1065 public function generateToken() 731 /**
732 * Update token for current user
733 */
734 public function updateToken()
1066 { 735 {
1067 if (ini_get('open_basedir') === '') { 736 $token = Tools::generateToken();
1068 if (strtoupper(substr(PHP_OS, 0, 3)) === 'WIN') { 737 $this->store->updateUserConfig($this->user->getId(), 'token', $token);
1069 echo 'This is a server using Windows!'; 738 $currentConfig = $_SESSION['poche_user']->config;
1070 // alternative to /dev/urandom for Windows 739 $currentConfig['token'] = $token;
1071 $token = substr(base64_encode(uniqid(mt_rand(), true)), 0, 20); 740 $_SESSION['poche_user']->setConfig($currentConfig);
1072 } else { 741 Tools::redirect();
1073 $token = substr(base64_encode(file_get_contents('/dev/urandom', false, null, 0, 20)), 0, 15);
1074 }
1075 }
1076 else {
1077 $token = substr(base64_encode(uniqid(mt_rand(), true)), 0, 20);
1078 }
1079
1080 $token = str_replace('+', '', $token);
1081 $this->store->updateUserConfig($this->user->getId(), 'token', $token);
1082 $currentConfig = $_SESSION['poche_user']->config;
1083 $currentConfig['token'] = $token;
1084 $_SESSION['poche_user']->setConfig($currentConfig);
1085 Tools::redirect();
1086 } 742 }
1087 743
744 /**
745 * Generate RSS feeds for current user
746 *
747 * @param $token
748 * @param $user_id
749 * @param $tag_id
750 * @param string $type
751 */
1088 public function generateFeeds($token, $user_id, $tag_id, $type = 'home') 752 public function generateFeeds($token, $user_id, $tag_id, $type = 'home')
1089 { 753 {
1090 $allowed_types = array('home', 'fav', 'archive', 'tag'); 754 $allowed_types = array('home', 'fav', 'archive', 'tag');
@@ -1094,10 +758,9 @@ class Poche
1094 die(sprintf(_('User with this id (%d) does not exist.'), $user_id)); 758 die(sprintf(_('User with this id (%d) does not exist.'), $user_id));
1095 } 759 }
1096 760
1097 if (!in_array($type, $allowed_types) || $token != $config['token']) { 761 if (!in_array($type, $allowed_types) || !isset($config['token']) || $token != $config['token']) {
1098 die(_('Uh, there is a problem while generating feeds.')); 762 die(_('Uh, there is a problem while generating feed. Wrong token used?'));
1099 } 763 }
1100 // Check the token
1101 764
1102 $feed = new FeedWriter(RSS2); 765 $feed = new FeedWriter(RSS2);
1103 $feed->setTitle('wallabag — ' . $type . ' feed'); 766 $feed->setTitle('wallabag — ' . $type . ' feed');
@@ -1129,147 +792,22 @@ class Poche
1129 exit; 792 exit;
1130 } 793 }
1131 794
1132 public function emptyCache() {
1133 $files = new RecursiveIteratorIterator(
1134 new RecursiveDirectoryIterator(CACHE, RecursiveDirectoryIterator::SKIP_DOTS),
1135 RecursiveIteratorIterator::CHILD_FIRST
1136 );
1137
1138 foreach ($files as $fileinfo) {
1139 $todo = ($fileinfo->isDir() ? 'rmdir' : 'unlink');
1140 $todo($fileinfo->getRealPath());
1141 }
1142 795
1143 Tools::logm('empty cache');
1144 $this->messages->add('s', _('Cache deleted.'));
1145 Tools::redirect();
1146 }
1147 796
1148 /** 797 /**
1149 * return new purifier object with actual config 798 * Returns new purifier object with actual config
1150 */ 799 */
1151 protected function getPurifier() { 800 private function _getPurifier()
1152 $config = HTMLPurifier_Config::createDefault(); 801 {
1153 $config->set('Cache.SerializerPath', CACHE); 802 $config = HTMLPurifier_Config::createDefault();
1154 $config->set('HTML.SafeIframe', true); 803 $config->set('Cache.SerializerPath', CACHE);
804 $config->set('HTML.SafeIframe', true);
1155 805
1156 //allow YouTube, Vimeo and dailymotion videos 806 //allow YouTube, Vimeo and dailymotion videos
1157 $config->set('URI.SafeIframeRegexp', '%^(https?:)?//(www\.youtube(?:-nocookie)?\.com/embed/|player\.vimeo\.com/video/|www\.dailymotion\.com/embed/video/)%'); 807 $config->set('URI.SafeIframeRegexp', '%^(https?:)?//(www\.youtube(?:-nocookie)?\.com/embed/|player\.vimeo\.com/video/|www\.dailymotion\.com/embed/video/)%');
1158 808
1159 return new HTMLPurifier($config); 809 return new HTMLPurifier($config);
1160 } 810 }
1161 811
1162 /**
1163 * handle epub
1164 */
1165 public function createEpub() {
1166
1167 switch ($_GET['method']) {
1168 case 'id':
1169 $entryID = filter_var($_GET['id'],FILTER_SANITIZE_NUMBER_INT);
1170 $entry = $this->store->retrieveOneById($entryID, $this->user->getId());
1171 $entries = array($entry);
1172 $bookTitle = $entry['title'];
1173 $bookFileName = substr($bookTitle, 0, 200);
1174 break;
1175 case 'all':
1176 $entries = $this->store->retrieveAll($this->user->getId());
1177 $bookTitle = sprintf(_('All my articles on '), date(_('d.m.y'))); #translatable because each country has it's own date format system
1178 $bookFileName = _('Allarticles') . date(_('dmY'));
1179 break;
1180 case 'tag':
1181 $tag = filter_var($_GET['tag'],FILTER_SANITIZE_STRING);
1182 $tags_id = $this->store->retrieveAllTags($this->user->getId(),$tag);
1183 $tag_id = $tags_id[0]["id"]; // we take the first result, which is supposed to match perfectly. There must be a workaround.
1184 $entries = $this->store->retrieveEntriesByTag($tag_id,$this->user->getId());
1185 $bookTitle = sprintf(_('Articles tagged %s'),$tag);
1186 $bookFileName = substr(sprintf(_('Tag %s'),$tag), 0, 200);
1187 break;
1188 case 'category':
1189 $category = filter_var($_GET['category'],FILTER_SANITIZE_STRING);
1190 $entries = $this->store->getEntriesByView($category,$this->user->getId());
1191 $bookTitle = sprintf(_('All articles in category %s'), $category);
1192 $bookFileName = substr(sprintf(_('Category %s'),$category), 0, 200);
1193 break;
1194 case 'search':
1195 $search = filter_var($_GET['search'],FILTER_SANITIZE_STRING);
1196 $entries = $this->store->search($search,$this->user->getId());
1197 $bookTitle = sprintf(_('All articles for search %s'), $search);
1198 $bookFileName = substr(sprintf(_('Search %s'), $search), 0, 200);
1199 break;
1200 case 'default':
1201 die(_('Uh, there is a problem while generating epub.'));
1202
1203 }
1204
1205 $content_start =
1206 "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
1207 . "<html xmlns=\"http://www.w3.org/1999/xhtml\" xmlns:epub=\"http://www.idpf.org/2007/ops\">\n"
1208 . "<head>"
1209 . "<meta http-equiv=\"Default-Style\" content=\"text/html; charset=utf-8\" />\n"
1210 . "<title>wallabag articles book</title>\n"
1211 . "</head>\n"
1212 . "<body>\n";
1213
1214 $bookEnd = "</body>\n</html>\n";
1215 812
1216 $log = new Logger("wallabag", TRUE);
1217 $fileDir = CACHE;
1218
1219 $book = new EPub(EPub::BOOK_VERSION_EPUB3, DEBUG_POCHE);
1220 $log->logLine("new EPub()");
1221 $log->logLine("EPub class version: " . EPub::VERSION);
1222 $log->logLine("EPub Req. Zip version: " . EPub::REQ_ZIP_VERSION);
1223 $log->logLine("Zip version: " . Zip::VERSION);
1224 $log->logLine("getCurrentServerURL: " . $book->getCurrentServerURL());
1225 $log->logLine("getCurrentPageURL..: " . $book->getCurrentPageURL());
1226
1227 $book->setTitle(_('wallabag\'s articles'));
1228 $book->setIdentifier("http://$_SERVER[HTTP_HOST]", EPub::IDENTIFIER_URI); // Could also be the ISBN number, prefered for published books, or a UUID.
1229 //$book->setLanguage("en"); // Not needed, but included for the example, Language is mandatory, but EPub defaults to "en". Use RFC3066 Language codes, such as "en", "da", "fr" etc.
1230 $book->setDescription(_("Some articles saved on my wallabag"));
1231 $book->setAuthor("wallabag","wallabag");
1232 $book->setPublisher("wallabag","wallabag"); // I hope this is a non existant address :)
1233 $book->setDate(time()); // Strictly not needed as the book date defaults to time().
1234 //$book->setRights("Copyright and licence information specific for the book."); // As this is generated, this _could_ contain the name or licence information of the user who purchased the book, if needed. If this is used that way, the identifier must also be made unique for the book.
1235 $book->setSourceURL("http://$_SERVER[HTTP_HOST]");
1236
1237 $book->addDublinCoreMetadata(DublinCore::CONTRIBUTOR, "PHP");
1238 $book->addDublinCoreMetadata(DublinCore::CONTRIBUTOR, "wallabag");
1239
1240 $cssData = "body {\n margin-left: .5em;\n margin-right: .5em;\n text-align: justify;\n}\n\np {\n font-family: serif;\n font-size: 10pt;\n text-align: justify;\n text-indent: 1em;\n margin-top: 0px;\n margin-bottom: 1ex;\n}\n\nh1, h2 {\n font-family: sans-serif;\n font-style: italic;\n text-align: center;\n background-color: #6b879c;\n color: white;\n width: 100%;\n}\n\nh1 {\n margin-bottom: 2px;\n}\n\nh2 {\n margin-top: -2px;\n margin-bottom: 2px;\n}\n";
1241
1242 $log->logLine("Add Cover");
1243
1244 $fullTitle = "<h1> " . $bookTitle . "</h1>\n";
1245
1246 $book->setCoverImage("Cover.png", file_get_contents("themes/baggy/img/apple-touch-icon-152.png"), "image/png", $fullTitle);
1247
1248 $cover = $content_start . '<div style="text-align:center;"><p>' . _('Produced by wallabag with PHPePub') . '</p><p>'. _('Please open <a href="https://github.com/wallabag/wallabag/issues" >an issue</a> if you have trouble with the display of this E-Book on your device.') . '</p></div>' . $bookEnd;
1249
1250 //$book->addChapter("Table of Contents", "TOC.xhtml", NULL, false, EPub::EXTERNAL_REF_IGNORE);
1251 $book->addChapter("Notices", "Cover2.html", $cover);
1252
1253 $book->buildTOC();
1254
1255 foreach ($entries as $entry) { //set tags as subjects
1256 $tags = $this->store->retrieveTagsByEntry($entry['id']);
1257 foreach ($tags as $tag) {
1258 $book->setSubject($tag['value']);
1259 }
1260
1261 $log->logLine("Set up parameters");
1262
1263 $chapter = $content_start . $entry['content'] . $bookEnd;
1264 $book->addChapter($entry['title'], htmlspecialchars($entry['title']) . ".html", $chapter, true, EPub::EXTERNAL_REF_ADD);
1265 $log->logLine("Added chapter " . $entry['title']);
1266 }
1267
1268 if (DEBUG_POCHE) {
1269 $epuplog = $book->getLog();
1270 $book->addChapter("Log", "Log.html", $content_start . $log->getLog() . "\n</pre>" . $bookEnd); // log generation
1271 }
1272 $book->finalize();
1273 $zipData = $book->sendBook($bookFileName);
1274 }
1275} 813}
diff --git a/inc/poche/Routing.class.php b/inc/poche/Routing.class.php
new file mode 100755
index 00000000..004bd45a
--- /dev/null
+++ b/inc/poche/Routing.class.php
@@ -0,0 +1,153 @@
1<?php
2/**
3 * wallabag, self hostable application allowing you to not miss any content anymore
4 *
5 * @category wallabag
6 * @author Nicolas Lœuillet <nicolas@loeuillet.org>
7 * @copyright 2013
8 * @license http://opensource.org/licenses/MIT see COPYING file
9 */
10
11class Routing
12{
13 protected $wallabag;
14 protected $referer;
15 protected $view;
16 protected $action;
17 protected $id;
18 protected $url;
19 protected $file;
20 protected $defaultVars = array();
21 protected $vars = array();
22
23 public function __construct(Poche $wallabag)
24 {
25 $this->wallabag = $wallabag;
26 $this->_init();
27 }
28
29 private function _init()
30 {
31 # Parse GET & REFERER vars
32 $this->referer = empty($_SERVER['HTTP_REFERER']) ? '' : $_SERVER['HTTP_REFERER'];
33 $this->view = Tools::checkVar('view', 'home');
34 $this->action = Tools::checkVar('action');
35 $this->id = Tools::checkVar('id');
36 $_SESSION['sort'] = Tools::checkVar('sort', 'id');
37 $this->url = new Url((isset ($_GET['url'])) ? $_GET['url'] : '');
38 }
39
40 public function run()
41 {
42 # vars to _always_ send to templates
43 $this->defaultVars = array(
44 'referer' => $this->referer,
45 'view' => $this->view,
46 'poche_url' => Tools::getPocheUrl(),
47 'title' => _('wallabag, a read it later open source system'),
48 'token' => \Session::getToken(),
49 'theme' => $this->wallabag->tpl->getTheme()
50 );
51
52 $this->_launchAction();
53 $this->_defineTplInformation();
54
55 # because messages can be added in $poche->action(), we have to add this entry now (we can add it before)
56 $this->vars = array_merge($this->vars, array('messages' => $this->wallabag->messages->display('all', FALSE)));
57
58 $this->_render($this->file, $this->vars);
59 }
60
61 private function _defineTplInformation()
62 {
63 $tplFile = array();
64 $tplVars = array();
65
66 if (\Session::isLogged()) {
67 $this->wallabag->action($this->action, $this->url, $this->id);
68 $tplFile = Tools::getTplFile($this->view);
69 $tplVars = array_merge($this->vars, $this->wallabag->displayView($this->view, $this->id));
70 } elseif(isset($_SERVER['PHP_AUTH_USER'])) {
71 if($this->wallabag->store->userExists($_SERVER['PHP_AUTH_USER'])) {
72 $this->wallabag->login($this->referer);
73 } else {
74 $this->wallabag->messages->add('e', _('login failed: user doesn\'t exist'));
75 Tools::logm('user doesn\'t exist');
76 $tplFile = Tools::getTplFile('login');
77 $tplVars['http_auth'] = 1;
78 }
79 } elseif(isset($_SERVER['REMOTE_USER'])) {
80 if($this->wallabag->store->userExists($_SERVER['REMOTE_USER'])) {
81 $this->wallabag->login($this->referer);
82 } else {
83 $this->wallabag->messages->add('e', _('login failed: user doesn\'t exist'));
84 Tools::logm('user doesn\'t exist');
85 $tplFile = Tools::getTplFile('login');
86 $tplVars['http_auth'] = 1;
87 }
88 } else {
89 $tplFile = Tools::getTplFile('login');
90 $tplVars['http_auth'] = 0;
91 \Session::logout();
92 }
93
94 $this->file = $tplFile;
95 $this->vars = array_merge($this->defaultVars, $tplVars);
96 }
97
98 private function _launchAction()
99 {
100 if (isset($_GET['login'])) {
101 // hello to you
102 $this->wallabag->login($this->referer);
103 } elseif (isset($_GET['feed']) && isset($_GET['user_id'])) {
104 $tag_id = (isset($_GET['tag_id']) ? intval($_GET['tag_id']) : 0);
105 $this->wallabag->generateFeeds($_GET['token'], filter_var($_GET['user_id'],FILTER_SANITIZE_NUMBER_INT), $tag_id, $_GET['type']);
106 }
107
108 //allowed ONLY to logged in user
109 if (\Session::isLogged() === true)
110 {
111 if (isset($_GET['logout'])) {
112 // see you soon !
113 $this->wallabag->logout();
114 } elseif (isset($_GET['config'])) {
115 // update password
116 $this->wallabag->updatePassword($_POST['password'], $_POST['password_repeat']);
117 } elseif (isset($_GET['newuser'])) {
118 $this->wallabag->createNewUser($_POST['newusername'], $_POST['password4newuser']);
119 } elseif (isset($_GET['deluser'])) {
120 $this->wallabag->deleteUser($_POST['password4deletinguser']);
121 } elseif (isset($_GET['epub'])) {
122 $epub = new WallabagEpub($this->wallabag, $_GET['method'], $_GET['id'], $_GET['value']);
123 $epub->run();
124 } elseif (isset($_GET['import'])) {
125 $import = $this->wallabag->import();
126 $tplVars = array_merge($this->vars, $import);
127 } elseif (isset($_GET['download'])) {
128 Tools::downloadDb();
129 } elseif (isset($_GET['empty-cache'])) {
130 Tools::emptyCache();
131 } elseif (isset($_GET['export'])) {
132 $this->wallabag->export();
133 } elseif (isset($_GET['updatetheme'])) {
134 $this->wallabag->tpl->updateTheme($_POST['theme']);
135 } elseif (isset($_GET['updatelanguage'])) {
136 $this->wallabag->language->updateLanguage($_POST['language']);
137 } elseif (isset($_GET['uploadfile'])) {
138 $this->wallabag->uploadFile();
139 } elseif (isset($_GET['feed']) && isset($_GET['action']) && $_GET['action'] == 'generate') {
140 $this->wallabag->updateToken();
141 }
142 elseif (isset($_GET['plainurl']) && !empty($_GET['plainurl'])) {
143 $plainUrl = new Url(base64_encode($_GET['plainurl']));
144 $this->wallabag->action('add', $plainUrl);
145 }
146 }
147 }
148
149 public function _render($file, $vars)
150 {
151 echo $this->wallabag->tpl->render($file, $vars);
152 }
153} \ No newline at end of file
diff --git a/inc/poche/Template.class.php b/inc/poche/Template.class.php
new file mode 100644
index 00000000..b686f2ec
--- /dev/null
+++ b/inc/poche/Template.class.php
@@ -0,0 +1,235 @@
1<?php
2/**
3 * wallabag, self hostable application allowing you to not miss any content anymore
4 *
5 * @category wallabag
6 * @author Nicolas Lœuillet <nicolas@loeuillet.org>
7 * @copyright 2013
8 * @license http://opensource.org/licenses/MIT see COPYING file
9 */
10
11class Template extends Twig_Environment
12{
13 protected $wallabag;
14
15 private $canRenderTemplates = TRUE;
16 private $currentTheme = '';
17
18 public function __construct(Poche $wallabag)
19 {
20 $this->wallabag = $wallabag;
21
22 // Set up theme
23 $pocheUser = Session::getParam('poche_user');
24
25 $themeDirectory = (is_null($pocheUser) ? DEFAULT_THEME : $pocheUser->getConfigValue('theme'));
26
27 if ($themeDirectory === false) {
28 $themeDirectory = DEFAULT_THEME;
29 }
30
31 $this->currentTheme = $themeDirectory;
32
33 if ($this->_themeIsInstalled() === array()) {
34 $this->_init();
35 }
36 }
37
38 /**
39 * Returns true if selected theme is installed
40 *
41 * @return bool
42 */
43 private function _themeIsInstalled()
44 {
45 $errors = array();
46
47 // Twig is an absolute requirement for wallabag to function.
48 // Abort immediately if the Composer installer hasn't been run yet
49 if (!$this->canRenderTemplates) {
50 $errors[] = 'Twig does not seem to be installed. Please initialize the Composer installation to automatically fetch dependencies. You can also download <a href="http://wllbg.org/vendor">vendor.zip</a> and extract it in your wallabag folder.';
51 }
52
53 // Check if the selected theme and its requirements are present
54 $theme = $this->getTheme();
55 if ($theme != '' && !is_dir(THEME . '/' . $theme)) {
56 $errors[] = 'The currently selected theme (' . $theme . ') does not seem to be properly installed (Missing directory: ' . THEME . '/' . $theme . ')';
57 $this->canRenderTemplates = FALSE;
58 }
59
60 $themeInfo = $this->getThemeInfo($theme);
61 if (isset($themeInfo['requirements']) && is_array($themeInfo['requirements'])) {
62 foreach ($themeInfo['requirements'] as $requiredTheme) {
63 if (! is_dir(THEME . '/' . $requiredTheme)) {
64 $errors[] = 'The required "' . $requiredTheme . '" theme is missing for the current theme (' . $theme . ')';
65 $this->canRenderTemplates = FALSE;
66 }
67 }
68 }
69
70 $currentErrors = (is_null(Session::getParam('errors'))? array() : Session::getParam('errors'));
71 Session::setParam('errors', array_merge($errors, $currentErrors));
72
73 return $errors;
74 }
75
76 /**
77 * Initialization for templates
78 */
79 private function _init()
80 {
81 $loaderChain = new Twig_Loader_Chain();
82 $theme = $this->getTheme();
83
84 // add the current theme as first to the loader chain
85 // so Twig will look there first for overridden template files
86 try {
87 $loaderChain->addLoader(new Twig_Loader_Filesystem(THEME . '/' . $theme));
88 } catch (Twig_Error_Loader $e) {
89 # @todo isInstalled() should catch this, inject Twig later
90 die('The currently selected theme (' . $theme . ') does not seem to be properly installed (' . THEME . '/' . $theme .' is missing)');
91 }
92
93 // add all required themes to the loader chain
94 $themeInfo = $this->getThemeInfo($theme);
95 if (isset($themeInfo['requirements']) && is_array($themeInfo['requirements'])) {
96 foreach ($themeInfo['requirements'] as $requiredTheme) {
97 try {
98 $loaderChain->addLoader(new Twig_Loader_Filesystem(THEME . '/' . $requiredTheme));
99 } catch (Twig_Error_Loader $e) {
100 # @todo isInstalled() should catch this, inject Twig later
101 die('The required "' . $requiredTheme . '" theme is missing for the current theme (' . $theme . ')');
102 }
103 }
104 }
105
106 if (DEBUG_POCHE) {
107 $twigParams = array();
108 } else {
109 $twigParams = array('cache' => CACHE);
110 }
111
112 parent::__construct($loaderChain, $twigParams);
113
114 //$tpl = new Twig_Environment($loaderChain, $twigParams);
115 $this->addExtension(new Twig_Extensions_Extension_I18n());
116
117 # filter to display domain name of an url
118 $filter = new Twig_SimpleFilter('getDomain', 'Tools::getDomain');
119 $this->addFilter($filter);
120
121 # filter for reading time
122 $filter = new Twig_SimpleFilter('getReadingTime', 'Tools::getReadingTime');
123 $this->addFilter($filter);
124 }
125
126 /**
127 * Returns current theme
128 *
129 * @return string
130 */
131 public function getTheme()
132 {
133 return $this->currentTheme;
134 }
135
136 /**
137 * Provides theme information by parsing theme.ini file if present in the theme's root directory.
138 * In all cases, the following data will be returned:
139 * - name: theme's name, or key if the theme is unnamed,
140 * - current: boolean informing if the theme is the current user theme.
141 *
142 * @param string $theme Theme key (directory name)
143 * @return array|boolean Theme information, or false if the theme doesn't exist.
144 */
145 public function getThemeInfo($theme)
146 {
147 if (!is_dir(THEME . '/' . $theme)) {
148 return false;
149 }
150
151 $themeIniFile = THEME . '/' . $theme . '/theme.ini';
152 $themeInfo = array();
153
154 if (is_file($themeIniFile) && is_readable($themeIniFile)) {
155 $themeInfo = parse_ini_file($themeIniFile);
156 }
157
158 if ($themeInfo === false) {
159 $themeInfo = array();
160 }
161
162 if (!isset($themeInfo['name'])) {
163 $themeInfo['name'] = $theme;
164 }
165
166 $themeInfo['current'] = ($theme === $this->getTheme());
167
168 return $themeInfo;
169 }
170
171 /**
172 * Returns an array with installed themes
173 *
174 * @return array
175 */
176 public function getInstalledThemes()
177 {
178 $handle = opendir(THEME);
179 $themes = array();
180
181 while (($theme = readdir($handle)) !== false) {
182 # Themes are stored in a directory, so all directory names are themes
183 # @todo move theme installation data to database
184 if (!is_dir(THEME . '/' . $theme) || in_array($theme, array('.', '..'))) {
185 continue;
186 }
187
188 $themes[$theme] = $this->getThemeInfo($theme);
189 }
190
191 ksort($themes);
192
193 return $themes;
194 }
195
196 /**
197 * Update theme for the current user
198 *
199 * @param $newTheme
200 */
201 public function updateTheme($newTheme)
202 {
203 # we are not going to change it to the current theme...
204 if ($newTheme == $this->getTheme()) {
205 $this->wallabag->messages->add('w', _('still using the "' . $this->getTheme() . '" theme!'));
206 Tools::redirect('?view=config');
207 }
208
209 $themes = $this->getInstalledThemes();
210 $actualTheme = false;
211
212 foreach (array_keys($themes) as $theme) {
213 if ($theme == $newTheme) {
214 $actualTheme = true;
215 break;
216 }
217 }
218
219 if (!$actualTheme) {
220 $this->wallabag->messages->add('e', _('that theme does not seem to be installed'));
221 Tools::redirect('?view=config');
222 }
223
224 $this->wallabag->store->updateUserConfig($this->wallabag->user->getId(), 'theme', $newTheme);
225 $this->wallabag->messages->add('s', _('you have changed your theme preferences'));
226
227 $currentConfig = $_SESSION['poche_user']->config;
228 $currentConfig['theme'] = $newTheme;
229
230 $_SESSION['poche_user']->setConfig($currentConfig);
231
232 Tools::emptyCache();
233 Tools::redirect('?view=config');
234 }
235} \ No newline at end of file
diff --git a/inc/poche/Tools.class.php b/inc/poche/Tools.class.php
index cc01f403..63137d76 100755
--- a/inc/poche/Tools.class.php
+++ b/inc/poche/Tools.class.php
@@ -5,19 +5,18 @@
5 * @category wallabag 5 * @category wallabag
6 * @author Nicolas Lœuillet <nicolas@loeuillet.org> 6 * @author Nicolas Lœuillet <nicolas@loeuillet.org>
7 * @copyright 2013 7 * @copyright 2013
8 * @license http://www.wtfpl.net/ see COPYING file 8 * @license http://opensource.org/licenses/MIT see COPYING file
9 */ 9 */
10 10
11class Tools 11final class Tools
12{ 12{
13 /**
14 * Initialize PHP environment
15 */
13 public static function initPhp() 16 public static function initPhp()
14 { 17 {
15 define('START_TIME', microtime(true)); 18 define('START_TIME', microtime(true));
16 19
17 if (phpversion() < 5) {
18 die(_('Oops, it seems you don\'t have PHP 5.'));
19 }
20
21 function stripslashesDeep($value) { 20 function stripslashesDeep($value) {
22 return is_array($value) 21 return is_array($value)
23 ? array_map('stripslashesDeep', $value) 22 ? array_map('stripslashesDeep', $value)
@@ -34,6 +33,11 @@ class Tools
34 register_shutdown_function('ob_end_flush'); 33 register_shutdown_function('ob_end_flush');
35 } 34 }
36 35
36 /**
37 * Get wallabag instance URL
38 *
39 * @return string
40 */
37 public static function getPocheUrl() 41 public static function getPocheUrl()
38 { 42 {
39 $https = (!empty($_SERVER['HTTPS']) 43 $https = (!empty($_SERVER['HTTPS'])
@@ -67,6 +71,11 @@ class Tools
67 . $host . $serverport . $scriptname; 71 . $host . $serverport . $scriptname;
68 } 72 }
69 73
74 /**
75 * Redirects to a URL
76 *
77 * @param string $url
78 */
70 public static function redirect($url = '') 79 public static function redirect($url = '')
71 { 80 {
72 if ($url === '') { 81 if ($url === '') {
@@ -87,11 +96,18 @@ class Tools
87 $url = $ref; 96 $url = $ref;
88 } 97 }
89 } 98 }
99
90 self::logm('redirect to ' . $url); 100 self::logm('redirect to ' . $url);
91 header('Location: '.$url); 101 header('Location: '.$url);
92 exit(); 102 exit();
93 } 103 }
94 104
105 /**
106 * Returns name of the template file to display
107 *
108 * @param $view
109 * @return string
110 */
95 public static function getTplFile($view) 111 public static function getTplFile($view)
96 { 112 {
97 $views = array( 113 $views = array(
@@ -99,13 +115,15 @@ class Tools
99 'edit-tags', 'view', 'login', 'error' 115 'edit-tags', 'view', 'login', 'error'
100 ); 116 );
101 117
102 if (in_array($view, $views)) { 118 return (in_array($view, $views) ? $view . '.twig' : 'home.twig');
103 return $view . '.twig';
104 }
105
106 return 'home.twig';
107 } 119 }
108 120
121 /**
122 * Download a file (typically, for downloading pictures on web server)
123 *
124 * @param $url
125 * @return bool|mixed|string
126 */
109 public static function getFile($url) 127 public static function getFile($url)
110 { 128 {
111 $timeout = 15; 129 $timeout = 15;
@@ -186,6 +204,11 @@ class Tools
186 } 204 }
187 } 205 }
188 206
207 /**
208 * Headers for JSON export
209 *
210 * @param $data
211 */
189 public static function renderJson($data) 212 public static function renderJson($data)
190 { 213 {
191 header('Cache-Control: no-cache, must-revalidate'); 214 header('Cache-Control: no-cache, must-revalidate');
@@ -195,6 +218,11 @@ class Tools
195 exit(); 218 exit();
196 } 219 }
197 220
221 /**
222 * Create new line in log file
223 *
224 * @param $message
225 */
198 public static function logm($message) 226 public static function logm($message)
199 { 227 {
200 if (DEBUG_POCHE && php_sapi_name() != 'cli') { 228 if (DEBUG_POCHE && php_sapi_name() != 'cli') {
@@ -204,36 +232,57 @@ class Tools
204 } 232 }
205 } 233 }
206 234
235 /**
236 * Encode a URL by using a salt
237 *
238 * @param $string
239 * @return string
240 */
207 public static function encodeString($string) 241 public static function encodeString($string)
208 { 242 {
209 return sha1($string . SALT); 243 return sha1($string . SALT);
210 } 244 }
211 245
246 /**
247 * Cleans a variable
248 *
249 * @param $var
250 * @param string $default
251 * @return string
252 */
212 public static function checkVar($var, $default = '') 253 public static function checkVar($var, $default = '')
213 { 254 {
214 return ((isset ($_REQUEST["$var"])) ? htmlentities($_REQUEST["$var"]) : $default); 255 return ((isset($_REQUEST["$var"])) ? htmlentities($_REQUEST["$var"]) : $default);
215 } 256 }
216 257
258 /**
259 * Returns the domain name for a URL
260 *
261 * @param $url
262 * @return string
263 */
217 public static function getDomain($url) 264 public static function getDomain($url)
218 { 265 {
219 return parse_url($url, PHP_URL_HOST); 266 return parse_url($url, PHP_URL_HOST);
220 } 267 }
221 268
222 public static function getReadingTime($text) { 269 /**
223 $word = str_word_count(strip_tags($text)); 270 * For a given text, we calculate reading time for an article
224 $minutes = floor($word / 200); 271 *
225 $seconds = floor($word % 200 / (200 / 60)); 272 * @param $text
226 $time = array('minutes' => $minutes, 'seconds' => $seconds); 273 * @return float
227 274 */
228 return $minutes; 275 public static function getReadingTime($text)
229 } 276 {
230 277 return floor(str_word_count(strip_tags($text)) / 200);
231 public static function getDocLanguage($userlanguage) {
232 $lang = explode('.', $userlanguage);
233 return str_replace('_', '-', $lang[0]);
234 } 278 }
235 279
236 public static function status($status_code) 280 /**
281 * Returns the correct header for a status code
282 *
283 * @param $status_code
284 */
285 private static function _status($status_code)
237 { 286 {
238 if (strpos(php_sapi_name(), 'apache') !== false) { 287 if (strpos(php_sapi_name(), 'apache') !== false) {
239 288
@@ -245,9 +294,13 @@ class Tools
245 } 294 }
246 } 295 }
247 296
248 public static function download_db() { 297 /**
298 * Download the sqlite database
299 */
300 public static function downloadDb()
301 {
249 header('Content-Disposition: attachment; filename="poche.sqlite.gz"'); 302 header('Content-Disposition: attachment; filename="poche.sqlite.gz"');
250 self::status(200); 303 self::_status(200);
251 304
252 header('Content-Transfer-Encoding: binary'); 305 header('Content-Transfer-Encoding: binary');
253 header('Content-Type: application/octet-stream'); 306 header('Content-Type: application/octet-stream');
@@ -256,18 +309,24 @@ class Tools
256 exit; 309 exit;
257 } 310 }
258 311
312 /**
313 * Get the content for a given URL (by a call to FullTextFeed)
314 *
315 * @param Url $url
316 * @return mixed
317 */
259 public static function getPageContent(Url $url) 318 public static function getPageContent(Url $url)
260 { 319 {
261 // Saving and clearing context 320 // Saving and clearing context
262 $REAL = array(); 321 $REAL = array();
263 foreach( $GLOBALS as $key => $value ) { 322 foreach( $GLOBALS as $key => $value ) {
264 if( $key != 'GLOBALS' && $key != '_SESSION' && $key != 'HTTP_SESSION_VARS' ) { 323 if( $key != 'GLOBALS' && $key != '_SESSION' && $key != 'HTTP_SESSION_VARS' ) {
265 $GLOBALS[$key] = array(); 324 $GLOBALS[$key] = array();
266 $REAL[$key] = $value; 325 $REAL[$key] = $value;
267 } 326 }
268 } 327 }
269 // Saving and clearing session 328 // Saving and clearing session
270 if ( isset($_SESSION) ) { 329 if (isset($_SESSION)) {
271 $REAL_SESSION = array(); 330 $REAL_SESSION = array();
272 foreach( $_SESSION as $key => $value ) { 331 foreach( $_SESSION as $key => $value ) {
273 $REAL_SESSION[$key] = $value; 332 $REAL_SESSION[$key] = $value;
@@ -279,12 +338,12 @@ class Tools
279 $scope = function() { 338 $scope = function() {
280 extract( func_get_arg(1) ); 339 extract( func_get_arg(1) );
281 $_GET = $_REQUEST = array( 340 $_GET = $_REQUEST = array(
282 "url" => $url->getUrl(), 341 "url" => $url->getUrl(),
283 "max" => 5, 342 "max" => 5,
284 "links" => "preserve", 343 "links" => "preserve",
285 "exc" => "", 344 "exc" => "",
286 "format" => "json", 345 "format" => "json",
287 "submit" => "Create Feed" 346 "submit" => "Create Feed"
288 ); 347 );
289 ob_start(); 348 ob_start();
290 require func_get_arg(0); 349 require func_get_arg(0);
@@ -292,23 +351,26 @@ class Tools
292 ob_end_clean(); 351 ob_end_clean();
293 return $json; 352 return $json;
294 }; 353 };
295 $json = $scope( "inc/3rdparty/makefulltextfeed.php", array("url" => $url) ); 354
355 $json = $scope("inc/3rdparty/makefulltextfeed.php", array("url" => $url));
296 356
297 // Clearing and restoring context 357 // Clearing and restoring context
298 foreach( $GLOBALS as $key => $value ) { 358 foreach ($GLOBALS as $key => $value) {
299 if( $key != "GLOBALS" && $key != "_SESSION" ) { 359 if($key != "GLOBALS" && $key != "_SESSION" ) {
300 unset($GLOBALS[$key]); 360 unset($GLOBALS[$key]);
301 } 361 }
302 } 362 }
303 foreach( $REAL as $key => $value ) { 363 foreach ($REAL as $key => $value) {
304 $GLOBALS[$key] = $value; 364 $GLOBALS[$key] = $value;
305 } 365 }
366
306 // Clearing and restoring session 367 // Clearing and restoring session
307 if ( isset($REAL_SESSION) ) { 368 if (isset($REAL_SESSION)) {
308 foreach( $_SESSION as $key => $value ) { 369 foreach($_SESSION as $key => $value) {
309 unset($_SESSION[$key]); 370 unset($_SESSION[$key]);
310 } 371 }
311 foreach( $REAL_SESSION as $key => $value ) { 372
373 foreach($REAL_SESSION as $key => $value) {
312 $_SESSION[$key] = $value; 374 $_SESSION[$key] = $value;
313 } 375 }
314 } 376 }
@@ -318,11 +380,48 @@ class Tools
318 380
319 /** 381 /**
320 * Returns whether we handle an AJAX (XMLHttpRequest) request. 382 * Returns whether we handle an AJAX (XMLHttpRequest) request.
383 *
321 * @return boolean whether we handle an AJAX (XMLHttpRequest) request. 384 * @return boolean whether we handle an AJAX (XMLHttpRequest) request.
322 */ 385 */
323 public static function isAjaxRequest() 386 public static function isAjaxRequest()
324 { 387 {
325 return isset($_SERVER['HTTP_X_REQUESTED_WITH']) && $_SERVER['HTTP_X_REQUESTED_WITH']==='XMLHttpRequest'; 388 return isset($_SERVER['HTTP_X_REQUESTED_WITH']) && $_SERVER['HTTP_X_REQUESTED_WITH']==='XMLHttpRequest';
389 }
390
391 /*
392 * Empty cache folder
393 */
394 public static function emptyCache()
395 {
396 $files = new RecursiveIteratorIterator(
397 new RecursiveDirectoryIterator(CACHE, RecursiveDirectoryIterator::SKIP_DOTS),
398 RecursiveIteratorIterator::CHILD_FIRST
399 );
400
401 foreach ($files as $fileInfo) {
402 $todo = ($fileInfo->isDir() ? 'rmdir' : 'unlink');
403 $todo($fileInfo->getRealPath());
404 }
405
406 Tools::logm('empty cache');
407 Tools::redirect();
408 }
409
410 public static function generateToken()
411 {
412 if (ini_get('open_basedir') === '') {
413 if (strtoupper(substr(PHP_OS, 0, 3)) === 'WIN') {
414 // alternative to /dev/urandom for Windows
415 $token = substr(base64_encode(uniqid(mt_rand(), true)), 0, 20);
416 } else {
417 $token = substr(base64_encode(file_get_contents('/dev/urandom', false, null, 0, 20)), 0, 15);
418 }
419 }
420 else {
421 $token = substr(base64_encode(uniqid(mt_rand(), true)), 0, 20);
422 }
423
424 return str_replace('+', '', $token);
326 } 425 }
327 426
328} 427}
diff --git a/inc/poche/Url.class.php b/inc/poche/Url.class.php
index aba236fa..d9172b7d 100644
--- a/inc/poche/Url.class.php
+++ b/inc/poche/Url.class.php
@@ -5,7 +5,7 @@
5 * @category wallabag 5 * @category wallabag
6 * @author Nicolas Lœuillet <nicolas@loeuillet.org> 6 * @author Nicolas Lœuillet <nicolas@loeuillet.org>
7 * @copyright 2013 7 * @copyright 2013
8 * @license http://www.wtfpl.net/ see COPYING file 8 * @license http://opensource.org/licenses/MIT see COPYING file
9 */ 9 */
10 10
11class Url 11class Url
diff --git a/inc/poche/User.class.php b/inc/poche/User.class.php
index cc8bec65..eaadd3e5 100644
--- a/inc/poche/User.class.php
+++ b/inc/poche/User.class.php
@@ -5,7 +5,7 @@
5 * @category wallabag 5 * @category wallabag
6 * @author Nicolas Lœuillet <nicolas@loeuillet.org> 6 * @author Nicolas Lœuillet <nicolas@loeuillet.org>
7 * @copyright 2013 7 * @copyright 2013
8 * @license http://www.wtfpl.net/ see COPYING file 8 * @license http://opensource.org/licenses/MIT see COPYING file
9 */ 9 */
10 10
11class User 11class User
@@ -44,7 +44,14 @@ class User
44 $this->config = $config; 44 $this->config = $config;
45 } 45 }
46 46
47 public function getConfigValue($name) { 47 /**
48 * Returns configuration entry for a user
49 *
50 * @param $name
51 * @return bool
52 */
53 public function getConfigValue($name)
54 {
48 return (isset($this->config[$name])) ? $this->config[$name] : FALSE; 55 return (isset($this->config[$name])) ? $this->config[$name] : FALSE;
49 } 56 }
50} \ No newline at end of file 57} \ No newline at end of file
diff --git a/inc/poche/WallabagEpub.class.php b/inc/poche/WallabagEpub.class.php
new file mode 100644
index 00000000..b81d9bfd
--- /dev/null
+++ b/inc/poche/WallabagEpub.class.php
@@ -0,0 +1,137 @@
1<?php
2/**
3 * wallabag, self hostable application allowing you to not miss any content anymore
4 *
5 * @category wallabag
6 * @author Nicolas Lœuillet <nicolas@loeuillet.org>
7 * @copyright 2013
8 * @license http://opensource.org/licenses/MIT see COPYING file
9 */
10
11class WallabagEpub
12{
13 protected $wallabag;
14 protected $method;
15 protected $id;
16 protected $value;
17
18 public function __construct(Poche $wallabag, $method, $id, $value)
19 {
20 $this->wallabag = $wallabag;
21 $this->method = $method;
22 $this->id = $id;
23 $this->value = $value;
24 }
25
26 /**
27 * handle ePub
28 */
29 public function run()
30 {
31 switch ($this->method) {
32 case 'id':
33 $entryID = filter_var($this->id, FILTER_SANITIZE_NUMBER_INT);
34 $entry = $this->wallabag->store->retrieveOneById($entryID, $this->wallabag->user->getId());
35 $entries = array($entry);
36 $bookTitle = $entry['title'];
37 $bookFileName = substr($bookTitle, 0, 200);
38 break;
39 case 'all':
40 $entries = $this->wallabag->store->retrieveAll($this->wallabag->user->getId());
41 $bookTitle = sprintf(_('All my articles on '), date(_('d.m.y'))); #translatable because each country has it's own date format system
42 $bookFileName = _('Allarticles') . date(_('dmY'));
43 break;
44 case 'tag':
45 $tag = filter_var($this->value, FILTER_SANITIZE_STRING);
46 $tags_id = $this->wallabag->store->retrieveAllTags($this->wallabag->user->getId(), $tag);
47 $tag_id = $tags_id[0]["id"]; // we take the first result, which is supposed to match perfectly. There must be a workaround.
48 $entries = $this->wallabag->store->retrieveEntriesByTag($tag_id, $this->wallabag->user->getId());
49 $bookTitle = sprintf(_('Articles tagged %s'), $tag);
50 $bookFileName = substr(sprintf(_('Tag %s'), $tag), 0, 200);
51 break;
52 case 'category':
53 $category = filter_var($this->value, FILTER_SANITIZE_STRING);
54 $entries = $this->wallabag->store->getEntriesByView($category, $this->wallabag->user->getId());
55 $bookTitle = sprintf(_('All articles in category %s'), $category);
56 $bookFileName = substr(sprintf(_('Category %s'), $category), 0, 200);
57 break;
58 case 'search':
59 $search = filter_var($this->value, FILTER_SANITIZE_STRING);
60 $entries = $this->store->search($search, $this->wallabag->user->getId());
61 $bookTitle = sprintf(_('All articles for search %s'), $search);
62 $bookFileName = substr(sprintf(_('Search %s'), $search), 0, 200);
63 break;
64 case 'default':
65 die(_('Uh, there is a problem while generating epub.'));
66 }
67
68 $content_start =
69 "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
70 . "<html xmlns=\"http://www.w3.org/1999/xhtml\" xmlns:epub=\"http://www.idpf.org/2007/ops\">\n"
71 . "<head>"
72 . "<meta http-equiv=\"Default-Style\" content=\"text/html; charset=utf-8\" />\n"
73 . "<title>wallabag articles book</title>\n"
74 . "</head>\n"
75 . "<body>\n";
76
77 $bookEnd = "</body>\n</html>\n";
78
79 $log = new Logger("wallabag", TRUE);
80 $fileDir = CACHE;
81
82 $book = new EPub(EPub::BOOK_VERSION_EPUB3, DEBUG_POCHE);
83 $log->logLine("new EPub()");
84 $log->logLine("EPub class version: " . EPub::VERSION);
85 $log->logLine("EPub Req. Zip version: " . EPub::REQ_ZIP_VERSION);
86 $log->logLine("Zip version: " . Zip::VERSION);
87 $log->logLine("getCurrentServerURL: " . $book->getCurrentServerURL());
88 $log->logLine("getCurrentPageURL..: " . $book->getCurrentPageURL());
89
90 $book->setTitle(_('wallabag\'s articles'));
91 $book->setIdentifier("http://$_SERVER[HTTP_HOST]", EPub::IDENTIFIER_URI); // Could also be the ISBN number, prefered for published books, or a UUID.
92 //$book->setLanguage("en"); // Not needed, but included for the example, Language is mandatory, but EPub defaults to "en". Use RFC3066 Language codes, such as "en", "da", "fr" etc.
93 $book->setDescription(_("Some articles saved on my wallabag"));
94 $book->setAuthor("wallabag", "wallabag");
95 $book->setPublisher("wallabag", "wallabag"); // I hope this is a non existant address :)
96 $book->setDate(time()); // Strictly not needed as the book date defaults to time().
97 //$book->setRights("Copyright and licence information specific for the book."); // As this is generated, this _could_ contain the name or licence information of the user who purchased the book, if needed. If this is used that way, the identifier must also be made unique for the book.
98 $book->setSourceURL("http://$_SERVER[HTTP_HOST]");
99
100 $book->addDublinCoreMetadata(DublinCore::CONTRIBUTOR, "PHP");
101 $book->addDublinCoreMetadata(DublinCore::CONTRIBUTOR, "wallabag");
102
103 $cssData = "body {\n margin-left: .5em;\n margin-right: .5em;\n text-align: justify;\n}\n\np {\n font-family: serif;\n font-size: 10pt;\n text-align: justify;\n text-indent: 1em;\n margin-top: 0px;\n margin-bottom: 1ex;\n}\n\nh1, h2 {\n font-family: sans-serif;\n font-style: italic;\n text-align: center;\n background-color: #6b879c;\n color: white;\n width: 100%;\n}\n\nh1 {\n margin-bottom: 2px;\n}\n\nh2 {\n margin-top: -2px;\n margin-bottom: 2px;\n}\n";
104
105 $log->logLine("Add Cover");
106
107 $fullTitle = "<h1> " . $bookTitle . "</h1>\n";
108
109 $book->setCoverImage("Cover.png", file_get_contents("themes/baggy/img/apple-touch-icon-152.png"), "image/png", $fullTitle);
110
111 $cover = $content_start . '<div style="text-align:center;"><p>' . _('Produced by wallabag with PHPePub') . '</p><p>'. _('Please open <a href="https://github.com/wallabag/wallabag/issues" >an issue</a> if you have trouble with the display of this E-Book on your device.') . '</p></div>' . $bookEnd;
112
113 //$book->addChapter("Table of Contents", "TOC.xhtml", NULL, false, EPub::EXTERNAL_REF_IGNORE);
114 $book->addChapter("Notices", "Cover2.html", $cover);
115
116 $book->buildTOC();
117
118 foreach ($entries as $entry) { //set tags as subjects
119 $tags = $this->wallabag->store->retrieveTagsByEntry($entry['id']);
120 foreach ($tags as $tag) {
121 $book->setSubject($tag['value']);
122 }
123
124 $log->logLine("Set up parameters");
125
126 $chapter = $content_start . $entry['content'] . $bookEnd;
127 $book->addChapter($entry['title'], htmlspecialchars($entry['title']) . ".html", $chapter, true, EPub::EXTERNAL_REF_ADD);
128 $log->logLine("Added chapter " . $entry['title']);
129 }
130
131 if (DEBUG_POCHE) {
132 $book->addChapter("Log", "Log.html", $content_start . $log->getLog() . "\n</pre>" . $bookEnd); // log generation
133 }
134 $book->finalize();
135 $zipData = $book->sendBook($bookFileName);
136 }
137} \ No newline at end of file
diff --git a/inc/poche/config.inc.default.php b/inc/poche/config.inc.default.php
index 95f727c6..2a458544 100755
--- a/inc/poche/config.inc.default.php
+++ b/inc/poche/config.inc.default.php
@@ -5,7 +5,7 @@
5 * @category wallabag 5 * @category wallabag
6 * @author Nicolas Lœuillet <nicolas@loeuillet.org> 6 * @author Nicolas Lœuillet <nicolas@loeuillet.org>
7 * @copyright 2013 7 * @copyright 2013
8 * @license http://www.wtfpl.net/ see COPYING file 8 * @license http://opensource.org/licenses/MIT see COPYING file
9 */ 9 */
10 10
11@define ('SALT', ''); # put a strong string here 11@define ('SALT', ''); # put a strong string here
@@ -59,7 +59,7 @@
59@define ('LOCALE', ROOT . '/locale'); 59@define ('LOCALE', ROOT . '/locale');
60@define ('CACHE', ROOT . '/cache'); 60@define ('CACHE', ROOT . '/cache');
61 61
62@define ('PAGINATION', '10'); 62@define ('PAGINATION', '12');
63 63
64//limit for download of articles during import 64//limit for download of articles during import
65@define ('IMPORT_LIMIT', 5); 65@define ('IMPORT_LIMIT', 5);
diff --git a/inc/poche/global.inc.php b/inc/poche/global.inc.php
index 8cf86d03..b8c487e3 100755
--- a/inc/poche/global.inc.php
+++ b/inc/poche/global.inc.php
@@ -5,7 +5,7 @@
5 * @category wallabag 5 * @category wallabag
6 * @author Nicolas Lœuillet <nicolas@loeuillet.org> 6 * @author Nicolas Lœuillet <nicolas@loeuillet.org>
7 * @copyright 2013 7 * @copyright 2013
8 * @license http://www.wtfpl.net/ see COPYING file 8 * @license http://opensource.org/licenses/MIT see COPYING file
9 */ 9 */
10 10
11# the poche system root directory (/inc) 11# the poche system root directory (/inc)
@@ -18,6 +18,11 @@ require_once INCLUDES . '/poche/Tools.class.php';
18require_once INCLUDES . '/poche/User.class.php'; 18require_once INCLUDES . '/poche/User.class.php';
19require_once INCLUDES . '/poche/Url.class.php'; 19require_once INCLUDES . '/poche/Url.class.php';
20require_once INCLUDES . '/3rdparty/class.messages.php'; 20require_once INCLUDES . '/3rdparty/class.messages.php';
21require_once ROOT . '/vendor/autoload.php';
22require_once INCLUDES . '/poche/Template.class.php';
23require_once INCLUDES . '/poche/Language.class.php';
24require_once INCLUDES . '/poche/Routing.class.php';
25require_once INCLUDES . '/poche/WallabagEpub.class.php';
21require_once INCLUDES . '/poche/Poche.class.php'; 26require_once INCLUDES . '/poche/Poche.class.php';
22 27
23require_once INCLUDES . '/poche/Database.class.php'; 28require_once INCLUDES . '/poche/Database.class.php';
@@ -36,25 +41,18 @@ require_once INCLUDES . '/3rdparty/libraries/PHPePub/Logger.php';
36require_once INCLUDES . '/3rdparty/libraries/PHPePub/EPub.php'; 41require_once INCLUDES . '/3rdparty/libraries/PHPePub/EPub.php';
37require_once INCLUDES . '/3rdparty/libraries/PHPePub/EPubChapterSplitter.php'; 42require_once INCLUDES . '/3rdparty/libraries/PHPePub/EPubChapterSplitter.php';
38 43
39# Composer its autoloader for automatically loading Twig
40if (! file_exists(ROOT . '/vendor/autoload.php')) {
41 Poche::$canRenderTemplates = false;
42} else {
43 require_once ROOT . '/vendor/autoload.php';
44}
45
46# system configuration; database credentials et caetera 44# system configuration; database credentials et caetera
47if (! file_exists(INCLUDES . '/poche/config.inc.php')) { 45require_once INCLUDES . '/poche/config.inc.php';
48 Poche::$configFileAvailable = false; 46require_once INCLUDES . '/poche/config.inc.default.php';
49} else {
50 require_once INCLUDES . '/poche/config.inc.php';
51 require_once INCLUDES . '/poche/config.inc.default.php';
52}
53 47
54if (Poche::$configFileAvailable && DOWNLOAD_PICTURES) { 48if (DOWNLOAD_PICTURES) {
55 require_once INCLUDES . '/poche/pochePictures.php'; 49 require_once INCLUDES . '/poche/pochePictures.php';
56} 50}
57 51
58if (!ini_get('date.timezone') || !@date_default_timezone_set(ini_get('date.timezone'))) { 52if (!ini_get('date.timezone') || !@date_default_timezone_set(ini_get('date.timezone'))) {
59 date_default_timezone_set('UTC'); 53 date_default_timezone_set('UTC');
60} \ No newline at end of file 54}
55
56if (defined('ERROR_REPORTING')) {
57 error_reporting(ERROR_REPORTING);
58}
diff --git a/inc/poche/pochePictures.php b/inc/poche/pochePictures.php
index 7c319a85..7a914f90 100644
--- a/inc/poche/pochePictures.php
+++ b/inc/poche/pochePictures.php
@@ -5,154 +5,164 @@
5 * @category wallabag 5 * @category wallabag
6 * @author Nicolas Lœuillet <nicolas@loeuillet.org> 6 * @author Nicolas Lœuillet <nicolas@loeuillet.org>
7 * @copyright 2013 7 * @copyright 2013
8 * @license http://www.wtfpl.net/ see COPYING file 8 * @license http://opensource.org/licenses/MIT see COPYING file
9 */ 9 */
10 10
11/** 11
12 * On modifie les URLS des images dans le corps de l'article 12final class Picture
13 */
14function filtre_picture($content, $url, $id)
15{ 13{
16 $matches = array(); 14 /**
17 $processing_pictures = array(); // list of processing image to avoid processing the same pictures twice 15 * Changing pictures URL in article content
18 preg_match_all('#<\s*(img)[^>]+src="([^"]*)"[^>]*>#Si', $content, $matches, PREG_SET_ORDER); 16 */
19 foreach($matches as $i => $link) { 17 public static function filterPicture($content, $url, $id)
20 $link[1] = trim($link[1]); 18 {
21 if (!preg_match('#^(([a-z]+://)|(\#))#', $link[1])) { 19 $matches = array();
22 $absolute_path = get_absolute_link($link[2],$url); 20 $processing_pictures = array(); // list of processing image to avoid processing the same pictures twice
23 $filename = basename(parse_url($absolute_path, PHP_URL_PATH)); 21 preg_match_all('#<\s*(img)[^>]+src="([^"]*)"[^>]*>#Si', $content, $matches, PREG_SET_ORDER);
24 $directory = create_assets_directory($id); 22 foreach($matches as $i => $link) {
25 $fullpath = $directory . '/' . $filename; 23 $link[1] = trim($link[1]);
26 24 if (!preg_match('#^(([a-z]+://)|(\#))#', $link[1])) {
27 if (in_array($absolute_path, $processing_pictures) === true) { 25 $absolute_path = self::_getAbsoluteLink($link[2], $url);
28 // replace picture's URL only if processing is OK : already processing -> go to next picture 26 $filename = basename(parse_url($absolute_path, PHP_URL_PATH));
29 continue; 27 $directory = self::_createAssetsDirectory($id);
28 $fullpath = $directory . '/' . $filename;
29
30 if (in_array($absolute_path, $processing_pictures) === true) {
31 // replace picture's URL only if processing is OK : already processing -> go to next picture
32 continue;
33 }
34
35 if (self::_downloadPictures($absolute_path, $fullpath) === true) {
36 $content = str_replace($matches[$i][2], $fullpath, $content);
37 }
38
39 $processing_pictures[] = $absolute_path;
30 } 40 }
31
32 if (download_pictures($absolute_path, $fullpath) === true) {
33 $content = str_replace($matches[$i][2], $fullpath, $content);
34 }
35
36 $processing_pictures[] = $absolute_path;
37 } 41 }
38 42
43 return $content;
39 } 44 }
40 45
41 return $content; 46 /**
42} 47 * Get absolute URL
48 */
49 private static function _getAbsoluteLink($relativeLink, $url)
50 {
51 /* return if already absolute URL */
52 if (parse_url($relativeLink, PHP_URL_SCHEME) != '') return $relativeLink;
43 53
44/** 54 /* queries and anchors */
45 * Retourne le lien absolu 55 if ($relativeLink[0]=='#' || $relativeLink[0]=='?') return $url . $relativeLink;
46 */
47function get_absolute_link($relative_link, $url) {
48 /* return if already absolute URL */
49 if (parse_url($relative_link, PHP_URL_SCHEME) != '') return $relative_link;
50 56
51 /* queries and anchors */ 57 /* parse base URL and convert to local variables:
52 if ($relative_link[0]=='#' || $relative_link[0]=='?') return $url . $relative_link; 58 $scheme, $host, $path */
59 extract(parse_url($url));
53 60
54 /* parse base URL and convert to local variables: 61 /* remove non-directory element from path */
55 $scheme, $host, $path */ 62 $path = preg_replace('#/[^/]*$#', '', $path);
56 extract(parse_url($url));
57 63
58 /* remove non-directory element from path */ 64 /* destroy path if relative url points to root */
59 $path = preg_replace('#/[^/]*$#', '', $path); 65 if ($relativeLink[0] == '/') $path = '';
60 66
61 /* destroy path if relative url points to root */ 67 /* dirty absolute URL */
62 if ($relative_link[0] == '/') $path = ''; 68 $abs = $host . $path . '/' . $relativeLink;
63 69
64 /* dirty absolute URL */ 70 /* replace '//' or '/./' or '/foo/../' with '/' */
65 $abs = $host . $path . '/' . $relative_link; 71 $re = array('#(/\.?/)#', '#/(?!\.\.)[^/]+/\.\./#');
72 for($n=1; $n>0; $abs=preg_replace($re, '/', $abs, -1, $n)) {}
66 73
67 /* replace '//' or '/./' or '/foo/../' with '/' */ 74 /* absolute URL is ready! */
68 $re = array('#(/\.?/)#', '#/(?!\.\.)[^/]+/\.\./#'); 75 return $scheme.'://'.$abs;
69 for($n=1; $n>0; $abs=preg_replace($re, '/', $abs, -1, $n)) {} 76 }
70 77
71 /* absolute URL is ready! */ 78 /**
72 return $scheme.'://'.$abs; 79 * Downloading pictures
73} 80 *
81 * @return bool true if the download and processing is OK, false else
82 */
83 private static function _downloadPictures($absolute_path, $fullpath)
84 {
85 $rawdata = Tools::getFile($absolute_path);
86 $fullpath = urldecode($fullpath);
87
88 if(file_exists($fullpath)) {
89 unlink($fullpath);
90 }
74 91
75/** 92 // check extension
76 * Téléchargement des images 93 $file_ext = strrchr($fullpath, '.');
77 * 94 $whitelist = array(".jpg",".jpeg",".gif",".png");
78 * @return bool true if the download and processing is OK, false else 95 if (!(in_array($file_ext, $whitelist))) {
79 */ 96 Tools::logm('processed image with not allowed extension. Skipping ' . $fullpath);
80function download_pictures($absolute_path, $fullpath) 97 return false;
81{ 98 }
82 $rawdata = Tools::getFile($absolute_path);
83 $fullpath = urldecode($fullpath);
84 99
85 if(file_exists($fullpath)) { 100 // check headers
86 unlink($fullpath); 101 $imageinfo = getimagesize($absolute_path);
87 } 102 if ($imageinfo['mime'] != 'image/gif' && $imageinfo['mime'] != 'image/jpeg'&& $imageinfo['mime'] != 'image/jpg'&& $imageinfo['mime'] != 'image/png') {
88 103 Tools::logm('processed image with bad header. Skipping ' . $fullpath);
89 // check extension 104 return false;
90 $file_ext = strrchr($fullpath, '.'); 105 }
91 $whitelist = array(".jpg",".jpeg",".gif",".png");
92 if (!(in_array($file_ext, $whitelist))) {
93 Tools::logm('processed image with not allowed extension. Skipping ' . $fullpath);
94 return false;
95 }
96
97 // check headers
98 $imageinfo = getimagesize($absolute_path);
99 if ($imageinfo['mime'] != 'image/gif' && $imageinfo['mime'] != 'image/jpeg'&& $imageinfo['mime'] != 'image/jpg'&& $imageinfo['mime'] != 'image/png') {
100 Tools::logm('processed image with bad header. Skipping ' . $fullpath);
101 return false;
102 }
103
104 // regenerate image
105 $im = imagecreatefromstring($rawdata);
106 if ($im === false) {
107 Tools::logm('error while regenerating image ' . $fullpath);
108 return false;
109 }
110
111 switch ($imageinfo['mime']) {
112 case 'image/gif':
113 $result = imagegif($im, $fullpath);
114 break;
115 case 'image/jpeg':
116 case 'image/jpg':
117 $result = imagejpeg($im, $fullpath, REGENERATE_PICTURES_QUALITY);
118 break;
119 case 'image/png':
120 $result = imagepng($im, $fullpath, ceil(REGENERATE_PICTURES_QUALITY / 100 * 9));
121 break;
122 }
123 imagedestroy($im);
124
125 return $result;
126}
127 106
128/** 107 // regenerate image
129 * Crée un répertoire de médias pour l'article 108 $im = imagecreatefromstring($rawdata);
130 */ 109 if ($im === false) {
131function create_assets_directory($id) 110 Tools::logm('error while regenerating image ' . $fullpath);
132{ 111 return false;
133 $assets_path = ABS_PATH; 112 }
134 if(!is_dir($assets_path)) { 113
135 mkdir($assets_path, 0715); 114 switch ($imageinfo['mime']) {
136 } 115 case 'image/gif':
116 $result = imagegif($im, $fullpath);
117 break;
118 case 'image/jpeg':
119 case 'image/jpg':
120 $result = imagejpeg($im, $fullpath, REGENERATE_PICTURES_QUALITY);
121 break;
122 case 'image/png':
123 $result = imagepng($im, $fullpath, ceil(REGENERATE_PICTURES_QUALITY / 100 * 9));
124 break;
125 }
126 imagedestroy($im);
137 127
138 $article_directory = $assets_path . $id; 128 return $result;
139 if(!is_dir($article_directory)) {
140 mkdir($article_directory, 0715);
141 } 129 }
142 130
143 return $article_directory; 131 /**
144} 132 * Create a directory for an article
133 *
134 * @param $id ID of the article
135 * @return string
136 */
137 private static function _createAssetsDirectory($id)
138 {
139 $assets_path = ABS_PATH;
140 if (!is_dir($assets_path)) {
141 mkdir($assets_path, 0715);
142 }
145 143
146/** 144 $article_directory = $assets_path . $id;
147 * Suppression du répertoire d'images 145 if (!is_dir($article_directory)) {
148 */ 146 mkdir($article_directory, 0715);
149function remove_directory($directory) 147 }
150{ 148
151 if(is_dir($directory)) { 149 return $article_directory;
152 $files = array_diff(scandir($directory), array('.','..')); 150 }
153 foreach ($files as $file) { 151
154 (is_dir("$directory/$file")) ? remove_directory("$directory/$file") : unlink("$directory/$file"); 152 /**
153 * Remove the directory
154 *
155 * @param $directory
156 * @return bool
157 */
158 public static function removeDirectory($directory)
159 {
160 if (is_dir($directory)) {
161 $files = array_diff(scandir($directory), array('.','..'));
162 foreach ($files as $file) {
163 (is_dir("$directory/$file")) ? self::removeDirectory("$directory/$file") : unlink("$directory/$file");
164 }
165 return rmdir($directory);
155 } 166 }
156 return rmdir($directory);
157 } 167 }
158} 168} \ No newline at end of file