aboutsummaryrefslogtreecommitdiffhomepage
path: root/inc/3rdparty/site_config
diff options
context:
space:
mode:
Diffstat (limited to 'inc/3rdparty/site_config')
-rw-r--r--inc/3rdparty/site_config/README.txt (renamed from inc/3rdparty/site_config/README.md)10
-rw-r--r--inc/3rdparty/site_config/custom/dilbert.com.txt4
-rw-r--r--inc/3rdparty/site_config/custom/interviewmagazine.com.txt4
-rw-r--r--inc/3rdparty/site_config/custom/inthepoche.com.txt7
-rwxr-xr-xinc/3rdparty/site_config/custom/stackexchange.com.txt4
-rwxr-xr-xinc/3rdparty/site_config/custom/stackoverflow.com.txt4
-rw-r--r--inc/3rdparty/site_config/index.php4
-rw-r--r--inc/3rdparty/site_config/standard/24ways.org.txt6
-rw-r--r--inc/3rdparty/site_config/standard/37signals.com.txt6
-rw-r--r--inc/3rdparty/site_config/standard/3quarksdaily.com.txt9
-rw-r--r--inc/3rdparty/site_config/standard/3voor12.vpro.nl.txt11
-rw-r--r--inc/3rdparty/site_config/standard/43folders.com.txt4
-rw-r--r--inc/3rdparty/site_config/standard/500px.com.txt27
-rw-r--r--inc/3rdparty/site_config/standard/512pixels.net.txt2
-rw-r--r--inc/3rdparty/site_config/standard/5by5.tv.txt9
-rw-r--r--inc/3rdparty/site_config/standard/944.com.txt9
-rw-r--r--inc/3rdparty/site_config/standard/aachener-nachrichten.de.txt10
-rw-r--r--inc/3rdparty/site_config/standard/aachener-zeitung.de.txt10
-rw-r--r--inc/3rdparty/site_config/standard/abc.es.txt7
-rw-r--r--inc/3rdparty/site_config/standard/abc.net.au.txt10
-rw-r--r--inc/3rdparty/site_config/standard/abcnews.go.com.txt27
-rw-r--r--inc/3rdparty/site_config/standard/accesstoinsight.org.txt9
-rw-r--r--inc/3rdparty/site_config/standard/acidcow.com.txt3
-rw-r--r--inc/3rdparty/site_config/standard/acquia.com.txt9
-rw-r--r--inc/3rdparty/site_config/standard/acroswing.fr.txt5
-rw-r--r--inc/3rdparty/site_config/standard/aht.seriouseats.com.txt15
-rw-r--r--inc/3rdparty/site_config/standard/alex.mullr.net.txt2
-rw-r--r--inc/3rdparty/site_config/standard/alistapart.com.txt12
-rw-r--r--inc/3rdparty/site_config/standard/aljazeera.com.txt8
-rw-r--r--inc/3rdparty/site_config/standard/allrecipes.com.txt14
-rw-r--r--inc/3rdparty/site_config/standard/allthingsd.com.txt10
-rw-r--r--inc/3rdparty/site_config/standard/allyou.com.txt8
-rw-r--r--inc/3rdparty/site_config/standard/alphabeta.argaam.com.txt11
-rw-r--r--inc/3rdparty/site_config/standard/alriyadh.com.txt9
-rw-r--r--inc/3rdparty/site_config/standard/alseraj.net.txt2
-rw-r--r--inc/3rdparty/site_config/standard/alt1040.com.txt2
-rw-r--r--inc/3rdparty/site_config/standard/altfoto.com.txt2
-rw-r--r--inc/3rdparty/site_config/standard/alumni.stanford.edu.txt10
-rw-r--r--inc/3rdparty/site_config/standard/amazon.com.txt19
-rw-r--r--inc/3rdparty/site_config/standard/americandrink.net.txt6
-rw-r--r--inc/3rdparty/site_config/standard/americascup.com.txt10
-rw-r--r--inc/3rdparty/site_config/standard/americastestkitchenfeed.com.txt5
-rw-r--r--inc/3rdparty/site_config/standard/anandtech.com.txt11
-rw-r--r--inc/3rdparty/site_config/standard/andyrutledge.com.txt9
-rw-r--r--inc/3rdparty/site_config/standard/annatravelling.wordpress.com.txt9
-rw-r--r--inc/3rdparty/site_config/standard/applature.com.txt18
-rw-r--r--inc/3rdparty/site_config/standard/apple.com.txt7
-rw-r--r--inc/3rdparty/site_config/standard/appleinsider.com.txt11
-rw-r--r--inc/3rdparty/site_config/standard/appleweblog.com.txt2
-rw-r--r--inc/3rdparty/site_config/standard/archdaily.com.txt5
-rw-r--r--inc/3rdparty/site_config/standard/archiveofourown.org.txt18
-rw-r--r--inc/3rdparty/site_config/standard/arstechnica.com.txt16
-rw-r--r--inc/3rdparty/site_config/standard/articles.boston.com.txt6
-rw-r--r--inc/3rdparty/site_config/standard/articles.courant.com.txt11
-rw-r--r--inc/3rdparty/site_config/standard/asahi.com.txt3
-rw-r--r--inc/3rdparty/site_config/standard/ascarter.net.txt5
-rw-r--r--inc/3rdparty/site_config/standard/astronews.com.txt7
-rw-r--r--inc/3rdparty/site_config/standard/asymco.com.txt8
-rw-r--r--inc/3rdparty/site_config/standard/autoblog.com.txt6
-rw-r--r--inc/3rdparty/site_config/standard/avclub.com.txt4
-rw-r--r--inc/3rdparty/site_config/standard/baltimoresun.com.txt12
-rw-r--r--inc/3rdparty/site_config/standard/basicthinking.de.txt7
-rw-r--r--inc/3rdparty/site_config/standard/bb.is.txt13
-rw-r--r--inc/3rdparty/site_config/standard/bbc.co.uk.txt32
-rw-r--r--inc/3rdparty/site_config/standard/benoitmaison.org.txt16
-rw-r--r--inc/3rdparty/site_config/standard/berlingske.dk.txt3
-rw-r--r--inc/3rdparty/site_config/standard/betabeat.com.txt2
-rw-r--r--inc/3rdparty/site_config/standard/betanews.com.txt7
-rw-r--r--inc/3rdparty/site_config/standard/biography.com.txt8
-rw-r--r--inc/3rdparty/site_config/standard/bitelia.com.txt2
-rw-r--r--inc/3rdparty/site_config/standard/bjango.com.txt7
-rw-r--r--inc/3rdparty/site_config/standard/blog.arsln.org.txt8
-rw-r--r--inc/3rdparty/site_config/standard/blog.asmartbear.com.txt7
-rw-r--r--inc/3rdparty/site_config/standard/blog.cloudflare.com.txt9
-rw-r--r--inc/3rdparty/site_config/standard/blog.fefe.de.txt5
-rw-r--r--inc/3rdparty/site_config/standard/blog.instagram.com.txt11
-rw-r--r--inc/3rdparty/site_config/standard/blog.jaysalvat.com.txt4
-rw-r--r--inc/3rdparty/site_config/standard/blog.kaelig.fr.txt5
-rw-r--r--inc/3rdparty/site_config/standard/blog.naver.com.txt6
-rw-r--r--inc/3rdparty/site_config/standard/blog.pchome.net.txt12
-rw-r--r--inc/3rdparty/site_config/standard/blog.pinboard.in.txt6
-rw-r--r--inc/3rdparty/site_config/standard/blog.sina.com.cn.txt26
-rw-r--r--inc/3rdparty/site_config/standard/blog.spu.edu.txt2
-rw-r--r--inc/3rdparty/site_config/standard/blog.wells.ee.txt6
-rw-r--r--inc/3rdparty/site_config/standard/blogs.aljazeera.net.txt8
-rw-r--r--inc/3rdparty/site_config/standard/blogs.forbes.com.txt2
-rw-r--r--inc/3rdparty/site_config/standard/blogs.hbr.org.txt4
-rw-r--r--inc/3rdparty/site_config/standard/blogs.msdn.com.txt6
-rw-r--r--inc/3rdparty/site_config/standard/blogs.reuters.com.txt3
-rw-r--r--inc/3rdparty/site_config/standard/blogs.scientificamerican.com.txt16
-rw-r--r--inc/3rdparty/site_config/standard/blogs.smithsonianmag.com.txt15
-rw-r--r--inc/3rdparty/site_config/standard/blogs.technet.com.txt6
-rw-r--r--inc/3rdparty/site_config/standard/bluetouff.com.txt4
-rw-r--r--inc/3rdparty/site_config/standard/boagworld.com.txt8
-rw-r--r--inc/3rdparty/site_config/standard/boingboing.net.txt11
-rw-r--r--inc/3rdparty/site_config/standard/boldizsar.palotas.eu.txt3
-rw-r--r--inc/3rdparty/site_config/standard/book.douban.com.txt6
-rw-r--r--inc/3rdparty/site_config/standard/bookforum.com.txt19
-rw-r--r--inc/3rdparty/site_config/standard/borderhouseblog.com.txt7
-rw-r--r--inc/3rdparty/site_config/standard/bostonglobe.com.txt16
-rw-r--r--inc/3rdparty/site_config/standard/bostonreview.net.txt15
-rw-r--r--inc/3rdparty/site_config/standard/boundlessline.org.txt5
-rw-r--r--inc/3rdparty/site_config/standard/brainfacts.org.txt10
-rw-r--r--inc/3rdparty/site_config/standard/brandeins.de.txt7
-rw-r--r--inc/3rdparty/site_config/standard/brandingstrategyinsider.com.txt3
-rw-r--r--inc/3rdparty/site_config/standard/brettterpstra.com.txt5
-rw-r--r--inc/3rdparty/site_config/standard/brisbanetimes.com.au.txt2
-rw-r--r--inc/3rdparty/site_config/standard/brookings.edu.txt13
-rw-r--r--inc/3rdparty/site_config/standard/brooksreview.net.txt6
-rw-r--r--inc/3rdparty/site_config/standard/buquad.com.txt8
-rw-r--r--inc/3rdparty/site_config/standard/businessinsider.com.txt12
-rw-r--r--inc/3rdparty/site_config/standard/businessnews.com.tn.txt12
-rw-r--r--inc/3rdparty/site_config/standard/businessweek.com.txt30
-rw-r--r--inc/3rdparty/site_config/standard/buzzfeed.com.txt15
-rw-r--r--inc/3rdparty/site_config/standard/bygonebureau.com.txt6
-rw-r--r--inc/3rdparty/site_config/standard/cardboardconnection.com.txt8
-rw-r--r--inc/3rdparty/site_config/standard/carpeaqua.com.txt6
-rw-r--r--inc/3rdparty/site_config/standard/catb.org.txt7
-rw-r--r--inc/3rdparty/site_config/standard/cbc.ca.txt5
-rw-r--r--inc/3rdparty/site_config/standard/cbsnews.com.txt14
-rw-r--r--inc/3rdparty/site_config/standard/chareidi.org.txt2
-rw-r--r--inc/3rdparty/site_config/standard/chinamining.org.txt10
-rw-r--r--inc/3rdparty/site_config/standard/chomsky.info.txt5
-rw-r--r--inc/3rdparty/site_config/standard/christianitytoday.com.txt13
-rw-r--r--inc/3rdparty/site_config/standard/christianpf.com.txt5
-rw-r--r--inc/3rdparty/site_config/standard/christies.com.txt6
-rw-r--r--inc/3rdparty/site_config/standard/chrome.google.com.txt9
-rw-r--r--inc/3rdparty/site_config/standard/chronicle.com.txt17
-rw-r--r--inc/3rdparty/site_config/standard/cicero.de.txt33
-rw-r--r--inc/3rdparty/site_config/standard/ciperchile.cl.txt4
-rw-r--r--inc/3rdparty/site_config/standard/cjr.org.txt6
-rw-r--r--inc/3rdparty/site_config/standard/clientk.com.txt6
-rw-r--r--inc/3rdparty/site_config/standard/clubic.com.txt11
-rw-r--r--inc/3rdparty/site_config/standard/cmswire.com.txt6
-rw-r--r--inc/3rdparty/site_config/standard/cnet.com.txt16
-rw-r--r--inc/3rdparty/site_config/standard/cnn.com.txt19
-rw-r--r--inc/3rdparty/site_config/standard/cnnsi.com.txt26
-rw-r--r--inc/3rdparty/site_config/standard/code.activestate.com.txt10
-rw-r--r--inc/3rdparty/site_config/standard/code.google.com.txt5
-rw-r--r--inc/3rdparty/site_config/standard/codinghorror.com.txt15
-rw-r--r--inc/3rdparty/site_config/standard/collegehumor.com.txt14
-rw-r--r--inc/3rdparty/site_config/standard/communities-dominate.blogs.com.txt2
-rw-r--r--inc/3rdparty/site_config/standard/community.service-now.com.txt8
-rw-r--r--inc/3rdparty/site_config/standard/computer.org.txt5
-rw-r--r--inc/3rdparty/site_config/standard/computerbase.de.txt18
-rw-r--r--inc/3rdparty/site_config/standard/computerworld.com.txt22
-rw-r--r--inc/3rdparty/site_config/standard/computerworld.dk.txt5
-rw-r--r--inc/3rdparty/site_config/standard/contemporist.com.txt9
-rw-r--r--inc/3rdparty/site_config/standard/conversaciones.nokia.com.txt7
-rw-r--r--inc/3rdparty/site_config/standard/core77.com.txt7
-rw-r--r--inc/3rdparty/site_config/standard/counterpunch.org.txt6
-rw-r--r--inc/3rdparty/site_config/standard/crazybutable.com.txt3
-rw-r--r--inc/3rdparty/site_config/standard/crimemagazine.com.txt2
-rw-r--r--inc/3rdparty/site_config/standard/crimethinc.com.txt3
-rw-r--r--inc/3rdparty/site_config/standard/crn.de.txt3
-rw-r--r--inc/3rdparty/site_config/standard/csmonitor.com.txt18
-rw-r--r--inc/3rdparty/site_config/standard/csnbayarea.com.txt7
-rw-r--r--inc/3rdparty/site_config/standard/csnphilly.com.txt22
-rw-r--r--inc/3rdparty/site_config/standard/cucharasonica.com.txt2
-rw-r--r--inc/3rdparty/site_config/standard/da.feedsportal.com.txt5
-rw-r--r--inc/3rdparty/site_config/standard/dailydot.com.txt4
-rw-r--r--inc/3rdparty/site_config/standard/dailykos.com.txt10
-rw-r--r--inc/3rdparty/site_config/standard/dailymail.co.uk.txt12
-rw-r--r--inc/3rdparty/site_config/standard/dansdata.com.txt5
-rw-r--r--inc/3rdparty/site_config/standard/daringfireball.net.txt7
-rw-r--r--inc/3rdparty/site_config/standard/datanami.com.txt4
-rw-r--r--inc/3rdparty/site_config/standard/dcurt.is.txt8
-rw-r--r--inc/3rdparty/site_config/standard/delong.typepad.com.txt4
-rw-r--r--inc/3rdparty/site_config/standard/derstandard.at.txt13
-rw-r--r--inc/3rdparty/site_config/standard/designtagebuch.de.txt11
-rw-r--r--inc/3rdparty/site_config/standard/desitvforum.net.txt5
-rw-r--r--inc/3rdparty/site_config/standard/details.com.txt8
-rw-r--r--inc/3rdparty/site_config/standard/developers.facebook.com.txt3
-rw-r--r--inc/3rdparty/site_config/standard/devlinsangle.blogspot.co.at.txt6
-rw-r--r--inc/3rdparty/site_config/standard/dictionary.reference.com.txt8
-rw-r--r--inc/3rdparty/site_config/standard/diepresse.com.txt6
-rw-r--r--inc/3rdparty/site_config/standard/digiphoto.techbang.com.txt8
-rw-r--r--inc/3rdparty/site_config/standard/digital-photography-school.com.txt6
-rw-r--r--inc/3rdparty/site_config/standard/digitalspy.co.uk.txt5
-rw-r--r--inc/3rdparty/site_config/standard/dilbert.com.txt8
-rw-r--r--inc/3rdparty/site_config/standard/dinamalar.com.txt19
-rw-r--r--inc/3rdparty/site_config/standard/dn.se.txt26
-rw-r--r--inc/3rdparty/site_config/standard/doctac.com.txt8
-rw-r--r--inc/3rdparty/site_config/standard/domusweb.it.txt21
-rw-r--r--inc/3rdparty/site_config/standard/dou.ua.txt8
-rw-r--r--inc/3rdparty/site_config/standard/douban.com.txt21
-rw-r--r--inc/3rdparty/site_config/standard/dpreview.com.txt9
-rw-r--r--inc/3rdparty/site_config/standard/dr.dk.txt9
-rw-r--r--inc/3rdparty/site_config/standard/dramasonline.com.txt10
-rw-r--r--inc/3rdparty/site_config/standard/drdobbs.com.txt2
-rw-r--r--inc/3rdparty/site_config/standard/drive2.ru.txt12
-rw-r--r--inc/3rdparty/site_config/standard/drupal.org.txt8
-rw-r--r--inc/3rdparty/site_config/standard/dukebasketballreport.com.txt11
-rw-r--r--inc/3rdparty/site_config/standard/dvice.com.txt9
-rw-r--r--inc/3rdparty/site_config/standard/eamesinerudition.com.txt8
-rw-r--r--inc/3rdparty/site_config/standard/eandt.theiet.org.txt8
-rw-r--r--inc/3rdparty/site_config/standard/eastoftheweb.com.txt18
-rw-r--r--inc/3rdparty/site_config/standard/ebay.com.txt5
-rw-r--r--inc/3rdparty/site_config/standard/ecetia.com.txt2
-rw-r--r--inc/3rdparty/site_config/standard/econlog.econlib.org.txt6
-rw-r--r--inc/3rdparty/site_config/standard/economia.estadao.com.br.txt7
-rw-r--r--inc/3rdparty/site_config/standard/economist.com.txt10
-rw-r--r--inc/3rdparty/site_config/standard/edge-online.com.txt13
-rw-r--r--inc/3rdparty/site_config/standard/edge.org.txt5
-rw-r--r--inc/3rdparty/site_config/standard/edition.cnn.com.txt9
-rw-r--r--inc/3rdparty/site_config/standard/ekultura.hu.txt11
-rw-r--r--inc/3rdparty/site_config/standard/elance.com.txt3
-rw-r--r--inc/3rdparty/site_config/standard/elektroniknet.de.txt27
-rw-r--r--inc/3rdparty/site_config/standard/elmalpensante.com.txt4
-rw-r--r--inc/3rdparty/site_config/standard/elpais.com.txt22
-rw-r--r--inc/3rdparty/site_config/standard/en.espnf1.com.txt10
-rw-r--r--inc/3rdparty/site_config/standard/engadget.com.txt7
-rw-r--r--inc/3rdparty/site_config/standard/engineering.tumblr.com.txt7
-rw-r--r--inc/3rdparty/site_config/standard/english.aljazeera.net.txt7
-rw-r--r--inc/3rdparty/site_config/standard/enikos.gr.txt9
-rw-r--r--inc/3rdparty/site_config/standard/entertainment.timesonline.co.uk.txt10
-rw-r--r--inc/3rdparty/site_config/standard/es.hu.txt11
-rw-r--r--inc/3rdparty/site_config/standard/escapistmagazine.com.txt2
-rw-r--r--inc/3rdparty/site_config/standard/espn.go.com.txt12
-rw-r--r--inc/3rdparty/site_config/standard/esquire.com.txt10
-rw-r--r--inc/3rdparty/site_config/standard/essentialpublicradio.org.txt6
-rw-r--r--inc/3rdparty/site_config/standard/etc.se.txt6
-rw-r--r--inc/3rdparty/site_config/standard/eternabuenosaires.com.txt2
-rw-r--r--inc/3rdparty/site_config/standard/eurogamer.net.txt8
-rw-r--r--inc/3rdparty/site_config/standard/evo.co.uk.txt11
-rw-r--r--inc/3rdparty/site_config/standard/expressen.se.txt9
-rw-r--r--inc/3rdparty/site_config/standard/extracine.com.txt2
-rw-r--r--inc/3rdparty/site_config/standard/f1actual.com.txt2
-rw-r--r--inc/3rdparty/site_config/standard/facta.co.jp.txt3
-rw-r--r--inc/3rdparty/site_config/standard/falter.at.txt18
-rw-r--r--inc/3rdparty/site_config/standard/fanfiction.net.txt6
-rw-r--r--inc/3rdparty/site_config/standard/fastcompany.com.txt16
-rw-r--r--inc/3rdparty/site_config/standard/faz.net.txt30
-rw-r--r--inc/3rdparty/site_config/standard/fictionpress.com.txt5
-rw-r--r--inc/3rdparty/site_config/standard/ficwad.com.txt12
-rw-r--r--inc/3rdparty/site_config/standard/finance.yahoo.com.txt12
-rw-r--r--inc/3rdparty/site_config/standard/findtheswagger.tumblr.com.txt10
-rw-r--r--inc/3rdparty/site_config/standard/firstthings.com.txt7
-rw-r--r--inc/3rdparty/site_config/standard/fivechapters.com.txt2
-rw-r--r--inc/3rdparty/site_config/standard/fivefilters.org.txt1
-rw-r--r--inc/3rdparty/site_config/standard/fivethirtyeight.com.txt7
-rw-r--r--inc/3rdparty/site_config/standard/fm4.orf.at.txt7
-rw-r--r--inc/3rdparty/site_config/standard/fnal.gov.txt15
-rw-r--r--inc/3rdparty/site_config/standard/focus.de.txt19
-rw-r--r--inc/3rdparty/site_config/standard/fool.com.txt11
-rw-r--r--inc/3rdparty/site_config/standard/forbes.com.txt16
-rw-r--r--inc/3rdparty/site_config/standard/foreignpolicy.com.txt11
-rw-r--r--inc/3rdparty/site_config/standard/forsvaret.no.txt9
-rw-r--r--inc/3rdparty/site_config/standard/foxnews.com.txt9
-rw-r--r--inc/3rdparty/site_config/standard/freelancer.com.txt3
-rw-r--r--inc/3rdparty/site_config/standard/freytag-film.com.txt5
-rw-r--r--inc/3rdparty/site_config/standard/friendskorner.com.txt11
-rw-r--r--inc/3rdparty/site_config/standard/ft.com.txt5
-rw-r--r--inc/3rdparty/site_config/standard/ftd.de.txt5
-rw-r--r--inc/3rdparty/site_config/standard/fubiz.net.txt3
-rw-r--r--inc/3rdparty/site_config/standard/futurezone.at.txt11
-rw-r--r--inc/3rdparty/site_config/standard/gamasutra.com.txt20
-rw-r--r--inc/3rdparty/site_config/standard/gameblog.fr.txt10
-rw-r--r--inc/3rdparty/site_config/standard/garythink.com.txt3
-rw-r--r--inc/3rdparty/site_config/standard/gasteroprod.com.txt4
-rw-r--r--inc/3rdparty/site_config/standard/gatopardo.com.txt8
-rw-r--r--inc/3rdparty/site_config/standard/gawker.com.txt6
-rw-r--r--inc/3rdparty/site_config/standard/geeksofdoom.com.txt3
-rw-r--r--inc/3rdparty/site_config/standard/geenstijl.nl.txt3
-rw-r--r--inc/3rdparty/site_config/standard/getnews.jp.txt3
-rw-r--r--inc/3rdparty/site_config/standard/giantbomb.com.txt11
-rw-r--r--inc/3rdparty/site_config/standard/giga.de.txt20
-rw-r--r--inc/3rdparty/site_config/standard/gigaom.com.txt17
-rw-r--r--inc/3rdparty/site_config/standard/gihyo.jp.txt3
-rw-r--r--inc/3rdparty/site_config/standard/gist.github.com.txt6
-rw-r--r--inc/3rdparty/site_config/standard/givemesomethingtoread.com.txt3
-rw-r--r--inc/3rdparty/site_config/standard/gizmodo.co.uk.txt7
-rw-r--r--inc/3rdparty/site_config/standard/gizmodo.com.txt7
-rw-r--r--inc/3rdparty/site_config/standard/gizmologia.com.txt2
-rw-r--r--inc/3rdparty/site_config/standard/gizmovil.com.txt2
-rw-r--r--inc/3rdparty/site_config/standard/global.txt4
-rw-r--r--inc/3rdparty/site_config/standard/globalissues.org.txt15
-rw-r--r--inc/3rdparty/site_config/standard/goal.com.txt16
-rw-r--r--inc/3rdparty/site_config/standard/golem.de.txt25
-rw-r--r--inc/3rdparty/site_config/standard/good.is.txt4
-rw-r--r--inc/3rdparty/site_config/standard/gossip-tv.gr.txt14
-rw-r--r--inc/3rdparty/site_config/standard/gothamist.com.txt7
-rw-r--r--inc/3rdparty/site_config/standard/gotomanager.com.txt21
-rw-r--r--inc/3rdparty/site_config/standard/gq.com.txt9
-rw-r--r--inc/3rdparty/site_config/standard/grantland.com.txt20
-rw-r--r--inc/3rdparty/site_config/standard/greatergreaterwashington.org.txt11
-rw-r--r--inc/3rdparty/site_config/standard/groups.drupal.org.txt5
-rw-r--r--inc/3rdparty/site_config/standard/guardian.co.uk.txt7
-rw-r--r--inc/3rdparty/site_config/standard/gulfnews.com.txt5
-rw-r--r--inc/3rdparty/site_config/standard/guokr.com.txt22
-rw-r--r--inc/3rdparty/site_config/standard/haberler.com.txt5
-rw-r--r--inc/3rdparty/site_config/standard/halo.bungie.org.txt5
-rw-r--r--inc/3rdparty/site_config/standard/hammers.theoffside.com.txt7
-rw-r--r--inc/3rdparty/site_config/standard/hanselman.com.txt4
-rw-r--r--inc/3rdparty/site_config/standard/hardware.fr.txt6
-rw-r--r--inc/3rdparty/site_config/standard/hbr.org.txt6
-rw-r--r--inc/3rdparty/site_config/standard/healthland.time.com.txt10
-rw-r--r--inc/3rdparty/site_config/standard/heise-online.mobi.txt3
-rw-r--r--inc/3rdparty/site_config/standard/heise.de.txt7
-rw-r--r--inc/3rdparty/site_config/standard/hespress.com.txt7
-rw-r--r--inc/3rdparty/site_config/standard/highscalability.com.txt3
-rw-r--r--inc/3rdparty/site_config/standard/hiperpop.com.txt2
-rw-r--r--inc/3rdparty/site_config/standard/hiphopleeft.nl.txt4
-rw-r--r--inc/3rdparty/site_config/standard/historytoday.com.txt10
-rw-r--r--inc/3rdparty/site_config/standard/hmercer.com.txt5
-rw-r--r--inc/3rdparty/site_config/standard/hometheaterreview.com.txt4
-rw-r--r--inc/3rdparty/site_config/standard/hosted.ap.org.txt5
-rw-r--r--inc/3rdparty/site_config/standard/hs.fi.txt3
-rw-r--r--inc/3rdparty/site_config/standard/ht.ly.txt3
-rw-r--r--inc/3rdparty/site_config/standard/huffingtonpost.com.txt16
-rw-r--r--inc/3rdparty/site_config/standard/humantransit.org.txt5
-rw-r--r--inc/3rdparty/site_config/standard/hurriyet.com.tr.txt7
-rw-r--r--inc/3rdparty/site_config/standard/hvg.hu.txt9
-rw-r--r--inc/3rdparty/site_config/standard/hypebeast.com.txt10
-rw-r--r--inc/3rdparty/site_config/standard/idlewords.com.txt7
-rw-r--r--inc/3rdparty/site_config/standard/igeneration.fr.txt5
-rw-r--r--inc/3rdparty/site_config/standard/ignoredbydinosaurs.com.txt7
-rw-r--r--inc/3rdparty/site_config/standard/ilounge.com.txt13
-rw-r--r--inc/3rdparty/site_config/standard/ilyabirman.ru.txt5
-rw-r--r--inc/3rdparty/site_config/standard/inc.com.txt21
-rw-r--r--inc/3rdparty/site_config/standard/independent.co.uk.txt9
-rw-r--r--inc/3rdparty/site_config/standard/index.php4
-rw-r--r--inc/3rdparty/site_config/standard/indiatimes.com.txt6
-rw-r--r--inc/3rdparty/site_config/standard/inessential.com.txt5
-rw-r--r--inc/3rdparty/site_config/standard/info.abril.com.br.txt4
-rw-r--r--inc/3rdparty/site_config/standard/infoq.com.txt14
-rw-r--r--inc/3rdparty/site_config/standard/informador.com.mx.txt9
-rw-r--r--inc/3rdparty/site_config/standard/information.dk.txt7
-rw-r--r--inc/3rdparty/site_config/standard/informationarchitects.net.txt10
-rw-r--r--inc/3rdparty/site_config/standard/informationclearinghouse.info.txt6
-rw-r--r--inc/3rdparty/site_config/standard/informit.com.txt7
-rw-r--r--inc/3rdparty/site_config/standard/infoworld.com.txt12
-rw-r--r--inc/3rdparty/site_config/standard/infzm.com.txt9
-rw-r--r--inc/3rdparty/site_config/standard/inhabitat.com.txt8
-rw-r--r--inc/3rdparty/site_config/standard/instagr.am.txt6
-rw-r--r--inc/3rdparty/site_config/standard/interest.co.nz.txt2
-rw-r--r--inc/3rdparty/site_config/standard/iolanguage.com.txt2
-rw-r--r--inc/3rdparty/site_config/standard/ipadclub.nl.txt7
-rw-r--r--inc/3rdparty/site_config/standard/ipadplanet.nl.txt7
-rw-r--r--inc/3rdparty/site_config/standard/iphoneclub.nl.txt7
-rw-r--r--inc/3rdparty/site_config/standard/iphonehacks.com.txt9
-rw-r--r--inc/3rdparty/site_config/standard/iplaysoft.com.txt2
-rw-r--r--inc/3rdparty/site_config/standard/isource.com.txt6
-rw-r--r--inc/3rdparty/site_config/standard/itavisen.no.txt6
-rw-r--r--inc/3rdparty/site_config/standard/itstactical.com.txt12
-rw-r--r--inc/3rdparty/site_config/standard/itworld.com.txt5
-rw-r--r--inc/3rdparty/site_config/standard/izismile.com.txt4
-rw-r--r--inc/3rdparty/site_config/standard/jalopnik.com.txt2
-rw-r--r--inc/3rdparty/site_config/standard/jandan.net.txt6
-rw-r--r--inc/3rdparty/site_config/standard/jetzt.sueddeutsche.de.txt22
-rw-r--r--inc/3rdparty/site_config/standard/jjahnke.net.txt4
-rw-r--r--inc/3rdparty/site_config/standard/jobbank.gc.ca.txt5
-rw-r--r--inc/3rdparty/site_config/standard/joelonsoftware.com.txt21
-rw-r--r--inc/3rdparty/site_config/standard/jouire.com.txt3
-rw-r--r--inc/3rdparty/site_config/standard/joystiq.com.txt8
-rw-r--r--inc/3rdparty/site_config/standard/juedische-allgemeine.de.txt19
-rw-r--r--inc/3rdparty/site_config/standard/juppy.org.txt8
-rw-r--r--inc/3rdparty/site_config/standard/kachestvo.ru.txt3
-rw-r--r--inc/3rdparty/site_config/standard/kenrockwell.com.txt7
-rw-r--r--inc/3rdparty/site_config/standard/kicker.de.txt21
-rw-r--r--inc/3rdparty/site_config/standard/kickstarter.com.txt7
-rw-r--r--inc/3rdparty/site_config/standard/kingarthurflour.com.txt4
-rw-r--r--inc/3rdparty/site_config/standard/kotaku.com.txt2
-rw-r--r--inc/3rdparty/site_config/standard/kottke.org.txt6
-rw-r--r--inc/3rdparty/site_config/standard/kumailplus.com.txt3
-rw-r--r--inc/3rdparty/site_config/standard/kumb.com.txt10
-rw-r--r--inc/3rdparty/site_config/standard/kwerfeldein.de.txt9
-rw-r--r--inc/3rdparty/site_config/standard/laphamsquarterly.org.txt13
-rw-r--r--inc/3rdparty/site_config/standard/laprensagrafica.com.txt3
-rw-r--r--inc/3rdparty/site_config/standard/laquadrature.net.txt10
-rw-r--r--inc/3rdparty/site_config/standard/lareviewofbooks.org.txt12
-rw-r--r--inc/3rdparty/site_config/standard/latimes.com.txt11
-rw-r--r--inc/3rdparty/site_config/standard/laughingsquid.com.txt3
-rw-r--r--inc/3rdparty/site_config/standard/leancrew.com.txt9
-rw-r--r--inc/3rdparty/site_config/standard/lefigaro.fr.txt8
-rw-r--r--inc/3rdparty/site_config/standard/lemonde.fr.txt13
-rw-r--r--inc/3rdparty/site_config/standard/lesnumeriques.com.txt9
-rw-r--r--inc/3rdparty/site_config/standard/letemps.ch.txt3
-rw-r--r--inc/3rdparty/site_config/standard/lifeandculture.fr.txt3
-rw-r--r--inc/3rdparty/site_config/standard/lifehacker.com.txt42
-rw-r--r--inc/3rdparty/site_config/standard/linkedin.com.txt2
-rw-r--r--inc/3rdparty/site_config/standard/longform.org.txt3
-rw-r--r--inc/3rdparty/site_config/standard/loopinsight.com.txt9
-rw-r--r--inc/3rdparty/site_config/standard/lostgarden.com.txt3
-rw-r--r--inc/3rdparty/site_config/standard/lrb.co.uk.txt8
-rw-r--r--inc/3rdparty/site_config/standard/luminous-landscape.com.txt6
-rw-r--r--inc/3rdparty/site_config/standard/m.bbc.co.uk.txt8
-rw-r--r--inc/3rdparty/site_config/standard/m.guardian.co.uk.txt12
-rw-r--r--inc/3rdparty/site_config/standard/mac4ever.com.txt5
-rw-r--r--inc/3rdparty/site_config/standard/macdrifter.com.txt2
-rw-r--r--inc/3rdparty/site_config/standard/macformat.techradar.com.txt9
-rw-r--r--inc/3rdparty/site_config/standard/macgeneration.com.txt5
-rw-r--r--inc/3rdparty/site_config/standard/macmagazine.com.br.txt21
-rw-r--r--inc/3rdparty/site_config/standard/macrumors.com.txt10
-rw-r--r--inc/3rdparty/site_config/standard/macstories.net.txt8
-rw-r--r--inc/3rdparty/site_config/standard/mactalk.com.au.txt4
-rw-r--r--inc/3rdparty/site_config/standard/mactechnews.de.txt3
-rw-r--r--inc/3rdparty/site_config/standard/macworld.com.txt24
-rw-r--r--inc/3rdparty/site_config/standard/mainichi.jp.txt3
-rw-r--r--inc/3rdparty/site_config/standard/mainpost.de.txt28
-rw-r--r--inc/3rdparty/site_config/standard/makeuseof.com.txt3
-rw-r--r--inc/3rdparty/site_config/standard/marco.org.txt8
-rw-r--r--inc/3rdparty/site_config/standard/marksdailyapple.com.txt2
-rw-r--r--inc/3rdparty/site_config/standard/martinfowler.com.txt8
-rw-r--r--inc/3rdparty/site_config/standard/mashable.com.txt4
-rw-r--r--inc/3rdparty/site_config/standard/mattcutts.com.txt2
-rw-r--r--inc/3rdparty/site_config/standard/mbl.is.txt2
-rw-r--r--inc/3rdparty/site_config/standard/medialens.org.txt2
-rw-r--r--inc/3rdparty/site_config/standard/menshealth.com.txt16
-rw-r--r--inc/3rdparty/site_config/standard/mikeash.com.txt5
-rw-r--r--inc/3rdparty/site_config/standard/mikeindustries.com.txt9
-rw-r--r--inc/3rdparty/site_config/standard/minnesota.publicradio.org.txt10
-rw-r--r--inc/3rdparty/site_config/standard/minnpost.com.txt5
-rw-r--r--inc/3rdparty/site_config/standard/mirrorfootball.co.uk.txt3
-rw-r--r--inc/3rdparty/site_config/standard/mises.org.txt5
-rw-r--r--inc/3rdparty/site_config/standard/mlb.mlb.com.txt14
-rw-r--r--inc/3rdparty/site_config/standard/mlb.sbnation.com.txt14
-rw-r--r--inc/3rdparty/site_config/standard/mlssoccer.com.txt6
-rw-r--r--inc/3rdparty/site_config/standard/mmo-champion.com.txt5
-rw-r--r--inc/3rdparty/site_config/standard/mnn.com.txt11
-rw-r--r--inc/3rdparty/site_config/standard/mno.hu.txt14
-rw-r--r--inc/3rdparty/site_config/standard/mobile.slate.com.txt5
-rw-r--r--inc/3rdparty/site_config/standard/mobileopportunity.blogspot.com.txt11
-rw-r--r--inc/3rdparty/site_config/standard/modernghana.com.txt8
-rw-r--r--inc/3rdparty/site_config/standard/money.cnn.com.txt24
-rw-r--r--inc/3rdparty/site_config/standard/monkeyzen.com.txt2
-rw-r--r--inc/3rdparty/site_config/standard/moonsault.de.txt13
-rw-r--r--inc/3rdparty/site_config/standard/moreintelligentlife.com.txt7
-rw-r--r--inc/3rdparty/site_config/standard/motherboard.vice.com.txt5
-rw-r--r--inc/3rdparty/site_config/standard/mothering.com.txt7
-rw-r--r--inc/3rdparty/site_config/standard/motherjones.com.txt15
-rw-r--r--inc/3rdparty/site_config/standard/motorfull.com.txt2
-rw-r--r--inc/3rdparty/site_config/standard/msdn.microsoft.com.txt3
-rw-r--r--inc/3rdparty/site_config/standard/msnbc.msn.com.txt21
-rw-r--r--inc/3rdparty/site_config/standard/myfoxboston.com.txt4
-rw-r--r--inc/3rdparty/site_config/standard/myrecipes.com.txt12
-rw-r--r--inc/3rdparty/site_config/standard/narenji.ir.txt2
-rw-r--r--inc/3rdparty/site_config/standard/nasa.gov.txt8
-rw-r--r--inc/3rdparty/site_config/standard/nbweekly.com.txt10
-rw-r--r--inc/3rdparty/site_config/standard/neh.gov.txt17
-rw-r--r--inc/3rdparty/site_config/standard/neomoney.co.txt3
-rw-r--r--inc/3rdparty/site_config/standard/net-security.org.txt7
-rw-r--r--inc/3rdparty/site_config/standard/netmagazine.com.txt16
-rw-r--r--inc/3rdparty/site_config/standard/netzpolitik.org.txt6
-rw-r--r--inc/3rdparty/site_config/standard/newmatilda.com.txt9
-rw-r--r--inc/3rdparty/site_config/standard/news-gazette.com.txt8
-rw-r--r--inc/3rdparty/site_config/standard/news.cnet.com.txt12
-rw-r--r--inc/3rdparty/site_config/standard/news.detik.com.txt8
-rw-r--r--inc/3rdparty/site_config/standard/news.kanaloco.jp.txt9
-rw-r--r--inc/3rdparty/site_config/standard/news.mynavi.jp.txt11
-rw-r--r--inc/3rdparty/site_config/standard/news.orf.at.txt11
-rw-r--r--inc/3rdparty/site_config/standard/news.rambler.ru.txt9
-rw-r--r--inc/3rdparty/site_config/standard/news.techmeme.com.txt4
-rw-r--r--inc/3rdparty/site_config/standard/news.yahoo.com.txt12
-rw-r--r--inc/3rdparty/site_config/standard/news.ycombinator.com.txt3
-rw-r--r--inc/3rdparty/site_config/standard/newsbomb.gr.txt9
-rw-r--r--inc/3rdparty/site_config/standard/newsle.com.txt2
-rw-r--r--inc/3rdparty/site_config/standard/newsmill.se.txt12
-rw-r--r--inc/3rdparty/site_config/standard/newsunspun.org.txt10
-rw-r--r--inc/3rdparty/site_config/standard/newyorker.com.txt10
-rw-r--r--inc/3rdparty/site_config/standard/next-gen.biz.txt16
-rw-r--r--inc/3rdparty/site_config/standard/nfl.com.txt11
-rw-r--r--inc/3rdparty/site_config/standard/ngm.nationalgeographic.com.txt7
-rw-r--r--inc/3rdparty/site_config/standard/nhk.or.jp.txt2
-rw-r--r--inc/3rdparty/site_config/standard/nintendoworldreport.com.txt13
-rw-r--r--inc/3rdparty/site_config/standard/nojesguiden.se.txt5
-rw-r--r--inc/3rdparty/site_config/standard/northumberlandview.ca.txt11
-rw-r--r--inc/3rdparty/site_config/standard/nplusonemag.com.txt6
-rw-r--r--inc/3rdparty/site_config/standard/npr.org.txt32
-rw-r--r--inc/3rdparty/site_config/standard/nybooks.com.txt13
-rw-r--r--inc/3rdparty/site_config/standard/nymag.com.txt8
-rw-r--r--inc/3rdparty/site_config/standard/nyteknik.se.txt8
-rw-r--r--inc/3rdparty/site_config/standard/nytimes.com.txt36
-rw-r--r--inc/3rdparty/site_config/standard/nzz.ch.txt12
-rw-r--r--inc/3rdparty/site_config/standard/observer.com.txt7
-rw-r--r--inc/3rdparty/site_config/standard/off.net.mk.txt7
-rw-r--r--inc/3rdparty/site_config/standard/omaha.com.txt2
-rw-r--r--inc/3rdparty/site_config/standard/omiliya.org.txt9
-rw-r--r--inc/3rdparty/site_config/standard/on.net.mk.txt5
-rw-r--r--inc/3rdparty/site_config/standard/online.wsj.com.txt23
-rw-r--r--inc/3rdparty/site_config/standard/onlinewelten.com.txt2
-rw-r--r--inc/3rdparty/site_config/standard/onstartups.com.txt2
-rw-r--r--inc/3rdparty/site_config/standard/opensource.org.txt2
-rw-r--r--inc/3rdparty/site_config/standard/openthemagazine.com.txt4
-rw-r--r--inc/3rdparty/site_config/standard/openwebx.org.txt4
-rw-r--r--inc/3rdparty/site_config/standard/orf.at.txt11
-rw-r--r--inc/3rdparty/site_config/standard/origo.hu.txt18
-rw-r--r--inc/3rdparty/site_config/standard/pakistantvdekho.com.txt11
-rw-r--r--inc/3rdparty/site_config/standard/pandagon.net.txt5
-rw-r--r--inc/3rdparty/site_config/standard/pandodaily.com.txt5
-rw-r--r--inc/3rdparty/site_config/standard/panic.com.txt3
-rw-r--r--inc/3rdparty/site_config/standard/parislemon.com.txt6
-rw-r--r--inc/3rdparty/site_config/standard/parliament.uk.txt3
-rw-r--r--inc/3rdparty/site_config/standard/pastebin.com.txt6
-rw-r--r--inc/3rdparty/site_config/standard/pastepad.fivefilters.org.txt5
-rw-r--r--inc/3rdparty/site_config/standard/pathawks.com.txt8
-rw-r--r--inc/3rdparty/site_config/standard/pcast.me.txt2
-rw-r--r--inc/3rdparty/site_config/standard/pcmag.com.txt10
-rw-r--r--inc/3rdparty/site_config/standard/pcworld.com.txt19
-rw-r--r--inc/3rdparty/site_config/standard/penny-arcade.com.txt23
-rw-r--r--inc/3rdparty/site_config/standard/pentaxforums.com.txt2
-rw-r--r--inc/3rdparty/site_config/standard/philadelphiaeagles.com.txt6
-rw-r--r--inc/3rdparty/site_config/standard/philly.com.txt10
-rw-r--r--inc/3rdparty/site_config/standard/photo.tutsplus.com.txt6
-rw-r--r--inc/3rdparty/site_config/standard/php.net.txt6
-rw-r--r--inc/3rdparty/site_config/standard/physicstoday.org.txt7
-rw-r--r--inc/3rdparty/site_config/standard/pitchfork.com.txt16
-rw-r--r--inc/3rdparty/site_config/standard/pittnews.com.txt8
-rw-r--r--inc/3rdparty/site_config/standard/pittsburgh.pirates.mlb.com.txt15
-rw-r--r--inc/3rdparty/site_config/standard/pittsburghlive.com.txt7
-rw-r--r--inc/3rdparty/site_config/standard/pittsburghmagazine.com.txt8
-rw-r--r--inc/3rdparty/site_config/standard/pittsburghpanthers.com.txt4
-rw-r--r--inc/3rdparty/site_config/standard/pittscriptblog.com.txt8
-rw-r--r--inc/3rdparty/site_config/standard/playboy.com.txt6
-rw-r--r--inc/3rdparty/site_config/standard/plus.google.com.txt17
-rw-r--r--inc/3rdparty/site_config/standard/plzkthxbai.com.txt4
-rw-r--r--inc/3rdparty/site_config/standard/pogue.blogs.nytimes.com.txt4
-rw-r--r--inc/3rdparty/site_config/standard/politico.com.txt13
-rw-r--r--inc/3rdparty/site_config/standard/politifact.com.txt4
-rw-r--r--inc/3rdparty/site_config/standard/politiken.dk.txt13
-rw-r--r--inc/3rdparty/site_config/standard/popularmechanics.com.txt8
-rw-r--r--inc/3rdparty/site_config/standard/positioningmag.com.txt19
-rw-r--r--inc/3rdparty/site_config/standard/post-gazette.com.txt26
-rw-r--r--inc/3rdparty/site_config/standard/posta.com.tr.txt15
-rw-r--r--inc/3rdparty/site_config/standard/prb.org.txt8
-rw-r--r--inc/3rdparty/site_config/standard/prog21.dadgum.com.txt9
-rw-r--r--inc/3rdparty/site_config/standard/prolost.com.txt4
-rw-r--r--inc/3rdparty/site_config/standard/propublica.org.txt11
-rw-r--r--inc/3rdparty/site_config/standard/prosa.dk.txt4
-rw-r--r--inc/3rdparty/site_config/standard/prospectmagazine.co.uk.txt26
-rw-r--r--inc/3rdparty/site_config/standard/psychologytoday.com.txt9
-rw-r--r--inc/3rdparty/site_config/standard/publications.parliament.uk.txt4
-rw-r--r--inc/3rdparty/site_config/standard/purpleplanetmedia.com.txt4
-rw-r--r--inc/3rdparty/site_config/standard/quantumdiaries.org.txt14
-rw-r--r--inc/3rdparty/site_config/standard/queerty.com.txt3
-rw-r--r--inc/3rdparty/site_config/standard/quepasa.cl.txt6
-rw-r--r--inc/3rdparty/site_config/standard/quora.com.txt17
-rw-r--r--inc/3rdparty/site_config/standard/radar.oreilly.com.txt3
-rw-r--r--inc/3rdparty/site_config/standard/radionz.co.nz.txt3
-rw-r--r--inc/3rdparty/site_config/standard/randsinrepose.com.txt11
-rw-r--r--inc/3rdparty/site_config/standard/readability.com.txt3
-rw-r--r--inc/3rdparty/site_config/standard/readwriteweb.com.txt8
-rw-r--r--inc/3rdparty/site_config/standard/real.gr.txt3
-rw-r--r--inc/3rdparty/site_config/standard/recipe.com.txt10
-rw-r--r--inc/3rdparty/site_config/standard/red-hot-girls.com.txt5
-rw-r--r--inc/3rdparty/site_config/standard/reddit.com.txt16
-rw-r--r--inc/3rdparty/site_config/standard/redmondpie.com.txt13
-rw-r--r--inc/3rdparty/site_config/standard/redtape.msnbc.msn.com.txt20
-rw-r--r--inc/3rdparty/site_config/standard/reflets.info.txt5
-rw-r--r--inc/3rdparty/site_config/standard/renenekuda.cz.txt3
-rw-r--r--inc/3rdparty/site_config/standard/retrieverweekly.com.txt6
-rw-r--r--inc/3rdparty/site_config/standard/reuters.com.txt10
-rw-r--r--inc/3rdparty/site_config/standard/revistapiaui.estadao.com.br.txt10
-rw-r--r--inc/3rdparty/site_config/standard/richardmuscat.wordpress.com.txt5
-rw-r--r--inc/3rdparty/site_config/standard/ritemail.blogspot.com.txt5
-rw-r--r--inc/3rdparty/site_config/standard/rockpapershotgun.com.txt8
-rw-r--r--inc/3rdparty/site_config/standard/rodrigo.sharpcube.com.txt7
-rw-r--r--inc/3rdparty/site_config/standard/rogerebert.com.txt8
-rw-r--r--inc/3rdparty/site_config/standard/rolfinjapan.nl.txt6
-rw-r--r--inc/3rdparty/site_config/standard/rollingstone.com.txt10
-rw-r--r--inc/3rdparty/site_config/standard/rottentomatoes.com.txt11
-rw-r--r--inc/3rdparty/site_config/standard/roughtype.com.txt5
-rw-r--r--inc/3rdparty/site_config/standard/roy.gbiv.com.txt2
-rw-r--r--inc/3rdparty/site_config/standard/rpgsite.net.txt4
-rw-r--r--inc/3rdparty/site_config/standard/rubysfera.pl.txt9
-rw-r--r--inc/3rdparty/site_config/standard/ruhlman.com.txt6
-rw-r--r--inc/3rdparty/site_config/standard/ruttloff.org.txt3
-rw-r--r--inc/3rdparty/site_config/standard/salon.com.txt11
-rw-r--r--inc/3rdparty/site_config/standard/salzburg.com.txt6
-rw-r--r--inc/3rdparty/site_config/standard/saveyourself.ca.txt25
-rw-r--r--inc/3rdparty/site_config/standard/sbnation.com.txt28
-rw-r--r--inc/3rdparty/site_config/standard/schneier.com.txt25
-rw-r--r--inc/3rdparty/site_config/standard/science.orf.at.txt11
-rw-r--r--inc/3rdparty/site_config/standard/scienceblogs.de.txt12
-rw-r--r--inc/3rdparty/site_config/standard/scienceticker.info.txt11
-rw-r--r--inc/3rdparty/site_config/standard/scientificamerican.com.txt25
-rw-r--r--inc/3rdparty/site_config/standard/scotusblog.com.txt8
-rw-r--r--inc/3rdparty/site_config/standard/scraplab.net.txt3
-rw-r--r--inc/3rdparty/site_config/standard/scripting.com.txt8
-rw-r--r--inc/3rdparty/site_config/standard/sct.temple.edu.txt5
-rw-r--r--inc/3rdparty/site_config/standard/searchengineland.com.txt20
-rw-r--r--inc/3rdparty/site_config/standard/seattletransitblog.com.txt5
-rw-r--r--inc/3rdparty/site_config/standard/sebbo.net.txt4
-rw-r--r--inc/3rdparty/site_config/standard/seriouseats.com.txt15
-rw-r--r--inc/3rdparty/site_config/standard/sf.curbed.com.txt7
-rw-r--r--inc/3rdparty/site_config/standard/sf.eater.com.txt7
-rw-r--r--inc/3rdparty/site_config/standard/sfgate.com.txt12
-rw-r--r--inc/3rdparty/site_config/standard/sfweekly.com.txt3
-rw-r--r--inc/3rdparty/site_config/standard/shabayek.com.txt3
-rw-r--r--inc/3rdparty/site_config/standard/shawnblanc.net.txt11
-rw-r--r--inc/3rdparty/site_config/standard/shifteleven.com.txt6
-rw-r--r--inc/3rdparty/site_config/standard/siasat.pk.txt11
-rw-r--r--inc/3rdparty/site_config/standard/simonwillison.net.txt5
-rw-r--r--inc/3rdparty/site_config/standard/singaporeanstocksinvestor.blogspot.com.txt5
-rw-r--r--inc/3rdparty/site_config/standard/singularityhub.com.txt2
-rw-r--r--inc/3rdparty/site_config/standard/sintagoulis.gr.txt6
-rw-r--r--inc/3rdparty/site_config/standard/slashfilm.com.txt15
-rw-r--r--inc/3rdparty/site_config/standard/slate.com.txt19
-rw-r--r--inc/3rdparty/site_config/standard/slice.seriouseats.com.txt15
-rw-r--r--inc/3rdparty/site_config/standard/slog.thestranger.com.txt4
-rw-r--r--inc/3rdparty/site_config/standard/smartinvestor.de.txt5
-rw-r--r--inc/3rdparty/site_config/standard/sme.sk.txt3
-rw-r--r--inc/3rdparty/site_config/standard/smithsonianmag.com.txt20
-rw-r--r--inc/3rdparty/site_config/standard/smokingapples.com.txt5
-rw-r--r--inc/3rdparty/site_config/standard/sourcebooks.com.txt4
-rw-r--r--inc/3rdparty/site_config/standard/spectator.co.uk.txt7
-rw-r--r--inc/3rdparty/site_config/standard/spectrum.ieee.org.txt3
-rw-r--r--inc/3rdparty/site_config/standard/speirs.org.txt2
-rw-r--r--inc/3rdparty/site_config/standard/spiegel.de.txt75
-rw-r--r--inc/3rdparty/site_config/standard/spin.com.txt5
-rw-r--r--inc/3rdparty/site_config/standard/splatf.com.txt5
-rw-r--r--inc/3rdparty/site_config/standard/splitsider.com.txt4
-rw-r--r--inc/3rdparty/site_config/standard/sport.detik.com.txt8
-rw-r--r--inc/3rdparty/site_config/standard/sport.orf.at.txt11
-rw-r--r--inc/3rdparty/site_config/standard/sports.espn.go.com.txt12
-rw-r--r--inc/3rdparty/site_config/standard/sports.yahoo.com.txt9
-rw-r--r--inc/3rdparty/site_config/standard/sportschau.de.txt22
-rw-r--r--inc/3rdparty/site_config/standard/sportsillustrated.cnn.com.txt26
-rw-r--r--inc/3rdparty/site_config/standard/sprengsatz.de.txt5
-rw-r--r--inc/3rdparty/site_config/standard/sqlite.org.txt7
-rw-r--r--inc/3rdparty/site_config/standard/squashed.tumblr.com.txt4
-rw-r--r--inc/3rdparty/site_config/standard/stackoverflow.com.txt14
-rw-r--r--inc/3rdparty/site_config/standard/stalbansreview.co.uk.txt14
-rw-r--r--inc/3rdparty/site_config/standard/standard.co.uk.txt16
-rw-r--r--inc/3rdparty/site_config/standard/staradvertiser.com.txt11
-rw-r--r--inc/3rdparty/site_config/standard/stephenfry.com.txt8
-rw-r--r--inc/3rdparty/site_config/standard/stlbeacon.org.txt5
-rw-r--r--inc/3rdparty/site_config/standard/stockholm.etc.se.txt5
-rw-r--r--inc/3rdparty/site_config/standard/streetsblog.net.txt7
-rw-r--r--inc/3rdparty/site_config/standard/stuff.co.nz.txt22
-rw-r--r--inc/3rdparty/site_config/standard/stumbleupon.com.txt3
-rw-r--r--inc/3rdparty/site_config/standard/subtraction.com.txt17
-rw-r--r--inc/3rdparty/site_config/standard/sueddeutsche.de.txt18
-rw-r--r--inc/3rdparty/site_config/standard/summify.com.txt2
-rw-r--r--inc/3rdparty/site_config/standard/suntimes.com.txt14
-rw-r--r--inc/3rdparty/site_config/standard/svd.se.txt4
-rw-r--r--inc/3rdparty/site_config/standard/sydsvenskan.se.txt11
-rw-r--r--inc/3rdparty/site_config/standard/symmetrymagazine.org.txt12
-rw-r--r--inc/3rdparty/site_config/standard/sz-magazin.sueddeutsche.de.txt15
-rw-r--r--inc/3rdparty/site_config/standard/tagesschau.de.txt23
-rw-r--r--inc/3rdparty/site_config/standard/tampabay.com.txt5
-rw-r--r--inc/3rdparty/site_config/standard/taptaptap.com.txt4
-rw-r--r--inc/3rdparty/site_config/standard/tasteofhome.com.txt15
-rw-r--r--inc/3rdparty/site_config/standard/taz.de.txt8
-rw-r--r--inc/3rdparty/site_config/standard/tbray.org.txt5
-rw-r--r--inc/3rdparty/site_config/standard/tcng.org.txt4
-rw-r--r--inc/3rdparty/site_config/standard/tech.fortune.cnn.com.txt4
-rw-r--r--inc/3rdparty/site_config/standard/tech.sina.com.cn.txt11
-rw-r--r--inc/3rdparty/site_config/standard/techcrunch.com.txt18
-rw-r--r--inc/3rdparty/site_config/standard/techdirt.com.txt12
-rw-r--r--inc/3rdparty/site_config/standard/techmeme.com.txt3
-rw-r--r--inc/3rdparty/site_config/standard/technicallyjordan.tumblr.com.txt8
-rw-r--r--inc/3rdparty/site_config/standard/technologyreview.com.txt16
-rw-r--r--inc/3rdparty/site_config/standard/techpinions.com.txt7
-rw-r--r--inc/3rdparty/site_config/standard/techradar.com.txt12
-rw-r--r--inc/3rdparty/site_config/standard/telegraaf.nl.txt9
-rw-r--r--inc/3rdparty/site_config/standard/telegraph.co.uk.txt10
-rw-r--r--inc/3rdparty/site_config/standard/theappleblog.com.txt3
-rw-r--r--inc/3rdparty/site_config/standard/theatlantic.com.txt18
-rw-r--r--inc/3rdparty/site_config/standard/thebostonchannel.com.txt7
-rw-r--r--inc/3rdparty/site_config/standard/thebrowser.com.txt10
-rw-r--r--inc/3rdparty/site_config/standard/thecarton.net.txt10
-rw-r--r--inc/3rdparty/site_config/standard/thedaily.com.txt24
-rw-r--r--inc/3rdparty/site_config/standard/thedailybeast.com.txt7
-rw-r--r--inc/3rdparty/site_config/standard/thedailymash.co.uk.txt14
-rw-r--r--inc/3rdparty/site_config/standard/thefilmexperience.net.txt2
-rw-r--r--inc/3rdparty/site_config/standard/theglobalmail.org.txt41
-rw-r--r--inc/3rdparty/site_config/standard/theglobeandmail.com.txt5
-rw-r--r--inc/3rdparty/site_config/standard/theindychannel.com.txt13
-rw-r--r--inc/3rdparty/site_config/standard/themillions.com.txt10
-rw-r--r--inc/3rdparty/site_config/standard/themuseumofinnocence.com.txt7
-rw-r--r--inc/3rdparty/site_config/standard/thenation.com.txt11
-rw-r--r--inc/3rdparty/site_config/standard/thenetworkgarden.blogs.com.txt4
-rw-r--r--inc/3rdparty/site_config/standard/thenextweb.com.txt12
-rw-r--r--inc/3rdparty/site_config/standard/theoaklandpress.com.txt3
-rw-r--r--inc/3rdparty/site_config/standard/theonion.com.txt11
-rw-r--r--inc/3rdparty/site_config/standard/thepioneerwoman.com.txt11
-rw-r--r--inc/3rdparty/site_config/standard/theregister.co.uk.txt5
-rw-r--r--inc/3rdparty/site_config/standard/theroot.com.txt3
-rw-r--r--inc/3rdparty/site_config/standard/therumpus.net.txt4
-rw-r--r--inc/3rdparty/site_config/standard/thesiasat.com.txt11
-rw-r--r--inc/3rdparty/site_config/standard/thesimpledollar.com.txt4
-rw-r--r--inc/3rdparty/site_config/standard/thespoiler.co.uk.txt3
-rw-r--r--inc/3rdparty/site_config/standard/thespoof.com.txt9
-rw-r--r--inc/3rdparty/site_config/standard/thestranger.com.txt12
-rw-r--r--inc/3rdparty/site_config/standard/thestreet.com.txt25
-rw-r--r--inc/3rdparty/site_config/standard/thethaovanhoa.vn.txt2
-rw-r--r--inc/3rdparty/site_config/standard/theverge.com.txt31
-rw-r--r--inc/3rdparty/site_config/standard/theweek.com.txt4
-rw-r--r--inc/3rdparty/site_config/standard/thinkprogress.org.txt4
-rw-r--r--inc/3rdparty/site_config/standard/thisdaylive.com.txt2
-rw-r--r--inc/3rdparty/site_config/standard/thisismynext.com.txt8
-rw-r--r--inc/3rdparty/site_config/standard/tidbits.com.txt3
-rw-r--r--inc/3rdparty/site_config/standard/time.com.txt14
-rw-r--r--inc/3rdparty/site_config/standard/timeshighereducation.co.uk.txt6
-rw-r--r--inc/3rdparty/site_config/standard/tipb.com.txt9
-rw-r--r--inc/3rdparty/site_config/standard/tnr.com.txt17
-rw-r--r--inc/3rdparty/site_config/standard/tomdispatch.com.txt6
-rw-r--r--inc/3rdparty/site_config/standard/tomshardware.com.txt8
-rw-r--r--inc/3rdparty/site_config/standard/tomshardware.de.txt12
-rw-r--r--inc/3rdparty/site_config/standard/toolsandtoys.net.txt6
-rw-r--r--inc/3rdparty/site_config/standard/trailer.web-view.net.txt2
-rw-r--r--inc/3rdparty/site_config/standard/traningslara.se.txt8
-rw-r--r--inc/3rdparty/site_config/standard/triblive.com.txt13
-rw-r--r--inc/3rdparty/site_config/standard/truthdig.com.txt10
-rw-r--r--inc/3rdparty/site_config/standard/tthfanfic.org.txt4
-rw-r--r--inc/3rdparty/site_config/standard/tthor.com.txt2
-rw-r--r--inc/3rdparty/site_config/standard/tuaw.com.txt6
-rw-r--r--inc/3rdparty/site_config/standard/tuckreview.com.txt6
-rw-r--r--inc/3rdparty/site_config/standard/tvtropes.org.txt20
-rw-r--r--inc/3rdparty/site_config/standard/twitter.com.txt9
-rw-r--r--inc/3rdparty/site_config/standard/uefa.com.txt6
-rw-r--r--inc/3rdparty/site_config/standard/uk.xbox360.ign.com.txt23
-rw-r--r--inc/3rdparty/site_config/standard/uni-watch.com.txt17
-rw-r--r--inc/3rdparty/site_config/standard/urbandictionary.com.txt3
-rw-r--r--inc/3rdparty/site_config/standard/usccb.org.txt6
-rw-r--r--inc/3rdparty/site_config/standard/useit.com.txt8
-rw-r--r--inc/3rdparty/site_config/standard/ux.artu.tv.txt7
-rw-r--r--inc/3rdparty/site_config/standard/uzivatelsketestovani.cz.txt2
-rw-r--r--inc/3rdparty/site_config/standard/vanityfair.com.txt30
-rw-r--r--inc/3rdparty/site_config/standard/varingen.no.txt5
-rw-r--r--inc/3rdparty/site_config/standard/varsity.co.uk.txt4
-rw-r--r--inc/3rdparty/site_config/standard/vedomosti.ru.txt3
-rw-r--r--inc/3rdparty/site_config/standard/veggbilder.no.txt5
-rw-r--r--inc/3rdparty/site_config/standard/vemedio.com.txt6
-rw-r--r--inc/3rdparty/site_config/standard/venturebeat.com.txt6
-rw-r--r--inc/3rdparty/site_config/standard/version.php3
-rw-r--r--inc/3rdparty/site_config/standard/version.txt1
-rw-r--r--inc/3rdparty/site_config/standard/version2.dk.txt12
-rw-r--r--inc/3rdparty/site_config/standard/verybestbaking.com.txt7
-rw-r--r--inc/3rdparty/site_config/standard/vg.no.txt3
-rw-r--r--inc/3rdparty/site_config/standard/video.forbes.com.txt9
-rw-r--r--inc/3rdparty/site_config/standard/videogum.com.txt6
-rw-r--r--inc/3rdparty/site_config/standard/villagevoice.com.txt9
-rw-r--r--inc/3rdparty/site_config/standard/vimeo.com.txt17
-rw-r--r--inc/3rdparty/site_config/standard/visir.is.txt14
-rw-r--r--inc/3rdparty/site_config/standard/vitispr.com.txt6
-rw-r--r--inc/3rdparty/site_config/standard/vivirmexico.com.txt2
-rw-r--r--inc/3rdparty/site_config/standard/vnexpress.net.txt8
-rw-r--r--inc/3rdparty/site_config/standard/voices.washingtonpost.com.txt3
-rw-r--r--inc/3rdparty/site_config/standard/vworker.com.txt3
-rw-r--r--inc/3rdparty/site_config/standard/waffle.wootest.net.txt4
-rw-r--r--inc/3rdparty/site_config/standard/walrusmagazine.com.txt14
-rw-r--r--inc/3rdparty/site_config/standard/warnerbros.fr.txt3
-rw-r--r--inc/3rdparty/site_config/standard/washingtonmonthly.com.txt10
-rw-r--r--inc/3rdparty/site_config/standard/washingtonpost.com.txt21
-rw-r--r--inc/3rdparty/site_config/standard/web-libre.org.txt6
-rw-r--r--inc/3rdparty/site_config/standard/weblog.bignerdranch.com.txt5
-rw-r--r--inc/3rdparty/site_config/standard/weblogs.asp.net.txt9
-rw-r--r--inc/3rdparty/site_config/standard/webpaper.nzz.ch.txt8
-rw-r--r--inc/3rdparty/site_config/standard/welt.de.txt22
-rw-r--r--inc/3rdparty/site_config/standard/westhamtillidie.com.txt6
-rw-r--r--inc/3rdparty/site_config/standard/what-if.xkcd.com.txt2
-rw-r--r--inc/3rdparty/site_config/standard/whatever.scalzi.com.txt7
-rw-r--r--inc/3rdparty/site_config/standard/wheelyric.com.txt11
-rw-r--r--inc/3rdparty/site_config/standard/wiki.guildwars.com.txt8
-rw-r--r--inc/3rdparty/site_config/standard/wiki.guildwars2.com.txt8
-rw-r--r--inc/3rdparty/site_config/standard/wikitravel.org.txt14
-rw-r--r--inc/3rdparty/site_config/standard/will-self.com.txt4
-rw-r--r--inc/3rdparty/site_config/standard/williampfaff.com.txt3
-rw-r--r--inc/3rdparty/site_config/standard/winfuture.de.txt12
-rw-r--r--inc/3rdparty/site_config/standard/winrumors.com.txt6
-rw-r--r--inc/3rdparty/site_config/standard/winsupersite.com.txt3
-rw-r--r--inc/3rdparty/site_config/standard/wired.com.txt22
-rw-r--r--inc/3rdparty/site_config/standard/wmnf.org.txt13
-rw-r--r--inc/3rdparty/site_config/standard/wmpoweruser.com.txt4
-rw-r--r--inc/3rdparty/site_config/standard/worldpoultry.net.txt5
-rw-r--r--inc/3rdparty/site_config/standard/worldwidewords.org.txt4
-rw-r--r--inc/3rdparty/site_config/standard/wow.joystiq.com.txt6
-rw-r--r--inc/3rdparty/site_config/standard/www1.folha.uol.com.br.txt15
-rw-r--r--inc/3rdparty/site_config/standard/www3.imperial.ac.uk.txt2
-rw-r--r--inc/3rdparty/site_config/standard/wyborcza.pl.txt11
-rw-r--r--inc/3rdparty/site_config/standard/wyctim.com.txt3
-rw-r--r--inc/3rdparty/site_config/standard/wz-newsline.de.txt5
-rw-r--r--inc/3rdparty/site_config/standard/xoeb.us.txt4
-rw-r--r--inc/3rdparty/site_config/standard/yated.com.txt2
-rw-r--r--inc/3rdparty/site_config/standard/yostivanich.com.txt5
-rw-r--r--inc/3rdparty/site_config/standard/youtube.com.txt15
-rw-r--r--inc/3rdparty/site_config/standard/zdnet.com.txt10
-rw-r--r--inc/3rdparty/site_config/standard/zeit.de.txt44
-rw-r--r--inc/3rdparty/site_config/standard/zerodistraction.com.txt4
-rw-r--r--inc/3rdparty/site_config/standard/zerokspot.com.txt3
-rw-r--r--inc/3rdparty/site_config/standard/zingtrain.com.txt3
783 files changed, 7014 insertions, 18 deletions
diff --git a/inc/3rdparty/site_config/README.md b/inc/3rdparty/site_config/README.txt
index 0aff456b..e966ee74 100644
--- a/inc/3rdparty/site_config/README.md
+++ b/inc/3rdparty/site_config/README.txt
@@ -1,6 +1,6 @@
1Full-Text RSS Site Patterns 1Full-Text RSS Site Patterns
2--------------------------- 2---------------------------
3 3
4Site patterns allow you to specify what should be extracted from specific sites. 4Site patterns allow you to specify what should be extracted from specific sites.
5 5
6Please see http://help.fivefilters.org/customer/portal/articles/223153-site-patterns for more information. \ No newline at end of file 6Please see http://help.fivefilters.org/customer/portal/articles/223153-site-patterns for more information. \ No newline at end of file
diff --git a/inc/3rdparty/site_config/custom/dilbert.com.txt b/inc/3rdparty/site_config/custom/dilbert.com.txt
new file mode 100644
index 00000000..6c8d95a2
--- /dev/null
+++ b/inc/3rdparty/site_config/custom/dilbert.com.txt
@@ -0,0 +1,4 @@
1title: //div[contains(@class, 'SB_Title')]//a
2body: //div[contains(@class, 'STR_Content')]
3
4test_url: http://dilbert.com/strips/comic/2013-10-22 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/custom/interviewmagazine.com.txt b/inc/3rdparty/site_config/custom/interviewmagazine.com.txt
new file mode 100644
index 00000000..a9d4f8ca
--- /dev/null
+++ b/inc/3rdparty/site_config/custom/interviewmagazine.com.txt
@@ -0,0 +1,4 @@
1title: //title
2body: //div[contains(@class, 'block')]
3
4test_url: http://www.interviewmagazine.com/film/spike-jonze \ No newline at end of file
diff --git a/inc/3rdparty/site_config/custom/inthepoche.com.txt b/inc/3rdparty/site_config/custom/inthepoche.com.txt
deleted file mode 100644
index ede74b97..00000000
--- a/inc/3rdparty/site_config/custom/inthepoche.com.txt
+++ /dev/null
@@ -1,7 +0,0 @@
1title: //title
2body: //div[@class='post-content']
3
4prune: no
5tidy: no
6
7test_url: http://www.inthepoche.com/?post/poche-hosting \ No newline at end of file
diff --git a/inc/3rdparty/site_config/custom/stackexchange.com.txt b/inc/3rdparty/site_config/custom/stackexchange.com.txt
new file mode 100755
index 00000000..c9d44b1d
--- /dev/null
+++ b/inc/3rdparty/site_config/custom/stackexchange.com.txt
@@ -0,0 +1,4 @@
1title: //title
2body: //div[@id='question']//div[contains(@class,'post-text')] | //div[@id='answers-header']//h2 | //div[contains(@class,'accepted-answer')]//div[contains(@class,'post-text')]
3
4test_url: http://cstheory.stackexchange.com/questions/14811/what-is-the-enlightenment-im-supposed-to-attain-after-studying-finite-automata/14818#14818
diff --git a/inc/3rdparty/site_config/custom/stackoverflow.com.txt b/inc/3rdparty/site_config/custom/stackoverflow.com.txt
new file mode 100755
index 00000000..d2eb984d
--- /dev/null
+++ b/inc/3rdparty/site_config/custom/stackoverflow.com.txt
@@ -0,0 +1,4 @@
1title: //title
2body: //div[@id='question']//div[contains(@class,'post-text')] | //div[@id='answers-header']//h2 | //div[contains(@class,'accepted-answer')]//div[contains(@class,'post-text')]
3
4test_url: http://stackoverflow.com/questions/20302422/calling-a-function-from-a-javascript-object
diff --git a/inc/3rdparty/site_config/index.php b/inc/3rdparty/site_config/index.php
index a3d5f739..a1b767fd 100644
--- a/inc/3rdparty/site_config/index.php
+++ b/inc/3rdparty/site_config/index.php
@@ -1,3 +1,3 @@
1<?php 1<?php
2// this is here to prevent directory listing over the web 2// this is here to prevent directory listing over the web
3?> \ No newline at end of file 3?> \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/24ways.org.txt b/inc/3rdparty/site_config/standard/24ways.org.txt
new file mode 100644
index 00000000..03bd1950
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/24ways.org.txt
@@ -0,0 +1,6 @@
1title: //div[@class='meta']/h2/a
2author: //div[@class='meta']/h2/following-sibling::p/a/text()
3date://div[@class='meta']/h2/strong
4body: //div[@id='article']
5strip: //div[@class='domore']
6test_url: http://24ways.org/2011/composing-the-new-canon \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/37signals.com.txt b/inc/3rdparty/site_config/standard/37signals.com.txt
new file mode 100644
index 00000000..43a10ae5
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/37signals.com.txt
@@ -0,0 +1,6 @@
1title: //div[@class='post_header']//h2/a
2author: //span[@class='author']
3date: //span[@class='date']
4body: //div[@id='Content']
5
6test_url: http://37signals.com/svn/posts/2785-the-end-of-the-it-department \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/3quarksdaily.com.txt b/inc/3rdparty/site_config/standard/3quarksdaily.com.txt
new file mode 100644
index 00000000..c4e7940f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/3quarksdaily.com.txt
@@ -0,0 +1,9 @@
1body: //div[@class='content']
2date: //div[@class='content']/h2
3strip: //div[@class='content']/h2
4title: //div[@class='content']/h3
5
6strip: //div[@id='postmenu']
7strip: //div[@class='trackback']
8tidy: no
9test_url: http://www.3quarksdaily.com/3quarksdaily/2012/01/martin-luther-king-i-have-a-dream.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/3voor12.vpro.nl.txt b/inc/3rdparty/site_config/standard/3voor12.vpro.nl.txt
new file mode 100644
index 00000000..b846b050
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/3voor12.vpro.nl.txt
@@ -0,0 +1,11 @@
1body: //div[@id='main']
2title: //div[@class='intro']/h1
3author: //ul[@class='text-data']/li[@class='author']
4date: //ul[@class='text-data']/li[@class='date']
5convert_double_br_tags: yes
6tidy: no
7
8strip: //div[@class='share']
9strip: //*[@class='zoom']
10strip: //div[@id='disqus_thread']
11test_url: http://3voor12.vpro.nl/nieuws/2012/januari/Ook-website-GroenLinks-woensdag-op-zwart-i-v-m--SOPA.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/43folders.com.txt b/inc/3rdparty/site_config/standard/43folders.com.txt
new file mode 100644
index 00000000..e8073f6f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/43folders.com.txt
@@ -0,0 +1,4 @@
1body: //*[@class = 'content']
2author: //*[@class = 'submitted']/a
3date: substring-after(//*[@class = 'submitted']/text(), '|')
4test_url: http://www.43folders.com/2011/04/22/cranking \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/500px.com.txt b/inc/3rdparty/site_config/standard/500px.com.txt
new file mode 100644
index 00000000..68e6b2d0
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/500px.com.txt
@@ -0,0 +1,27 @@
1# very loose setup for both 500px.com/photo/* and 500px.com/blog/*
2# photo page example: http://500px.com/photo/4181666
3# blog page example: http://500px.com/blog/110
4
5# avoid "no text" error
6tidy:no
7prune:no
8
9# reorganize photo page elements
10#body://div[contains(@class,'container')]
11move_into(body)://div[contains(@id,'thephoto')]
12move_into(body)://div[contains(@id,'description')]
13move_into(body)://div[contains(@id,'tags')]
14move_into(body)://div[contains(@id,'photo-info')]
15
16# clean photo page info
17strip://span[contains(@id,'copyright')]
18strip://*[contains(@id,'store')]
19strip://*[contains(@id,'user-info')]
20strip://*[contains(@id,'photo-stats')]
21strip://*[contains(@id,'voting_controls_container')]
22strip://*[contains(@id,'more-photos')]
23strip://*[contains(@id,'embed-photo')]
24
25# clean blog page side bar
26strip://*[contains(@class,'col d3 clearafter')]
27test_url: http://500px.com/photo/3641041?from=editors \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/512pixels.net.txt b/inc/3rdparty/site_config/standard/512pixels.net.txt
new file mode 100644
index 00000000..e458980f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/512pixels.net.txt
@@ -0,0 +1,2 @@
1title: substring-before(//title, '&mdash;')
2test_url: http://512pixels.net/more-on-linked-lists/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/5by5.tv.txt b/inc/3rdparty/site_config/standard/5by5.tv.txt
new file mode 100644
index 00000000..dce0df4e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/5by5.tv.txt
@@ -0,0 +1,9 @@
1body: //*[@id="episode"]
2prune: no
3tidy: no
4
5autodetect_next_page: no
6strip_id_or_class: player
7
8strip://*[@id="header"]
9test_url: http://5by5.tv/buildanalyze/60 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/944.com.txt b/inc/3rdparty/site_config/standard/944.com.txt
new file mode 100644
index 00000000..84380e79
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/944.com.txt
@@ -0,0 +1,9 @@
1title: //h2[@class='border']
2body: //div[@class='padding']
3
4convert_double_br_tags: yes
5
6strip: //div[@id='social_sharing']
7strip: //div[@class='socialLinks']
8
9test_url: http://www.944.com/articles/mild-obsessions-frock-la-get-to-know-victoria-tik-s-haute-sustainable-fashion-line/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/aachener-nachrichten.de.txt b/inc/3rdparty/site_config/standard/aachener-nachrichten.de.txt
new file mode 100644
index 00000000..379592e0
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/aachener-nachrichten.de.txt
@@ -0,0 +1,10 @@
1title: //meta[@property='og:title']/@content
2body: //*[@class='fliesstext_detail' or @class='detail_fliesstext'] | //img[@itemprop="image" and starts-with(@src, "/sixcms/media.php/")]
3
4strip_id_or_class: socialshareprivacy1
5strip_id_or_class: zvaFacebookButton
6
7tidy: no
8prune: no
9
10test_url: http://www.aachener-nachrichten.de/lokales/aachen-detail-an/2517757 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/aachener-zeitung.de.txt b/inc/3rdparty/site_config/standard/aachener-zeitung.de.txt
new file mode 100644
index 00000000..4d76fac7
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/aachener-zeitung.de.txt
@@ -0,0 +1,10 @@
1title: //meta[@property='og:title']/@content
2body: //*[@class='fliesstext_detail' or @class='detail_fliesstext'] | //img[@itemprop="image" and starts-with(@src, "/sixcms/media.php/")]
3
4strip_id_or_class: socialshareprivacy1
5strip_id_or_class: zvaFacebookButton
6
7tidy: no
8prune: no
9
10test_url: http://www.aachener-zeitung.de/sixcms/detail.php?template=az_detail&id=2552718 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/abc.es.txt b/inc/3rdparty/site_config/standard/abc.es.txt
new file mode 100644
index 00000000..a99833de
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/abc.es.txt
@@ -0,0 +1,7 @@
1title: //meta[@property='og:title']/@content
2body: //div[@class='datosi' or @class='date' or @class='photo-alt1' or @class='text']
3strip_id_or_class: colB
4
5prune: no
6
7test_url: http://www.abc.es/20120209/tv-series/abci-house-ultima-temporada-201202090936.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/abc.net.au.txt b/inc/3rdparty/site_config/standard/abc.net.au.txt
new file mode 100644
index 00000000..5e6269cb
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/abc.net.au.txt
@@ -0,0 +1,10 @@
1title: //h1
2author: //div[@class="byline"]/a
3date: //span[@class="timestamp"]
4
5strip: //p[@class="topics"]
6strip: //h1
7strip: //div[@class="byline"]
8strip: //p[@class="published"]
9strip: //div[contains(@class,"featured-scroller")]
10test_url: http://www.abc.net.au/news/2011-11-08/crabb-carbon-legislation-abbott-demolition/3652544 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/abcnews.go.com.txt b/inc/3rdparty/site_config/standard/abcnews.go.com.txt
new file mode 100644
index 00000000..c515d3e4
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/abcnews.go.com.txt
@@ -0,0 +1,27 @@
1title: //h1[@class='headline']
2body: //div[@id='storyText']
3# for video entries
4body: //img[@id='ff-img'] | //div[@id='meta']//div[contains(@class, 'overview')]
5author: //div[@class='byline']
6date: //div[@class='date']
7strip: //*[@id='date_partner']
8
9strip: //div[@class='breadcrumb']
10strip: //div[contains(@class,'show_tools')]
11strip: //div[@id='sponsoredByAd']
12strip: //div[contains(@class,'rel_container')]
13strip: //p[a[starts-with(@href, 'http://www.twitter.com')]]
14strip: //p[a[starts-with(@href, 'http://www.facebook.com')]]
15strip: //p[contains(., 'Click here to return to')]
16#strip_id_or_class: media
17strip_id_or_class: mediaplayer
18
19replace_string(<link rel="image_src" href="http): <img id="ff-img" src="http
20
21prune: no
22
23single_page_link: concat(//li[@class='pager']//a/@href, '&singlePage=true')
24
25test_url: http://abcnews.go.com/Politics/newt-gingrich-rocky-rollout-presidential-campaign-recover/story?id=13632744
26# multi-page
27test_url: http://abcnews.go.com/Blotter/family-freed-american-hostage-somalia-seals-obama/story?id=15439544 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/accesstoinsight.org.txt b/inc/3rdparty/site_config/standard/accesstoinsight.org.txt
new file mode 100644
index 00000000..b5d85079
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/accesstoinsight.org.txt
@@ -0,0 +1,9 @@
1title: //div[@id='H_docTitle']
2
3body: //div[@id='H_meta' or @id='H_content' or @id='F_footer']
4
5strip_id_or_class: F_toenail
6
7prune: no
8
9test_url: http://www.accesstoinsight.org/lib/authors/nyanaponika/wheel026.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/acidcow.com.txt b/inc/3rdparty/site_config/standard/acidcow.com.txt
new file mode 100644
index 00000000..60ede6a6
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/acidcow.com.txt
@@ -0,0 +1,3 @@
1body: //div[starts-with(@id, 'news-id-')]
2
3test_url: http://acidcow.com/fun/20933-acid-picdump-83-pics.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/acquia.com.txt b/inc/3rdparty/site_config/standard/acquia.com.txt
new file mode 100644
index 00000000..5ddf542e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/acquia.com.txt
@@ -0,0 +1,9 @@
1title://h1[@class="title"]
2author://div[@class="submitted"]/span/a
3date://div[@class="submitted"]/span
4body://div[@class="content-wrapper"]
5
6strip://div[@id="skip-link"]
7strip://div[@id="region-content-3-3"]
8strip://div[@id="section-footer"]
9test_url: https://www.acquia.com/blog/drupals-long-warmth-toward-third-party-code \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/acroswing.fr.txt b/inc/3rdparty/site_config/standard/acroswing.fr.txt
new file mode 100644
index 00000000..57d86d2f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/acroswing.fr.txt
@@ -0,0 +1,5 @@
1tidy:no
2date: //time[@class='updated']
3dissolve: //ul[@class='video-gallery']/li
4dissolve: //ul[@class='video-gallery']
5test_url: http://www.acroswing.fr/actualites/competition_rock/selectif_bellegarde_sur_valserine__2012-02-26.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/aht.seriouseats.com.txt b/inc/3rdparty/site_config/standard/aht.seriouseats.com.txt
new file mode 100644
index 00000000..408e9099
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/aht.seriouseats.com.txt
@@ -0,0 +1,15 @@
1body: //div[@id='content']
2
3# clean up recipe pages
4strip: //h2[@class='fn'] | //h2[@class='double-lined'] | //h3 | //div[@id='threeColumn2'] | //div[@id='threeColumn3']
5
6#recipe pages
7strip_id_or_class: "recipe-feedback"
8strip_id_or_class: "comments"
9strip_id_or_class: "procedure-number"
10strip_id_or_class: "more-with-author"
11
12#slice
13strip_id_or_class: "inner"
14
15test_url: http://aht.seriouseats.com/archives/2009/12/the-burger-lab-salting-ground-beef.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/alex.mullr.net.txt b/inc/3rdparty/site_config/standard/alex.mullr.net.txt
new file mode 100644
index 00000000..c5f15370
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/alex.mullr.net.txt
@@ -0,0 +1,2 @@
1body: //div[@class="entry"]
2test_url: http://alex.mullr.net/blog/2011/05/on-spotify/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/alistapart.com.txt b/inc/3rdparty/site_config/standard/alistapart.com.txt
new file mode 100644
index 00000000..090f7eb1
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/alistapart.com.txt
@@ -0,0 +1,12 @@
1title: //h1[@class='title']
2author: //h3[@class='byline']/a
3date: //div[@class='ishinfo']
4
5body: //*[@id='articletext']
6strip_id_or_class: 'ishinfo'
7strip_id_or_class: 'metastuff'
8strip_id_or_class: 'learnmore'
9strip_id_or_class: 'discuss'
10
11prune: no
12test_url: http://www.alistapart.com/articles/organizing-mobile/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/aljazeera.com.txt b/inc/3rdparty/site_config/standard/aljazeera.com.txt
new file mode 100644
index 00000000..4f0148f4
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/aljazeera.com.txt
@@ -0,0 +1,8 @@
1title: //span[@id='DetailedTitle']
2body: //td[@id='tdTextContent']
3strip_id_or_class: Skyscrapper_Body
4date: //span[@id='ctl00_cphBody_lblDate']
5author: //div[@id="dvAuthorInfo"]//a/text()
6strip: //table[ tbody/tr/td/object ]
7prune: no
8test_url: http://www.aljazeera.com/indepth/opinion/2012/01/2012114121925380575.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/allrecipes.com.txt b/inc/3rdparty/site_config/standard/allrecipes.com.txt
new file mode 100644
index 00000000..e9767bda
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/allrecipes.com.txt
@@ -0,0 +1,14 @@
1title: //h1[@id='itemTitle']
2body: //img[@id="ctl00_CenterColumnPlaceHolder_recipe_photoStuff_imgPhoto"] | //div[@id='ctl00_CenterColumnPlaceHolder_recipe_divSubmitter'] | //div[contains(@class, 'recipe-details-content')]
3strip: //div[@class='top-left' or @class='top-right' or @class='bot-left' or @class='bot-right']
4strip: //div[contains(@class, 'rightcoltoolsdiv')]
5strip: //div[contains(@class, 'servings-form')]
6strip: //p[@class='nutritional-information']
7strip: //a[contains(@class, 'nutritional-information') or contains(@class, 'nutritionanchor')]
8strip: //div[@id='nutri-info']/div[contains(@class, 'title')]
9strip: //img[@id='ctl00_CenterColumnPlaceHolder_recipe_imgSubmitter']
10strip_id_or_class: eshaAttribute
11strip_id_or_class: eshaParagraph
12prune: no
13
14test_url: http://allrecipes.com/Recipe/Taco-Pie/Detail.aspx?src=rotd \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/allthingsd.com.txt b/inc/3rdparty/site_config/standard/allthingsd.com.txt
new file mode 100644
index 00000000..cd52498f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/allthingsd.com.txt
@@ -0,0 +1,10 @@
1title://div[@class="article-title"]/h1[@class="title"]
2date: //p[@class="article-date"]
3body://*[@class="article-body article-text"]
4# Trim out related posts at bottom of article
5strip://blockquote[@class="memo"]
6
7# Yup, no idea why author won't work...
8author://div[@class="page-header article-header clearfix"]/p[@class="title"]
9# [Marco:] Author won't work here because the page defines the "home" link under the author's name as rel="author", which always gets priority if the page has defined it.
10test_url: http://allthingsd.com/20120513/exclusive-yahoos-thompson-out-levinsohn-in-board-settlement-with-loeb-nears-completion/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/allyou.com.txt b/inc/3rdparty/site_config/standard/allyou.com.txt
new file mode 100644
index 00000000..3c26c682
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/allyou.com.txt
@@ -0,0 +1,8 @@
1title: //div[@id='pageHdr']//h1
2body: //div[@id='pageHdr']/*[@class='dek'] | //div[@id='printArticle' or @id='slideShowPrint']
3strip: //div[contains(@class, 'infoBox') or @id='infoBox']
4single_page_link: //li[@id='print']/a
5
6prune: no
7
8test_url: http://www.allyou.com/budget-home/money-shopping/freebies-online-00400000066392/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/alphabeta.argaam.com.txt b/inc/3rdparty/site_config/standard/alphabeta.argaam.com.txt
new file mode 100644
index 00000000..f5865f89
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/alphabeta.argaam.com.txt
@@ -0,0 +1,11 @@
1body: //div[@class = 'entry']
2date: substring-after(//p[@class="date"],'بتاريخ ')
3strip_id_or_class: date
4strip_id_or_class: follow-single
5strip_id_or_class: ratingblock
6strip_id_or_class: newRatingHolder
7strip_id_or_class: postmetadata
8strip_id_or_class: addthis_toolbox
9strip_id_or_class: addthis_default_style
10strip_id_or_class: size-full
11test_url: http://alphabeta.argaam.com/?p=35657 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/alriyadh.com.txt b/inc/3rdparty/site_config/standard/alriyadh.com.txt
new file mode 100644
index 00000000..d0060000
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/alriyadh.com.txt
@@ -0,0 +1,9 @@
1body: //div[@id = "article-view"]
2body: //div[contains(@class, 'article')]//div[contains(@class, 'photo_bg')]
3author: //p[@class = "author"]
4strip: //h1
5strip: //h2
6strip_id_or_class: author
7prune: no
8test_url: http://www.alriyadh.com/2011/10/10/article674357.html
9test_url: http://www.alriyadh.com/net/article/780935 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/alseraj.net.txt b/inc/3rdparty/site_config/standard/alseraj.net.txt
new file mode 100644
index 00000000..107d82d6
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/alseraj.net.txt
@@ -0,0 +1,2 @@
1title: //*[@id='normalfontyellow']
2test_url: http://www.alseraj.net/cgi-bin/pros/av/LeqaTextDisplay.cgi?display&2 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/alt1040.com.txt b/inc/3rdparty/site_config/standard/alt1040.com.txt
new file mode 100644
index 00000000..4fd45719
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/alt1040.com.txt
@@ -0,0 +1,2 @@
1body: //*[(@class = "historia")]
2test_url: http://alt1040.com/2011/09/banda-ancha-en-america-latina-insignificante \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/altfoto.com.txt b/inc/3rdparty/site_config/standard/altfoto.com.txt
new file mode 100644
index 00000000..d974cf4a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/altfoto.com.txt
@@ -0,0 +1,2 @@
1body: //*[(@class = "historia")]
2test_url: http://altfoto.com/2011/09/nikon-presenta-su-nuevo-sistema-nikon-1-y-dos-nuevas-camaras \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/alumni.stanford.edu.txt b/inc/3rdparty/site_config/standard/alumni.stanford.edu.txt
new file mode 100644
index 00000000..7fd47193
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/alumni.stanford.edu.txt
@@ -0,0 +1,10 @@
1title: //h1
2
3author: substring-after(//div[@class="enableBullets"]/preceding-sibling::p[1], "By ")
4
5date: //div/a[contains (@href, "issue")]
6
7move_into(//div[@class="enableBullets"]/p): (//div[@id="content"]//img)[1]
8
9body: //div[@class="enableBullets"]
10test_url: http://alumni.stanford.edu/get/page/magazine/article/?article_id=54819 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/amazon.com.txt b/inc/3rdparty/site_config/standard/amazon.com.txt
new file mode 100644
index 00000000..1a23c4b7
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/amazon.com.txt
@@ -0,0 +1,19 @@
1title: //span[@id = 'btAsinTitle']
2body: (//*[@id='prodImageCell']//a)[1] | //div[@id = 'ps-content'] | //span[@id='actualPriceValue'] | //h2[.='Product Details']/following-sibling::div | //div[@class='h2' and .='Product Description']/following-sibling::div
3#strip_id_or_class: quantityDropdownDiv
4#strip_id_or_class: addToCartSpan
5#strip_id_or_class: oneClickDiv
6strip_id_or_class: nocontent
7strip_id_or_class: masDynamicConten
8strip_id_or_class: dynamic-content
9prune: no
10
11find_string: <span id="actualPriceValue">
12replace_string: <span id="actualPriceValue"><br />Price:
13
14strip_id_or_class: collapsePS
15strip_id_or_class: expandPS
16strip_id_or_class: psPlaceHolde
17strip: //li[contains(., 'update product info') or contains(., 'give feedback on images')]
18
19test_url: http://www.amazon.com/Common-Sense-Forestry-Living-Mother/dp/1931498210/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/americandrink.net.txt b/inc/3rdparty/site_config/standard/americandrink.net.txt
new file mode 100644
index 00000000..dee0e868
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/americandrink.net.txt
@@ -0,0 +1,6 @@
1title: //div[@class='head']/h2/a
2author: //div[@class='head']/a
3date: //div[@class='head']/p[@class='date']/a
4body: //div[@class='copy']
5strip: //p[@class='meta']
6test_url: http://americandrink.net/post/10567188712/free-the-hooch \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/americascup.com.txt b/inc/3rdparty/site_config/standard/americascup.com.txt
new file mode 100644
index 00000000..b1673b6a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/americascup.com.txt
@@ -0,0 +1,10 @@
1title: //div[@class="editorial-content"]/h3
2body: //div[@class="hero-image" or @class="editorial-content"]
3
4strip: //ul[@class="hero-caption"]
5strip_id_or_class: footer
6
7prune: no
8tidy: no
9
10test_url: http://www.americascup.com/en/Latest/News/2012/3/Coutts-and-Peyron-tell-transformative-tale-at-Global-Sports-Forum/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/americastestkitchenfeed.com.txt b/inc/3rdparty/site_config/standard/americastestkitchenfeed.com.txt
new file mode 100644
index 00000000..8bf31ec2
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/americastestkitchenfeed.com.txt
@@ -0,0 +1,5 @@
1title: //h1[@class="post-title"]
2author: //span[@class="author"]/a
3date: //span[@class="date"]
4body: //div[@class="post-content main"]
5test_url: http://www.americastestkitchenfeed.com/gadgets-and-gear/2012/07/chill-out-with-tovolos-king-cube-silicone-ice-cube-tray/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/anandtech.com.txt b/inc/3rdparty/site_config/standard/anandtech.com.txt
new file mode 100644
index 00000000..8067e03c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/anandtech.com.txt
@@ -0,0 +1,11 @@
1author: //a[@class='b'][1]
2date: substring-after(substring-before(//div, 'Posted in'), ' on ')
3strip_image_src: /content/images/globals/
4strip: //h2[. = 'Page 1']/preceding::p
5strip: //h2
6
7prune: no
8
9single_page_link: concat('http://www.anandtech.com/print/', substring-after(//meta[@property='og:url']/@content, '/show/'))
10
11test_url: http://www.anandtech.com/show/5812/eurocom-monster-10-clevos-little-monster/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/andyrutledge.com.txt b/inc/3rdparty/site_config/standard/andyrutledge.com.txt
new file mode 100644
index 00000000..f9ffd3c3
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/andyrutledge.com.txt
@@ -0,0 +1,9 @@
1title: //h2
2author: string('Andy Rutledge')
3date: //div[@class='articledate']
4body: //div[@class='copybody']
5
6strip: //*[@class='space']
7strip: //*[@class='articleFoot']
8
9test_url: http://www.andyrutledge.com/hungry-for-a-better-menu.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/annatravelling.wordpress.com.txt b/inc/3rdparty/site_config/standard/annatravelling.wordpress.com.txt
new file mode 100644
index 00000000..a5c7c08a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/annatravelling.wordpress.com.txt
@@ -0,0 +1,9 @@
1title: //h1[@class="title"]
2
3author: ("Anna Manasova")
4# is ignored, unfortunately
5
6date: //p[@class="date"]
7
8body: //div[@class="entry"]
9test_url: http://annatravelling.wordpress.com/2011/11/07/a-day-of-cooking-thai/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/applature.com.txt b/inc/3rdparty/site_config/standard/applature.com.txt
new file mode 100644
index 00000000..a78a6150
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/applature.com.txt
@@ -0,0 +1,18 @@
1title: //h1[contains(@class, 'title')#
2body: //div[@id='mainContent']//div[contains(@class, 'section_content')] | //ul[@class='section_footer']
3date: //div[@class='date']
4
5strip_id_or_class: sharethis
6strip_id_or_class: stats
7strip_id_or_class: apply_form
8strip_id_or_class: job_map
9strip_id_or_class: respond
10strip: //h1//span[@class='type']
11strip: //li[@class='print' or @class='map']
12
13replace_string(<ul class="section_footer" style="display): <ul class="section_footer" style="display-bla
14
15prune: no
16tidy: no
17
18test_url: http://applature.com/mining-jobs/jobs/nickel-west-leinster-analytical-laboratory-technician/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/apple.com.txt b/inc/3rdparty/site_config/standard/apple.com.txt
new file mode 100644
index 00000000..4c483955
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/apple.com.txt
@@ -0,0 +1,7 @@
1strip: //p[@class='sosumi']
2# Aren't they witty?
3
4# I can't work out what causes the  before the title.
5title: //h1[@class='title']
6strip: //h1[@class='title']
7test_url: http://www.apple.com/pr/library/2011/02/15appstore.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/appleinsider.com.txt b/inc/3rdparty/site_config/standard/appleinsider.com.txt
new file mode 100644
index 00000000..279fbce1
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/appleinsider.com.txt
@@ -0,0 +1,11 @@
1title: //p[@class='title']
2
3author: //p[text() = 'By ']/a/text()
4strip: //p[text() = 'By ']
5
6body: //td[@class='bod']
7strip_id_or_class: title
8strip_id_or_class: minor
9
10strip_id_or_class: multipagefooter
11test_url: http://www.appleinsider.com/articles/12/02/29/inside_os_x_108_mountain_lion_safari_52_gets_a_simplified_user_interface_with_new_sharing_features.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/appleweblog.com.txt b/inc/3rdparty/site_config/standard/appleweblog.com.txt
new file mode 100644
index 00000000..023c9ccb
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/appleweblog.com.txt
@@ -0,0 +1,2 @@
1body: //*[(@class = "historia")]
2test_url: http://appleweblog.com/2011/09/encontrada-vulnerabilidad-grave-en-skype-para-ios \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/archdaily.com.txt b/inc/3rdparty/site_config/standard/archdaily.com.txt
new file mode 100644
index 00000000..9476cf56
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/archdaily.com.txt
@@ -0,0 +1,5 @@
1date: //div[@class='post_date']
2
3body: //div[@class='post_content']
4
5test_url: http://www.archdaily.com/185325/p10-mixed-use-building-studio-up \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/archiveofourown.org.txt b/inc/3rdparty/site_config/standard/archiveofourown.org.txt
new file mode 100644
index 00000000..50ff632d
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/archiveofourown.org.txt
@@ -0,0 +1,18 @@
1# Description: Fix XPaths to include ALL chapters on 'view_full_work' pages.
2# Include: work meta, summary, chapter information, and notes which Instapaper strips out on default.
3# Exclude: header, footer, navigation, comments.
4# Notes: User is a newbie with XPaths.
5
6title: //h2[@class='title']
7author: //h3[@class='byline']
8author: //a[@class='login author']
9
10strip_id_or_class:header
11strip_id_or_class:navigation
12strip_id_or_class:feedback
13strip_id_or_class:kudos
14strip_id_or_class:add_comment_placeholder
15strip_id_or_class:add_comment
16strip_id_or_class:globalize
17strip_id_or_class:footer
18test_url: http://archiveofourown.org/works/229402?view_full_work=true \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/arstechnica.com.txt b/inc/3rdparty/site_config/standard/arstechnica.com.txt
new file mode 100644
index 00000000..49bb3dbc
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/arstechnica.com.txt
@@ -0,0 +1,16 @@
1author: //p[@class='byline']/a
2body: //div[contains(@class,'article-content')]
3strip: //h2[@class='title']
4strip_id_or_class: byline
5prune: no
6
7date: //div[@class='byline']/span[@class='posted']//abbr/@original-title
8date: //div[@class='byline']/span[@class='posted']//abbr
9
10title: //div[@id='story']//h2[@class='title']
11
12strip: //div[@class='pager']
13next_page_link: //nav//a[span/@class='next']/@href
14
15test_url: http://arstechnica.com/tech-policy/news/2012/02/gigabit-internet-for-80-the-unlikely-success-of-californias-sonicnet.ars
16test_url: http://arstechnica.com/apple/2005/04/macosx-10-4/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/articles.boston.com.txt b/inc/3rdparty/site_config/standard/articles.boston.com.txt
new file mode 100644
index 00000000..e54423be
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/articles.boston.com.txt
@@ -0,0 +1,6 @@
1title: //div[@class="mod-bostonarticleheader mod-articleheader"]/h1
2author: substring-after(//div[@class="mod-bostonarticlebyline mod-articlebyline"]/span[3],"By ")
3date: //div[@class="mod-bostonarticlebyline mod-articlebyline"]/span[@class="pubdate"]
4
5strip_id_or_class: mod-pagination
6test_url: http://articles.boston.com/2011-10-23/news/30313691_1_bigfoot-free-speech-monadnock-state-park \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/articles.courant.com.txt b/inc/3rdparty/site_config/standard/articles.courant.com.txt
new file mode 100644
index 00000000..a08f2041
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/articles.courant.com.txt
@@ -0,0 +1,11 @@
1title: //div[@class="mod-courantarticleheader mod-articleheader"]/h1
2date: //div[@class="mod-courantarticlebyline mod-articlebyline"]/span[@class="pubdate"]
3author: //div[@class="mod-courantarticlebyline mod-articlebyline"]/span[3]
4
5strip_id_or_class: mod-article-byline
6strip_id_or_class: mod-article-header
7strip_id_or_class: mod-article-subtitle
8#This leaves some crud after the article, but it's better than nothing.
9#It would be ideal if we could set the body to every element matching //div[contains(@class, "mod-articletext")]/p, but it seems like body only takes the first matching element.
10
11test_url: http://articles.courant.com/2011-10-22/news/hc-green-drugsearch--1022-20111022_1_drugs-in-student-lockers-police-dogs-lockdown \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/asahi.com.txt b/inc/3rdparty/site_config/standard/asahi.com.txt
new file mode 100644
index 00000000..2562edb9
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/asahi.com.txt
@@ -0,0 +1,3 @@
1body: //div[@id='HeadLine']
2strip: //div[@id='utility_right']
3test_url: http://www.asahi.com/culture/update/0520/TKY201105200321.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ascarter.net.txt b/inc/3rdparty/site_config/standard/ascarter.net.txt
new file mode 100644
index 00000000..5236d09e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/ascarter.net.txt
@@ -0,0 +1,5 @@
1title: //h1[@class='article_title']
2author: //span[@class='author']
3date: //h2[@class='dateline']
4body: //div[@class='article_body']
5test_url: http://ascarter.net/2012/02/20/enough-is-enough.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/astronews.com.txt b/inc/3rdparty/site_config/standard/astronews.com.txt
new file mode 100644
index 00000000..33e8153d
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/astronews.com.txt
@@ -0,0 +1,7 @@
1title: //span[@class='titel']
2author: //span[@class='metadaten_C']/a//span[@class='metadaten_C']
3date: substring-after(//span[@class='metadaten_C'],'astronews.com')
4strip: //span[@class='bu']
5strip_image_src: '/_images/'
6
7test_url: http://www.astronews.com/news/artikel/2011/10/1110-021.shtml \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/asymco.com.txt b/inc/3rdparty/site_config/standard/asymco.com.txt
new file mode 100644
index 00000000..adad5f18
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/asymco.com.txt
@@ -0,0 +1,8 @@
1# Johannes Stühler
2
3title://h2
4author://span[@class='meta-content']
5date://abbr[@class='date published']/@title
6body://div[@class='entry-content']
7
8test_url: http://www.asymco.com/2011/01/14/is-android-more-efficient-than-ios-at-generating-search-revenue/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/autoblog.com.txt b/inc/3rdparty/site_config/standard/autoblog.com.txt
new file mode 100644
index 00000000..58681bf9
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/autoblog.com.txt
@@ -0,0 +1,6 @@
1prune: no
2body: //div[@class='post-body']
3author: //p[@class='byline']//a
4date: substring-after(//div[@class='about']/p[2], 'Posted')
5strip: //div[@class='body']/div[@class='meta']
6test_url: http://www.autoblog.com/2012/01/17/next-gen-bmw-x5-caught-again/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/avclub.com.txt b/inc/3rdparty/site_config/standard/avclub.com.txt
new file mode 100644
index 00000000..776ee108
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/avclub.com.txt
@@ -0,0 +1,4 @@
1author: //*[@id="article_wrapper"]/div[1]/a[1]
2body: //*[@id="article_wrapper"]/div[2]
3date: //*[@id="article_wrapper"]/div[1]/text()[2]
4test_url: http://www.avclub.com/articles/forgetmenot,70904 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/baltimoresun.com.txt b/inc/3rdparty/site_config/standard/baltimoresun.com.txt
new file mode 100644
index 00000000..32adff8d
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/baltimoresun.com.txt
@@ -0,0 +1,12 @@
1single_page_link: //div[@class='toppaginate']//a[@rel='nofollow']
2convert_double_br_tags: yes
3
4title: //div[@class="story"]/h1
5body: //div[@id="story-body-text"]
6author: //span[@class="byline"]
7date: //p[@class="date"]
8
9strip: //*[@class='all']
10strip: //*[@class='articlerail']
11
12test_url: http://www.baltimoresun.com/news/maryland/bs-md-omalley-budget-2-20120116,0,5340585.story \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/basicthinking.de.txt b/inc/3rdparty/site_config/standard/basicthinking.de.txt
new file mode 100644
index 00000000..ab583145
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/basicthinking.de.txt
@@ -0,0 +1,7 @@
1title: //h2
2date: //span[@class='date']
3body: //div[@class='entry']
4
5strip: //div[@class='zusatz']
6
7test_url: http://www.basicthinking.de/blog/2011/12/13/sagt-social-networks-adieu-begrust-private-networks/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/bb.is.txt b/inc/3rdparty/site_config/standard/bb.is.txt
new file mode 100644
index 00000000..eaafaf18
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/bb.is.txt
@@ -0,0 +1,13 @@
1author: substring(//h3[@class='headlines']/span[@class='dates'],0,string-length(//h3[@class='headlines']/span[@class='dates'])-20)
2
3
4date: substring((//h3[@class='headlines']/span[@class='dates']),string-length(//h3[@class='headlines']/span[@class='dates'])-18,12)
5
6
7body: //div[@class='first-article-big']
8strip: //table[@class='newsimagecontainer']
9strip: //h3[@class='headlines']
10strip: //iframe[@class='headlines']
11strip: //a[@class='newslink']
12convert_double_br_tags: yes
13test_url: http://bb.is/Pages/82?NewsID=174119 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/bbc.co.uk.txt b/inc/3rdparty/site_config/standard/bbc.co.uk.txt
new file mode 100644
index 00000000..9c5c3419
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/bbc.co.uk.txt
@@ -0,0 +1,32 @@
1body: //div[@class="story-body"]
2title: //h1[@class="story-header"]
3date: //span[@class="story-date"]/span[@class='date']
4
5# recipes, e.g. http://www.bbc.co.uk/food/recipes/mymincepies_71055
6body: //div[contains(@class, 'hrecipe')]//div[@id='subcolumn-1']
7
8#strip: //div[@class="story-feature narrow"]
9#strip: //div[@class="story-feature wide"]
10#strip: //div[@class="story-feature dslideshow-enclosure"]
11strip: //div[contains(@class, "story-feature")]
12strip: //span[@class="story-date"]
13#strip: //div[@class="caption body-narrow-width"]
14strip: //div[@class="warning"]//p
15strip: //div[@id='page-bookmark-links-head']
16strip: //object
17strip: //div[contains(@class, "bbccom_advert_placeholder")]
18strip: //div[contains(@class, "embedded-hyper")]
19strip: //div[contains(@class, 'market-data')]
20strip: //a[contains(@class, 'hidden')]
21strip: //div[contains(@class, 'hypertabs')]
22strip: //div[contains(@class, 'related')]
23strip: //form[@id='comment-form']
24strip: //div[contains(@class, 'comment-introduction')]
25
26replace_string(<noscript>): <div>
27replace_string(</noscript>): </div>
28
29prune: no
30
31dissolve: //h2
32test_url: http://www.bbc.co.uk/news/business-15060862 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/benoitmaison.org.txt b/inc/3rdparty/site_config/standard/benoitmaison.org.txt
new file mode 100644
index 00000000..f341d593
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/benoitmaison.org.txt
@@ -0,0 +1,16 @@
1body: //div[@class="entry-content"]
2
3# Remove text &lsquo;Tweet&rsquo;
4strip: //div[@class="entry-content"]/div[last()]
5
6title: h1[@class="entry-title"]
7
8# If the Instapaper text parser worked with HTML5 tags, we would use:
9date: //time[@class="entry-date"]
10
11# But since it does not, use this more complicated rule:
12date: //div[@class="entry-meta"]/a[@rel="bookmark"]
13
14# Unfortunately, the following rule is overridden by the automatically found author.
15author: ("Benoit Maison")
16test_url: http://www.benoitmaison.org/2011/12/06/why-siri-had-to-start-in-beta/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/berlingske.dk.txt b/inc/3rdparty/site_config/standard/berlingske.dk.txt
new file mode 100644
index 00000000..607c998d
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/berlingske.dk.txt
@@ -0,0 +1,3 @@
1title: //h1[@class='headline']
2body: //div[contains(@class, 'article-wrapper')]
3test_url: http://www.berlingske.dk/danmark/festen-er-flyttet-nordpaa \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/betabeat.com.txt b/inc/3rdparty/site_config/standard/betabeat.com.txt
new file mode 100644
index 00000000..7815cf26
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/betabeat.com.txt
@@ -0,0 +1,2 @@
1body: //div[@class="entry-content"]
2test_url: http://www.betabeat.com/2011/07/04/sheryl-sandberg-breaks-through-silicon-valleys-boys-club-sort-of/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/betanews.com.txt b/inc/3rdparty/site_config/standard/betanews.com.txt
new file mode 100644
index 00000000..0eaf085e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/betanews.com.txt
@@ -0,0 +1,7 @@
1# some articles at this site like this one doesn't
2# seem to pick up the article body via normal
3# processing, other articles come through fine
4# http://www.betanews.com/joewilcox/article
5# /Google-is-a-marketing-sensation/1309708375
6body: //*[@id="article"]
7test_url: http://www.betanews.com/joewilcox/article/Google-is-a-marketing-sensation/1309708375 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/biography.com.txt b/inc/3rdparty/site_config/standard/biography.com.txt
new file mode 100644
index 00000000..dc071299
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/biography.com.txt
@@ -0,0 +1,8 @@
1title: //div[contains(@class, 'main-content')]//h1
2body: //div[@class='summary-column'] | //div[contains(@class, 'main-content')]
3
4prune: no
5
6single_page_link: //div[@id='biography-action-links']//a[contains(@href, '/print/')]
7
8test_url: http://www.biography.com/print/profile/martin-luther-9389283 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/bitelia.com.txt b/inc/3rdparty/site_config/standard/bitelia.com.txt
new file mode 100644
index 00000000..7bffae93
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/bitelia.com.txt
@@ -0,0 +1,2 @@
1body: //*[(@class = "historia")]
2test_url: http://bitelia.com/2011/09/klout-midiendo-influencia \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/bjango.com.txt b/inc/3rdparty/site_config/standard/bjango.com.txt
new file mode 100644
index 00000000..6cb04631
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/bjango.com.txt
@@ -0,0 +1,7 @@
1title: //h1[@class='articlehead']
2body: //div[@class='column']
3strip: //h1
4strip: //div[@class='help']
5
6#no author or date/time provided in current layout
7test_url: http://bjango.com/articles/actions/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.arsln.org.txt b/inc/3rdparty/site_config/standard/blog.arsln.org.txt
new file mode 100644
index 00000000..1f43f490
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blog.arsln.org.txt
@@ -0,0 +1,8 @@
1tidy: no
2prune: no
3date: //article/header/h6/time
4title: //article/header/h3
5author: //meta[@name='author']/@content
6body: //article//post
7
8test_url: http://blog.arsln.org/aska-ayip-oluyor/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.asmartbear.com.txt b/inc/3rdparty/site_config/standard/blog.asmartbear.com.txt
new file mode 100644
index 00000000..81c3bda6
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blog.asmartbear.com.txt
@@ -0,0 +1,7 @@
1title: //title
2author: //span[@class='author vcard']/a
3date: //p[@class='headline_meta']/abbr[@class='published']
4body: //div[@class='format_text entry-content']
5
6strip: //div[@id='dd_ajax_float']
7test_url: http://blog.asmartbear.com/how-to-get-quality-freelance-graphics-design-work-on-a-budget.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.cloudflare.com.txt b/inc/3rdparty/site_config/standard/blog.cloudflare.com.txt
new file mode 100644
index 00000000..a4c5aaea
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blog.cloudflare.com.txt
@@ -0,0 +1,9 @@
1# Instapaper gets this back to front and only gets the blog title instead of the article title.
2title: substring-before(//title, '-')
3
4author: //a[ contains(@href, '/people') ]
5
6body: //div[ @class='post' ]
7
8# Date is impossible to retrieve since they use those stupid "fuzzy" dates, inserted through javascript, at posterous.
9test_url: http://blog.cloudflare.com/understanding-analytics-when-is-a-page-view-n \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.fefe.de.txt b/inc/3rdparty/site_config/standard/blog.fefe.de.txt
new file mode 100644
index 00000000..92272b70
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blog.fefe.de.txt
@@ -0,0 +1,5 @@
1title: //h2
2date: //h3
3body: //ul
4
5test_url: http://blog.fefe.de/?ts=b063bf55 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.instagram.com.txt b/inc/3rdparty/site_config/standard/blog.instagram.com.txt
new file mode 100644
index 00000000..3065dd80
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blog.instagram.com.txt
@@ -0,0 +1,11 @@
1# clean Instagram blog a little bit
2
3tidy:no
4prune:no
5
6body://div[contains(@id,'content')]
7
8strip_id_or_class:meta
9strip_id_or_class:notes
10strip_id_or_class:pagination
11test_url: http://blog.instagram.com/post/8757832007/fromwhereistand \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.jaysalvat.com.txt b/inc/3rdparty/site_config/standard/blog.jaysalvat.com.txt
new file mode 100644
index 00000000..4e467fe9
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blog.jaysalvat.com.txt
@@ -0,0 +1,4 @@
1date: //span[contains(@class, 'date-links')]
2author: //span[contains(@class, 'author-links')]
3body: //div[contains(@class, 'entry-content')]
4test_url: http://blog.jaysalvat.com/article/celui-qui-avait-refait-son-site-web \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.kaelig.fr.txt b/inc/3rdparty/site_config/standard/blog.kaelig.fr.txt
new file mode 100644
index 00000000..ac18ad15
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blog.kaelig.fr.txt
@@ -0,0 +1,5 @@
1body: //*[contains(@class, 'post_content')]
2author: string('Kaelig Deloumeau-Prigent')
3title: //h1[@class='title']
4date: //span[@class='date']
5test_url: http://blog.kaelig.fr/post/24877648508/preprocesseurs-css-renoncer-par-choix-ou-par \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.naver.com.txt b/inc/3rdparty/site_config/standard/blog.naver.com.txt
new file mode 100644
index 00000000..702789ad
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blog.naver.com.txt
@@ -0,0 +1,6 @@
1title: //span[@class='pcol1 itemSubjectBoldfont']
2body: //div[@id='postListBody']
3date: //p[@class='date fil5 pcol2']
4single_page_link: /html/frameset/frame[1]/attribute::src
5strip: //div[@class='post-btn']
6test_url: http://blog.naver.com/how2invest/110135068757 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.pchome.net.txt b/inc/3rdparty/site_config/standard/blog.pchome.net.txt
new file mode 100644
index 00000000..3089001e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blog.pchome.net.txt
@@ -0,0 +1,12 @@
1# PCHOME blog, a popular Chinese blog host
2# Oct 15, 2011
3#
4
5title://*[contains(@class,'imp')]/h2
6
7date://*[contains(@class,'imp')]/span
8body://div[contains(@id,'blog_content')]
9
10
11
12test_url: http://blog.pchome.net/article/462502.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.pinboard.in.txt b/inc/3rdparty/site_config/standard/blog.pinboard.in.txt
new file mode 100644
index 00000000..b7afe455
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blog.pinboard.in.txt
@@ -0,0 +1,6 @@
1title: //a[@class="blog_title"]
2date: //p[@class="when"]/a
3body: //div[@class="blog_entry"]
4strip_id_or_class:blog_title
5strip_id_or_class:when
6test_url: http://blog.pinboard.in/2011/11/the_social_graph_is_neither/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.sina.com.cn.txt b/inc/3rdparty/site_config/standard/blog.sina.com.cn.txt
new file mode 100644
index 00000000..acb9ce81
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blog.sina.com.cn.txt
@@ -0,0 +1,26 @@
1# Sina blog, the most popular blog host in China.
2# Its source code is horrible.
3#
4# Issue:
5# Only the first image in the article is displayed.
6# The rest images are replace by a 1x1 transparent gif by sina blog host.
7#
8
9title://*[contains(@class,'titName SG_txta')]
10author://*[contains(@id,'ownernick')]
11date://*[contains(@class,'time SG_txtc')]
12body://div[contains(@class,'articalContent')]
13
14# Remove redundant content which has span class start with "MASS"
15# Example <span class="MASSf21674ffeef7"></span>
16strip://span[contains(@class,'MASS')]
17
18# Remove comment
19strip://div[contains(@class,'allComm')]
20
21# Remove hiden text and link
22strip://ins
23
24tidy:no
25convert_double_br_tags:yes
26test_url: http://blog.sina.com.cn/s/blog_5054769e0102dtja.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.spu.edu.txt b/inc/3rdparty/site_config/standard/blog.spu.edu.txt
new file mode 100644
index 00000000..68bd4e39
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blog.spu.edu.txt
@@ -0,0 +1,2 @@
1body://div[@class='post']
2test_url: http://blog.spu.edu/lectio/from-the-frying-pan-into-the-fire/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.wells.ee.txt b/inc/3rdparty/site_config/standard/blog.wells.ee.txt
new file mode 100644
index 00000000..8c8b3838
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blog.wells.ee.txt
@@ -0,0 +1,6 @@
1title: //h2/a[@class="no-link title"]
2author: //h2[@id="blog_owner"]
3date: //time
4strip: //h2/a[@class="no-link title"]
5test_url: http://blog.wells.ee/retina
6test_url: http://blog.wells.ee/skeuomorphism \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blogs.aljazeera.net.txt b/inc/3rdparty/site_config/standard/blogs.aljazeera.net.txt
new file mode 100644
index 00000000..f630127b
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blogs.aljazeera.net.txt
@@ -0,0 +1,8 @@
1# 2011-08-23 [carlo@...] Initial version.
2
3author: //div[@id="blogauthordatebox-node"]//a[@title="View user profile."]/text()
4
5# why yes, I do feel a bit dirty
6date: substring-before( substring-after( substring-after( //div[@id="blogauthordatebox-node"]//td[3], "on " ), ", "), " " )
7
8test_url: http://blogs.aljazeera.net/asia/2011/08/22/peoples-hero \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blogs.forbes.com.txt b/inc/3rdparty/site_config/standard/blogs.forbes.com.txt
new file mode 100644
index 00000000..86580d21
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blogs.forbes.com.txt
@@ -0,0 +1,2 @@
1body: //div[@class='entry']
2test_url: http://blogs.forbes.com/adamhartung/2011/04/08/apple-is-better-managed-than-microsoft/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blogs.hbr.org.txt b/inc/3rdparty/site_config/standard/blogs.hbr.org.txt
new file mode 100644
index 00000000..3664d16c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blogs.hbr.org.txt
@@ -0,0 +1,4 @@
1title: //div[@id='pageFeature']/h1
2body: //div[@id='articleBody']
3strip: //div[@class='module wide']
4test_url: http://blogs.hbr.org/bregman/2011/04/the-1-killer-of-meetings-and-w.html?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+harvardbusiness+%28HBR.org%29 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blogs.msdn.com.txt b/inc/3rdparty/site_config/standard/blogs.msdn.com.txt
new file mode 100644
index 00000000..3d3ec020
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blogs.msdn.com.txt
@@ -0,0 +1,6 @@
1title: //h3[@class="post-name"]
2author: //span[@class="user-name"]
3date: //div[@class="post-date"]
4body: //div[@class="post-content user-defined-markup"]
5footnotes: no
6test_url: http://blogs.msdn.com/b/b8/archive/2011/10/04/designing-the-start-screen.aspx \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blogs.reuters.com.txt b/inc/3rdparty/site_config/standard/blogs.reuters.com.txt
new file mode 100644
index 00000000..6907bcb2
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blogs.reuters.com.txt
@@ -0,0 +1,3 @@
1title: //div[@id='single']/h1
2body: //div[@id='postcontent']
3test_url: http://blogs.reuters.com/felix-salmon/2010/07/16/the-value-of-a-strong-brand-apple-edition/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blogs.scientificamerican.com.txt b/inc/3rdparty/site_config/standard/blogs.scientificamerican.com.txt
new file mode 100644
index 00000000..a7d15081
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blogs.scientificamerican.com.txt
@@ -0,0 +1,16 @@
1# meta data
2title://h1[@class = 'postTitle']
3author:substring-before(substring-after(//span[@class = 'byline'],'By '),'|')
4date://span[@class = 'datestamp']
5
6#body content
7body://div[@id = 'singleBlogPost']
8
9#reclaim author info
10move_into(//div[@id = 'singleBlogPost'])://div[@id = 'aboutAuthorDiv']
11strip://p[@class = 'moreLink mobileHide']
12
13#cleanup comments, there might be some open <div> sections
14strip://div[@id = 'comments2']
15strip://h3[a[@href = '#add-comment']]
16test_url: http://blogs.scientificamerican.com/a-blog-around-the-clock/2012/07/10/science-blogs-definition-and-a-history/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blogs.smithsonianmag.com.txt b/inc/3rdparty/site_config/standard/blogs.smithsonianmag.com.txt
new file mode 100644
index 00000000..ba8bc6e7
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blogs.smithsonianmag.com.txt
@@ -0,0 +1,15 @@
1# metadata
2author://div[@class = 'post']/div[@class='meta']/a[1]
3date://div[@id = 'rap']/h2[1]
4body://div[@class = 'post']
5
6# wrapping caption and image
7wrap_in(fieldset)://div[contains(@class, 'wp-caption')]
8
9
10# clean up
11strip://div[@class = 'post']/h3[@class = 'storytitle']
12strip://div[@class = 'post']/div[@class = 'social']
13strip://img[@style = 'display:none;']
14strip://img[@height='0' and @width='0']
15test_url: http://blogs.smithsonianmag.com/adventure/2011/10/tips-for-women-traveling-in-turkey/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blogs.technet.com.txt b/inc/3rdparty/site_config/standard/blogs.technet.com.txt
new file mode 100644
index 00000000..a2909fd1
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blogs.technet.com.txt
@@ -0,0 +1,6 @@
1title: //h3[@class="post-name"]
2author: //span[@class="user-name"]
3date: //div[@class="post-date"]
4body: //div[@class="post-content user-defined-markup"]
5footnotes: no
6test_url: http://blogs.technet.com/b/dlemson/archive/2004/03/03/83304.aspx \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/bluetouff.com.txt b/inc/3rdparty/site_config/standard/bluetouff.com.txt
new file mode 100644
index 00000000..fbe7a5c6
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/bluetouff.com.txt
@@ -0,0 +1,4 @@
1body://div[@class='entry']
2date://div[@class='meta']
3strip://a[@class='FlattrButton']
4test_url: http://bluetouff.com/2012/03/02/polemique-google-vie-privee/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/boagworld.com.txt b/inc/3rdparty/site_config/standard/boagworld.com.txt
new file mode 100644
index 00000000..91e48fdb
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/boagworld.com.txt
@@ -0,0 +1,8 @@
1title: //h1[@class="entry-title"][2]
2author: string("Paul Boag")
3date: substring(//span[@class="meta"], 11)
4body: //article
5strip: //h2
6strip: //h1
7strip: //div[@id="callsToAction"]
8test_url: http://boagworld.com/working-in-web-design/dealing-with-the-dickheads/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/boingboing.net.txt b/inc/3rdparty/site_config/standard/boingboing.net.txt
new file mode 100644
index 00000000..9169e8fb
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/boingboing.net.txt
@@ -0,0 +1,11 @@
1# This is far from perfect, but so is BoingBoing's markup
2title: //h2[@class="headline"]
3single_page_link: //h2[@class="headline"]/a
4#date: //p[@class="byline"]
5body: //div[@class="post"]
6
7strip_id_or_class: shareMe
8strip_id_or_class: authorbox
9strip_id_or_class: byline
10
11test_url: http://boingboing.net/2011/10/23/understanding-the-hyperrich-through-the-lens-of-tomorrows-history.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/boldizsar.palotas.eu.txt b/inc/3rdparty/site_config/standard/boldizsar.palotas.eu.txt
new file mode 100644
index 00000000..4cc49043
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/boldizsar.palotas.eu.txt
@@ -0,0 +1,3 @@
1title: //h2[@class='entry-title']
2body: //div[@class='entry-content']
3test_url: http://boldizsar.palotas.eu/blog/?p=1394 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/book.douban.com.txt b/inc/3rdparty/site_config/standard/book.douban.com.txt
new file mode 100644
index 00000000..8b958562
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/book.douban.com.txt
@@ -0,0 +1,6 @@
1body: //span[@property='v:description']
2date: //span[@property='v:dtreviewed']
3author: //span[@property='v:reviewer']
4prune: no
5
6test_url: http://book.douban.com/review/2422662/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/bookforum.com.txt b/inc/3rdparty/site_config/standard/bookforum.com.txt
new file mode 100644
index 00000000..331f415e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/bookforum.com.txt
@@ -0,0 +1,19 @@
1#metadata
2title://div[@class = 'Topper']/h1
3author://div[@class = 'Topper']/h3
4date://div[@class = 'Topper']/h6
5body://div[@class = 'Core']
6
7
8
9# clean up
10strip://div[@class = 'Topper']/h1
11strip://div[@class = 'Topper']/h3
12strip://div[@class = 'Topper']/h4
13strip://div[@class = 'Topper']/h5
14strip://div[@class = 'Topper']/h6
15strip://br[@clear = 'all']
16strip://div[@class = 'adCore']
17strip://div[@class = 'BookR']
18strip://div[@class = 'InfoBox']
19test_url: http://bookforum.com/inprint/018_04/8595 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/borderhouseblog.com.txt b/inc/3rdparty/site_config/standard/borderhouseblog.com.txt
new file mode 100644
index 00000000..190738d5
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/borderhouseblog.com.txt
@@ -0,0 +1,7 @@
1title://h1
2author://div[@class="meta"]/span/a
3date://div[@class="date"]
4body://div[@class="content article"]
5strip://div[@class="content article"]/h1
6
7test_url: http://borderhouseblog.com/?p=7832 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/bostonglobe.com.txt b/inc/3rdparty/site_config/standard/bostonglobe.com.txt
new file mode 100644
index 00000000..d3e6f43f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/bostonglobe.com.txt
@@ -0,0 +1,16 @@
1# NOTE: If testing this configuration yields bad results, including junk text like "Try BostonGlobe.com today" and "THIS STORY APPEARED IN", please replace the Test URL with a current-day headline link from bostonglobe.com.
2
3title: //div[@class="header"]/h1
4author: substring-after(//div[@class="byline"]/h2[@class="author"],"By ")
5date: //div[@class="byline"]/p[last()]
6body: //div[@class="article-body"]
7
8strip_id_or_class: aside
9strip_id_or_class: promo
10strip_id_or_class: skip-nav
11strip_id_or_class: article-more
12strip_id_or_class: article-bar
13
14# This removes image captions. If the parser starts saving images from bostonglobe.com (currently, it does not), then this directive should be removed.
15strip_id_or_class: figure
16test_url: http://bostonglobe.com/news/nation/2012/03/17/illinois-primary-could-pivotal/PsDzFZqvhEYyXbOcF9FOkO/story.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/bostonreview.net.txt b/inc/3rdparty/site_config/standard/bostonreview.net.txt
new file mode 100644
index 00000000..68567012
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/bostonreview.net.txt
@@ -0,0 +1,15 @@
1#basics
2title://h3[@class = 'article_title']
3date://span[@class = 'article_date']
4body://div[@id = 'center_column_article']
5#correct, but author not being picked up in preview
6author://span[@class = 'article_author']
7
8#strips basics from article
9strip_id_or_class:article_title
10strip_id_or_class:article_date
11strip_id_or_class:article_author
12
13#strips pull quotes
14strip_id_or_class:pull_quote
15test_url: http://www.bostonreview.net/BR36.4/megan_pugh_agnes_de_mille_dance.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/boundlessline.org.txt b/inc/3rdparty/site_config/standard/boundlessline.org.txt
new file mode 100644
index 00000000..bfc3f3d1
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/boundlessline.org.txt
@@ -0,0 +1,5 @@
1title: substring-before(//title, '|')
2body: //div[@class="entry"]
3# Remove the author's picture
4strip: //div[@class="entry"]/a[1]
5test_url: http://www.boundlessline.org/2011/06/the-nyts-on-gender-over-the-weekend.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/brainfacts.org.txt b/inc/3rdparty/site_config/standard/brainfacts.org.txt
new file mode 100644
index 00000000..94b0f56d
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/brainfacts.org.txt
@@ -0,0 +1,10 @@
1title: //div[@class="standard"]/h1
2author: string("BrainFacts.org")
3date: //div[@class="meta"]/strong
4
5strip: //p[@class="skip"]
6strip: //div[@class="meta"]
7strip: //div[@class="standard"]/h1
8strip: //div[@class="modal"]
9strip: //div[@class="columnRight"]
10test_url: http://brainfacts.org/diseases-disorders/childhood-disorders/articles/2011/autism-the-pervasive-developmental-disorder/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/brandeins.de.txt b/inc/3rdparty/site_config/standard/brandeins.de.txt
new file mode 100644
index 00000000..3753ce67
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/brandeins.de.txt
@@ -0,0 +1,7 @@
1# set body
2body: //div[@id='theContent']
3
4# set title
5title: //div[@id='theContent']/h3
6strip: //div[@id='theContent']/h3
7test_url: http://www.brandeins.de/archiv/magazin/gegessen-wird-immer/artikel/hunger.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/brandingstrategyinsider.com.txt b/inc/3rdparty/site_config/standard/brandingstrategyinsider.com.txt
new file mode 100644
index 00000000..19504844
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/brandingstrategyinsider.com.txt
@@ -0,0 +1,3 @@
1date://h2[@class="date-header"]
2body://div[@class="entry-content"]
3test_url: http://www.brandingstrategyinsider.com/2011/12/top-twelve-branding-keys-for-2012.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/brettterpstra.com.txt b/inc/3rdparty/site_config/standard/brettterpstra.com.txt
new file mode 100644
index 00000000..f6f73778
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/brettterpstra.com.txt
@@ -0,0 +1,5 @@
1body: //div[@class='post full']
2title: //h1
3author: substring-after(//title, '- ')
4date: //span[@class='date']
5test_url: http://brettterpstra.com/byword-for-ios/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/brisbanetimes.com.au.txt b/inc/3rdparty/site_config/standard/brisbanetimes.com.au.txt
new file mode 100644
index 00000000..27e6b70c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/brisbanetimes.com.au.txt
@@ -0,0 +1,2 @@
1body: //div[@class='articleBody']
2test_url: http://www.brisbanetimes.com.au/opinion/blogs/blunt-instrument/losing-our-minds--for-24-hours-20120118-1q682.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/brookings.edu.txt b/inc/3rdparty/site_config/standard/brookings.edu.txt
new file mode 100644
index 00000000..9f4fc4e3
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/brookings.edu.txt
@@ -0,0 +1,13 @@
1title: //div[@id='contentheader']/h1
2author: //p[@class='attribution']/span[@class='author']/*
3# Is there a way to pull multiple authors? My XPath here is just grabbing the first
4
5date: /html/head/meta[@name="date"]/@content
6body: //div[@class='main-content']
7
8strip: //p[@class='byline']
9strip: //div[@class='img-gallery']
10strip: //div[@class='callout']
11strip: //div[@class='add-your-view']
12convert_double_br_tags: yes
13test_url: http://www.brookings.edu/opinions/2011/1018_cyberattack_libya_goldsmith.aspx \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/brooksreview.net.txt b/inc/3rdparty/site_config/standard/brooksreview.net.txt
new file mode 100644
index 00000000..71cafcdb
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/brooksreview.net.txt
@@ -0,0 +1,6 @@
1title: //h1
2body: //div[@class='article']
3body: //div[@class='post']
4date: //*[@id='single']/span
5prune: no
6test_url: http://brooksreview.net/2011/11/readability-agency/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/buquad.com.txt b/inc/3rdparty/site_config/standard/buquad.com.txt
new file mode 100644
index 00000000..a75fa046
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/buquad.com.txt
@@ -0,0 +1,8 @@
1title: //h1
2author: //h2/a
3date: substring-after(//h2, '|')
4strip_id_or_class: 'attachment'
5strip: //h3
6
7body: //div[@class='entry']
8test_url: http://buquad.com/2012/04/09/paul-ryan/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/businessinsider.com.txt b/inc/3rdparty/site_config/standard/businessinsider.com.txt
new file mode 100644
index 00000000..c773db8b
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/businessinsider.com.txt
@@ -0,0 +1,12 @@
1title://div[@class="sl-layout-post"]/h1
2body: //div[contains(@class, 'post-content') or contains(@class, 'KonaBody')]
3strip: //div[contains(@class, "post-sidebar")]
4strip: //div[@id='related-links']
5author://div[@class="byline"]/a
6date://div[@class="byline"]/span[@class="date"]
7prune: no
8
9strip://*[contains(@class,'sponsored-text')]
10strip: //div[@id='post_footer']
11
12test_url: http://www.businessinsider.com/microsoft-just-put-one-of-its-hardcore-technical-geniuses-on-xbox-2012-1 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/businessnews.com.tn.txt b/inc/3rdparty/site_config/standard/businessnews.com.tn.txt
new file mode 100644
index 00000000..714cfc90
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/businessnews.com.tn.txt
@@ -0,0 +1,12 @@
1body: //div[@id='article_detail']
2title: //meta[@property='og:title']/@content
3date: //div[@id='date_com_art']//a[@class='date']
4author: //div[@id='article_detail']//font[@class='auteur']
5
6strip_id_or_class: porte_titre_theme
7strip_id_or_class: cont_param
8strip_id_or_class: date_com_art
9
10prune: no
11
12test_url: http://www.businessnews.com.tn/details_article.php?a=31073&t=522&lang=fr&temp=1 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/businessweek.com.txt b/inc/3rdparty/site_config/standard/businessweek.com.txt
new file mode 100644
index 00000000..7b3d063b
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/businessweek.com.txt
@@ -0,0 +1,30 @@
1# story has several pages, should be detected
2body: //div[@id='storyBody']
3body: //div[@id='article_body']
4body: //div[@id='story_body']
5
6title://h1[@id='article_headline']
7
8# article author
9author: //p[@class='author']/a
10# story author(s)
11author: substring-after(//p[@class='byline'], 'By ')
12
13# article date
14date: //span[@class='published_date']
15# story date
16date: //span[@class='date']
17
18date: substring-after(//div[contains(@class,'attributor')],'on')
19strip_id_or_class: inset
20strip: //p/span[@class='photoCredit']
21strip: //h1
22
23strip_id_or_class: page_count
24strip_id_or_class: tools
25strip_id_or_class: pagination
26
27single_page_link: //li[@id='stPrint']/a
28
29test_url: http://www.businessweek.com/magazine/buyback-insurance-a-good-deal-for-retailers-07282011.html
30test_url: http://www.businessweek.com/articles/2012-06-06/american-pain-the-largest-u-dot-s-dot-pill-mills-rise-and-fall \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/buzzfeed.com.txt b/inc/3rdparty/site_config/standard/buzzfeed.com.txt
new file mode 100644
index 00000000..6df8bc47
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/buzzfeed.com.txt
@@ -0,0 +1,15 @@
1# Creator: Greg Leuch <greg@...>
2
3# It can be messy.
4tidy:no
5
6# The basic template.
7title: //h1[@data-print='title']
8author: //a[@data-print='author']
9date: //time[@data-print='date']
10body: //div[@data-print='body']
11body: //section[@data-print='body']
12
13# For various things...
14strip: *[@data-print="ignore"]
15test_url: http://www.buzzfeed.com/hgrant/35-reasons-why-dogs-hate-the-holidays \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/bygonebureau.com.txt b/inc/3rdparty/site_config/standard/bygonebureau.com.txt
new file mode 100644
index 00000000..0abb6436
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/bygonebureau.com.txt
@@ -0,0 +1,6 @@
1title: //h1
2author: //a[contains(@href, '/author/')]
3date: //*[@class='post-date']
4strip: //*[@class='post-date']
5strip: //h1
6test_url: http://bygonebureau.com/2011/06/20/an-existential-psychoanalysis/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/cardboardconnection.com.txt b/inc/3rdparty/site_config/standard/cardboardconnection.com.txt
new file mode 100644
index 00000000..3adc7a35
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/cardboardconnection.com.txt
@@ -0,0 +1,8 @@
1title: //h1[@class='producttabbed-title']
2body: //div[@class='postTabs_divs postTabs_curr_div']
3strip: //div[@class='ratingblock2']
4strip: //p[@id='breadcrumbs']
5strip: //div[@style='display: none']
6
7
8test_url: http://www.cardboardconnection.com/2012-topps-archives-baseball-cards \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/carpeaqua.com.txt b/inc/3rdparty/site_config/standard/carpeaqua.com.txt
new file mode 100644
index 00000000..7ba1ed78
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/carpeaqua.com.txt
@@ -0,0 +1,6 @@
1title: //h2
2body: //div[@class='entry']
3
4prune: no
5# otherwise the footnotes are removed
6test_url: http://carpeaqua.com/2011/03/27/the-intersection-of-power-and-portability/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/catb.org.txt b/inc/3rdparty/site_config/standard/catb.org.txt
new file mode 100644
index 00000000..8908292c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/catb.org.txt
@@ -0,0 +1,7 @@
1body: //div[@class='article']
2strip: //div[@class='revhistory']
3strip: //div[@class='toc']
4tidy: no
5prune: no
6
7test_url: http://catb.org/~esr/faqs/smart-questions.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/cbc.ca.txt b/inc/3rdparty/site_config/standard/cbc.ca.txt
new file mode 100644
index 00000000..25305109
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/cbc.ca.txt
@@ -0,0 +1,5 @@
1title: //div[contains(@class, 'headline')]/h1
2author: //h5[contains(@class, 'byline')]
3date: substring-after(//h4[contains(@class, 'posted')], 'Posted: ')
4body: //div[@id="storyboard"]
5test_url: http://www.cbc.ca/news/world/story/2012/01/16/cruise-ship-monday.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/cbsnews.com.txt b/inc/3rdparty/site_config/standard/cbsnews.com.txt
new file mode 100644
index 00000000..4ba3da19
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/cbsnews.com.txt
@@ -0,0 +1,14 @@
1date: //meta[@name="published"]/@content
2date: //div[@class="timeLine"]
3title: //div[@id='contentBody']//h1
4author: //dl[@class="storyBlogByline"]/dd/a
5body: //div[@id='storyMediaBox'] | //div[contains(@class, 'storyText')]
6
7# Content Pruning
8strip: //div[@class="scrollingArrows"]
9strip: //div[@class="timeLine"]
10strip: //dl[@class="storyBlogByline"]
11
12prune: no
13
14test_url: http://www.cbsnews.com/8301-201_162-57366361/rescued-americans-dad-proud-of-the-u.s/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/chareidi.org.txt b/inc/3rdparty/site_config/standard/chareidi.org.txt
new file mode 100644
index 00000000..de34a7d8
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/chareidi.org.txt
@@ -0,0 +1,2 @@
1title: //h1
2test_url: http://www.chareidi.org/archives5772/tetzaveh/TZV72adraft.htm \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/chinamining.org.txt b/inc/3rdparty/site_config/standard/chinamining.org.txt
new file mode 100644
index 00000000..ea0df2a3
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/chinamining.org.txt
@@ -0,0 +1,10 @@
1title: //*[@id='Content']/span[1]
2author: substring-after(substring-before(//*[@id='Content']/span[2], ')'), '(')
3date: substring-before(substring-after(//*[@id='Content']/span[2], 'Updated: '), 'Counter')
4
5strip: //*[@id='Content']/span[1]
6strip: //*[@id='Content']/span[2]
7
8body: //*[@id='Content']
9
10test_url: http://www.chinamining.org/News/2011-07-22/1311319069d48087.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/chomsky.info.txt b/inc/3rdparty/site_config/standard/chomsky.info.txt
new file mode 100644
index 00000000..1d294109
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/chomsky.info.txt
@@ -0,0 +1,5 @@
1title: //div[@class='title']
2author: //div[@class='author']
3prune: no
4
5test_url: http://www.chomsky.info/onchomsky/2002----.htm \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/christianitytoday.com.txt b/inc/3rdparty/site_config/standard/christianitytoday.com.txt
new file mode 100644
index 00000000..44288a46
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/christianitytoday.com.txt
@@ -0,0 +1,13 @@
1title://div[@class='title']
2author://div[@class='byline']/b
3date:substring-after(//div[@class='byline'], 'posted')
4body://div[@id='body']
5wrap_in(h2)://span[@class='subhead']
6wrap_in(i)://p[@class='bio']
7wrap_in(i)://p[@class='copyright']
8strip://div[@class='title']
9strip://div[@class='deck']
10strip://div[@class='byline']
11strip://div[@class='copyright']
12strip://br
13test_url: http://www.christianitytoday.com/ct/2012/aprilweb-only/my-god-forsaken-me.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/christianpf.com.txt b/inc/3rdparty/site_config/standard/christianpf.com.txt
new file mode 100644
index 00000000..7f089c55
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/christianpf.com.txt
@@ -0,0 +1,5 @@
1title: //h1[@class="entry-title"]
2author: //*[@class="author vcard fn"]
3date: //*[@class="published"]
4body: //div[(@class = "dd_content_wrap")]
5test_url: http://christianpf.com/do-ibuys-lead-to-more-buying/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/christies.com.txt b/inc/3rdparty/site_config/standard/christies.com.txt
new file mode 100644
index 00000000..5c5889a2
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/christies.com.txt
@@ -0,0 +1,6 @@
1tidy: no
2prune: no
3date: //article//time[@pubdate]
4title: //article/header/h2
5body: //article
6test_url: http://www.christies.com/LotFinder/custom/lot_details_MultiLanguage.aspx?from=salesummary&intObjectID=5556662&sid=e536ed1a-b763-41c4-afcf-c94815ec6eee&LID=3 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/chrome.google.com.txt b/inc/3rdparty/site_config/standard/chrome.google.com.txt
new file mode 100644
index 00000000..d4cc8581
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/chrome.google.com.txt
@@ -0,0 +1,9 @@
1body: //pre[@id='cx-desc-text']
2body: //div[contains(@class, 'overview-tab-right-bar-info')]
3title: //h1[contains(@class, 'detail-dialog-title')]
4tidy: no
5prune: no
6replace_string(<noscript>): <div>
7replace_string(</noscript>): </div>
8
9test_url: https://chrome.google.com/webstore/detail/pnaiinchjaonopoejhknmgjingcnaloc \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/chronicle.com.txt b/inc/3rdparty/site_config/standard/chronicle.com.txt
new file mode 100644
index 00000000..0c6c11ed
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/chronicle.com.txt
@@ -0,0 +1,17 @@
1title: //h1[contains(@class, "entry-title")]
2author: //p[contains(@class, "byline")]
3
4# blog articles (chronicle.com/blogs/*)
5body: //div[contains(@class, "abstract")]
6date: //p[contains(@class, "time")]
7
8# all (?) other articles
9body: //div[@id="article-body"]
10date: //p[contains(@class, "dateline")]
11
12# remove sidebars containing images (I assume this is desired for Instapaper)
13strip: //div[@id="related"]
14strip: //div[contains(@class, "image")]
15
16# note that if you're not a Chronicle subscriber (personally or institutionally), you'll only see the first couple of paragraphs of the article, and Instapaper will display that with some crap above and below. thank goodness for that bookmarklet
17test_url: http://chronicle.com/article/In-a-Land-of-Second-Chances/128375/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/cicero.de.txt b/inc/3rdparty/site_config/standard/cicero.de.txt
new file mode 100644
index 00000000..b9f9a12b
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/cicero.de.txt
@@ -0,0 +1,33 @@
1# fforst@...
2
3# Use link to print article for single page view
4single_page_link: //a[@class="print"]
5
6# set body
7tidy: no
8body: //div[@class='artikel-content']
9
10# strip title and subtitle since we got it already
11strip: //div[@class='issue']
12strip: //div[@class='artikel-content']/h2
13
14# some authors are known and have a link, others don't
15author: //a[contains(@href, 'autor?')]
16
17#date
18date: //span[@class='article-date']
19
20# Strip author since we got him
21strip_id_or_class: author
22
23#strip captions
24strip_id_or_class: field-name-field-image-credit
25strip_id_or_class: field-name-field-article-image-subtitle
26
27# remove community functions
28strip: //div[@class='meta']
29strip: //div[@id='comments']
30
31# remove "continue on the next page" text
32strip: //p[text()="[SEITE]"]
33test_url: http://www.cicero.de/weltbuehne/ihre-wut-ist-global-krise-jugend-revolten-aufstaende-zelte/43049 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ciperchile.cl.txt b/inc/3rdparty/site_config/standard/ciperchile.cl.txt
new file mode 100644
index 00000000..4d3ac804
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/ciperchile.cl.txt
@@ -0,0 +1,4 @@
1body: //*[(@id = "articlebody")]
2strip_id_or_class: rotulo
3
4test_url: http://ciperchile.cl/2011/04/18/las-operaciones-secretas-que-ordenaba-karadima-para-aniquilar-a-su-competencia/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/cjr.org.txt b/inc/3rdparty/site_config/standard/cjr.org.txt
new file mode 100644
index 00000000..a0c3ea5d
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/cjr.org.txt
@@ -0,0 +1,6 @@
1body: //p[@class='subhead' or @class='attribution'] | //div[@class='article-body']
2prune: no
3
4single_page_link: //li[@class='print']/a
5
6test_url: http://www.cjr.org/behind_the_news/from_breaking_news_to_baseless.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/clientk.com.txt b/inc/3rdparty/site_config/standard/clientk.com.txt
new file mode 100644
index 00000000..369e88ad
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/clientk.com.txt
@@ -0,0 +1,6 @@
1title://div[@class="entrytitle"]/a
2author:substring-after(substring-before(//div[@class="entrytime"], "|"), "By ")
3date:substring-before(substring-after(//div[@class="entrytime"], "|"), "- Posted")
4body://div[@class="entrybody"]
5strip://div[@class="entrybody"]//p[@class="singleinfo"]
6test_url: http://clientk.com/2011/12/19/the-impact-of-more/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/clubic.com.txt b/inc/3rdparty/site_config/standard/clubic.com.txt
new file mode 100644
index 00000000..b356bbdf
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/clubic.com.txt
@@ -0,0 +1,11 @@
1title: //h1
2author: //a[@class='auteur']
3body: //div[@class='editorial']
4next_page_link: //a[contains(text(),'Page suivante')]
5strip: //a[contains(text(),'Page suivante')]
6strip: //a[contains(text(),'Page précédente')]
7strip_id_or_class: slideshow
8
9prune: no
10
11test_url: http://www.clubic.com/carte-graphique/carte-graphique-amd/radeon-hd-7770/article-478936-1-radeon-hd-7750-7770.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/cmswire.com.txt b/inc/3rdparty/site_config/standard/cmswire.com.txt
new file mode 100644
index 00000000..2bc96d2e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/cmswire.com.txt
@@ -0,0 +1,6 @@
1body: //div[contains(@id,'article-body')]
2strip://div[contains(@id,'disqus_count_block')]
3strip://div[contains(@id,'col-left')]
4strip://div[contains(@id,'col-right')]
5
6test_url: http://www.cmswire.com/cms/customer-experience/for-apps-and-appstores-the-singularity-is-approaching-014888.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/cnet.com.txt b/inc/3rdparty/site_config/standard/cnet.com.txt
new file mode 100644
index 00000000..74f46ba9
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/cnet.com.txt
@@ -0,0 +1,16 @@
1title: //meta[@property="og:title"]/@content
2body: //div[contains(@class, 'postBody')]
3date: //div[@id='nameAndTime']/time
4author: //div[@id='nameAndTime']/span[@class='author']
5
6strip_id_or_class: image-credit
7strip_id_or_class: noAutolink
8strip_id_or_class: related
9
10prune: no
11tidy: no
12
13# early end
14replace_string(Download today's podcast</a>): Download today's podcast</a></div></body></html>
15
16test_url: http://www.cnet.com/8301-13952_1-57367607-81/the-404-981-where-the-world-is-a-vampire-podcast/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/cnn.com.txt b/inc/3rdparty/site_config/standard/cnn.com.txt
new file mode 100644
index 00000000..995e2c79
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/cnn.com.txt
@@ -0,0 +1,19 @@
1title: //div[@class="cnn_storyarea"]/h1
2author: //div[@class="cnnByline"]/strong
3date: substring-after(//div[@class="cnn_strytmstmp"], 'Sun')
4date: substring-after(//div[@class="cnn_strytmstmp"], 'Mon')
5date: substring-after(//div[@class="cnn_strytmstmp"], 'Tue')
6date: substring-after(//div[@class="cnn_strytmstmp"], 'Wed')
7date: substring-after(//div[@class="cnn_strytmstmp"], 'Thu')
8date: substring-after(//div[@class="cnn_strytmstmp"], 'Fri')
9date: substring-after(//div[@class="cnn_strytmstmp"], 'Sat')
10strip: //div[@class="cnn_storyarea"]/h1
11strip_id_or_class: cnnByline
12strip_id_or_class: cnn_strytmstmp
13strip_id_or_class: cnn_strycaptiontxt
14strip_id_or_class: cnn_strybtntoolsbttm
15strip_id_or_class: cnn_strybtntools
16strip_id_or_class: cnn_strybtmcntnt
17strip_id_or_class: cnn_containerwht
18strip_id_or_class: cnn_stryathrtmp
19test_url: http://www.cnn.com/2012/05/13/us/new-york-police-policy/index.html?eref=rss_topstories \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/cnnsi.com.txt b/inc/3rdparty/site_config/standard/cnnsi.com.txt
new file mode 100644
index 00000000..6a2c2b80
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/cnnsi.com.txt
@@ -0,0 +1,26 @@
1# main sportsillustrated.com articles
2
3body: //div[@id="cnnStoryContent"]
4title: //div[@id="cnnStoryHeadline"]//h1
5author: //div[@id="cnnSubBanner"]//strong
6date: substring-after(//div[@id="cnnTimeStamp"], "Updated: ")
7date: substring-after(//div[@id="cnnTimeStamp"], "Posted: ")
8
9# kill ugly font buttons
10strip: //div[@id="cnnSCFontButtons"]
11
12# kill misc filler videos & etc
13strip: //div[@class="cnnDivideContent"]
14strip: //*[@class="cnnTMbox"]
15
16# si vault articles
17# -------------
18body: //div[@class="siv_artPara"]
19title: //div[@class="siv_artHeader"]//h1
20author: //div[@class="byline"]
21date: //div[@class="date"]
22
23next_page_link: //div[@id='cnnStoryContinue']/a
24strip_id_or_class: cnnstorypagination
25
26test_url: http://cnnsi.com/2012/writers/peter_king/01/08/wild.card.round/index.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/code.activestate.com.txt b/inc/3rdparty/site_config/standard/code.activestate.com.txt
new file mode 100644
index 00000000..6cf72e23
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/code.activestate.com.txt
@@ -0,0 +1,10 @@
1body: //div[@id='content']
2title: //div[@id='page_header']/h1
3
4strip_id_or_class: 'lineno'
5strip_id_or_class: 'block-toolbar-button'
6strip_id_or_class: 'recipe_score'
7strip: //div[@id='recipe_tools']
8strip: //div[@id='addcomment']
9
10test_url: http://code.activestate.com/recipes/500261-named-tuples/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/code.google.com.txt b/inc/3rdparty/site_config/standard/code.google.com.txt
new file mode 100644
index 00000000..40a16209
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/code.google.com.txt
@@ -0,0 +1,5 @@
1body: //div[@id="gc-pagecontent"]
2strip: //a[@class="backtotop"]
3prune: no
4
5test_url: http://code.google.com/apis/analytics/docs/tracking/gaTrackingEcommerce.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/codinghorror.com.txt b/inc/3rdparty/site_config/standard/codinghorror.com.txt
new file mode 100644
index 00000000..9c95f107
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/codinghorror.com.txt
@@ -0,0 +1,15 @@
1body: //div[@class='blogbody']
2strip: //h3[@class='title']
3date: //h2[@class='date']
4#Should Atwood just be a literal?
5author: substring-before( substring-after(//div[@class='posted'], 'y'), 'V')
6
7# tim.kingman@... 2011-07-26
8# Prune:no to retain all-link ULs that are part of the body content like
9# http://www.codinghorror.com/blog/2011/07/building-a-pc-part-vii-rebooting.html
10# Then explicitly strip the "Posted By" and prev/next links that Prune:yes would have removed.
11
12prune: no
13strip: //div[@class='posted']/following-sibling::*
14strip: //div[@class='posted']
15test_url: http://www.codinghorror.com/blog/2011/07/building-a-pc-part-vii-rebooting.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/collegehumor.com.txt b/inc/3rdparty/site_config/standard/collegehumor.com.txt
new file mode 100644
index 00000000..9d75d641
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/collegehumor.com.txt
@@ -0,0 +1,14 @@
1title: //h1[@class='title']
2author: //p[@class='byline']/a[1]
3date: //*[@class='date']
4
5body: //div[@class='article_body']
6strip: //p[@class='ca_intro']
7strip: //div[@id='action_bar']
8strip: //div[@class='below_content']
9strip: //div[@id='announcement']
10strip: //div[@id='leftovers']
11strip: //div[@class='form']
12strip: //div[@id='email_overlay']
13strip: //a[@class='close']
14test_url: http://www.collegehumor.com/article/6599562/how-it-happened-the-necktie \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/communities-dominate.blogs.com.txt b/inc/3rdparty/site_config/standard/communities-dominate.blogs.com.txt
new file mode 100644
index 00000000..800a907d
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/communities-dominate.blogs.com.txt
@@ -0,0 +1,2 @@
1body: //div[@class="entry-body"]
2test_url: http://communities-dominate.blogs.com/brands/2012/03/brutal-truth-about-lumia-cannot-sustain-even-1-to-1-replacement-of-symbian-windows-phone-strategy-do.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/community.service-now.com.txt b/inc/3rdparty/site_config/standard/community.service-now.com.txt
new file mode 100644
index 00000000..10fd2516
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/community.service-now.com.txt
@@ -0,0 +1,8 @@
1body: //div[@id="center"]//div[@class="node"]
2title: //div[@id="center"]//h2
3author: substring-after(//div[@id="center"]//div[@class="node"]//span[@class="submitted"], "&mdash;")
4date: substring-before(//div[@id="center"]//div[@class="node"]//span[@class="submitted"], "&mdash;")
5strip: //div[@id="center"]//h2[1]
6strip: //span[@class="submitted"][1]
7move_into(//div[@class="node"])://div[@class="breadcrumb"]
8test_url: http://community.service-now.com/blog/lawrenceeng/seasons-greetings-servicenow-team \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/computer.org.txt b/inc/3rdparty/site_config/standard/computer.org.txt
new file mode 100644
index 00000000..00e6fddf
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/computer.org.txt
@@ -0,0 +1,5 @@
1strip_id_or_class:column-3
2strip_id_or_class:portlet-boundary
3strip_id_or_class:banner
4
5test_url: http://www.computer.org/portal/web/buildyourcareer/careerwatch/jt19 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/computerbase.de.txt b/inc/3rdparty/site_config/standard/computerbase.de.txt
new file mode 100644
index 00000000..29199242
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/computerbase.de.txt
@@ -0,0 +1,18 @@
1title://h1
2
3author://div[@id="news-meta"]/a
4
5body://*[@id="main"]/div[1]
6
7strip://*[@id="main"]/div[2]
8strip://*[@id="main"]/div[3]
9strip://*[@id="page"]//footer
10
11#date: didn't manage to parse it
12
13#Images have to be stripped because the page does it with overlay
14strip://img
15
16#figures are not displayed in instapaper...
17strip://figure | //figcaption
18test_url: http://www.computerbase.de/news/2012-06/verbraucherzentrale-mahnt-blizzard-fuer-diablo-3-ab/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/computerworld.com.txt b/inc/3rdparty/site_config/standard/computerworld.com.txt
new file mode 100644
index 00000000..8e1f3e11
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/computerworld.com.txt
@@ -0,0 +1,22 @@
1title: //meta[@name='headline']/@content
2date: //meta[@name='date']/@content
3author: //meta[@name='author']/@content
4body: //div[contains(@class, 'article')]
5body://div[@id="article_body"]
6
7strip_id_or_class: banner
8strip: //noscript
9strip: //div[@style='width:1px;height:130px;float:right;']
10strip: //div[@class='storyby']
11strip_image_src: twitter_icon
12strip_image_src: rss_bug
13
14tidy: no
15prune: no
16
17next_page_link://div[@id="next_page"]/a
18
19single_page_link: concat('http://www.computerworld.com/s/article/print/', substring-after(//link[@rel='canonical']/@href, '/s/article/'))
20
21test_url: http://www.computerworld.com/s/article/9224348/Apple_s_new_OS_X_tightens_screws_on_some_malware
22test_url: http://www.computerworld.com/s/article/9227679/Windows_8_Release_Preview_Updated_but_still_uneasy \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/computerworld.dk.txt b/inc/3rdparty/site_config/standard/computerworld.dk.txt
new file mode 100644
index 00000000..a83f366f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/computerworld.dk.txt
@@ -0,0 +1,5 @@
1strip: //div[contains(@class, 'articleAdtechAd')]
2title: //div[@id='article']/h1
3title: //div[contains(@class, 'article')]/h1
4body: //div[@id='articleText']
5test_url: http://www.computerworld.dk/art/56748/test-din-viden-med-computerworlds-store-sommerquiz?a=fp_1&i=0 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/contemporist.com.txt b/inc/3rdparty/site_config/standard/contemporist.com.txt
new file mode 100644
index 00000000..d2b289a3
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/contemporist.com.txt
@@ -0,0 +1,9 @@
1# get author from string like "Posted by <author> on <date>"
2author: substring-before(substring-after(//div[@class='post']/p[@class='post-meta'], 'by'), 'on')
3
4# get date from string like "Posted by <author> on <date>"
5date: substring-after(//div[@class='post']/p[@class='post-meta'], 'on')
6
7# this keeps thumbnail images
8prune: no
9test_url: http://www.contemporist.com/2011/11/02/landing-200-lamp-by-kim-hyunjoo \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/conversaciones.nokia.com.txt b/inc/3rdparty/site_config/standard/conversaciones.nokia.com.txt
new file mode 100644
index 00000000..9bad2c84
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/conversaciones.nokia.com.txt
@@ -0,0 +1,7 @@
1title: //div[@class='article_header']/h1
2body: //div[@class='article_header']/p | //div[@class='article_body']
3strip_id_or_class: share_this
4strip_id_or_class: sociable
5prune: no
6
7test_url: http://conversaciones.nokia.com/2011/10/07/cinco-atajos-en-el-nokia-n8/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/core77.com.txt b/inc/3rdparty/site_config/standard/core77.com.txt
new file mode 100644
index 00000000..a24374d8
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/core77.com.txt
@@ -0,0 +1,7 @@
1body: //div[@id="permalink"]/div[@class="post"]
2
3strip: //div[@id='backArrow']
4strip: //div[@id='fwdArrow']
5strip: //div[@class="post-title"]
6strip: //div[@class="sharing"]
7test_url: http://www.core77.com/blog/columns/why_design_education_must_change_17993.asp \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/counterpunch.org.txt b/inc/3rdparty/site_config/standard/counterpunch.org.txt
new file mode 100644
index 00000000..c9e92287
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/counterpunch.org.txt
@@ -0,0 +1,6 @@
1title: //div[@class='main']//h1[contains(@class, 'article-title')]
2author: //div[@class='mainauthorstyle']
3body: //div[@class='main']//div[@class='main-text']
4strip: //td[@width='140']
5
6test_url: http://www.counterpunch.org/johnstone05172011.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/crazybutable.com.txt b/inc/3rdparty/site_config/standard/crazybutable.com.txt
new file mode 100644
index 00000000..d25cd05d
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/crazybutable.com.txt
@@ -0,0 +1,3 @@
1title://h2
2body://div[contains(@class, 'entrytext')]
3test_url: http://www.crazybutable.com/weblog/archives/2010/07/01/house-ideas-that-worked/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/crimemagazine.com.txt b/inc/3rdparty/site_config/standard/crimemagazine.com.txt
new file mode 100644
index 00000000..9cf0bccc
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/crimemagazine.com.txt
@@ -0,0 +1,2 @@
1autodetect_next_page: no
2test_url: http://www.crimemagazine.com/son-sam \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/crimethinc.com.txt b/inc/3rdparty/site_config/standard/crimethinc.com.txt
new file mode 100644
index 00000000..74bc6db9
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/crimethinc.com.txt
@@ -0,0 +1,3 @@
1body: //div[@class="readingtext"]
2title: substring-after(substring-after(//title, ':'), ':')
3test_url: http://www.crimethinc.com/texts/recentfeatures/nightmares.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/crn.de.txt b/inc/3rdparty/site_config/standard/crn.de.txt
new file mode 100644
index 00000000..7fa950af
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/crn.de.txt
@@ -0,0 +1,3 @@
1author: //p[contains(@class,'author')]/a
2date: //div[contains(@class,'date')]
3test_url: http://www.crn.de/netzwerke-tk/artikel-93103.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/csmonitor.com.txt b/inc/3rdparty/site_config/standard/csmonitor.com.txt
new file mode 100644
index 00000000..d4dbc5c8
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/csmonitor.com.txt
@@ -0,0 +1,18 @@
1title: //h1[contains(@class, 'head')]
2
3# standard page
4body: //div[@id='mainColumn']//div[contains(@class, 'list-article-full')]
5# print page
6body: //div[@id='mainColumn']
7
8author: //a[contains(@class, 'ui-author')]
9
10single_page_link: //div[@class='storyToolbar']//a[contains(@href, '/print/')]
11
12strip_id_or_class: storyToolbar
13strip_id_or_class: promotion-tag
14
15tidy: no
16prune: no
17
18test_url: www.csmonitor.com/World/Middle-East/2011/1108/Imminent-Iran-nuclear-threat-A-timeline-of-warnings-since-1979/Earliest-warnings-1979-84 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/csnbayarea.com.txt b/inc/3rdparty/site_config/standard/csnbayarea.com.txt
new file mode 100644
index 00000000..131a923b
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/csnbayarea.com.txt
@@ -0,0 +1,7 @@
1title: //div[@id='csn_blogST_headline']/h1
2
3body: //div[@id='csn_blogST_main']
4strip_id_or_class: ipfootnotes
5strip: //div[@id='csn_blogST_main']/p[1]/img
6strip: //div[@id='csn_blogST_sidebar']
7test_url: http://www.csnbayarea.com/blog/giants-talk/post/-?blog%2Fgiants-talk%2Fpost%2F-=&blockID=578902&feedID=5987 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/csnphilly.com.txt b/inc/3rdparty/site_config/standard/csnphilly.com.txt
new file mode 100644
index 00000000..0df72c32
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/csnphilly.com.txt
@@ -0,0 +1,22 @@
1# author's name is not isolated as a tag.... ugh
2convert_double_br_tags: yes
3body: //csn_blogST_main
4
5#junk above and around the article
6strip: /html/body/div[4]/div[3]/div/div/div/section/div/div/div/div/div/div
7strip: /html/body/div[4]/header
8strip_id_or_class: article-right-sidebar
9strip_id_or_class: rsn-gigya-sharebar-container
10strip_id_or_class: article-bottom
11strip_id_or_class: hider
12strip_id_or_class: footer
13strip_id_or_class: masthead
14strip_id_or_class: block-menu-menu-rsn-login-or-register
15strip_id_or_class: block-menu-menu-header-links
16strip_id_or_class: block-rsn-follow-bar-follow-bar
17strip_id_or_class: block-rsn-weather-rsn-weather-scoreboard
18strip_id_or_class: logo
19strip_id_or_class: element-invisible
20strip_id_or_class: site-name
21strip: //div[contains(@style, 'none')]
22test_url: http://www.csnphilly.com/eagles/can-stoutland-save-danny-watkins-career \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/cucharasonica.com.txt b/inc/3rdparty/site_config/standard/cucharasonica.com.txt
new file mode 100644
index 00000000..e691fe83
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/cucharasonica.com.txt
@@ -0,0 +1,2 @@
1body: //*[(@class = "historia")]
2test_url: http://cucharasonica.com/2011/09/queen-busca-candidatos-para-su-propia-banda-tributo \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/da.feedsportal.com.txt b/inc/3rdparty/site_config/standard/da.feedsportal.com.txt
new file mode 100644
index 00000000..4a00ef44
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/da.feedsportal.com.txt
@@ -0,0 +1,5 @@
1single_page_link: //a
2tidy: no
3prune: no
4
5test_url: da.feedsportal.com/c/585/f/413794/s/17037b5a/l/0L0Stelegraaf0Bnl0Cbinnenland0C10A2757860C0I0IKlacht0Itegen0Idr0B0IFrank0Iniet0I0Eontvankelijk0I0I0Bhtml0Dcid0Frss/ia1.htm \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dailydot.com.txt b/inc/3rdparty/site_config/standard/dailydot.com.txt
new file mode 100644
index 00000000..61013993
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/dailydot.com.txt
@@ -0,0 +1,4 @@
1tidy: no
2body: //article
3
4test_url: http://www.dailydot.com/entertainment/tumblr-christopher-price-topherchris/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dailykos.com.txt b/inc/3rdparty/site_config/standard/dailykos.com.txt
new file mode 100644
index 00000000..124675cb
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/dailykos.com.txt
@@ -0,0 +1,10 @@
1body: //div[@id='article-1']//div[contains(@class, 'article-body')]
2title: //div[@class='meta']//a[@id='titleHref']
3date: //div[@class='meta']//p[@class='date']
4
5strip_id_or_class: invisible
6strip_id_or_class: divider-doodle
7
8prune: no
9
10test_url: http://www.dailykos.com/story/2012/01/26/1058790/-Newt-Gingrichs-campaign-admits-he-lied-during-debate-about-ABC-News-interview-with-his ex-wife \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dailymail.co.uk.txt b/inc/3rdparty/site_config/standard/dailymail.co.uk.txt
new file mode 100644
index 00000000..c83dbdb0
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/dailymail.co.uk.txt
@@ -0,0 +1,12 @@
1body: //div[@id='js-article-text']
2strip: //div[@class='explore-links']
3strip: //div[@id='js-article-text']/br[position()=1]
4strip_id_or_class: print-or-mail-links
5strip_id_or_class: shareArticles
6strip_id_or_class: googleAds
7strip_id_or_class: digg-button
8strip_id_or_class: article-icon-links-container
9strip_id_or_class: clickToEnlarge
10tidy: no
11
12test_url: http://www.dailymail.co.uk/news/article-1375423/Royal-wedding-Texan-billionaire-Joe-Albritton-invited-Prince-Charles.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dansdata.com.txt b/inc/3rdparty/site_config/standard/dansdata.com.txt
new file mode 100644
index 00000000..96a2bc41
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/dansdata.com.txt
@@ -0,0 +1,5 @@
1autodetect_next_page: no
2tidy: no
3prune: no
4body: //div[@class='NoOverflow']
5test_url: http://www.dansdata.com/gz129.htm \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/daringfireball.net.txt b/inc/3rdparty/site_config/standard/daringfireball.net.txt
new file mode 100644
index 00000000..dca8ade7
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/daringfireball.net.txt
@@ -0,0 +1,7 @@
1title: //div[@class="article"]/h1
2author: //div[@id="Sidebar"]/p/strong
3date: //h6[@class="dateline"]
4body: //div[@class="article"]
5strip: //h6[@class="dateline"]
6strip: //div[@class="article"]/h1
7test_url: http://daringfireball.net/2011/10/apps_are_the_new_channels \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/datanami.com.txt b/inc/3rdparty/site_config/standard/datanami.com.txt
new file mode 100644
index 00000000..3534002a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/datanami.com.txt
@@ -0,0 +1,4 @@
1body: //div[@id="article"]
2date: //p[@class="date"]
3author: //p[@class="byline"]
4test_url: http://www.datanami.com/datanami/2011-12-07/new_path_for_sap:_in_memory_computing,_predictive_analysis_converge.html?featured=top \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dcurt.is.txt b/inc/3rdparty/site_config/standard/dcurt.is.txt
new file mode 100644
index 00000000..7d11c6e1
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/dcurt.is.txt
@@ -0,0 +1,8 @@
1title: (//article//h2)[1]
2body: //article[contains(@class, 'post')]
3date: //time[@id='top_time']/@datetime
4
5prune: no
6tidy: no
7
8test_url: http://dcurt.is/predictions-txt \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/delong.typepad.com.txt b/inc/3rdparty/site_config/standard/delong.typepad.com.txt
new file mode 100644
index 00000000..84fd4f79
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/delong.typepad.com.txt
@@ -0,0 +1,4 @@
1strip_id_or_class: banner
2strip_id_or_class: gamma
3strip_id_or_class: module-list
4test_url: http://delong.typepad.com/sdj/2011/02/in-which-suresh-naidu-visits-the-new-jerusalem.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/derstandard.at.txt b/inc/3rdparty/site_config/standard/derstandard.at.txt
new file mode 100644
index 00000000..48722ebd
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/derstandard.at.txt
@@ -0,0 +1,13 @@
1title: //div[@id='artikelHeader']/h1
2author: //span[@class='author']
3date: //span[@class='date']
4body: //div[@class='copytext']
5strip: //ul[@class='lookupLinksArtikel']
6
7strip: //div[@id='pageTop']
8strip: //div[@id='toolbar']
9strip: //div[@id='articleTools']
10strip: //div[@id='weiterlesen']
11strip: //div[@id='communityCanvas']
12
13test_url: http://derstandard.at/1318726018343/Breitband-LTE-Was-bringt-die-neue-Mobilfunk-Generation \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/designtagebuch.de.txt b/inc/3rdparty/site_config/standard/designtagebuch.de.txt
new file mode 100644
index 00000000..6096db0b
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/designtagebuch.de.txt
@@ -0,0 +1,11 @@
1tidy: no
2body: //div[@class='main']
3
4author: substring-before(substring-after(//div[@class='meta-single'], 'erstellt von '), ' am')
5date: substring-before(substring-after(//div[@class='meta-single'], ' am '), ' | ')
6
7strip_id_or_class: pagelink
8strip_id_or_class: wp-polls
9
10next_page_link: //div[@class='post-page-next']/a
11test_url: http://www.designtagebuch.de/die-gefuehlte-lesbarkeit/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/desitvforum.net.txt b/inc/3rdparty/site_config/standard/desitvforum.net.txt
new file mode 100644
index 00000000..a6dac5fd
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/desitvforum.net.txt
@@ -0,0 +1,5 @@
1body: (//blockquote[contains(@class, 'postcontent')])[1]
2body: (//div[starts-with(@id, 'post_message')])[1]
3
4prune: no
5tidy: no \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/details.com.txt b/inc/3rdparty/site_config/standard/details.com.txt
new file mode 100644
index 00000000..548cabad
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/details.com.txt
@@ -0,0 +1,8 @@
1title: //h1[@class="content-headline"]
2body: //div[@class="headers-container"] | //div[@class="content-container"]
3prune: no
4tidy: no
5
6single_page_link: //li[@class='utility-print']/a
7
8test_url: http://www.details.com/culture-trends/critical-eye/201108/best-new-designers-innovations \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/developers.facebook.com.txt b/inc/3rdparty/site_config/standard/developers.facebook.com.txt
new file mode 100644
index 00000000..43a8f0a0
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/developers.facebook.com.txt
@@ -0,0 +1,3 @@
1title: //div[@class="bodyText"]/h1
2author: //div[@class="picture"]/a/img/@alt
3test_url: https://developers.facebook.com/blog/post/2012/03/22/developer-spotlight--foodspotting/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/devlinsangle.blogspot.co.at.txt b/inc/3rdparty/site_config/standard/devlinsangle.blogspot.co.at.txt
new file mode 100644
index 00000000..b960b37e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/devlinsangle.blogspot.co.at.txt
@@ -0,0 +1,6 @@
1date: //h2[@class='date-header']
2body: //div[@class='post hentry']
3title: //h3
4strip: //div[@class='post-footer']
5
6test_url: http://devlinsangle.blogspot.co.at/2012/03/difference-between-teaching-and_01.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dictionary.reference.com.txt b/inc/3rdparty/site_config/standard/dictionary.reference.com.txt
new file mode 100644
index 00000000..a1172024
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/dictionary.reference.com.txt
@@ -0,0 +1,8 @@
1title: //h1[@id='query_h1']
2body: //div[contains(@class, 'lunatext results_content')]
3strip_id_or_class: spl_unshd
4#replace_string(<div class="dicTl">): <div class="dicTl">------------------<br />
5
6prune: no
7
8test_url: http://www.wired.com/cloudline/2011/10/meet-arms-cortex-a15-the-future-of-the-ipad-and-possibly-the-macbook-air/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/diepresse.com.txt b/inc/3rdparty/site_config/standard/diepresse.com.txt
new file mode 100644
index 00000000..7e825a91
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/diepresse.com.txt
@@ -0,0 +1,6 @@
1title: //div[@class='article']/h1
2date: substring-before(//p[@class='articletime'],'|')
3body: //div[@id='articletext']
4strip: //div[@class='inlineDiashow']
5
6test_url: http://diepresse.com/home/politik/aussenpolitik/701905/TibeterProteste_Nonne-verbrennt-sich-selbst?_vl_backlink=/home/politik/index.do \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/digiphoto.techbang.com.txt b/inc/3rdparty/site_config/standard/digiphoto.techbang.com.txt
new file mode 100644
index 00000000..2d2ae2c2
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/digiphoto.techbang.com.txt
@@ -0,0 +1,8 @@
1# default parser works great
2# only add "author" and "next page link" reference
3# 2012-04-13
4
5next_page_link: //div[@class = 'pagination']/a[@class = 'next_page']
6
7author: //*[@class = 'author metadata']/a
8test_url: http://digiphoto.techbang.com/posts/2433--commercial-photography-communication-is-the-key-to-a-good-work \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/digital-photography-school.com.txt b/inc/3rdparty/site_config/standard/digital-photography-school.com.txt
new file mode 100644
index 00000000..37192ac0
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/digital-photography-school.com.txt
@@ -0,0 +1,6 @@
1title: //div[@class='post-title']/h1
2author: //a[@href='#author']
3body: //div[@class='post-content']
4strip: //div[@class='post-meta']
5
6test_url: http://www.digital-photography-school.com/10-ways-to-develop-yourself-photographically \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/digitalspy.co.uk.txt b/inc/3rdparty/site_config/standard/digitalspy.co.uk.txt
new file mode 100644
index 00000000..b21431d7
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/digitalspy.co.uk.txt
@@ -0,0 +1,5 @@
1title: //div[@class="article_header"]/h1
2date: //div[@class="article_pub"]/span[@class="time"]
3author: //div[@class="article_pub"]/span[@class="editors"]/a/text()
4body: //div[@class="article_body clear_left"]
5test_url: http://www.digitalspy.co.uk/movies/at-the-movies/a364066/top-5-super-bowl-movie-trailers-the-avengers-battleship-more.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dilbert.com.txt b/inc/3rdparty/site_config/standard/dilbert.com.txt
new file mode 100644
index 00000000..413e5506
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/dilbert.com.txt
@@ -0,0 +1,8 @@
1convert_double_br_tags: yes
2
3title: substring(substring-after(//title, ':'), 1, string-length(substring-after(//title, ':')) - 10)
4body: //*[contains(@class, 'SB_Content')]
5author: string('Scott Adams')
6date: //*[contains(@class, 'SB_Detail')]/text()[1]
7
8test_url: http://dilbert.com/blog/entry/death_by_hypnosis_or_not/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dinamalar.com.txt b/inc/3rdparty/site_config/standard/dinamalar.com.txt
new file mode 100644
index 00000000..9ef198c9
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/dinamalar.com.txt
@@ -0,0 +1,19 @@
1title: //div[@class='newsdetbd']
2body: //div[@id='innerleft']
3#//p[@class = 'plnht']
4strip_image_src: /albums/
5strip: //div[@class='mrrt']
6prune: yes
7strip_id_or_class: 'fdpd'
8strip_id_or_class: 'epapt'
9strip_id_or_class: 'newsrtwd'
10strip_id_or_class: 'padtp'
11strip_id_or_class: 'newdt'
12strip_id_or_class: 'newdlt'
13strip: //div[@id='selNotes']
14strip_id_or_class: 'clsNotes'
15strip_id_or_class: 'clear'
16strip_id_or_class: 'cmtwrap'
17strip_id_or_class: 'sess'
18strip_id_or_class: 'parents'
19test_url: http://www.dinamalar.com/News_Detail.asp?Id=295725 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dn.se.txt b/inc/3rdparty/site_config/standard/dn.se.txt
new file mode 100644
index 00000000..86bb3b8d
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/dn.se.txt
@@ -0,0 +1,26 @@
1# Since this element has class="clear", the Instapaper stylesheets (at least this text parser preview), will render it unreadable, with a 1px font size and line height.
2
3body: //div[@id="article-content"]
4
5
6# Ads
7strip_id_or_class: advert-space
8
9# Read more, recommend, comments etc
10strip_id_or_class: fbc-recommend
11strip_id_or_class: recommend
12strip_id_or_class: article-readers
13strip_id_or_class: article-addons
14strip_id_or_class: hook
15strip_id_or_class: right
16strip_id_or_class: footer
17
18# Other news
19strip: //div[@id="mirrors"]
20
21# Author
22author: //div[@id="byline"]/div/p/strong
23
24# Date
25date: substring(substring-after(//p[@class="published"], 'Publicerad '), 0, 11)
26test_url: http://www.dn.se/nyheter/varlden/landade-flygplan-mitt-i-villaomrade \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/doctac.com.txt b/inc/3rdparty/site_config/standard/doctac.com.txt
new file mode 100644
index 00000000..9f65ea9b
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/doctac.com.txt
@@ -0,0 +1,8 @@
1strip: //*[(@id = "featured")]
2
3author:substring-after( //div[@class='posttitle']/h2[@class='author'],'by ')
4
5date: concat(//div[@class='month'],' ',//div[@class='day'])
6
7#doctac doesn't provide a year, but month/day is better than nothing
8test_url: http://www.doctac.com/mac/iphone/instapaper-update-app/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/domusweb.it.txt b/inc/3rdparty/site_config/standard/domusweb.it.txt
new file mode 100644
index 00000000..81683f02
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/domusweb.it.txt
@@ -0,0 +1,21 @@
1# TODO: clean up the extra junk at the end of articles
2
3# general text formatting
4prune: no
5convert_double_br_tags:yes
6
7# where to find the basic metadata
8author://a[@class='articleauthor']
9date://a[starts-with(@href,'/en/search/published/')]
10title:substring-before(//h2[@class='title'],'&mdash;')
11body://div[@id='maincontainer']
12
13dissolve://div[starts-with(@id,'commentableblock')]
14
15# clean up the crap
16strip://div[contains(@class,'domusnetwork')]
17strip://div[contains(@class,'relative_wrapper')]
18
19strip://div[contains(@class,'captionsubimage')]/img[contains(@class,'arrow')]
20wrap_in(em): //div[contains(@class,'captionsubimage')]/span
21test_url: http://www.domusweb.it/en/design/in-praise-of-lost-time/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dou.ua.txt b/inc/3rdparty/site_config/standard/dou.ua.txt
new file mode 100644
index 00000000..22907c22
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/dou.ua.txt
@@ -0,0 +1,8 @@
1title: //h1[@itemprop="name"]
2
3author: //div[contains(@class, 'author')]//div[contains(@class, 'name')]/a
4
5date: //div[contains(@class, 'b-info')]//span[contains(@class, 'date')]
6
7body: //div[contains(@class, 'b-typo')]
8test_url: http://dou.ua/lenta/interviews/andrej-havryuchenko/?from=sb_mostcomm \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/douban.com.txt b/inc/3rdparty/site_config/standard/douban.com.txt
new file mode 100644
index 00000000..99d7e5dc
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/douban.com.txt
@@ -0,0 +1,21 @@
1# This filter is tested on:
2# http://www.douban.com/note/215003067/
3# http://www.douban.com/note/213540049/
4# http://www.douban.com/group/topic/31140104/
5
6title: //div[@class='note-header']/h1
7title: //div[@id='content']/h1
8
9author: //div[@class='info']/ul/li/a
10author: //h3/span/a
11
12date://div[@class='note-header']/div/span
13date://h3/span[contains(@class, 'color-green')]
14
15body://div[contains(@class, 'note')]
16body://div[contains(@class, 'topic-content')]
17
18strip://h3
19
20convert_double_br_tags: yes
21test_url: http://www.douban.com/group/topic/31140104/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dpreview.com.txt b/inc/3rdparty/site_config/standard/dpreview.com.txt
new file mode 100644
index 00000000..30179a3b
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/dpreview.com.txt
@@ -0,0 +1,9 @@
1# next_page_link for product review
2# example: http://www.dpreview.com/reviews/lytro/
3next_page_link: //img[@alt = 'Next page']/../@href
4
5# next_page_link for other articles
6# example: http://www.dpreview.com/articles/6126592906/first-impressions-using-the-fujifilm-x-pro1
7next_page_link: //*[@class = 'pages']/*/td[@class = 'next enabled']/a
8single_page_link: //a[contains(.,'Print view')]
9test_url: http://www.dpreview.com/articles/6126592906/first-impressions-using-the-fujifilm-x-pro1 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dr.dk.txt b/inc/3rdparty/site_config/standard/dr.dk.txt
new file mode 100644
index 00000000..7e46b0d6
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/dr.dk.txt
@@ -0,0 +1,9 @@
1title: //meta[@property='og:title']/@content
2author: //div[@class='articleFunctions']//a
3date: //meta[@name='pubdate']/@content
4
5# Can you strip elements from the body only? It is required here (`//div[@class='articleContent']/p` breaks for some reason)
6body: //div[@class='articleContent']
7
8tidy: no
9test_url: http://www.dr.dk/Nyheder/Udland/2011/10/24/150115.htm \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dramasonline.com.txt b/inc/3rdparty/site_config/standard/dramasonline.com.txt
new file mode 100644
index 00000000..659d0443
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/dramasonline.com.txt
@@ -0,0 +1,10 @@
1body: //div[@class='postext']
2
3strip_id_or_class: ratingblock
4strip_id_or_class: hreview-aggregate
5strip: //div[contains(@style, 'display: none;')]
6
7tidy: no
8prune: no
9
10test_url: http://www.dramasonline.com/jago-pakistan-jago-7th-december-2012-ali-gul-pir/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/drdobbs.com.txt b/inc/3rdparty/site_config/standard/drdobbs.com.txt
new file mode 100644
index 00000000..b1a9db6f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/drdobbs.com.txt
@@ -0,0 +1,2 @@
1single_page_link: //a[contains(@href, '/article/print')]
2test_url: http://www.drdobbs.com/architecture-and-design/240001128 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/drive2.ru.txt b/inc/3rdparty/site_config/standard/drive2.ru.txt
new file mode 100644
index 00000000..6125ce79
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/drive2.ru.txt
@@ -0,0 +1,12 @@
1body: //div[@class = "description"]
2body: //div[@id = "post"]
3
4strip_id_or_class: vcard
5strip_id_or_class: journallist
6strip_id_or_class: infobox
7strip_id_or_class: terms
8strip_id_or_class: replieslist
9strip_id_or_class: communityside
10
11
12test_url: http://www.drive2.ru/cars/audi/a6/a6_c5/elysey/journal/288230376151836654/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/drupal.org.txt b/inc/3rdparty/site_config/standard/drupal.org.txt
new file mode 100644
index 00000000..ffb77e4d
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/drupal.org.txt
@@ -0,0 +1,8 @@
1title://h1
2author://div[@class="submitted"]/a
3date:substring-after(//div[@class="meta"],'modified: ')
4date:substring-after(//div[@class="submitted"],'on ')
5body://div[@class="node-content"]
6strip://div[@class="meta"]
7strip_id_or_class:book-navigation
8test_url: http://drupal.org/node/1327354 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dukebasketballreport.com.txt b/inc/3rdparty/site_config/standard/dukebasketballreport.com.txt
new file mode 100644
index 00000000..418c9f62
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/dukebasketballreport.com.txt
@@ -0,0 +1,11 @@
1title: //h2/a
2author: substring-before(substring-after(//span[@class='byline'], 'by'), ',')
3date: substring-before(substring-after(//span[@class='byline'], ','), '|')
4body: //div[@class='entry']
5
6
7# strip out auction stuff at the end of posts
8# tidy kills the center tag, so disable it
9tidy: no
10strip: //center//table
11test_url: http://www.dukebasketballreport.com/articles/?p=42660 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dvice.com.txt b/inc/3rdparty/site_config/standard/dvice.com.txt
new file mode 100644
index 00000000..c8163680
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/dvice.com.txt
@@ -0,0 +1,9 @@
1strip://*[@id = 'blog_top_stories']
2strip://*[@id = 'takeover_off']
3strip://*[@id = 'right_gray_box']
4strip://*[@class = 'blog_topics']
5strip://*[@class = 'section_titles']
6
7author://div[@class = 'post_author_info']/a
8date://div[@class = 'post_date_info']
9test_url: http://dvice.com/archives/2012/05/is-nfc-and-smar.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/eamesinerudition.com.txt b/inc/3rdparty/site_config/standard/eamesinerudition.com.txt
new file mode 100644
index 00000000..908a1b51
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/eamesinerudition.com.txt
@@ -0,0 +1,8 @@
1title: //div [@class="post contain"]/h1
2strip: //div [@class="post contain"]/h1
3body: //div [@class="post contain"]
4author: substring-before(//title, ':')
5author: substring-before(//title, ' ')
6
7
8test_url: http://eamesinerudition.com/2012/03/hospital-numbers-are-bad-for-you \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/eandt.theiet.org.txt b/inc/3rdparty/site_config/standard/eandt.theiet.org.txt
new file mode 100644
index 00000000..c4c38f25
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/eandt.theiet.org.txt
@@ -0,0 +1,8 @@
1title: //h1
2date: //div[@class="et_dateUnderTitle"]
3author: substring-after(//div[@class="et_authorUnderTitle"], 'By ')
4body: //div[@id="et_leftCol640split"]
5
6strip: //div[@id="et_leftCol640splitRight"]
7strip: //div[@class="et_light_greybgboxlower"]
8test_url: http://eandt.theiet.org/magazine/2011/12/this-festive-waste.cfm \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/eastoftheweb.com.txt b/inc/3rdparty/site_config/standard/eastoftheweb.com.txt
new file mode 100644
index 00000000..d762091c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/eastoftheweb.com.txt
@@ -0,0 +1,18 @@
1title: //div[@class='title_text']
2
3author: //div[@class='author_text']
4
5body: //div[@class='story_text']/..
6
7strip: //b
8
9strip_id_or_class: back_to_top
10strip_id_or_class: author_text
11strip_id_or_class: title_text
12
13wrap_in(center): //a
14
15dissolve: //a
16
17footnotes: no
18test_url: http://www.eastoftheweb.com/short-stories/UBooks/Horl.shtml \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ebay.com.txt b/inc/3rdparty/site_config/standard/ebay.com.txt
new file mode 100644
index 00000000..5fa18ff3
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/ebay.com.txt
@@ -0,0 +1,5 @@
1body: //h1[@class='it-ttl'] | //div[@id='mainImgHldr'] | //span[@id='prcIsum']
2
3strip_image_src: imgLoading_30x30.gif
4
5test_url: http://www.ebay.com/itm/BRAND-NEW-FM-Transmitter-Ca-r-Charger-iPhone-4S-4-4G-3GS-3G-2G-iPod-Touch-/190657497204 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ecetia.com.txt b/inc/3rdparty/site_config/standard/ecetia.com.txt
new file mode 100644
index 00000000..d67e9103
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/ecetia.com.txt
@@ -0,0 +1,2 @@
1body: //*[(@class = "historia")]
2test_url: http://ecetia.com/2011/09/vida-de-jugon-vii-las-tres-es \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/econlog.econlib.org.txt b/inc/3rdparty/site_config/standard/econlog.econlib.org.txt
new file mode 100644
index 00000000..ebafc197
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/econlog.econlib.org.txt
@@ -0,0 +1,6 @@
1title: //h1[@class="title"]
2author: //div[@class="hosted"]/a
3date: substring-after(//div[@class="dateline"]/text(), '|')
4
5strip: //a[@class="top" and @href="#"]
6test_url: http://econlog.econlib.org/archives/2012/04/blinder_on_heal.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/economia.estadao.com.br.txt b/inc/3rdparty/site_config/standard/economia.estadao.com.br.txt
new file mode 100644
index 00000000..b59f554e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/economia.estadao.com.br.txt
@@ -0,0 +1,7 @@
1date: //div[@class="bb-md-noticia-fecha"]
2body: //div[@class="corpo"]
3dissolve: //div[@class="bb-md-noticia-extras"]
4strip: //strong
5strip_id_or_class: bb-md-noticia-foto-autor
6strip_id_or_class: bb-md-noticia-foto-bajada
7test_url: http://economia.estadao.com.br/noticias/economia,cmn-aprova-r-67-bi-em-credito-para-20-setores-da-economia,118501,0.htm \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/economist.com.txt b/inc/3rdparty/site_config/standard/economist.com.txt
new file mode 100644
index 00000000..71dd62f5
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/economist.com.txt
@@ -0,0 +1,10 @@
1title: //div[@class='ec-blog-headline']
2body: //div[@class='ec-blog-body']
3body: //div[@class='ec-article-content clear']
4strip: //div[@class='related-items']
5date: substring-before(//p[@class='ec-article-info'], '|')
6prune: no
7
8autodetect_next_page: no
9
10test_url: http://www.economist.com/node/21528429 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/edge-online.com.txt b/inc/3rdparty/site_config/standard/edge-online.com.txt
new file mode 100644
index 00000000..461d909c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/edge-online.com.txt
@@ -0,0 +1,13 @@
1title: //meta[@property="og:title"]/@content
2body: //h2[@class='strapline'] | //article[contains(@class, 'node-article')]
3date: //time[@pubdate]/@datetime
4author: //span[@class='author-name']
5prune: no
6tidy: no
7strip: //footer
8
9replace_string(<p>[ pagebreak ]</p>): <!-- pagebreak -->
10
11single_page_link: //a[contains(@href, '?page=show')]
12
13test_url: http://www.edge-online.com/features/telling-modern-warfares-story \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/edge.org.txt b/inc/3rdparty/site_config/standard/edge.org.txt
new file mode 100644
index 00000000..9980000d
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/edge.org.txt
@@ -0,0 +1,5 @@
1title: //div[@class='HomeLeftPannel IMGCTRL']/h2
2body: //div[@class='HomeLeftPannel IMGCTRL']//div[@class='Brownalink' or @id='shortdesc']
3tidy: no
4
5test_url: http://edge.org/print/conversation.php?cid=the-argumentative-theory \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/edition.cnn.com.txt b/inc/3rdparty/site_config/standard/edition.cnn.com.txt
new file mode 100644
index 00000000..dc8ebe14
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/edition.cnn.com.txt
@@ -0,0 +1,9 @@
1body: //div[@id='cnnContentContainer']//div[contains(@class, 'cnn_strycntntlft')]
2strip: //div[@id='cnnCVP2']
3strip_id_or_class: cnn_strylftcexpbx
4strip_id_or_class: cnn_strylctcqrelt
5strip_id_or_class: cnn_strybtntoolsbttm
6strip_id_or_class: cnn_stryftsbttm
7strip_id_or_class: cnn_strybtmcntnt
8prune: no
9test_url: http://edition.cnn.com/2011/US/04/29/severe.weather/index.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ekultura.hu.txt b/inc/3rdparty/site_config/standard/ekultura.hu.txt
new file mode 100644
index 00000000..59f6a711
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/ekultura.hu.txt
@@ -0,0 +1,11 @@
1title: //h1[@class='style6 nevek']
2
3body: //div[@class='bal3']
4
5
6prune: yes
7
8tidy: yes
9convert_double_br_tags: yes
10
11test_url: http://ekultura.hu/olvasnivalo/egyeb/cikk/2010-12-15/interju-galvolgyi-judit-2010-december \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/elance.com.txt b/inc/3rdparty/site_config/standard/elance.com.txt
new file mode 100644
index 00000000..52ffe2d0
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/elance.com.txt
@@ -0,0 +1,3 @@
1body: //div[@id='jobDesc-bd']/p
2
3test_url: http://www.elance.com/j/xml-technical-intergration/23687172/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/elektroniknet.de.txt b/inc/3rdparty/site_config/standard/elektroniknet.de.txt
new file mode 100644
index 00000000..07664719
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/elektroniknet.de.txt
@@ -0,0 +1,27 @@
1title: //h1
2date: //div[@class='datum']
3single_page_link: //a[contains(@href, '?type=99')]
4
5# this hack preserves the intro text, because it would be striped otherwise if the title is set to //h1
6dissolve: //div[@class='artikelMeldung']
7
8
9strip_id_or_class: anzeige
10strip_id_or_class: top_page_navigation
11strip_id_or_class: cr_image_container
12strip_id_or_class: cr_image_reference
13strip_id_or_class: cr_image_icon
14strip_id_or_class: _close_txt
15strip_id_or_class: _close_ico
16strip_id_or_class: clearer
17
18strip://h1
19strip://h6
20strip://div[contains(@id, 'plista')]
21strip://img[contains(@id,'tiny')]
22strip://img[@class='cr_image']
23
24# strip url at the top
25strip: //p[@style='font-size: 10px;']
26
27test_url: http://www.elektroniknet.de/automotive/technik-know-how/sicherheitselektronik/article/87717/0/Besser_als_die_Wirklichkeit/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/elmalpensante.com.txt b/inc/3rdparty/site_config/standard/elmalpensante.com.txt
new file mode 100644
index 00000000..9fecd663
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/elmalpensante.com.txt
@@ -0,0 +1,4 @@
1single_page_link: //a[contains(@href, 'print_contenido')]
2title: //h2
3author: //div[@class="autor"]
4test_url: http://www.elmalpensante.com/index.php?doc=display_contenido&id=668 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/elpais.com.txt b/inc/3rdparty/site_config/standard/elpais.com.txt
new file mode 100644
index 00000000..32f9fc3f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/elpais.com.txt
@@ -0,0 +1,22 @@
1title: //meta[@name='DC.title']/@content
2title: //div[contains(@class, 'cabecera_noticia')]//h1
3date: //meta[@name='DC.date']/@content
4date: //meta[@name='date']/@content
5body: //div[@class='columna_texto']
6body: //div[@id='cuerpo_noticia']
7body: //div[@class='estructura_2col_1zq']//div[@class='margen_n']
8
9prune: no
10
11strip_id_or_class: disposicion_vertical
12strip_id_or_class: ampliar_foto
13strip_id_or_class: utilidades
14strip_id_or_class: info_relacionada
15strip_id_or_class: m-kiosko
16strip_id_or_class: info_complementa
17
18strip: //div[starts-with(@id, 'sumario') and contains(., 'más información')]
19strip: //div[@id='coment' or @id='foros_not']
20
21test_url: http://elpais.com/elpais/2012/02/06/gente/1328526783_491687.html
22test_url: http://www.elpais.com/articulo/cultura/mano/retrato/materia/elpepicul/20120207elpepicul_2/Tes \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/en.espnf1.com.txt b/inc/3rdparty/site_config/standard/en.espnf1.com.txt
new file mode 100644
index 00000000..c1a91063
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/en.espnf1.com.txt
@@ -0,0 +1,10 @@
1body: //div[@id='content']
2strip: //div[@class='rl']
3strip: //p[@class='authdesc']
4strip: //p[@class='strybtm']
5strip: //div[@id='stryFtrLft']
6strip: //div[@id='f1Conversation']
7strip: //div[@id='cmtSpncrRuler']
8strip: //div[@id='stryComments']
9strip: //div[@id='athrData']
10test_url: http://en.espnf1.com/monaco/motorsport/story/50529.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/engadget.com.txt b/inc/3rdparty/site_config/standard/engadget.com.txt
new file mode 100644
index 00000000..6cc6b14e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/engadget.com.txt
@@ -0,0 +1,7 @@
1title: //meta[@property="og:title"]/@content
2body: //div[@class='post_body']
3date: //*[@class='post_time']
4
5prune: no
6
7test_url: http://www.engadget.com/2011/05/20/screen-grabs-the-mentalist-takes-the-ipad-to-new-heights/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/engineering.tumblr.com.txt b/inc/3rdparty/site_config/standard/engineering.tumblr.com.txt
new file mode 100644
index 00000000..35ace467
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/engineering.tumblr.com.txt
@@ -0,0 +1,7 @@
1title: //h2
2body: //div[@class="post_content"]
3author: //p[@class="author"]/a
4date: //p[@class="date"]
5strip: //h2
6strip: //header
7test_url: http://engineering.tumblr.com/post/21276808338/tumblr-firehose \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/english.aljazeera.net.txt b/inc/3rdparty/site_config/standard/english.aljazeera.net.txt
new file mode 100644
index 00000000..aed3a5f9
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/english.aljazeera.net.txt
@@ -0,0 +1,7 @@
1title: //span[@id='DetailedTitle']
2body: //div[@id='ctl00_cphBody_dvArticleInfoBlock'] | //td[@class='DetailedSummary']
3strip_id_or_class: sidebar
4strip_id_or_class: Skyscrapper_Body
5strip: //td[@class='DetailedSummary']/table[position() != 1]
6prune: no
7test_url: http://english.aljazeera.net//news/middleeast/2011/04/20114681444376835.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/enikos.gr.txt b/inc/3rdparty/site_config/standard/enikos.gr.txt
new file mode 100644
index 00000000..e2b99bfc
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/enikos.gr.txt
@@ -0,0 +1,9 @@
1body: //div[@id='article']//div[contains(@class, 'inside')]
2
3strip_id_or_class: tags
4strip_id_or_class: actions
5strip_id_or_class: google-ads
6
7prune: no
8
9test_url: http://www.enikos.gr/politics/98606,To_oxi_toy_Agorastoy_stoys_Germanoys.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/entertainment.timesonline.co.uk.txt b/inc/3rdparty/site_config/standard/entertainment.timesonline.co.uk.txt
new file mode 100644
index 00000000..3e7fba09
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/entertainment.timesonline.co.uk.txt
@@ -0,0 +1,10 @@
1author://div[@class = 'article-author']/span[@class = 'byline']
2title://h1[@class = 'heading']
3body://div[@id = 'related-article-links']
4strip://div[@id = 'comment-sort-order']
5strip://div[@id = 'my-profile']
6strip://div[@class = 'article-author']
7strip://div[@class = 'bg-f8f1d8 width-385 text-left']
8strip://div[@id = 'login-status']
9strip://div[@class = 'puff-padding']
10test_url: http://entertainment.timesonline.co.uk/tol/arts_and_entertainment/the_tls/article7177738.ece \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/es.hu.txt b/inc/3rdparty/site_config/standard/es.hu.txt
new file mode 100644
index 00000000..19a1e9dd
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/es.hu.txt
@@ -0,0 +1,11 @@
1title: concat(//div[@class='doc_author'], ' - ', upper-case(//div[@class='doc_title']))
2
3body: //div[@class='doc']
4
5prune: yes
6
7tidy: yes
8convert_double_br_tags: yes
9
10strip: //a[contains(@href, 'www.facebook.com/pages/Elet-es-Irodalom/')]
11test_url: http://www.es.hu/2010-12-08_vissza-a-partpenzt \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/escapistmagazine.com.txt b/inc/3rdparty/site_config/standard/escapistmagazine.com.txt
new file mode 100644
index 00000000..7e17a04d
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/escapistmagazine.com.txt
@@ -0,0 +1,2 @@
1strip_comments: no
2test_url: http://www.escapistmagazine.com/articles/view/columns/extraconsideration/8717-Extra-Consideration-The-Story \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/espn.go.com.txt b/inc/3rdparty/site_config/standard/espn.go.com.txt
new file mode 100644
index 00000000..319d352b
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/espn.go.com.txt
@@ -0,0 +1,12 @@
1title: //div[@class='headline'] | //div[@class='mod-header']/h3
2body: //div[contains(@class, 'article')]
3strip: //div[contains(@class, 'mod-inline')]
4strip: //*/span[@class='page-actions']
5strip: //div[@class='page-actions']/*
6strip: //div[@class='headline'] | //div[@class='mod-header']/h3
7strip: //div[@class='mod-blog-navigation']
8strip: //div[@class='monthday']
9strip: //div[@class='time']
10strip: //div[@class='timeofday']
11strip: //div[contains(@class, 'mod-conversations')]
12test_url: http://espn.go.com/boston/mlb/story/_/id/7092528/terry-francona-victim-latest-red-sox-smear-campaign \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/esquire.com.txt b/inc/3rdparty/site_config/standard/esquire.com.txt
new file mode 100644
index 00000000..7566e8cc
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/esquire.com.txt
@@ -0,0 +1,10 @@
1title: //h1
2author: //div[@id='byline']
3
4body: //div[@id='printBody']
5
6single_page_link: concat('http://www.esquire.com/print-this/', substring-after(//link[@rel='canonical']/@href, 'esquire.com/'))
7
8prune: no
9
10test_url: http://www.esquire.com/features/impossible/price-is-right-perfect-bid-0810 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/essentialpublicradio.org.txt b/inc/3rdparty/site_config/standard/essentialpublicradio.org.txt
new file mode 100644
index 00000000..88c8c560
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/essentialpublicradio.org.txt
@@ -0,0 +1,6 @@
1title: //*[@itemprop='headline']
2author: //*[@itemprop='author']
3date: //*[@itemprop='datePublished']
4body: //*[@itemprop='articleBody']
5strip: //*[contains(@class, 'instapaper_ignore')]
6test_url: http://www.essentialpublicradio.org/story/2011-11-14/volunteers-sought-federal-tax-assistance-program-pennsylvania-9421 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/etc.se.txt b/inc/3rdparty/site_config/standard/etc.se.txt
new file mode 100644
index 00000000..58da5ef7
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/etc.se.txt
@@ -0,0 +1,6 @@
1strip_id_or_class: 'left'
2strip_id_or_class: 'right'
3strip_id_or_class: 'block-belowcontent'
4author: //span[@class = 'name']/a
5date: //div[@class= 'datum']
6test_url: http://www.etc.se/intervju/lonsamt-att-radda-jorden \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/eternabuenosaires.com.txt b/inc/3rdparty/site_config/standard/eternabuenosaires.com.txt
new file mode 100644
index 00000000..bfa2c5dc
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/eternabuenosaires.com.txt
@@ -0,0 +1,2 @@
1body: //*[(@class = "historia")]
2test_url: http://eternabuenosaires.com/2011/09/calle-adolfo-bioy-casares \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/eurogamer.net.txt b/inc/3rdparty/site_config/standard/eurogamer.net.txt
new file mode 100644
index 00000000..6ecdf6bd
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/eurogamer.net.txt
@@ -0,0 +1,8 @@
1body: //div[ @class='content' ] | //div[ @class='blog-entry' ]
2
3strip: //h2/abbr | //div[ @class='lowleader' ] | //*[ @class='discussion' ] | //img[ @class='play-button' ] | //div[ @class='boxout' ] | //h2/a | //h2 | //h2/div | //p[ @class='timestamp' ] | //a[ @class='eurogamer-author' ] | //p[ @class='aPager' ] | //h1 | //div[ @id='lowleader' ] | //a[ @class='next' ] | //div[contains(concat(' ', normalize-space(@class), ' '), ' pullquote ')]
4
5date://p[ @class='timestamp' ]
6
7author://a[ @class='eurogamer-author' ]
8test_url: http://www.eurogamer.net/articles/digitalfoundry-vs-unreal-engine-4 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/evo.co.uk.txt b/inc/3rdparty/site_config/standard/evo.co.uk.txt
new file mode 100644
index 00000000..07162513
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/evo.co.uk.txt
@@ -0,0 +1,11 @@
1author: substring-after(//div[@class='articleauthor'],'By ')
2
3# Blog posts
4date: //div[@class='articledate']
5# News
6date: //div[@class='articledate_b']
7
8body: //div[@class='articletext']
9
10convert_double_br_tags: yes
11test_url: http://www.evo.co.uk/carreviews/evolongtermtests/280072/bmw_330d_sport_touring.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/expressen.se.txt b/inc/3rdparty/site_config/standard/expressen.se.txt
new file mode 100644
index 00000000..d0cb283e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/expressen.se.txt
@@ -0,0 +1,9 @@
1title: //div[@id='article']/div[contains(@class, 'content')]/h1
2body: //div[@id='article']/div[contains(@class, 'content')]
3date: //div[contains(@class, 'article-slot')]/descendant::div[contains(@id, 'articledates')]
4
5strip: //img[contains(@src, 'img/px.gif')]
6prune: no
7# remove Facebook banner and obtrusive ad
8strip: //div[@id='article']/div[contains(@class, 'content')]/div[contains(@class, 'art-right')]
9test_url: http://www.expressen.se/kultur/1.2683904/medan-natet-dras-at \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/extracine.com.txt b/inc/3rdparty/site_config/standard/extracine.com.txt
new file mode 100644
index 00000000..52b598da
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/extracine.com.txt
@@ -0,0 +1,2 @@
1body: //*[(@class = "historia")]
2test_url: http://extracine.com/2011/09/straw-dogs-la-original \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/f1actual.com.txt b/inc/3rdparty/site_config/standard/f1actual.com.txt
new file mode 100644
index 00000000..6ef2738a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/f1actual.com.txt
@@ -0,0 +1,2 @@
1body: //*[(@class = "historia")]
2test_url: http://f1actual.com/2011/09/previo-gran-premio-de-singapur \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/facta.co.jp.txt b/inc/3rdparty/site_config/standard/facta.co.jp.txt
new file mode 100644
index 00000000..c17e0b8c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/facta.co.jp.txt
@@ -0,0 +1,3 @@
1bosdy: //div[@class='content']
2
3test_url: http://facta.co.jp/blog/archives/20111026001026.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/falter.at.txt b/inc/3rdparty/site_config/standard/falter.at.txt
new file mode 100644
index 00000000..b941b740
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/falter.at.txt
@@ -0,0 +1,18 @@
1title: //h2[@class='related relatedTitle']
2author: //a[contains(@href, 'liste.php?author_id')]
3
4# can't think of a better way unfortunately, really bad markup on this site
5date: substring-after(//td[@style='width:85%;'], 'vom')
6
7# not sure why, but instapaper seems to suck up the teaser paragraph
8# not solved!
9body: //div[contains(@class, 'teaser')]
10body: //div[@id='content']
11
12# cleanup
13strip: //img[@src='http://www.falter.at/web/_pics/falterlogo_dblau.gif']
14strip: //div[@class='servicebox']
15strip: //h1
16strip: //br
17strip: //td[@id='adcol']
18test_url: http://www.falter.at/web/print/detail.php?id=1634 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/fanfiction.net.txt b/inc/3rdparty/site_config/standard/fanfiction.net.txt
new file mode 100644
index 00000000..8d0c4daf
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/fanfiction.net.txt
@@ -0,0 +1,6 @@
1body: //*[@id = 'story text']
2author: //a[starts-with(@href, '/u/')]
3next_page_link: substring-after(//input[contains(@value, 'Next')]/@onclick, "self.location='")
4autodetect_next_page:yes
5strip_id_or_class: 'a2a_kit'
6test_url: http://www.fanfiction.net/s/6497403/1/Spartan_Love \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/fastcompany.com.txt b/inc/3rdparty/site_config/standard/fastcompany.com.txt
new file mode 100644
index 00000000..5547a76c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/fastcompany.com.txt
@@ -0,0 +1,16 @@
1title: //h1
2author: //h5[@class='byline']//a
3date: //h5[@class='date']
4body: //figure[@class='node-poster'] | //div[contains(@class, "node-content")]
5strip_id_or_class: article-top-wrapper
6strip_id_or_class: footer-message
7strip_id_or_class: print-logo
8strip: //cite
9strip://*[@class='timestamp']
10strip://div[@id='page_right']
11strip://section[@id='header_region']
12strip://h1[@class='node-title']
13strip://div[@class='node-submitted']
14strip_id_or_class: skipnav
15test_url: http://www.fastcompany.com/3000226/link-between-quietness-and-productivity
16test_url: http://www.fastcompany.com/3003586/6-simple-rituals-reach-your-potential-every-day \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/faz.net.txt b/inc/3rdparty/site_config/standard/faz.net.txt
new file mode 100644
index 00000000..4fe5968b
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/faz.net.txt
@@ -0,0 +1,30 @@
1# Title
2title: //p[@class='Content HeadlineShort']
3
4# Authors
5# some are known and have a link, others don't
6author: substring-after(//span[@class='Autor'], 'Von')
7
8# Date
9date: //span[@class='Datum']
10
11# Body
12body: //div[@class='Artikel']
13
14# Removements before body text
15strip: //div[@class='Breadcrumbs']
16strip: //div[@class='QuickSearchBox']
17strip: //div[@class='FAZArtikelEinleitung']
18strip: //div[@class='FAZArtikelReiter']
19strip: //div[@class='clear']
20
21# General removements
22strip: //span[@class='Bildnachweis']
23
24# Removements after body text
25strip: //div[@class='ArtikelAbbinder']
26strip: //div[@class='ArtikelKommentieren Artikelfuss GETS;tk;boxen.top-lesermeinungen;tp;content']
27strip: //div[@class='FAZArtikelKommentare FAZArtikelContent']
28strip: //div[@class='FAZArtikelFunktionen']
29strip: //div[@id='FAZContentRight']
30test_url: http://www.faz.net/aktuell/gesellschaft/ehe-haltbarkeitsformel-verliebe-dich-oft-verlobe-dich-selten-heirate-vielleicht-11685306.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/fictionpress.com.txt b/inc/3rdparty/site_config/standard/fictionpress.com.txt
new file mode 100644
index 00000000..4a04e832
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/fictionpress.com.txt
@@ -0,0 +1,5 @@
1body: id('storytext')
2author: //a[starts-with(@href, '/u/')]
3#next_page_link: substring-after(//input[contains(@value, 'Next')]/@onclick, "self.location='")
4strip_id_or_class: 'a2a_kit'
5test_url: http://www.fictionpress.com/s/2897964/1/All_We_Knew \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ficwad.com.txt b/inc/3rdparty/site_config/standard/ficwad.com.txt
new file mode 100644
index 00000000..3dbfe76f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/ficwad.com.txt
@@ -0,0 +1,12 @@
1title: //h4
2author: //span[@class="author"]
3body: //div[@id="story"]
4strip_id_or_class: summary
5strip_id_or_class: meta
6strip_id_or_class: storyfoot
7convert_double_br_tags: yes
8prune: no
9
10# Note: this site still has trouble because single <br> tags are stripped, but I don't see a way to fix that with this interface.
11
12test_url: http://www.ficwad.com/story/158977 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/finance.yahoo.com.txt b/inc/3rdparty/site_config/standard/finance.yahoo.com.txt
new file mode 100644
index 00000000..81c18fd3
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/finance.yahoo.com.txt
@@ -0,0 +1,12 @@
1title: //meta[@property='og:title']/@content
2body: //div[@id='y-article-bd']
3body: //div[contains(@class, 'yom-art-content')]
4strip: //div[contains(@class, 'related-companies')]
5strip: //div[@id='y-article-related']
6strip: //div[@id='ypf-article-related']
7prune: no
8
9single_page_link: //div[@class='ft']//a[contains(@href, 'page=all')]
10
11test_url: http://sg.finance.yahoo.com/news/Motorola-takes-wraps-249-rsg-3508842732.html?x=0&.v=1
12test_url: http://finance.yahoo.com/news/super-young-retirement-savers.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/findtheswagger.tumblr.com.txt b/inc/3rdparty/site_config/standard/findtheswagger.tumblr.com.txt
new file mode 100644
index 00000000..1a5cd2e1
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/findtheswagger.tumblr.com.txt
@@ -0,0 +1,10 @@
1date: //div[@class='notes']/a
2body: //div[@id='content']
3
4strip_id_or_class: tags
5strip_id_or_class: permalink
6strip_id_or_class: notes
7strip_id_or_class: post_nav
8strip: //div[@id='content']//h2
9strip_id_or_class: right_column
10test_url: http://findtheswagger.tumblr.com/post/11589145141/moe-resners-end-of-an-era-1957-giants-final \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/firstthings.com.txt b/inc/3rdparty/site_config/standard/firstthings.com.txt
new file mode 100644
index 00000000..dd56da22
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/firstthings.com.txt
@@ -0,0 +1,7 @@
1title: //div[@class='articleTitle']
2author: //div[@class='articleAuthor']
3body: //div[@class='articleContent']
4prune: no
5convert_double_br_tags: yes
6
7test_url: http://www.firstthings.com/article/2011/05/the-trouble-with-ayn-rand \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/fivechapters.com.txt b/inc/3rdparty/site_config/standard/fivechapters.com.txt
new file mode 100644
index 00000000..d9c5e42e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/fivechapters.com.txt
@@ -0,0 +1,2 @@
1body: //div[@class='entry']
2test_url: http://www.fivechapters.com/2010/paris-part-one/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/fivefilters.org.txt b/inc/3rdparty/site_config/standard/fivefilters.org.txt
new file mode 100644
index 00000000..dc1db432
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/fivefilters.org.txt
@@ -0,0 +1 @@
prune: no \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/fivethirtyeight.com.txt b/inc/3rdparty/site_config/standard/fivethirtyeight.com.txt
new file mode 100644
index 00000000..3d7b45a8
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/fivethirtyeight.com.txt
@@ -0,0 +1,7 @@
1title: substring-after(//title, 'Right:')
2body: //div[@class = 'post-body']
3author: substring-after(//*[@class='post-author'], 'by')
4date: concat(//*[@class='date-header'], ' ', //*[@class='post-timestamp']/a)
5convert_double_br_tags: yes
6
7test_url: http://www.fivethirtyeight.com/2010/07/does-rnc-have-structural-problems.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/fm4.orf.at.txt b/inc/3rdparty/site_config/standard/fm4.orf.at.txt
new file mode 100644
index 00000000..32d44c87
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/fm4.orf.at.txt
@@ -0,0 +1,7 @@
1author: //div[@class='authorDescription']/h2
2body: //div[@id='story']
3date: substring-before(substring-after(//p[@class='date'],'Erstellt am:'), '-')
4title: //h1[@class='detail']
5strip: //div[@class='fact']
6
7test_url: http://fm4.orf.at/stories/1689156/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/fnal.gov.txt b/inc/3rdparty/site_config/standard/fnal.gov.txt
new file mode 100644
index 00000000..7faa6bfc
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/fnal.gov.txt
@@ -0,0 +1,15 @@
1title: normalize(//h1)
2
3author: //td/p[position()=last()]/em
4
5# I swear, this is really the best way to do this
6date: normalize(//td[contains(@style, "color: #ffffff")])
7
8# my god, it's full of tables
9body: /table/tbody/tr[5]//table/tbody//table/tbody/tr/td
10strip: //h1
11
12# the following two lines strip the byline at the end of the article (the byline is a <p> that consists of an em dash and then some text in an <em>). I have no idea why I can't just strip //p[position()=last()], but trying to do so includes a bunch of other crap in the output.
13strip: //p[position()=last()]/em
14strip: //p[position()=last()]/child::text()
15test_url: http://www.fnal.gov/pub/today/archive_2011/today11-11-09_MuonDepartmentReadMore.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/focus.de.txt b/inc/3rdparty/site_config/standard/focus.de.txt
new file mode 100644
index 00000000..3ad5cabf
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/focus.de.txt
@@ -0,0 +1,19 @@
1title: //h1
2
3author: //div[@class='articleContent small']/div[@class='textBlock']//span[@class='created']
4
5date: //div[@class='articleHead']/span[@class='created']
6
7body: //div[@id='article']
8
9strip: //span[@class='markerText']
10strip: //div[@class='articleContent small']/div[@class='textBlock']//span[@class='created']
11strip: //div[@class='sidebar']
12strip: //div[@class='starbar']
13strip: //div[@class='actions clearfix']
14strip: //div[@id='commentForm']
15strip: //div[@id='commentSent']
16strip: //div[@id='comments']
17strip: //div[@class='similarityBlock']
18
19test_url: http://www.focus.de/politik/ausland/ein-jahr-nach-bombenanschlag-u-bahn-attentaeter-von-minsk-hingerichtet_aid_724958.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/fool.com.txt b/inc/3rdparty/site_config/standard/fool.com.txt
new file mode 100644
index 00000000..69867ccb
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/fool.com.txt
@@ -0,0 +1,11 @@
1body: //div[@class='entry-content']
2date: //meta[@name="date"]/@content
3author: //meta[@name="author"]/@content
4
5strip_id_or_class: ecapShell
6strip_id_or_class: noindent
7strip_id_or_class: targetedPromotion
8
9prune: no
10
11test_url: http://www.fool.com/investing/general/2012/01/27/dfc-global-beats-up-on-analysts-yet-again.aspx \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/forbes.com.txt b/inc/3rdparty/site_config/standard/forbes.com.txt
new file mode 100644
index 00000000..2381b56a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/forbes.com.txt
@@ -0,0 +1,16 @@
1title: //hgroup//h1
2title: //span[@class='mainarttitle']
3
4body: //div[@id='leftRail']//div[contains(@class, 'body')]
5
6author: //meta[@name="author"]/@content
7author: //span[@class='mainartauthor']
8
9date: substring-before(//hgroup//h6, '@')
10date: //span[@class='mainartdate']
11
12prune: no
13
14single_page_link: //a[contains(@href, '/print/')]
15
16test_url: http://www.forbes.com/forbes/2011/0509/technology-frog-design-jan-chipchase-ethnographer-birth-cool_print.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/foreignpolicy.com.txt b/inc/3rdparty/site_config/standard/foreignpolicy.com.txt
new file mode 100644
index 00000000..6ab7a091
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/foreignpolicy.com.txt
@@ -0,0 +1,11 @@
1title: //div[@id='art-mast']//h1
2author: substring-after(//span[@id='by-line'], 'BY ')
3date: //span[@id='pub-date']
4body: //div[@id='art-mast']//h2 | //div[@id='art-mast']/h3 | //div[@id='art-body']//div[@class='translateBody']
5strip: //div[@id='share-box']
6prune: no
7
8single_page_link: //span[@id='controls']/a[contains(@href, 'print=yes')]
9
10test_url: http://www.foreignpolicy.com/articles/2011/08/01/a_murderers_manifesto_and_me
11test_url: test_url: http://www.foreignpolicy.com/articles/2012/02/29/five_years_in_damascus \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/forsvaret.no.txt b/inc/3rdparty/site_config/standard/forsvaret.no.txt
new file mode 100644
index 00000000..3085c8f2
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/forsvaret.no.txt
@@ -0,0 +1,9 @@
1title: //div[@class="articleHeader"]/h1
2author: //p[@class="byline"]
3date: //p[contains(@class,"publishedDate")]/span
4# remove the right menu
5strip: //div[contains(@class,"aside")]
6# remove some SharePoint webpart label junk
7strip: //div[@id="ctl00_PlaceHolderMain_ArticleLeadField_label"]
8strip: //div[@id="ctl00_PlaceHolderMain_PublishingPageContentField_label"]
9test_url: http://forsvaret.no/aktuelt/publisert/nyheter/Sider/F5-fly-til-Skedsmo.aspx \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/foxnews.com.txt b/inc/3rdparty/site_config/standard/foxnews.com.txt
new file mode 100644
index 00000000..f1ee4851
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/foxnews.com.txt
@@ -0,0 +1,9 @@
1prune: no
2
3author: //meta[@name="dc.publisher"]/@content
4date: //meta[@name="dc.date"]/@content
5strip: //p[contains(@class, 'contributor vcard')]
6replace_string(<ul><li><div class="photo">): <div class="photo">
7strip: //p[a[contains(., 'Click here to read more on this story ')]]
8
9test_url: http://www.foxnews.com/entertainment/2011/05/04/dwayne-johnson-guys-grow-pair-driving-hybrid/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/freelancer.com.txt b/inc/3rdparty/site_config/standard/freelancer.com.txt
new file mode 100644
index 00000000..f3d5425c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/freelancer.com.txt
@@ -0,0 +1,3 @@
1body: //div[@id="projectDetailsContent"]//td
2
3test_url: http://www.freelancer.com/projects/PHP-Website-Design/debug-Forum-website-code.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/freytag-film.com.txt b/inc/3rdparty/site_config/standard/freytag-film.com.txt
new file mode 100644
index 00000000..8dc0dabc
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/freytag-film.com.txt
@@ -0,0 +1,5 @@
1body: //div[@class = 'instapaperbody']
2convert_double_br_tags: no
3date: //div[@class='instadate']
4title: //h2[@class = 'instatitle']
5test_url: http://freytag-film.com/blog/artikel/shooting_a_feature_film_in_10_days \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/friendskorner.com.txt b/inc/3rdparty/site_config/standard/friendskorner.com.txt
new file mode 100644
index 00000000..39a9973f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/friendskorner.com.txt
@@ -0,0 +1,11 @@
1#body: (//div[@class='ftr-yt-vid'])[1]
2body: (//blockquote[contains(@class, 'postcontent')])[1]
3body: (//div[starts-with(@id, 'post_message')])[1]
4
5prune: no
6tidy: no
7
8#replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player"
9#replace_string(</iframe>): </iframe>&nbsp;</div>
10
11test_url: http://www.friendskorner.com/forum/f137/debate-personal-lives-leaders-west-vs-pakistan-must-read-297989/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ft.com.txt b/inc/3rdparty/site_config/standard/ft.com.txt
new file mode 100644
index 00000000..38d9d326
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/ft.com.txt
@@ -0,0 +1,5 @@
1body: //div[contains(@class, 'ft-story-body')]
2
3author: substring-after(//div[contains(@class, 'ft-story-header')]/p[1], 'By ')
4date: substring-before(substring-after(//div[contains(@class, 'ft-story-header')]/p[2], 'Published:'), '|')
5test_url: http://www.ft.com/cms/s/2/e1be4b5a-620c-11e0-8ee4-00144feab49a.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ftd.de.txt b/inc/3rdparty/site_config/standard/ftd.de.txt
new file mode 100644
index 00000000..a58765b0
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/ftd.de.txt
@@ -0,0 +1,5 @@
1body: //div[@class='boxIntroHead']/span[@class='h3'] | //div[@class='section']/div[@class='paragraph' or @class='embObjLeft']
2single_page_link: //a[@class='icon print']
3
4test_url: http://www.ftd.de/it-medien/it-telekommunikation/:mobilfunk-vivendi-und-vodafone-trennen-sich-in-frankreich/60034691.html
5test_url: http://www.ftd.de/it-medien/medien-internet/:verkauf-von-warner-music-musikbranche-auf-dem-sprung/60048185.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/fubiz.net.txt b/inc/3rdparty/site_config/standard/fubiz.net.txt
new file mode 100644
index 00000000..8e6356bf
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/fubiz.net.txt
@@ -0,0 +1,3 @@
1body: //div[@class = 'entry']
2
3test_url: http://www.fubiz.net/2011/05/31/world-press-photo-2011/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/futurezone.at.txt b/inc/3rdparty/site_config/standard/futurezone.at.txt
new file mode 100644
index 00000000..50fc144a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/futurezone.at.txt
@@ -0,0 +1,11 @@
1date: //span[@class='date']
2strip: //div[@class='postsidebar']
3body: //div[@class='singlepost']
4title: //div[@class='singlepost']/h1
5move_into(//div[@class='singlepost']): //div[@class='info']
6strip: //div[@class='gallery']
7strip: //div[@class='biggallery']
8strip: //ul[@class='social']
9strip: //ul[@class='social_mail']
10
11test_url: http://futurezone.at/future/5502-erste-galileo-satelliten-starten-ins-all.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gamasutra.com.txt b/inc/3rdparty/site_config/standard/gamasutra.com.txt
new file mode 100644
index 00000000..35a8762a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/gamasutra.com.txt
@@ -0,0 +1,20 @@
1# default view title
2title: //span[@class='newsTitle']
3# print view title
4title: //h3[@class='title']
5
6# default view author
7author: //span[@class='newsAuth']/a
8author: substring-after(//span[@class='newsAuth'], 'by ')
9
10# default view date
11date: //td[@class='newsDate']
12
13# default view body
14body: //td[@class='featureText']
15body: //td[@class='newsText']
16
17strip: //h3[@class='title']
18
19single_page_link: //a[contains(@href, '?print=1')]
20test_url: http://www.gamasutra.com/view/feature/132559/staying_power_rethinking_feedback_.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gameblog.fr.txt b/inc/3rdparty/site_config/standard/gameblog.fr.txt
new file mode 100644
index 00000000..2cc4b378
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/gameblog.fr.txt
@@ -0,0 +1,10 @@
1title: //meta[@property="og:title"]/@content
2body: //div[@id='GBTVPlayer'] | //div[contains(@class, 'col490')]
3
4prune: no
5
6strip_id_or_class: noprint
7strip: //div[@id='gbNewsTextContent']/following-sibling::*
8
9test_url: http://www.gameblog.fr/news/26330-les-sims-3-showtime-s-annonce-en-video
10test_url: http://www.gameblog.fr/news/26306-mise-a-jour-du-dashboard-de-la-xbox-360-disponible \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/garythink.com.txt b/inc/3rdparty/site_config/standard/garythink.com.txt
new file mode 100644
index 00000000..1791e816
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/garythink.com.txt
@@ -0,0 +1,3 @@
1tidy: no
2
3test_url: http://www.garythink.com/eft/testing.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gasteroprod.com.txt b/inc/3rdparty/site_config/standard/gasteroprod.com.txt
new file mode 100644
index 00000000..ef68082a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/gasteroprod.com.txt
@@ -0,0 +1,4 @@
1# These should work, but don't. They were given by Firefox XPather extension
2title: //article//header//a//h1
3body: //article//section
4test_url: http://gasteroprod.com/blog/faut-il-continuer-a-supporter-internet-explorer-6.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gatopardo.com.txt b/inc/3rdparty/site_config/standard/gatopardo.com.txt
new file mode 100644
index 00000000..74346328
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/gatopardo.com.txt
@@ -0,0 +1,8 @@
1body: //div[@class='panel']
2strip: //div[@style='float:right']
3strip: //span[@class='titulosHomePublicidad']
4strip: //div[@id='TitTop5Der']
5strip: //img[@src='/ImagesGatoPardo/LogoGatopardo.png']
6
7prune: yes
8test_url: http://www.gatopardo.com/ReportajesGP.php?R=95 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gawker.com.txt b/inc/3rdparty/site_config/standard/gawker.com.txt
new file mode 100644
index 00000000..6531d81a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/gawker.com.txt
@@ -0,0 +1,6 @@
1body: //div[@class="post-body"]
2
3# Remove 'content is restricted'
4strip: //div[@id='agegate_IDHERE']
5
6test_url: http://gawker.com/#!5782070/russian-bomb-squad-successfully-defuses-sex-toy \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/geeksofdoom.com.txt b/inc/3rdparty/site_config/standard/geeksofdoom.com.txt
new file mode 100644
index 00000000..55586e1c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/geeksofdoom.com.txt
@@ -0,0 +1,3 @@
1author: substring-after(//span[@class='storyauthor'],'Posted by')
2date: //span[@class='storydate']
3test_url: http://www.geeksofdoom.com/2012/03/14/robert-rodriguez-says-machete-kills-and-sin-city-2-will-film-this-year/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/geenstijl.nl.txt b/inc/3rdparty/site_config/standard/geenstijl.nl.txt
new file mode 100644
index 00000000..f6dccf48
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/geenstijl.nl.txt
@@ -0,0 +1,3 @@
1body: //div[@id = 'article']
2strip: //div[@id = 'klasbox']
3test_url: http://www.geenstijl.nl/mt/archieven/2010/10/vrouw_lange_frans_wou_baas_b_d.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/getnews.jp.txt b/inc/3rdparty/site_config/standard/getnews.jp.txt
new file mode 100644
index 00000000..537b4c2e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/getnews.jp.txt
@@ -0,0 +1,3 @@
1body: //div[@class='post']
2strip: //ul[@id='bookmark_single']
3test_url: http://getnews.jp/archives/117312 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/giantbomb.com.txt b/inc/3rdparty/site_config/standard/giantbomb.com.txt
new file mode 100644
index 00000000..8a54bc07
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/giantbomb.com.txt
@@ -0,0 +1,11 @@
1# 2011-11-19 - carlo@... - Initial setup.
2
3strip_id_or_class: user-review-detail
4strip: //h1
5
6body: //div[@class="wiki-content"] | //div[@class="section-bd"] | //div[@class="news-story"]
7
8author: //span[@class="reviewer"] | //p[@class="byline"]/a/text()
9date: //span[@class="dtreviewed"]
10
11test_url: http://www.giantbomb.com/the-elder-scrolls-v-skyrim/61-33394/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/giga.de.txt b/inc/3rdparty/site_config/standard/giga.de.txt
new file mode 100644
index 00000000..f60199ad
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/giga.de.txt
@@ -0,0 +1,20 @@
1tidy:no
2title://h2[@class="title"]
3# author:"Ben Miller"
4date://div[@id="stats"]/span
5strip_id_or_class:stats
6strip_id_or_class:breadcrumbs
7strip_id_or_class:gn-why-content
8strip_id_or_class:single-social
9strip_id_or_class:sidebar-ads
10strip_id_or_class:sidebar-top
11strip_id_or_class:footer
12strip_id_or_class:post_meta
13# strip_id_or_class:
14# strip_id_or_class:
15# strip_id_or_class:
16# strip_id_or_class:
17# strip_id_or_class:
18# strip_id_or_class:
19
20test_url: http://www.giga.de/benm/2011/10/17/probleme-mit-ios-5-wenn-die-daten-weg-sind/#more-58033 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gigaom.com.txt b/inc/3rdparty/site_config/standard/gigaom.com.txt
new file mode 100644
index 00000000..348bdf23
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/gigaom.com.txt
@@ -0,0 +1,17 @@
1date: //meta[@name='DC.date.issued']/@content
2date: //span[@class='post-meta the-date']
3
4title: //meta[@property='og:title']/@content
5
6author: //meta[@name='DC.creator']/@content
7
8body: //div[contains(@class, 'post-sub-head') or starts-with(@id, 'post-content-')]
9
10find_string: id="content"
11replace_string: id="content-ignore"
12
13strip_id_or_class: sharedaddy
14
15prune: no
16
17test_url: http://gigaom.com/2011/10/24/groupon-google-lawsuit/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gihyo.jp.txt b/inc/3rdparty/site_config/standard/gihyo.jp.txt
new file mode 100644
index 00000000..478b23a3
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/gihyo.jp.txt
@@ -0,0 +1,3 @@
1single_page_link: //p[@id='skip']//a[contains(@href, 'skip')]
2
3test_url: http://gihyo.jp/dev/serial/01/machine-learning/0010 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gist.github.com.txt b/inc/3rdparty/site_config/standard/gist.github.com.txt
new file mode 100644
index 00000000..53095b34
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/gist.github.com.txt
@@ -0,0 +1,6 @@
1body: //div[@class="highlight"]/pre
2
3prune: no
4tidy: no
5
6test_url: https://gist.github.com/1258908 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/givemesomethingtoread.com.txt b/inc/3rdparty/site_config/standard/givemesomethingtoread.com.txt
new file mode 100644
index 00000000..144ce045
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/givemesomethingtoread.com.txt
@@ -0,0 +1,3 @@
1single_page_link: //div[@id="content"]//h2/a
2
3test_url: http://givemesomethingtoread.com/post/6285838917/the-baddest-lawyer-in-the-history-of-jersey \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gizmodo.co.uk.txt b/inc/3rdparty/site_config/standard/gizmodo.co.uk.txt
new file mode 100644
index 00000000..285e76c0
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/gizmodo.co.uk.txt
@@ -0,0 +1,7 @@
1body: //div[@id="leadimage" or @class="postcontent"]
2author: //div[@class="contentauthor"]
3date: //div[@class="timestamp"]
4
5prune: no
6
7test_url: http://www.gizmodo.co.uk/2013/02/bbc-forcing-poor-old-sir-david-attenborough-to-go-on-twitter/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gizmodo.com.txt b/inc/3rdparty/site_config/standard/gizmodo.com.txt
new file mode 100644
index 00000000..c9536255
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/gizmodo.com.txt
@@ -0,0 +1,7 @@
1body: //div[@class="post-body" or contains(@class, 'illustration top')]
2author: (//cite//span[@class="plus-icon"])[1]
3date: //span[@class="date"]
4
5prune: no
6
7test_url: http://gizmodo.com/5880147/kuhn-rikon-improves-their-spice-grinder-with-grade-school-science \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gizmologia.com.txt b/inc/3rdparty/site_config/standard/gizmologia.com.txt
new file mode 100644
index 00000000..d2c7c9f9
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/gizmologia.com.txt
@@ -0,0 +1,2 @@
1body: //*[(@class = "historia")]
2test_url: http://gizmologia.com/2011/09/amd-trinity-el-sucesor-de-llano-en-una-demostracion-muy-interesante \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gizmovil.com.txt b/inc/3rdparty/site_config/standard/gizmovil.com.txt
new file mode 100644
index 00000000..5fc204b8
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/gizmovil.com.txt
@@ -0,0 +1,2 @@
1body: //*[(@class = "historia")]
2test_url: http://gizmovil.com/2011/09/hipertextual-labs-receptor-bluetooth-nokia-bh-214 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/global.txt b/inc/3rdparty/site_config/standard/global.txt
new file mode 100644
index 00000000..135ed500
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/global.txt
@@ -0,0 +1,4 @@
1# Look for Open Graph data - http://ogp.me
2title: //meta[@property="og:title"]/@content
3date: //meta[@property="article:published_time"]/@content
4# article:author is someties URL, e.g. on guardian.co.uk \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/globalissues.org.txt b/inc/3rdparty/site_config/standard/globalissues.org.txt
new file mode 100644
index 00000000..95d4becf
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/globalissues.org.txt
@@ -0,0 +1,15 @@
1body: //div[@id='content']
2
3strip: //p[@class='top']
4strip: //h2[.='Where next?']
5strip_id_or_class: where-next
6strip_id_or_class: social-bookmarks
7strip_id_or_class: link-to-here
8strip_id_or_class: options-heading
9strip_id_or_class: page-options-content
10strip_id_or_class: page-info-bottom
11
12tidy: no
13prune: no
14
15test_url: http://www.globalissues.org/article/39/a-primer-on-neoliberalism \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/goal.com.txt b/inc/3rdparty/site_config/standard/goal.com.txt
new file mode 100644
index 00000000..075c4d2b
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/goal.com.txt
@@ -0,0 +1,16 @@
1title: //div[@id='article_headline']//h1
2date: //div[contains(@class, 'articleDate')]//h4
3body: //div[@id='article_headline']/h2 | //div[@id='large_article_image' or @id='article_content']
4
5strip_id_or_class: relatedLinksBox
6strip_id_or_class: betting-widget
7strip_image_src: install_flash.gif
8
9strip: //table[contains(@style, 'float: right; width: 285px;')]
10strip: //div[@class='caption']
11
12tidy: no
13prune: no
14
15test_url: http://www.goal.com/en-gb/news/3284/euro-2012/2012/05/31/3139032/video-profile-back-to-his-very-best-for-bayern-frances-flair-and-
16test_url: http://www.goal.com/en-gb/news/3284/euro-2012/2012/05/31/3139869/lampard-injury-a-bitter-blow-for-england-and-sorry-way-to# \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/golem.de.txt b/inc/3rdparty/site_config/standard/golem.de.txt
new file mode 100644
index 00000000..6c5d1c4f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/golem.de.txt
@@ -0,0 +1,25 @@
1# Jens Kohl, jens.kohl@...
2# - Added publication date
3# - Striped pagination block
4# - Added single page link
5# - Added xpath-querys for the printer friendly version
6
7title: //h1
8body: //div[@class='formatted']
9prune: no
10
11date: substring-after(//li[2][@class="text1"], 'Datum:')
12strip: //ol[@class="list-chapters"]
13strip_comments: yes
14
15# next: commands for printer friendly pages
16single_page_link: //a[contains(@href, 'print.php?a=')]/@href
17title: //body/h3
18strip_image_src: staticrl/images/logo.jpg
19strip_image_src: http://cpx.golem.de/cpx.php?class=7
20strip: //body/h3
21strip: //body/b[1]
22strip: //body/b[2]
23strip: //body/b[3]
24strip: //div[1]
25test_url: http://www.golem.de/1112/88696.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/good.is.txt b/inc/3rdparty/site_config/standard/good.is.txt
new file mode 100644
index 00000000..5cf67011
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/good.is.txt
@@ -0,0 +1,4 @@
1title: //div[@class="title"]/div/h1
2body: //div[@class="body"]
3date: //li[@class="date-time"]
4test_url: http://www.good.is/post/why-amazon-is-the-next-top-tech-company/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gossip-tv.gr.txt b/inc/3rdparty/site_config/standard/gossip-tv.gr.txt
new file mode 100644
index 00000000..c2fe4e40
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/gossip-tv.gr.txt
@@ -0,0 +1,14 @@
1date: //meta[@name='og:article:published_time']/@value
2
3body: //div[@class='itemIntroText' or @class='itemImageBlock' or @class='itemFullText']
4
5strip_id_or_class: itemImageGallery
6
7# remove extras at end of post content
8find_string: <div style="margin:5px 0 10px;">
9replace_string: </div></body></html><!--
10
11prune: no
12
13test_url: http://www.gossip-tv.gr/story/158902/aggelike-daliane-semera-duskoleuontai-oloi-sta-epaggelmatika-tous
14test_url: http://www.gossip-tv.gr/lifestyle/Taste/story/230266/lahtaristo-kai-ygieino-tost-sokolatas \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gothamist.com.txt b/inc/3rdparty/site_config/standard/gothamist.com.txt
new file mode 100644
index 00000000..5179fc12
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/gothamist.com.txt
@@ -0,0 +1,7 @@
1title: //div[@class='entry-header']
2author: //span[@class='vcard author']
3date: //abbr[@class='published']
4#move_into(//div[@class='entry-body']): //img[@id='photo_1']
5body: //div[@class='entry-body']
6strip: //div[@class='galleryEaseThumbs']
7test_url: http://gothamist.com/2012/03/15/fancy_cocktail_lounge_the_randolph.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gotomanager.com.txt b/inc/3rdparty/site_config/standard/gotomanager.com.txt
new file mode 100644
index 00000000..7fb0ee03
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/gotomanager.com.txt
@@ -0,0 +1,21 @@
1title: //span[@id="showTitle"]
2author: //span[@id="showAuthor"]
3date: //span[@id="showRefDate"]
4
5strip: //span[@class="black_bold"]
6strip: //div[@id="sectionName"]
7strip: //div[@id="storyHeader"]
8
9body: //div[@id="newsBodyText"]
10
11strip_image_src: "http://www.gotomanager.com/img/mgrm/space.gif"
12strip_image_src: "http://www.gotomanager.com/images/separator.gif"
13strip_image_src: "http://www.gotomanager.com/images/spaces.gif"
14
15convert_double_br_tags: yes
16tidy: yes
17
18strip: //div[@id="smallLeadImage"]
19strip: //div[@id="truehitsSurvey"]
20strip: //table[@id="relatedInfoTable"]
21test_url: http://www.gotomanager.com/news/details.aspx?id=86759 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gq.com.txt b/inc/3rdparty/site_config/standard/gq.com.txt
new file mode 100644
index 00000000..233c4a7f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/gq.com.txt
@@ -0,0 +1,9 @@
1next_page_link: //div[@class='pagination']//span[@class='paginationNext']/a
2strip_id_or_class: utility
3strip_id_or_class: keywords
4strip_id_or_class: pagination
5strip_id_or_class: position2_content
6body: //div[@class='article']
7title: //h1[@class='content-headline']
8author: //span[@class='contributor']//a
9test_url: http://www.gq.com/news-politics/newsmakers/201203/terry-thompson-ohio-zoo-massacre-chris-heath-gq-february-2012 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/grantland.com.txt b/inc/3rdparty/site_config/standard/grantland.com.txt
new file mode 100644
index 00000000..3269e086
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/grantland.com.txt
@@ -0,0 +1,20 @@
1# this is fragile with footnotes -- leave it for now
2
3#tidy: no
4#prune: no
5#move_into(//article): //aside[@id='footnotes']
6author: //cite/a
7date: //time
8
9strip: //a[text()='Grantland']
10strip_id_or_class: ad-wrapper
11strip_id_or_class: fb-connect-link
12strip_id_or_class: fb-status
13strip: //li[@class='print']
14strip: //cite
15strip: //a[contains(text(), '[+]')]
16strip: //a[@id='jump-nav-link']
17strip: //h1[text()='Share This']
18strip: //h1[text()='Top Stories']
19strip: //div[@id="update-text-size"]
20test_url: http://www.grantland.com/story/_/id/8421241/examining-new-albums-rock-veterans-no-doubt-green-day \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/greatergreaterwashington.org.txt b/inc/3rdparty/site_config/standard/greatergreaterwashington.org.txt
new file mode 100644
index 00000000..a5258030
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/greatergreaterwashington.org.txt
@@ -0,0 +1,11 @@
1title: //div[@class="blogpost"]/h2
2author: //div[@class="blogpost"]/p[@class="byline"]/a
3date: //div[@class="blogpost"]/p[@class="byline"]/span[@class="time_posted"]
4body: //div[@class="blogpost"]
5strip_id_or_class: flag
6strip_id_or_class: byline
7strip_id_or_class: post_footer
8strip_id_or_class: related_posts
9strip_id_or_class: post_author_bios
10strip: //h2
11test_url: http://greatergreaterwashington.org/post/12457/ask-ggw-what-will-happen-to-the-1000-series-railcars/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/groups.drupal.org.txt b/inc/3rdparty/site_config/standard/groups.drupal.org.txt
new file mode 100644
index 00000000..7e15a5c1
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/groups.drupal.org.txt
@@ -0,0 +1,5 @@
1title://h1
2author://span[@class="submitted"]/a
3date:substring-after(//span[@class="submitted"],'on ')
4body://div[@class="content"]
5test_url: http://groups.drupal.org/node/36816 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/guardian.co.uk.txt b/inc/3rdparty/site_config/standard/guardian.co.uk.txt
new file mode 100644
index 00000000..71d84306
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/guardian.co.uk.txt
@@ -0,0 +1,7 @@
1title: //div[@id='main-article-info']//h1
2body: //div[@id='article-wrapper']
3date: //li[@class='publication']//time[@pubdate] | //li[@class='publication']//data[@pubdate]
4author: //li[@class='byline']
5prune: no
6tidy: no
7test_url: http://www.guardian.co.uk/business/2011/oct/06/quantitative-easing-75bn-bank-of-england \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gulfnews.com.txt b/inc/3rdparty/site_config/standard/gulfnews.com.txt
new file mode 100644
index 00000000..e69044b3
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/gulfnews.com.txt
@@ -0,0 +1,5 @@
1body: //div[@class='wrapper_half']//ul[@class='details'] | //div[@class='wrapper_half']//p[@class='synopsis'] | //div[@class='wrapper_half']//div[@class='image'] | //div[@class='wrapper_half']//div[@class='article']
2strip: //div[@class='wrapper_half']//ul[@class='details']/li[position()>1]
3prune: no
4tidy: no
5test_url: http://gulfnews.com/news/gulf/uae/government/abu-dhabi-centre-offers-useful-information-1.811084 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/guokr.com.txt b/inc/3rdparty/site_config/standard/guokr.com.txt
new file mode 100644
index 00000000..00255eb8
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/guokr.com.txt
@@ -0,0 +1,22 @@
1# To administrator:
2# Please change the hostname to "www.guokr.com/article/*"
3# Not working for "www.guokr.com/post/" pages configured by carlosliu913@gmail.com
4
5# This filter is tested on:
6# http://www.guokr.com/article/274325/
7# http://www.guokr.com/article/275013/
8
9title://h1
10author://div[contains(@class, 'content-th-info')]/a
11date://div[contains(@class, 'content-th-info')]/span
12body://div[contains(@class, 'Content')]
13
14strip://div[contains(@class, 'bottom-i')]
15strip://div[contains(@class, 'copyright')]
16strip://div[contains(@class, 'fr')]
17strip://div[contains(@class, 'content-th-info')]
18strip://h1[contains(@id, 'articleTitle')]
19strip://div[contains(@class, 'side')]
20strip://div[contains(@class, 'top-wp')]
21test_url: http://www.guokr.com/article/275013/
22test_url: http://www.guokr.com/article/338387/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/haberler.com.txt b/inc/3rdparty/site_config/standard/haberler.com.txt
new file mode 100644
index 00000000..bc1ce689
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/haberler.com.txt
@@ -0,0 +1,5 @@
1title: //div[@id="habermetni"]/h1[@id="haber_baslik"]
2body: //div[@id="habermetni"]/p
3strip: //img[@class='newsDetailLeft']
4strip_image_src: /haber-resimleri/
5test_url: http://www.haberler.com/emniyete-atacakti-elinde-patladi-3198733-haberi/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/halo.bungie.org.txt b/inc/3rdparty/site_config/standard/halo.bungie.org.txt
new file mode 100644
index 00000000..7989d09f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/halo.bungie.org.txt
@@ -0,0 +1,5 @@
1title:substring-before(id("maincontent")/table, 'Posted')
2body:id("maincontent")/p
3# eventually convert linebreaks better
4
5test_url: http://halo.bungie.org/fanfic/?story=Delahunt0312112316071.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/hammers.theoffside.com.txt b/inc/3rdparty/site_config/standard/hammers.theoffside.com.txt
new file mode 100644
index 00000000..747f90a1
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/hammers.theoffside.com.txt
@@ -0,0 +1,7 @@
1# Remove right column
2strip: //*[(@class = 'right_col')]
3
4# Remove comments etc.
5strip: //*[(@class = 'category')]
6strip: /html/body/div[1][@class='absolute_content_high']/div[1][@class='wrapper']/div[1][@class='main_col']/div[@class='main_content']/h3
7test_url: http://hammers.theoffside.com/carling-cup/a-funny-thing-happened-on-the-way-to-4-nil.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/hanselman.com.txt b/inc/3rdparty/site_config/standard/hanselman.com.txt
new file mode 100644
index 00000000..d3ffeab1
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/hanselman.com.txt
@@ -0,0 +1,4 @@
1date: //span[@class="item-date"]
2body: //div[@class="item-content"]
3strip_comments: no
4test_url: http://www.hanselman.com/blog/BrainBytesBackBunsTheProgrammersPriorities.aspx \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/hardware.fr.txt b/inc/3rdparty/site_config/standard/hardware.fr.txt
new file mode 100644
index 00000000..318885c8
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/hardware.fr.txt
@@ -0,0 +1,6 @@
1title: //h1
2author: //a[@class='a_aut']
3body: //div[@class='content_dossier']
4strip: //div[@id='pagination']
5next_page_link: //div[@class='sommaire_colonne']//span[@class='page_actuelle']/following::span[@class='autres_page']//a/@href
6test_url: http://www.hardware.fr/articles/850-1/pci-express-3-0-impact-performances.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/hbr.org.txt b/inc/3rdparty/site_config/standard/hbr.org.txt
new file mode 100644
index 00000000..fd6145e7
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/hbr.org.txt
@@ -0,0 +1,6 @@
1title: //div[@id='article-title']
2author: //div[@id='articleAuthors']
3body: //div[@id='article']
4strip: //div[@class='module wide']
5next_page_link: //a[@title='Next Page']
6test_url: http://hbr.org/2012/04/the-real-leadership-lessons-of-steve-jobs/ar/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/healthland.time.com.txt b/inc/3rdparty/site_config/standard/healthland.time.com.txt
new file mode 100644
index 00000000..204d8da0
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/healthland.time.com.txt
@@ -0,0 +1,10 @@
1date: //span[@class = 'date']
2body: //div[@class = 'entry-content']
3strip://div[@class='more-ways']
4strip://div[@id = 'stayConnected']
5strip://p[child::a[@rel = 'bookmark']]
6strip://p[starts-with(string(.),'(MORE:')]
7strip://p[starts-with(string(.),'(PHOTOS:')]
8move_into(//p[../@class = 'entry-content'][position() = last()])://div[@id = 'featbox']
9
10test_url: http://healthland.time.com/2011/07/24/amy-winehouse-and-the-pain-of-addiction/?preview=true&preview_id=39210&preview_nonce=0777d4e408 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/heise-online.mobi.txt b/inc/3rdparty/site_config/standard/heise-online.mobi.txt
new file mode 100644
index 00000000..1da82ac7
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/heise-online.mobi.txt
@@ -0,0 +1,3 @@
1body: //div[@id='content']/div
2date: //p[@class='author_date']/span[@class='date']
3test_url: http://heise-online.mobi/newsticker/meldung/Amazons-Appstore-in-der-Kritik-Ein-Desaster-fuer-Kunden-und-Entwickler-1273936.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/heise.de.txt b/inc/3rdparty/site_config/standard/heise.de.txt
new file mode 100644
index 00000000..5f19d3f8
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/heise.de.txt
@@ -0,0 +1,7 @@
1single_page_link: //p[@class='news_option']/a
2
3date: //p[@class='news_datum']
4title: //h1
5body: //div[@class='meldung_wrapper']
6
7test_url: http://www.heise.de/newsticker/meldung/Europa-soll-Grundrechteschutz-im-Netz-staerken-1392664.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/hespress.com.txt b/inc/3rdparty/site_config/standard/hespress.com.txt
new file mode 100644
index 00000000..d866f629
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/hespress.com.txt
@@ -0,0 +1,7 @@
1body: //div[@id='article_holder']//div[@class='image'] | //div[@id='article_body']
2
3prune: no
4tidy: no
5
6test_url: http://hespress.com/videos/73684.html
7test_url: http://hespress.com/permalink/73678.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/highscalability.com.txt b/inc/3rdparty/site_config/standard/highscalability.com.txt
new file mode 100644
index 00000000..fd50b6ad
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/highscalability.com.txt
@@ -0,0 +1,3 @@
1body: //div[@class='journal-entry-text']
2
3test_url: http://highscalability.com/blog/2011/3/14/6-lessons-from-dropbox-one-million-files-saved-every-15-minu.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/hiperpop.com.txt b/inc/3rdparty/site_config/standard/hiperpop.com.txt
new file mode 100644
index 00000000..b5eb062e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/hiperpop.com.txt
@@ -0,0 +1,2 @@
1body: //*[(@class = "historia")]
2test_url: http://hiperpop.com/2011/09/marc-anthony-celebra-su-cumpleanos-con-jennifer-lopez \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/hiphopleeft.nl.txt b/inc/3rdparty/site_config/standard/hiphopleeft.nl.txt
new file mode 100644
index 00000000..c57c1aa9
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/hiphopleeft.nl.txt
@@ -0,0 +1,4 @@
1body: //div[@class = 'pd']
2strip: //div[@id = 'overzicht-albumrecensies']
3strip: //div[@id = 'jc']
4test_url: http://hiphopleeft.nl/index.php?option=com_content&view=article&id=2767:mark-ronson-record-collection&catid=66:m&Itemid=142 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/historytoday.com.txt b/inc/3rdparty/site_config/standard/historytoday.com.txt
new file mode 100644
index 00000000..dc687f3f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/historytoday.com.txt
@@ -0,0 +1,10 @@
1body://div[@id = 'content']
2author://span[@class = 'authors']
3author://span[@class = 'ht-vtag'][1]
4date:substring-before(//meta[@name = 'dc.date']/@content,'T')
5strip://div[contains(@class, 'region-ubercontent')]
6strip://h1
7strip://div[@id = 'ht-author']
8strip://ul[@class = 'links inline']
9strip://div[@id = 'ht-tools']
10test_url: http://www.historytoday.com/carol-dyhouse/skin-deep-fall-fur \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/hmercer.com.txt b/inc/3rdparty/site_config/standard/hmercer.com.txt
new file mode 100644
index 00000000..eeee1594
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/hmercer.com.txt
@@ -0,0 +1,5 @@
1title: //*[@class='ptitle']
2date: //span[@class='date']
3body: //div[@class='body']
4prune: no
5test_url: http://hmercer.com/2011/07/why-i-switched-to-jekyll/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/hometheaterreview.com.txt b/inc/3rdparty/site_config/standard/hometheaterreview.com.txt
new file mode 100644
index 00000000..d43e6448
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/hometheaterreview.com.txt
@@ -0,0 +1,4 @@
1body: //div[@id='entry-body']
2strip_id_or_class: paginate
3strip: //p[contains(., 'Additional Resources')]
4test_url: http://hometheaterreview.com/dreamvision-starlight-3-three-chip-d-ila-projector-reviewed/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/hosted.ap.org.txt b/inc/3rdparty/site_config/standard/hosted.ap.org.txt
new file mode 100644
index 00000000..e19dd526
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/hosted.ap.org.txt
@@ -0,0 +1,5 @@
1body: //table[@class='ap-smallphoto-table'] | //div[@class='body']//*[@class='entry-content']
2tidy: no
3strip_image_src: analytics.apnewsregistry
4
5test_url: http://hosted.ap.org/dynamic/stories/U/US_SPENDING_SHOWDOWN?SITE=FLPET&SECTION=HOME&TEMPLATE=DEFAULT&CTIME=2011-04-06-07-46-50 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/hs.fi.txt b/inc/3rdparty/site_config/standard/hs.fi.txt
new file mode 100644
index 00000000..67125fb5
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/hs.fi.txt
@@ -0,0 +1,3 @@
1prune: yes
2tidy: yes
3test_url: http://www.hs.fi/kotimaa/Teollisuushallin%20palo%20levitt%C3%A4%C3%A4%20vaarallista%20savua%20Tuusulassa/a1305571582405 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ht.ly.txt b/inc/3rdparty/site_config/standard/ht.ly.txt
new file mode 100644
index 00000000..a8412d2a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/ht.ly.txt
@@ -0,0 +1,3 @@
1single_page_link: //iframe[@id='hootFrame']/@src
2
3test_url: http://ht.ly/bOiZV \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/huffingtonpost.com.txt b/inc/3rdparty/site_config/standard/huffingtonpost.com.txt
new file mode 100644
index 00000000..d40513b2
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/huffingtonpost.com.txt
@@ -0,0 +1,16 @@
1title: //meta[@property="og:title"]/@content
2body: //div[img[starts-with(@id, 'img_caption')]] | //div[@class="big_photo"] | //div[contains(@class, 'entry_body_text')]
3date: //meta[@name="publish_date"]/@content
4author: //a[@rel="author"]
5author: //meta[@name="author"]/@content
6prune: no
7tidy: no
8strip: //footer
9strip_id_or_class: ps-slideshow
10strip_id_or_class: fs-slideshow
11strip: //p[contains(., 'Related on HuffPost:')]
12# end early
13replace_string(<div class="sbm-main): </body></html><div class="not-interested
14
15test_url: http://www.huffingtonpost.com/mitch-moxley/tracking-beijings-boom-th_b_1209828.html
16test_url: http://www.huffingtonpost.com/2012/09/11/president-obama-iphone-throwdown_n_1873826.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/humantransit.org.txt b/inc/3rdparty/site_config/standard/humantransit.org.txt
new file mode 100644
index 00000000..ec7d3c06
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/humantransit.org.txt
@@ -0,0 +1,5 @@
1title: //h3[@class="entry-header"]
2date: //h2[@class="date-header"]
3body: //div[contains(@class, 'entry')]
4
5test_url: http://www.humantransit.org/2012/06/can-network-primers-reduce-grief-about-network-design.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/hurriyet.com.tr.txt b/inc/3rdparty/site_config/standard/hurriyet.com.tr.txt
new file mode 100644
index 00000000..ccf09dcc
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/hurriyet.com.tr.txt
@@ -0,0 +1,7 @@
1title: //div[@class='HaberDetayTitleHold Title']/h1
2body: //div[@id='YazarDetayText']
3author: //div[@class='HaberDetayTitleHold Title']/h1
4prune: no
5
6test_url: http://www.hurriyet.com.tr/ekonomi/19490260.asp
7test_url: http://www.hurriyet.com.tr/yazarlar/22078439.asp \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/hvg.hu.txt b/inc/3rdparty/site_config/standard/hvg.hu.txt
new file mode 100644
index 00000000..06fa98d8
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/hvg.hu.txt
@@ -0,0 +1,9 @@
1title: //div[@id='pg-content']//h1
2body: //div[@id='articleBody0']
3replace_string(</table>): </table><br /><br />
4
5single_page_link: //div[@class="up-header"]/a
6
7prune: no
8
9test_url: http://hvg.hu/w/20111125_sparta \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/hypebeast.com.txt b/inc/3rdparty/site_config/standard/hypebeast.com.txt
new file mode 100644
index 00000000..49b46da5
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/hypebeast.com.txt
@@ -0,0 +1,10 @@
1body: //div[@id='content']//div[contains(@class, 'wp-image-') or contains(@class, 'entry')][1]
2author: //span[@class='author']/a
3
4strip_id_or_class: disqus
5strip_id_or_class: paginator
6strip_id_or_class: photo-number
7
8prune: no
9
10test_url: http://hypebeast.com/2012/11/stussy-2012-fall-winter-november-releases/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/idlewords.com.txt b/inc/3rdparty/site_config/standard/idlewords.com.txt
new file mode 100644
index 00000000..e1badef7
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/idlewords.com.txt
@@ -0,0 +1,7 @@
1title: //a[@class='post_title']
2body: //div[@class='entrybox']
3strip_id_or_class: post_title
4date: //div[@class='entrybox']/b[1]
5strip: //div[@class='entrybox']/b[1]
6author: string('Maciej Cegłowski')
7test_url: http://idlewords.com/2011/08/why_arabic_is_terrific.htm \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/igeneration.fr.txt b/inc/3rdparty/site_config/standard/igeneration.fr.txt
new file mode 100644
index 00000000..d7ec2da1
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/igeneration.fr.txt
@@ -0,0 +1,5 @@
1author: substring-after(substring-after(//span[@class='submitted'],'- '),'- ')
2date: substring-before(//span[@class='submitted'], concat('- ',substring-after(substring-after(//span[@class='submitted'],'- '),'- ')))
3body: //div[@class='content clear-block zoneApple']
4
5test_url: http://www.igeneration.fr/iphone/l-iphone-et-l-ipad-chouchous-des-tpe-et-pme-55112 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ignoredbydinosaurs.com.txt b/inc/3rdparty/site_config/standard/ignoredbydinosaurs.com.txt
new file mode 100644
index 00000000..f74178a9
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/ignoredbydinosaurs.com.txt
@@ -0,0 +1,7 @@
1title://h1[@class='page-title']
2body://*[@id='content']//div[contains(@class,'node-content')]
3
4author://*[@id='content']//div[contains(@class,'node-submitted')]/a
5
6date:substring-after(//div[contains(@class,'node-submitted')],' on ')
7test_url: http://ignoredbydinosaurs.com/2011/09/great-lie-lorem-ipsum \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ilounge.com.txt b/inc/3rdparty/site_config/standard/ilounge.com.txt
new file mode 100644
index 00000000..ca1e54a8
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/ilounge.com.txt
@@ -0,0 +1,13 @@
1# Get proper Title, Author and Date info
2title: substring-before(//title, '|')
3author: substring-after(//h4/a[@href='http://www.ilounge.com/index.php/ilounge/aboutus/'], 'By')
4date: //span[@class='instapaper_date']
5
6# For Reviews & First Looks, get the intro paragraph and put it in front of the main body.
7move_into(//div[@id='instapaper_para1']): //div[@id='instapaper_body']
8body: //div[@id='instapaper_para1']
9strip: //div[@class='reviewinfo']
10
11# We don't use footnotes, so why bother checking for them?
12footnotes: no
13test_url: http://www.ilounge.com/index.php/reviews/entry/luxa2-alum-x-for-iphone-4-4s/?utm_source=twitterfeed&utm_medium=twitter \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ilyabirman.ru.txt b/inc/3rdparty/site_config/standard/ilyabirman.ru.txt
new file mode 100644
index 00000000..da6a60f6
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/ilyabirman.ru.txt
@@ -0,0 +1,5 @@
1title: //div[@class='published visible e2-smart-title']//span
2author: //span[@id='e2-blog-title']
3date: //p[@class='super-h']
4body: //div[@class='text published visible']
5test_url: http://ilyabirman.ru/meanwhile/2011/11/15/2/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/inc.com.txt b/inc/3rdparty/site_config/standard/inc.com.txt
new file mode 100644
index 00000000..0589aaae
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/inc.com.txt
@@ -0,0 +1,21 @@
1author: substring-after(substring-before(//div[@id='byline'],'|'),'By')
2author: //div[@class='byline']/a
3date: //span[@class='pubdate']
4# print friendly page
5body: //div[@id='text']
6# regular page
7body: //div[@id= 'articlecontent']
8
9strip: //div[@id= 'articlecontent']/h1
10strip: //div[@id='articlecontent']/p[@class='deck']
11strip: //div[@id='articlecontent']/div[@class='byline']
12strip: //div[@id='articlespacer']
13strip: //div[@id='incsharebox']
14strip: //div[@id='articlesidebar']
15
16prune: no
17
18single_page_link: //a[contains(@href, 'Printer_Friendly.html')]
19strip: //a[contains(., 'Dig Deeper')]
20test_url: http://www.inc.com/guides/2010/11/seven-tips-for-lobbying-politicians.html
21test_url: http://www.inc.com/eric-schurenberg/startups-are-we-geting-irrationally-exuberant.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/independent.co.uk.txt b/inc/3rdparty/site_config/standard/independent.co.uk.txt
new file mode 100644
index 00000000..47baf36b
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/independent.co.uk.txt
@@ -0,0 +1,9 @@
1title: //meta[@property='og:title']/@content
2body: //div[contains(@class, 'articleContent')]
3date: //meta[@property='article:published_time']/@content
4author: //div[@id='main']//div[@class='byline']//span[@class='authorName']
5
6strip_id_or_class: RelatedArtTag
7
8tidy: no
9test_url: http://www.independent.co.uk/news/world/middle-east/syria-could-face-human-rights-probe-2274326.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/index.php b/inc/3rdparty/site_config/standard/index.php
index a3d5f739..a1b767fd 100644
--- a/inc/3rdparty/site_config/standard/index.php
+++ b/inc/3rdparty/site_config/standard/index.php
@@ -1,3 +1,3 @@
1<?php 1<?php
2// this is here to prevent directory listing over the web 2// this is here to prevent directory listing over the web
3?> \ No newline at end of file 3?> \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/indiatimes.com.txt b/inc/3rdparty/site_config/standard/indiatimes.com.txt
new file mode 100644
index 00000000..e7a35e84
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/indiatimes.com.txt
@@ -0,0 +1,6 @@
1body: //figure[@class='mainVideo']
2strip: //figcaption
3
4prune: no
5
6test_url: http://www.indiatimes.com/bollywood/kareena-insecure-about-saif-working-with-bipasha-23386.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/inessential.com.txt b/inc/3rdparty/site_config/standard/inessential.com.txt
new file mode 100644
index 00000000..312cec4b
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/inessential.com.txt
@@ -0,0 +1,5 @@
1title: //div[@class='weblogPost']/h3[1]
2author: ("Brent Simmons")
3date: //span[@class="weblogPostDisplayDate"]
4body: //div[@class='weblogPostBody']
5test_url: http://inessential.com/2011/10/25/why_just_store_the_app_data_on_dropbo \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/info.abril.com.br.txt b/inc/3rdparty/site_config/standard/info.abril.com.br.txt
new file mode 100644
index 00000000..64cf3c8e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/info.abril.com.br.txt
@@ -0,0 +1,4 @@
1title://h1
2body://div[@id='texto_link']
3
4test_url: http://info.abril.com.br/noticias/internet/filme-do-youtube-vai-estrear-nos-cinemas-22042011-6.shl \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/infoq.com.txt b/inc/3rdparty/site_config/standard/infoq.com.txt
new file mode 100644
index 00000000..3a4e402d
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/infoq.com.txt
@@ -0,0 +1,14 @@
1body: //div[@id="intTranscript"]
2body: //div[@class="box-content"]
3title: //div[@class="box-content"]//h1[1]
4author: //p[@class="info"]/strong
5date: substring-before(substring-after(//p[@class="info"], "on"), "Length")
6strip: //div[@class="box-content"]//h1[1]
7strip: //div[@class="box-content"]//p[@class="info"]
8strip_id_or_class: vendor-content-box
9strip_id_or_class: tags2
10strip_id_or_class: instructions
11strip_id_or_class: comments
12strip_id_or_class: forum-list-tree
13strip: //div[@class="addthis_toolbox addthis_default_style"]
14test_url: http://www.infoq.com/interviews/oleg-zhurakousky-javaone2011-interview \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/informador.com.mx.txt b/inc/3rdparty/site_config/standard/informador.com.mx.txt
new file mode 100644
index 00000000..eedec24f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/informador.com.mx.txt
@@ -0,0 +1,9 @@
1title: //div[@class='tituloInt']
2body: //div[@class='notaPortada']
3strip: //img[@id='imgHorizontalInt imgDetalleImg imagenNota']
4date: //span[@class='publi']
5author: //span[@class='autor']
6tidy: no
7prune: no
8
9test_url: http://www.informador.com.mx/tecnologia/2011/337606/6/iran-desarrolla-antivirus-tras-afectaciones-por-duqu.htm \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/information.dk.txt b/inc/3rdparty/site_config/standard/information.dk.txt
new file mode 100644
index 00000000..6e3c3b1a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/information.dk.txt
@@ -0,0 +1,7 @@
1title: //meta[@property='og:title']/@content
2author: //*[@property='dc:creator']
3date: //*[@property='dc:date']/@content
4body: //div[@id='page-content']//div[contains(@class, 'article-body')]
5
6tidy: no
7test_url: http://www.information.dk/282307 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/informationarchitects.net.txt b/inc/3rdparty/site_config/standard/informationarchitects.net.txt
new file mode 100644
index 00000000..134306cd
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/informationarchitects.net.txt
@@ -0,0 +1,10 @@
1title://h1[@class="post_title"]
2body://article[@class="post"]
3date://h1[@class="section_separator"]
4author://span[@class="post_author"]
5strip://nav[@class="arrow_nav"]
6strip://section[@id="contact"]
7strip_id_or_class:post_title
8strip_id_or_class:post_author
9strip_id_or_class:section_separator
10test_url: http://informationarchitects.net/blog/nzz-relaunch-a-quick-review/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/informationclearinghouse.info.txt b/inc/3rdparty/site_config/standard/informationclearinghouse.info.txt
new file mode 100644
index 00000000..0879e9e6
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/informationclearinghouse.info.txt
@@ -0,0 +1,6 @@
1title: //head/title
2body: //table[@id='table3']//div[@class='postContent']
3prune: no
4tidy: no
5
6test_url: http://www.informationclearinghouse.info/article28238.htm \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/informit.com.txt b/inc/3rdparty/site_config/standard/informit.com.txt
new file mode 100644
index 00000000..84c1fdcf
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/informit.com.txt
@@ -0,0 +1,7 @@
1title: //div[@id='content']/h1
2body: //div[@id="content"]
3strip: //img[contains(@src, 'informit_printer.png')]
4single_page_link: //div[contains(@class, 'articleTools')]//a[contains(@href, '/printerfriendly.')]
5prune: no
6
7test_url: http://www.informit.com/articles/article.aspx?p=1729268 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/infoworld.com.txt b/inc/3rdparty/site_config/standard/infoworld.com.txt
new file mode 100644
index 00000000..dd588ed8
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/infoworld.com.txt
@@ -0,0 +1,12 @@
1body: //div[@id='main_text']
2title: //div[@id='main_text']/h1
3strip: //div[@id='main_text']/h1
4strip: //div[@id='main_text']/h2
5strip_id_or_class: tools
6strip_id_or_class: articleTools
7strip_id_or_class: pagination
8strip_id_or_class: byline
9strip_id_or_class: tweet
10date: //div[@class='date']
11strip: //div[@class='date']
12test_url: http://www.infoworld.com/d/the-industry-standard/it-jobs-the-rise-both-offshore-and-in-us-187689 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/infzm.com.txt b/inc/3rdparty/site_config/standard/infzm.com.txt
new file mode 100644
index 00000000..012c873f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/infzm.com.txt
@@ -0,0 +1,9 @@
1# This filter is tested on:
2# http://www.infzm.com/content/71068
3# http://www.infzm.com/content/41577
4
5author://em[contains(@class, 'toAuthor')]
6date:substring(//em[contains(@class, 'pubTime')],1)
7body://section[contains(@id, 'articleContent')]
8title://h1[contains(@class ,'articleHeadline clearfix')]
9test_url: http://www.infzm.com/content/41577 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/inhabitat.com.txt b/inc/3rdparty/site_config/standard/inhabitat.com.txt
new file mode 100644
index 00000000..6629dafe
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/inhabitat.com.txt
@@ -0,0 +1,8 @@
1# set body
2body: //div[@class='post-listing']
3
4# remove clutter
5strip: //a/big
6strip: //a/em
7strip: //p/em
8test_url: http://inhabitat.com/2010/11/18/sliding-walls-transform-this-tokyo-house-into-an-office/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/instagr.am.txt b/inc/3rdparty/site_config/standard/instagr.am.txt
new file mode 100644
index 00000000..ad9e8214
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/instagr.am.txt
@@ -0,0 +1,6 @@
1title: //div[@class='caption']
2author: //p[@class='username']
3
4strip: //div[@class='contents']/h3
5strip: //div[@class='location']
6test_url: http://instagr.am/p/G-s_aciyDJ/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/interest.co.nz.txt b/inc/3rdparty/site_config/standard/interest.co.nz.txt
new file mode 100644
index 00000000..28c3310a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/interest.co.nz.txt
@@ -0,0 +1,2 @@
1body: //div[@id='content']
2test_url: http://www.interest.co.nz/opinion/opinion-when-our-fear-corporate-way-and-our-love-small-business-man-dangerous-thing \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/iolanguage.com.txt b/inc/3rdparty/site_config/standard/iolanguage.com.txt
new file mode 100644
index 00000000..231875ad
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/iolanguage.com.txt
@@ -0,0 +1,2 @@
1body: //center/table
2test_url: http://www.iolanguage.com/scm/io/docs/IoGuide.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ipadclub.nl.txt b/inc/3rdparty/site_config/standard/ipadclub.nl.txt
new file mode 100644
index 00000000..d196059e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/ipadclub.nl.txt
@@ -0,0 +1,7 @@
1body: //div[@id = 'post']
2strip: //div[@class = 'postinfo']
3strip: //div[@id = 'postmetanew']
4strip: //div[@class = 'paginator']
5strip: //div[@class = 'col-2']
6strip: //div[@id = 'adfactor-label']
7test_url: http://www.ipadclub.nl/15808/text-writer-ipad-tekstverwerker-met-functieknoppen/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ipadplanet.nl.txt b/inc/3rdparty/site_config/standard/ipadplanet.nl.txt
new file mode 100644
index 00000000..a2e49005
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/ipadplanet.nl.txt
@@ -0,0 +1,7 @@
1body: //div[@id = 'post']
2strip: //div[@class = 'postinfo']
3strip: //div[@id = 'postmetanew']
4strip: //div[@class = 'paginator']
5strip: //div[@class = 'col-2']
6strip: //div[@id = 'adfactor-label']
7test_url: http://www.ipadplanet.nl/11723/steve-jobs-bevestigt-verdwijnen-fysieke-rotatieschakelaar-in-ios-4-2/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/iphoneclub.nl.txt b/inc/3rdparty/site_config/standard/iphoneclub.nl.txt
new file mode 100644
index 00000000..f8d4f6a6
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/iphoneclub.nl.txt
@@ -0,0 +1,7 @@
1body: //div[@id = 'post']
2strip: //div[@class = 'postinfo']
3strip: //div[@id = 'postmetanew']
4strip: //div[@class = 'paginator']
5strip: //div[@class = 'col-2']
6strip: //div[@id = 'adfactor-label']
7test_url: http://www.iphoneclub.nl/105808/t-mobile-mobiel-internet-wordt-duurder-maar-blijft-onbeperkt/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/iphonehacks.com.txt b/inc/3rdparty/site_config/standard/iphonehacks.com.txt
new file mode 100644
index 00000000..c97ff43c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/iphonehacks.com.txt
@@ -0,0 +1,9 @@
1title: //meta[@name='og:title']/@content
2body: //small[@class='postmetadata'] | //div[contains(@class, 'entry-content')]
3
4strip: //span[@vanilla-identifier]
5
6prune: no
7tidy: no
8
9test_url: http://www.iphonehacks.com/2012/07/app-review-process-behind-the-scenes.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/iplaysoft.com.txt b/inc/3rdparty/site_config/standard/iplaysoft.com.txt
new file mode 100644
index 00000000..4a944768
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/iplaysoft.com.txt
@@ -0,0 +1,2 @@
1body: //div[@id='content']//div[@class='entry-banner' or @class='entry-content']
2test_url: http://www.iplaysoft.com/webbrowserpassview.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/isource.com.txt b/inc/3rdparty/site_config/standard/isource.com.txt
new file mode 100644
index 00000000..a1c16a16
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/isource.com.txt
@@ -0,0 +1,6 @@
1# Remove social buttons
2strip: //div[@id='temp_Content_Right']
3
4# Remove duplicate article title
5strip: //*[(@class='storytitle')]
6test_url: http://isource.com/2010/10/24/swearch-a-cool-iphone-web-app/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/itavisen.no.txt b/inc/3rdparty/site_config/standard/itavisen.no.txt
new file mode 100644
index 00000000..8da78cb0
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/itavisen.no.txt
@@ -0,0 +1,6 @@
1author: //p[@class = 'writer']
2
3date: //p[@class = 'published-time']
4
5body: //div[@class = 'text main']
6test_url: http://www.itavisen.no/899786/old-republic-blir-gratis \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/itstactical.com.txt b/inc/3rdparty/site_config/standard/itstactical.com.txt
new file mode 100644
index 00000000..550875ec
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/itstactical.com.txt
@@ -0,0 +1,12 @@
1title: //h1[@class="entry-title"]
2body: //div[@class='format_text entry-content']
3author: //span[@class="author vcard"]/a
4date: //abbr[@class="published"]
5
6strip_id_or_class: related-posts
7strip_id_or_class: membershipbox
8strip_id_or_class: share_this_compact_bt
9
10
11footnotes: no
12test_url: http://www.itstactical.com/warcom/knives/exclusive-triple-aught-design-production-dauntless-knife-video-walkthrough/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/itworld.com.txt b/inc/3rdparty/site_config/standard/itworld.com.txt
new file mode 100644
index 00000000..d4fa604e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/itworld.com.txt
@@ -0,0 +1,5 @@
1title: //*[@id="article-title"]
2author: //*[@id="article-info"]/strong
3date: //*[@class="article-dateline"]/strong
4body: //*[@id="article-content"]
5test_url: http://www.itworld.com/open-source/140916/android-sued-microsoft-not-linux \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/izismile.com.txt b/inc/3rdparty/site_config/standard/izismile.com.txt
new file mode 100644
index 00000000..af3f299a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/izismile.com.txt
@@ -0,0 +1,4 @@
1body: //div[starts-with(@id, 'news-id-')]
2prune: no
3
4test_url: http://izismile.com/2011/06/13/uncanny_factoid_fashion_or_creepy_2_pics.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/jalopnik.com.txt b/inc/3rdparty/site_config/standard/jalopnik.com.txt
new file mode 100644
index 00000000..fc2eef8e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/jalopnik.com.txt
@@ -0,0 +1,2 @@
1author: //span[@class='plus-icon']
2test_url: http://jalopnik.com/5892124/1955-porsche-550-spyder-sells-for-record-3685-million/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/jandan.net.txt b/inc/3rdparty/site_config/standard/jandan.net.txt
new file mode 100644
index 00000000..f1dd3d17
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/jandan.net.txt
@@ -0,0 +1,6 @@
1body: //div[@id='content']//div[@class = 'post f']
2strip_id_or_class: comment-big
3strip_id_or_class: avatar
4strip: //div[@class='time_s']
5
6test_url: http://jandan.net/2011/04/03/iphone-5-sony.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/jetzt.sueddeutsche.de.txt b/inc/3rdparty/site_config/standard/jetzt.sueddeutsche.de.txt
new file mode 100644
index 00000000..6e8af934
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/jetzt.sueddeutsche.de.txt
@@ -0,0 +1,22 @@
1title: //h1
2author: //p[contains(@class, 'author')]/a
3date: //p[contains(@class, 'time')]
4body: //div[@class='content']/div[contains(@class, 'text')]
5
6# prevent "no text" errors on multi-page articles
7tidy: no
8
9# we use a custom next-link detector instead of the print view because
10# it's pretty hard to strip out the unwanted parts in the print view
11autodetect_next_page: no
12next_page_link: //div[contains(@class, 'text')]/div/div[contains(@class, 'paging')]/a[@class='more ']
13
14strip: //h1
15
16strip_id_or_class: meta
17strip_id_or_class: author
18strip_id_or_class: paging
19
20# prevent "Report an Error" from being recognized as footnote
21footnotes: no
22test_url: http://jetzt.sueddeutsche.de/texte/anzeigen/544308/Alles-flicken \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/jjahnke.net.txt b/inc/3rdparty/site_config/standard/jjahnke.net.txt
new file mode 100644
index 00000000..95c45ee7
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/jjahnke.net.txt
@@ -0,0 +1,4 @@
1body: //div[@class='entry']
2prune: no
3
4test_url: http://www.jjahnke.net/rundbr87.html#2514 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/jobbank.gc.ca.txt b/inc/3rdparty/site_config/standard/jobbank.gc.ca.txt
new file mode 100644
index 00000000..af8d7d17
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/jobbank.gc.ca.txt
@@ -0,0 +1,5 @@
1body: //div[@id='formatCont_en']
2
3prune: no
4
5test_url: http://www.jobbank.gc.ca/detail-eng.aspx?Source=JobPosting&OrderNum=6397922 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/joelonsoftware.com.txt b/inc/3rdparty/site_config/standard/joelonsoftware.com.txt
new file mode 100644
index 00000000..75fbee5a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/joelonsoftware.com.txt
@@ -0,0 +1,21 @@
1# Works with old posts too, such as http://www.joelonsoftware.com/articles/fog0000000332.html
2
3author: substring-after(//div[@class="author"], 'by ')
4date: //div[@class="date"]
5
6## Clean stuff at top ##
7
8strip: //h1[1]
9strip: //h2[1]
10strip: //div[@class="date"]
11strip: //div[@class="author"]
12
13## Clean stuff at bottom ##
14
15strip: //blockquote[@class="textmessage"]
16strip: //div[@style="width:500px"]/p[last()]
17strip: //div[@style="width:500px"]/p[last()-1]
18strip: //div[@style="width:500px"]/h4[last()]
19strip: //div[@style="width:500px"]/h4[last()-1]
20strip: //div[@style="width:500px"]/div[last()]
21test_url: http://www.joelonsoftware.com/items/2011/09/15.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/jouire.com.txt b/inc/3rdparty/site_config/standard/jouire.com.txt
new file mode 100644
index 00000000..535a501e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/jouire.com.txt
@@ -0,0 +1,3 @@
1author: //h1
2date: //p[contains(@class,'date')]
3test_url: http://jouire.com/2011/01/exquisite-whispers/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/joystiq.com.txt b/inc/3rdparty/site_config/standard/joystiq.com.txt
new file mode 100644
index 00000000..7fbd467d
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/joystiq.com.txt
@@ -0,0 +1,8 @@
1author: //a[@class="byline-author"]
2title: //h1[@class="headline"]
3strip: //div[@id="info-card"]
4strip: //div[@id="breaking-news"]
5strip: //div[@class="rmod list-post-mod"]
6strip: //div[@id="footer"]
7strip: //div[@id="GH_strip"]
8test_url: http://www.joystiq.com/2012/06/20/magic-the-gathering-duels-of-the-planeswalkers-2013-review/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/juedische-allgemeine.de.txt b/inc/3rdparty/site_config/standard/juedische-allgemeine.de.txt
new file mode 100644
index 00000000..be844e57
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/juedische-allgemeine.de.txt
@@ -0,0 +1,19 @@
1body: //div[@id='article_container']
2author: //h4//a[@class='author']
3title: //h1
4
5replace_string(lang="en"): lang="de"
6replace_string(/>1</a>):/></a>
7
8strip_id_or_class: share_toolbox
9strip_id_or_class: article_header
10strip_id_or_class: phototext
11
12strip_image_src: icon_author.gif
13
14strip: //img[@src='']
15strip: //h4[@id='author']
16
17prune: no
18
19test_url: http://www.juedische-allgemeine.de/article/view/id/13366 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/juppy.org.txt b/inc/3rdparty/site_config/standard/juppy.org.txt
new file mode 100644
index 00000000..e2d07f24
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/juppy.org.txt
@@ -0,0 +1,8 @@
1convert_double_br_tags: yes
2
3title: //div[@id="storycredits"]/p/span[@class="title"]
4author: //div[@id="storycredits"]/p/br[1]/following-sibling::text()
5
6strip: //div[@id="storycredits"]
7
8test_url: http://www.juppy.org/santa/stories.php?ForAuthorID=35&Year=2005 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/kachestvo.ru.txt b/inc/3rdparty/site_config/standard/kachestvo.ru.txt
new file mode 100644
index 00000000..34404e96
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/kachestvo.ru.txt
@@ -0,0 +1,3 @@
1body: //div[contains(@class, 'inner_content')]
2
3test_url: http://kachestvo.ru/promtovar/odezhda/denim.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/kenrockwell.com.txt b/inc/3rdparty/site_config/standard/kenrockwell.com.txt
new file mode 100644
index 00000000..e6d100ea
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/kenrockwell.com.txt
@@ -0,0 +1,7 @@
1# Ads
2strip: //table[@align="right"][@width="120"]
3
4# Affiliate link paragraphs
5strip: //a[.="Adorama"]/parent::p[contains(., "goodies")]
6strip: //a[.="Adorama"]/parent::p[contains(., "This free website's biggest source of")]
7test_url: http://www.kenrockwell.com/tech/composition.htm \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/kicker.de.txt b/inc/3rdparty/site_config/standard/kicker.de.txt
new file mode 100644
index 00000000..7d5daa4b
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/kicker.de.txt
@@ -0,0 +1,21 @@
1# set body
2body: //div[@id='ovArtikel']
3
4# set title
5title: //div[@id='ovArtikel']/h1
6# strip main title and leave sub title
7strip: //div[@id='ovArtikel']/h1
8
9date: //div[@class='publicdate']
10
11#remove captions
12strip: //*/div[@class='bu']
13strip: //*/div[@class='credit']
14
15#remove adds
16strip: //*/div[@class='ad-head']
17strip: //*/div[@class='linksebay']
18
19# remove video content
20strip: //*/div[@class='ovVideo']
21test_url: http://www.kicker.de/news/fussball/frauen/wmfr/frauen-weltmeisterschaft/2011/3/1123662/spielbericht_frankreich-frauen_deutschland-frauen.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/kickstarter.com.txt b/inc/3rdparty/site_config/standard/kickstarter.com.txt
new file mode 100644
index 00000000..c055659f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/kickstarter.com.txt
@@ -0,0 +1,7 @@
1title: //h1[@id='name']
2body: //*[@id='leftcol']
3
4strip_id_or_class: 'share-box'
5strip_id_or_class: 'project-faqs'
6strip_id_or_class: 'report-issue-wrap'
7test_url: http://www.kickstarter.com/projects/hop/elevation-dock-the-best-dock-for-iphone \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/kingarthurflour.com.txt b/inc/3rdparty/site_config/standard/kingarthurflour.com.txt
new file mode 100644
index 00000000..2f6783a3
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/kingarthurflour.com.txt
@@ -0,0 +1,4 @@
1title: //div[@class='post']/h2
2body: //div[@class='entry']
3strip: //p[contains(.,'Tags:')]
4test_url: http://www.kingarthurflour.com/blog/2011/01/28/a-big-sandwich-for-the-big-game/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/kotaku.com.txt b/inc/3rdparty/site_config/standard/kotaku.com.txt
new file mode 100644
index 00000000..be439d75
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/kotaku.com.txt
@@ -0,0 +1,2 @@
1author: //span[@class="plus-icon"]
2test_url: http://kotaku.com/5920211/save-the-furries-on-your-wii-in-this-weeks-nintendo-download \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/kottke.org.txt b/inc/3rdparty/site_config/standard/kottke.org.txt
new file mode 100644
index 00000000..f93a61e7
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/kottke.org.txt
@@ -0,0 +1,6 @@
1title: //h2
2author: //*[@id='main']/div/a[1]
3date: substring-before(substring-after(//div[@class='meta'],'&bull;'),'&bull;')
4body: //div[@id='main']
5strip: //div[@class='meta']
6test_url: http://kottke.org/08/02/king-of-kong-a-fistful-of-quarters \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/kumailplus.com.txt b/inc/3rdparty/site_config/standard/kumailplus.com.txt
new file mode 100644
index 00000000..9e15cc34
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/kumailplus.com.txt
@@ -0,0 +1,3 @@
1body: //div[@class = "entry-full"]
2
3test_url: http://www.kumailplus.com/2011/12/02/24308 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/kumb.com.txt b/inc/3rdparty/site_config/standard/kumb.com.txt
new file mode 100644
index 00000000..3f0d2369
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/kumb.com.txt
@@ -0,0 +1,10 @@
1title: //div[@id='centrediv']/h1
2
3author: substring-after(//div[@id='centrediv']/h3,'By: ')
4
5date: substring-after(substring-before(//div[@id='centrediv']/h3,'By: '),'Filed: ')
6
7body: //div[@class='KonaBody']
8
9convert_double_br_tags: yes
10test_url: http://www.kumb.com/story.php?id=126084 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/kwerfeldein.de.txt b/inc/3rdparty/site_config/standard/kwerfeldein.de.txt
new file mode 100644
index 00000000..879b4d6c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/kwerfeldein.de.txt
@@ -0,0 +1,9 @@
1date: //span[@class='datum']
2title: //div[@class='artikel']/h2
3body: //div[@class='entry']
4strip: //p[@class='tags']
5author: substring-after(//div[@class='authorinfo']/em,'Dies ist ein Artikel von ')
6strip: //div[@class='authorinfo']
7strip: //div[@class='authorpic']
8
9test_url: http://kwerfeldein.de/index.php/2011/10/17/doppelbelichtungen-mit-konzept/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/laphamsquarterly.org.txt b/inc/3rdparty/site_config/standard/laphamsquarterly.org.txt
new file mode 100644
index 00000000..a34e39dd
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/laphamsquarterly.org.txt
@@ -0,0 +1,13 @@
1title: //h1[@class='headline']
2body: //div[@class='article']
3strip: //div[@class='article']//h3[contains(@class, 'section')]
4strip: //div[@class='article']//ul[contains(@class, 'article-actions')]
5strip: //div[@id='syndication-upper']
6strip: //a[@id='syndication']
7strip: //dl[@id='article-tags']
8strip: //div[@id='article-like']
9prune: no
10
11single_page_link: //li[@class='single-page']/a
12
13test_url: http://www.laphamsquarterly.org/essays/balanced-diets.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/laprensagrafica.com.txt b/inc/3rdparty/site_config/standard/laprensagrafica.com.txt
new file mode 100644
index 00000000..e771f81f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/laprensagrafica.com.txt
@@ -0,0 +1,3 @@
1tidy: no
2
3test_url: http://www.laprensagrafica.com/opinion/editorial/229252-reflexiones-sobre-la-educacion-que-necesitamos.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/laquadrature.net.txt b/inc/3rdparty/site_config/standard/laquadrature.net.txt
new file mode 100644
index 00000000..5bad8e65
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/laquadrature.net.txt
@@ -0,0 +1,10 @@
1body: //div[@id='content-content']//div[@class='content']
2title: //h1[@class='title']
3date: substring-after(//*[@class='submitted'],'Submitted on')
4tidy: no
5strip: //div[@class='terms terms-inline']
6strip: //div[@class='more']
7strip: //div[@class='share-links']
8strip: //table[@id='attachments']
9
10test_url: http://www.laquadrature.net/en/finalization-of-eu-parliaments-weak-net-neutrality-resolution \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/lareviewofbooks.org.txt b/inc/3rdparty/site_config/standard/lareviewofbooks.org.txt
new file mode 100644
index 00000000..504dbea1
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/lareviewofbooks.org.txt
@@ -0,0 +1,12 @@
1#meta data
2title:substring-after(title,'|')
3
4author:substring-before( substring-after(//meta[@name = 'description']/@content, normalize-space(substring-after(//title,'|'))),' respond ')
5date://h5[@class = 'postDate']
6
7#text
8body://div[@class = 'articleBody']
9
10#clean up
11strip://center
12test_url: http://lareviewofbooks.org/post/14066007115/literary-transactions-and-their-vicissitudes \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/latimes.com.txt b/inc/3rdparty/site_config/standard/latimes.com.txt
new file mode 100644
index 00000000..0d6ac851
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/latimes.com.txt
@@ -0,0 +1,11 @@
1strip: //div[@id="tugs_story_display"]
2strip: //div[@id="search_overlay"]
3strip: //div[@id="adv_search"]
4body: //div[@class='story']
5tidy: no
6convert_double_br_tags: yes
7single_page_link: //a[contains(@href, ',print.')]
8strip: //p[starts-with(., 'latimes.com')]
9strip: //h1[starts-with(., 'latimes.com')]
10strip_id_or_class: cubead
11test_url: http://www.latimes.com/news/opinion/commentary/la-oe-gartonash-wilders-20110512,0,2876761.story \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/laughingsquid.com.txt b/inc/3rdparty/site_config/standard/laughingsquid.com.txt
new file mode 100644
index 00000000..1814988a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/laughingsquid.com.txt
@@ -0,0 +1,3 @@
1title: //h1[@class='entry-title']
2body: //div[@class='entry-content']
3test_url: http://laughingsquid.com/mysterious-tiny-doors-appearing-around-san-francisco/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/leancrew.com.txt b/inc/3rdparty/site_config/standard/leancrew.com.txt
new file mode 100644
index 00000000..0a4c84ba
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/leancrew.com.txt
@@ -0,0 +1,9 @@
1title: //div[@id="content"]/h1[1]
2date: substring-before(//p[@class="postdate"], ' at ')
3author: ("Dr. Drang")
4
5strip: //div[@id="content"]/h1[1]
6strip: //p[@class="postdate"]
7strip: //h2[@id="respond"]
8strip: //blockquote[@class="bbpTweet"]/p/span/a/img
9test_url: http://www.leancrew.com/all-this/2011/12/more-shell-less-egg/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/lefigaro.fr.txt b/inc/3rdparty/site_config/standard/lefigaro.fr.txt
new file mode 100644
index 00000000..f5494b96
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/lefigaro.fr.txt
@@ -0,0 +1,8 @@
1title: //meta[@name='title']/@content
2author: //span[@class='sign']//a[@class='journaliste']
3author: //meta[@name='author']/@content
4body: //*[@id='article']/div[@class='photo'] | //*[@id='article']/h2 | //*[@id='article']/div[@class='texte']
5date: //time[@pubdate]/@datetime
6prune: no
7test_url: http://www.lefigaro.fr/environnement/2011/11/10/01029-20111110ARTFIG00801-la-chine-confrontee-a-un-immense-defi-ecologique.php
8test_url: http://www.lefigaro.fr/conjoncture/2012/11/20/20002-20121120ARTFIG00609-l-usager-devrait-payer-plus-pour-financer-les-transports.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/lemonde.fr.txt b/inc/3rdparty/site_config/standard/lemonde.fr.txt
new file mode 100644
index 00000000..eb205275
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/lemonde.fr.txt
@@ -0,0 +1,13 @@
1title: //h1
2
3# they have a single component containing both author and date
4#author: //p[@class='source']
5#date: //p[@class='source']
6
7body: //div[@class='contenu_article']
8#Shoot the insane "conjugaison.lemonde.fr" links :
9strip: //a[contains(@class, 'listLink')]
10
11prune: no
12
13test_url: http://www.lemonde.fr/economie/article/2011/07/05/moody-s-abaisse-la-note-du-portugal-de-quatre-crans_1545237_3234.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/lesnumeriques.com.txt b/inc/3rdparty/site_config/standard/lesnumeriques.com.txt
new file mode 100644
index 00000000..9b57f726
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/lesnumeriques.com.txt
@@ -0,0 +1,9 @@
1title: //h1/following::span[@class='fn']
2# Author: should stop parsing until <br> reached, but I don't know how to do this.
3author: //following::div[@class='PDate2']
4date: //following::div[@class='PDate2']/strong
5
6body: //div[@class='ArTexte']
7body: //div[@id='prod_txt_b']
8body: //div[@class='ArPhotoP']
9test_url: http://www.lesnumeriques.com/disque-dur-multimedia/popcorn-hour-300-p12231/test.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/letemps.ch.txt b/inc/3rdparty/site_config/standard/letemps.ch.txt
new file mode 100644
index 00000000..c4bee7ec
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/letemps.ch.txt
@@ -0,0 +1,3 @@
1title: //h2
2strip_image_src: logo.gif
3test_url: http://www.letemps.ch/Facet/print/Uuid/7c9f912c-07c9-11e0-9b50-4d96c9eca37f \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/lifeandculture.fr.txt b/inc/3rdparty/site_config/standard/lifeandculture.fr.txt
new file mode 100644
index 00000000..c3888aa8
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/lifeandculture.fr.txt
@@ -0,0 +1,3 @@
1title: //h2[@class="entry-title"]
2body: //div[@class="entry-content"]
3test_url: http://www.lifeandculture.fr/digital/facebook-and-the-epiphanator-an-end-to-endings/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/lifehacker.com.txt b/inc/3rdparty/site_config/standard/lifehacker.com.txt
new file mode 100644
index 00000000..32ade14a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/lifehacker.com.txt
@@ -0,0 +1,42 @@
1# Adds author text: Gawker sites commonly show as "Author: View Profile"
2author://a[@class="plus-icon modfont"]
3
4# Add date and time
5date: //span[@class="date"]
6
7# Remove date and time from article text
8strip: //span[@class="date"]
9
10# Remove login/comment text
11strip: //*[(@class="presence_control_external smalltype")]
12
13strip: //div[@class="nodebyline modfont"]
14
15# Remove right sidebar
16strip: //div[@id="rightwrapper"]
17
18# Remove print header
19strip: //div[@id='printhead']/h1
20
21# Remove 'content is restricted'
22strip: //div[@id='agegate_IDHERE']
23
24# Remove follow text
25strip: //*[(@class="permalink_ads")]
26
27# Remove view/comment count
28strip: //div[@id='wrapper']/div[2][@class='postmeta_permalink_wrapper']/div[1][@class='postmeta_permalink']/div[2][@class='pm_line']
29
30# Remove contact text
31strip: //div[@id='wrapper']/div[1][@class='content permalink']/p[6][@class='contactinfo']
32
33# Remove medium duplicates of the article image
34strip_image_src: medium.jpg
35
36# Remove "arrow" class at bottom of page
37strip: //p[@class="arrow"]
38
39# Remove "track" image from article body
40strip: //img[@alt="track"]
41test_url: http://lifehacker.com/5925801/how-can-i-turn-vague-goals-into-actionable-to+dos
42test_url: http://lifehacker.com/5941600/hack-an-old-computer-mouse-into-a-wireless-bluetooth-mouse \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/linkedin.com.txt b/inc/3rdparty/site_config/standard/linkedin.com.txt
new file mode 100644
index 00000000..37e83cf6
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/linkedin.com.txt
@@ -0,0 +1,2 @@
1single_page_link: //ul[@class='util-nav']//a[@class='close']
2test_url: http://www.linkedin.com/news?actionBar=&articleID=894735221&ids=0Rdj4Qe3wQejwIczAOc3sRdzwUb3wScPoPdzkVe2MNcz8RcPsQejwIcPASdjwTcjwU&aag=true&freq=weekly \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/longform.org.txt b/inc/3rdparty/site_config/standard/longform.org.txt
new file mode 100644
index 00000000..48d5e1a7
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/longform.org.txt
@@ -0,0 +1,3 @@
1single_page_link: //div[@class="post"]/div[@class="title"]/a
2
3test_url: http://longform.org/2011/05/06/disconcerting-new-answers-in-models-suicide/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/loopinsight.com.txt b/inc/3rdparty/site_config/standard/loopinsight.com.txt
new file mode 100644
index 00000000..08ad90c3
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/loopinsight.com.txt
@@ -0,0 +1,9 @@
1body: //div[@class='container_16']//div[@class='grid_11']
2strip: //h2[@class='mast']
3strip: //div[@class='container_16']//div[@class='grid_11']/h1
4strip: //div[@class='container_16']//div[@class='grid_11']/p[1]
5strip: //div[@class='container_16']//div[@class='grid_11']/div
6author: //a[starts-with(@title, 'Posts by')]
7date: substring-before(substring-after(//time, 'Posted on '), ' at')
8test_url: http://www.loopinsight.com/2012/09/13/forget-iphone-5-naysayers-this-thing-is-big/
9test_url: http://www.loopinsight.com/2011/05/20/playbook-returns-high-misses-sales-targets-by-90/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/lostgarden.com.txt b/inc/3rdparty/site_config/standard/lostgarden.com.txt
new file mode 100644
index 00000000..a823e649
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/lostgarden.com.txt
@@ -0,0 +1,3 @@
1prune: no
2convert_double_br_tags: yes
3test_url: http://www.lostgarden.com/2012/04/loops-and-arcs.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/lrb.co.uk.txt b/inc/3rdparty/site_config/standard/lrb.co.uk.txt
new file mode 100644
index 00000000..ce5053d4
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/lrb.co.uk.txt
@@ -0,0 +1,8 @@
1title: substring-before(//title, ' · LRB')
2
3body: //div[@class="article-body indent"]
4
5date: substring-after(//p[@class="meta-info"]/a, '· ')
6
7prune: no
8test_url: http://www.lrb.co.uk/v33/n18/james-meek/its-already-happened \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/luminous-landscape.com.txt b/inc/3rdparty/site_config/standard/luminous-landscape.com.txt
new file mode 100644
index 00000000..92ccf3ba
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/luminous-landscape.com.txt
@@ -0,0 +1,6 @@
1title: //h2
2
3body: // div[@id='content']
4
5strip: //div[@class='sidebar_wrapper']
6test_url: http://www.luminous-landscape.com/tutorials/optimizing_exposure.shtml \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/m.bbc.co.uk.txt b/inc/3rdparty/site_config/standard/m.bbc.co.uk.txt
new file mode 100644
index 00000000..a8af5438
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/m.bbc.co.uk.txt
@@ -0,0 +1,8 @@
1title: //div[@class="story-body"]/div[@class="story-inner"]/h1
2body: //div[@class="story-body"]
3date: //p[@class='date']/strong
4author: substring-after(//div[@class="story-inner"]/div[@class="byline"]//span[@class='name'], 'By')
5
6strip: //div[@class="story-inner"]/div[@class="byline"]
7
8test_url: http://m.bbc.co.uk/news/science-environment-19144464 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/m.guardian.co.uk.txt b/inc/3rdparty/site_config/standard/m.guardian.co.uk.txt
new file mode 100644
index 00000000..f5f0dfca
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/m.guardian.co.uk.txt
@@ -0,0 +1,12 @@
1title: //p[@class="txhead"]
2author: //div[@class='txb']
3wrap_in(p): //div[@class='para']
4date: //div[@class='txb']/following-sibling::p/text()[substring(., 14)]
5strip: //table[@class="tlogo"]
6strip: //div[@class="cookieText"]
7strip: //*[@class="sltb"]
8strip: //*[@class="ijobs-x-link"]
9strip: //*[@class="sponscolour"]
10strip: //*[@class="sponsouter"]
11strip: //div[@id="bottom-nav-block"]/following::*
12test_url: http://m.guardian.co.uk/ms/p/gnm/op/s3OOwgO3yIhGuj41C1_S3Xg/view.m?id=15&gid=world/2012/jul/26/arctic-climate-change&cat=top-stories \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mac4ever.com.txt b/inc/3rdparty/site_config/standard/mac4ever.com.txt
new file mode 100644
index 00000000..892b47f5
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/mac4ever.com.txt
@@ -0,0 +1,5 @@
1author: substring-after(//div[@class='author'],'Par ')
2date: //div[@class='date']
3body: //div[@class='content']
4
5test_url: http://www.mac4ever.com/news/64182/icloud_les_prix_en_euros_et_en_chf/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/macdrifter.com.txt b/inc/3rdparty/site_config/standard/macdrifter.com.txt
new file mode 100644
index 00000000..fd1ede7d
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/macdrifter.com.txt
@@ -0,0 +1,2 @@
1title: substring-before(//title,' « Macdrifter')
2test_url: http://www.macdrifter.com/2012/03/instacast-on-my-mac/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/macformat.techradar.com.txt b/inc/3rdparty/site_config/standard/macformat.techradar.com.txt
new file mode 100644
index 00000000..109eae45
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/macformat.techradar.com.txt
@@ -0,0 +1,9 @@
1# Remove news feed
2strip: //div[@id='news_feed_front']
3
4# Remove pull quote
5strip: //div[@class='field field-type-text field-field-pull-quote']
6
7# Remove login
8strip: //div[@class='right_bar_login']
9test_url: http://macformat.techradar.com/blog/solid-state-storage-bringing-parity-back-mac-29-10-10&article=89189666 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/macgeneration.com.txt b/inc/3rdparty/site_config/standard/macgeneration.com.txt
new file mode 100644
index 00000000..e6bbe28e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/macgeneration.com.txt
@@ -0,0 +1,5 @@
1author: substring-before(substring-after(//div[@class='dateNews'],'par '),' le')
2date: substring-after(//div[@class='dateNews'],' le ')
3body: //div[@class='singleNews zoneApple']
4
5test_url: http://www.macgeneration.com/news/voir/211162/dropbox-encore-un-mac-et-deux-comptes-dropbox \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/macmagazine.com.br.txt b/inc/3rdparty/site_config/standard/macmagazine.com.br.txt
new file mode 100644
index 00000000..47ebfd79
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/macmagazine.com.br.txt
@@ -0,0 +1,21 @@
1# Remove sliders
2strip: //*[(@class="slides_container")]
3strip: //div[(@id="slides_two")]
4
5# Remove tag cloud
6strip: //span[(@class="secao")]
7
8# Fix date article
9# TODO
10
11# Remove other stuff
12strip: //div[(@id="idc-container")]
13strip: //div[(@id="idc-noscript")]
14strip: //div[(@class="linkwithin_div")]
15strip: //div[(@class="navPosts")]
16strip: //div[(@id="lateral")]
17strip: //div[(@id="autor")]
18strip: //div[(@id="rodape")]
19strip: //div[(@id="post")]/h1
20strip: //div[(@id="post")]/div[(@id="boxInformacoes")]
21test_url: http://macmagazine.com.br/2011/08/01/skype-para-ipad-esta-finalmente-chegando-a-app-store/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/macrumors.com.txt b/inc/3rdparty/site_config/standard/macrumors.com.txt
new file mode 100644
index 00000000..76f999d3
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/macrumors.com.txt
@@ -0,0 +1,10 @@
1author: substring-after(//div[@class='byline'], " by ")
2date: substring-before(//div[@class='byline'], " by ")
3
4# set body
5body: //div[@class='content']
6
7# set title
8title: //h3
9#strip: //div[@class='content']/h3
10test_url: http://www.macrumors.com/2010/11/10/apple-debuts-new-apple-tv-and-itunes-movie-content-in-japan/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/macstories.net.txt b/inc/3rdparty/site_config/standard/macstories.net.txt
new file mode 100644
index 00000000..6e651ca0
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/macstories.net.txt
@@ -0,0 +1,8 @@
1strip: //*[(@id = "featured")]
2
3author:substring-after( //div[@class='posttitle']/h2[@class='author'],'by ')
4
5date: concat(//div[@class='month'],' ',//div[@class='day'])
6
7#macstories doesn't provide a year, but month/day is better than nothing
8test_url: http://www.macstories.net/news/instapaper-4-0-available-completely-redesigned-ipad-ui-new-features-search-subscription/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mactalk.com.au.txt b/inc/3rdparty/site_config/standard/mactalk.com.au.txt
new file mode 100644
index 00000000..e8d60522
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/mactalk.com.au.txt
@@ -0,0 +1,4 @@
1author://div[@class="article_username_container_full"]
2date://div[@class="article_username_container"]
3body://div[@class="article cms_clear restore postcontainer"]
4test_url: http://www.mactalk.com.au/content/chat-basil-shkara-developer-taptax-2452/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mactechnews.de.txt b/inc/3rdparty/site_config/standard/mactechnews.de.txt
new file mode 100644
index 00000000..c3fc0e44
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/mactechnews.de.txt
@@ -0,0 +1,3 @@
1title: substring-after(substring-after(//title, '>'), '>')
2body: //div[@class='NewsArticleContent']
3test_url: http://www.mactechnews.de/news/index/Apple-Pressekonferenz-zum-iPhone-4-147316.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/macworld.com.txt b/inc/3rdparty/site_config/standard/macworld.com.txt
new file mode 100644
index 00000000..96175872
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/macworld.com.txt
@@ -0,0 +1,24 @@
1title: //article//h1
2date: //meta[@name="date"]/@content
3author: //div[@class="author-name" or @class="article-byline"]/a[1]
4
5body: //section[@class="page"]
6
7# remove 'From the Lab' and 'Recent posts' text
8strip: //div[@class='blogLabel']
9
10# remove byline and meta info
11strip: //div[@class="article-meta"]
12strip: //div[@class="author-info"]
13
14#strip tags and categories
15strip: //div[@class="department"]
16
17#strip product cap links
18strip: //div[@class="cap-main"]
19strip: //div[@id="compare-lede"]
20
21prune: no
22
23# copes less well with Review pages, seems fine for News
24test_url: http://www.macworld.com/article/163184/2011/10/the_ipod_as_an_iconic_cultural_force.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mainichi.jp.txt b/inc/3rdparty/site_config/standard/mainichi.jp.txt
new file mode 100644
index 00000000..e701207f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/mainichi.jp.txt
@@ -0,0 +1,3 @@
1body: //div[@class='NewsArticle']
2
3test_url: http://mainichi.jp/select/weathernews/20110311/news/20110520k0000e040062000c.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mainpost.de.txt b/inc/3rdparty/site_config/standard/mainpost.de.txt
new file mode 100644
index 00000000..a2d25d56
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/mainpost.de.txt
@@ -0,0 +1,28 @@
1title: substring-before(//title, '|')
2body: //*[@id='content-left']
3
4# Why is this not working here?
5# body: //*[@id='content-left']/div[@class='content-container'][2]/div[@class='content-body']/div[@class='inner-container']/div[@class='detail']
6
7
8#Header
9strip_id_or_class: 'subHead'
10strip_id_or_class: 'fl_right'
11strip_id_or_class: 'infolink'
12strip_id_or_class: 'content-head'
13strip_id_or_class: 'tab'
14strip_id_or_class: 'tab-active'
15strip: //*[contains(@class,'trenner')]
16
17# Headline
18strip: //h1/*
19strip_id_or_class: 'font16'
20
21#Images
22strip_id_or_class: 'leftimage'
23strip_id_or_class: 'rightimage'
24
25#Comments
26strip: //table
27strip: //p/following-sibling::*[0]
28test_url: http://www.mainpost.de/ueberregional/meinung/Dioxin-Skandal-bringt-Agrarministerin-in-Bedraengnis;art9517,5920211 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/makeuseof.com.txt b/inc/3rdparty/site_config/standard/makeuseof.com.txt
new file mode 100644
index 00000000..6809afed
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/makeuseof.com.txt
@@ -0,0 +1,3 @@
1tidy: no
2
3test_url: http://www.makeuseof.com/dir/kindle-it-web-pages-kindle-friendly/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/marco.org.txt b/inc/3rdparty/site_config/standard/marco.org.txt
new file mode 100644
index 00000000..ef2e03d3
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/marco.org.txt
@@ -0,0 +1,8 @@
1tidy: no
2prune: no
3date: //article//time[@pubdate]
4title: //article/header/h2
5body: //article
6strip: //header
7test_url: http://www.marco.org/2012/09/08/businessweek-gruber
8test_url: http://www.marco.org/2012/04/24/might-upgrade-someday \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/marksdailyapple.com.txt b/inc/3rdparty/site_config/standard/marksdailyapple.com.txt
new file mode 100644
index 00000000..0077f560
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/marksdailyapple.com.txt
@@ -0,0 +1,2 @@
1strip_id_or_class: wwsgd
2test_url: http://www.marksdailyapple.com/are-detoxes-and-cleanses-safe-and-effective/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/martinfowler.com.txt b/inc/3rdparty/site_config/standard/martinfowler.com.txt
new file mode 100644
index 00000000..8e0e349f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/martinfowler.com.txt
@@ -0,0 +1,8 @@
1date: //div[@id="main"]/p[@class="date"]
2author: string("Martin Fowler")
3body: //div[@id="main"]
4strip_id_or_class: date
5strip_id_or_class: tags
6strip_id_or_class: tagLabel
7strip: //div[@id="main"]/h1[1]
8test_url: http://martinfowler.com/bliki/DatabaseThaw.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mashable.com.txt b/inc/3rdparty/site_config/standard/mashable.com.txt
new file mode 100644
index 00000000..2c5a14a6
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/mashable.com.txt
@@ -0,0 +1,4 @@
1title: //header[@class='entry-title']/h1
2body: //div[@class='description']
3strip: //div[@class='ytm-gallery-box']
4test_url: http://mashable.com/2011/12/05/india-wants-google-and-facebook-to-censor-user-content/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mattcutts.com.txt b/inc/3rdparty/site_config/standard/mattcutts.com.txt
new file mode 100644
index 00000000..76b1eac6
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/mattcutts.com.txt
@@ -0,0 +1,2 @@
1date: //*[@class = 'published']
2test_url: http://www.mattcutts.com/blog/internet-censorship-sopa/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mbl.is.txt b/inc/3rdparty/site_config/standard/mbl.is.txt
new file mode 100644
index 00000000..fd26f091
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/mbl.is.txt
@@ -0,0 +1,2 @@
1body: //div[class="frett-main"]
2test_url: http://mbl.is/frettir/innlent/2012/02/21/litill_munur_a_fargjaldaverdi/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/medialens.org.txt b/inc/3rdparty/site_config/standard/medialens.org.txt
new file mode 100644
index 00000000..94f27b71
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/medialens.org.txt
@@ -0,0 +1,2 @@
1strip: //div[contains(@class, 'article-tools')]
2test_url: http://www.medialens.org/index.php/alerts/alert-archive/2012/713-the-illusion-of-democracy.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/menshealth.com.txt b/inc/3rdparty/site_config/standard/menshealth.com.txt
new file mode 100644
index 00000000..e7e1e269
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/menshealth.com.txt
@@ -0,0 +1,16 @@
1# need to find a way to eliminate <span> content for "related content" without eliminating important content
2
3convert_double_br_tags: [yes]
4#body: //div[@id='leftside']
5title: //h1
6title: //h2
7Author: substring-after(//h4, 'By ')
8Author: substring-after(//h4, 'By: ')
9#Strip: //span
10strip_id_or_class: morefromcat
11strip_id_or_class: mostpopular
12strip_id_or_class: articlepagination
13strip_id_or_class: toolbar
14body: //div[@id='zmodcontent']
15single_page_link: //li[@class='onepage'] //a[contains (@href, 'printer.php')]
16test_url: http://www.menshealth.com/mhlists/pursuit_of_happiness/index.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mikeash.com.txt b/inc/3rdparty/site_config/standard/mikeash.com.txt
new file mode 100644
index 00000000..af8a7d30
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/mikeash.com.txt
@@ -0,0 +1,5 @@
1title: //div[@class="blogtitle"]
2strip: //div[@class="blogtitle"]
3
4author: substring-after(//span[@class="blogheader"], 'Author: ')
5test_url: http://www.mikeash.com/pyblog/friday-qa-2012-01-13-the-mac-toolbox.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mikeindustries.com.txt b/inc/3rdparty/site_config/standard/mikeindustries.com.txt
new file mode 100644
index 00000000..3d488e13
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/mikeindustries.com.txt
@@ -0,0 +1,9 @@
1title: //div[@class='post_content']/h2
2date: //div[@class='dateline']
3body: //div[@class='entry']
4
5strip: //div[@class='closer']
6strip: //div[@class='navigation']
7strip: //div[@class='aux_pane']
8strip: //div[@class='aux_aux_pane']
9test_url: http://www.mikeindustries.com/blog/archive/2011/10/never-be-another \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/minnesota.publicradio.org.txt b/inc/3rdparty/site_config/standard/minnesota.publicradio.org.txt
new file mode 100644
index 00000000..7e43d63c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/minnesota.publicradio.org.txt
@@ -0,0 +1,10 @@
1title: //*[@class="article"]/h1
2date: //*[@class="article"]/div[@class="date"]
3
4# strip the title and date from the article text
5strip: //*[@class="article"]/h1
6strip: //*[@class="article"]/div[@class="date"]
7
8# strip annoying <br> between metadata and article
9strip: //*[@class="article"]/div[@class="date"]/following-sibling::br
10test_url: http://minnesota.publicradio.org/display/web/2012/06/19/health/senators-want-health-care-ruling-on-tv/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/minnpost.com.txt b/inc/3rdparty/site_config/standard/minnpost.com.txt
new file mode 100644
index 00000000..51a0630b
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/minnpost.com.txt
@@ -0,0 +1,5 @@
1title: //*[@id="content-header"]/h1
2author: //*[contains(@class, 'byline')]/a/text()
3date: substring-after(//*[contains(@class, 'byline')]/text()[2], '|')
4body: //*[contains(@class, 'node-body')]
5test_url: http://www.minnpost.com/eric-black-ink/2012/06/overturning-obamacare-would-be-game-changer-supreme-court \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mirrorfootball.co.uk.txt b/inc/3rdparty/site_config/standard/mirrorfootball.co.uk.txt
new file mode 100644
index 00000000..4215a051
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/mirrorfootball.co.uk.txt
@@ -0,0 +1,3 @@
1# Remove extra links
2strip: //*[@class='appended_html']
3test_url: http://www.mirrorfootball.co.uk/news/West-Ham-crisis-Carlton-Cole-slams-diabolical-performance-and-rips-into-Avram-Grant-lack-of-tactical-nous-following-Liverpool-mauling-article636151.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mises.org.txt b/inc/3rdparty/site_config/standard/mises.org.txt
new file mode 100644
index 00000000..ae542aa6
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/mises.org.txt
@@ -0,0 +1,5 @@
1strip_id_or_class: 'book-ad'
2strip_id_or_class: 'bigger pullquote'
3strip_id_or_class: 'subscribe'
4strip_id_or_class: 'blog-link'
5test_url: http://mises.org/daily/4804 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mlb.mlb.com.txt b/inc/3rdparty/site_config/standard/mlb.mlb.com.txt
new file mode 100644
index 00000000..30e8aff2
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/mlb.mlb.com.txt
@@ -0,0 +1,14 @@
1title: //h1[@class='article-headline']
2date: //span[@class='timeStamp']
3author: substring-before(//p[@class='article-byline'], '/')
4body: //div[@id='article']
5#strip: //div[@class='inner']
6strip: //div[@id='article_head']
7strip: //p[@class='tagLine']
8strip: //div[@id='article_related_links']
9strip: //div[@id='article_related_mlb']
10strip: //span[@class='more']
11strip: //div[@class='article_component']
12strip: //span[@class='screen_reader']
13strip: //ul[@class='columnists_blurb']
14test_url: http://mlb.mlb.com/news/article.jsp?ymd=20120403&content_id=27880830 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mlb.sbnation.com.txt b/inc/3rdparty/site_config/standard/mlb.sbnation.com.txt
new file mode 100644
index 00000000..c4e3389e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/mlb.sbnation.com.txt
@@ -0,0 +1,14 @@
1title: //h1[@id = 'stream_title']
2author: //p[@class = 'byline']/a
3date: //span[@class = 'datetime']
4
5body: //div[@id = 'stream_container']
6strip: //p[@class = 'byline']
7strip_id_or_class: stream_summary
8strip_id_or_class: social-spoken
9strip_id_or_class: datetime
10strip_id_or_class: author-mini-profile
11strip_id_or_class: social-tools
12strip_id_or_class: entry-tags
13strip_id_or_class: fb-like-box
14test_url: http://mlb.sbnation.com/2011/10/17/2495845/2011-world-series-st-louis-cardinals-texas-rangers-home-field-advantage \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mlssoccer.com.txt b/inc/3rdparty/site_config/standard/mlssoccer.com.txt
new file mode 100644
index 00000000..41e15136
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/mlssoccer.com.txt
@@ -0,0 +1,6 @@
1title: //*[@class="header_title"]/h1
2date: //*[@class="field-date"]
3author: //*[@class="field-author"]
4body: //div[contains(@class, 'content')]
5
6test_url: http://www.mlssoccer.com/news/article/2012/06/19/lack-depth-front-forces-arena-alter-las-formation \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mmo-champion.com.txt b/inc/3rdparty/site_config/standard/mmo-champion.com.txt
new file mode 100644
index 00000000..918fae36
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/mmo-champion.com.txt
@@ -0,0 +1,5 @@
1title: //h1
2body: //div[@id = 'article_content']/div[contains(@class,'article')]
3author: //sub[@class = 'article_promoted_text']/a[starts-with(@href, 'member')]
4date: //div[@class = 'article_username_container']
5test_url: http://www.mmo-champion.com/content/2688-Other-Press-Tour-Interviews-A-Night-in-Mists-of-Pandaria-Blue-Posts-MoP-Screenshot \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mnn.com.txt b/inc/3rdparty/site_config/standard/mnn.com.txt
new file mode 100644
index 00000000..ddfe6fa2
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/mnn.com.txt
@@ -0,0 +1,11 @@
1tidy: no
2author: //div[@id="above-content"]//img/@alt | //div[@class="comment-auth"]/span[1]/a/text()
3date: //div[@class="comment-auth"]/div | //div[@class="comment-auth"]/span[2]
4body: //div[@class="node"]
5
6strip_id_or_class: vertical-social-bar
7strip_id_or_class: blogs_paginator
8strip_id_or_class: horizontal-social-links
9strip_id_or_class: servicelinksdiv
10
11test_url: http://www.mnn.com/green-tech/research-innovations/blogs/5-breakthroughs-that-will-make-solar-power-cheaper-than-coal \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mno.hu.txt b/inc/3rdparty/site_config/standard/mno.hu.txt
new file mode 100644
index 00000000..ba158953
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/mno.hu.txt
@@ -0,0 +1,14 @@
1title: //title
2
3author: //div[@class="author"]
4
5strip_id_or_class: 'header'
6strip_id_or_class: 'cikk_ajanlo'
7strip_id_or_class: 'buttons'
8strip_id_or_class: 'related'
9strip_id_or_class: 'adbox ad_cikk_kozepre'
10strip_id_or_class: 'cikk-cimkek'
11strip_id_or_class: 'cikk_ertekeles'
12
13strip_comments: yes
14test_url: http://mno.hu/grund/a-gumibottal-hadonaszo-rendort-joval-konnyebb-utalni-1055351 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mobile.slate.com.txt b/inc/3rdparty/site_config/standard/mobile.slate.com.txt
new file mode 100644
index 00000000..d5d81034
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/mobile.slate.com.txt
@@ -0,0 +1,5 @@
1title: //h2[@class="article_title"]
2strip: //a[@class="houseAdLink"]
3strip: //h1
4strip: //div[@class="more_articles"]
5test_url: http://mobile.slate.com/rss.jsp?rssid=411&item=http%3a%2f%2fwww.slate.com%2fdefault.aspx%3fdisplaymode%3d201%26id%3d2293749%26device%3drss \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mobileopportunity.blogspot.com.txt b/inc/3rdparty/site_config/standard/mobileopportunity.blogspot.com.txt
new file mode 100644
index 00000000..a1cc5317
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/mobileopportunity.blogspot.com.txt
@@ -0,0 +1,11 @@
1body: //div[@class='post uncustomized-post-template']
2
3# remove duplicate of post title, which is a link
4strip: //h3[@class='post-title']
5
6# remove permalink and timestamp, which isn't useful as it's a time with no date
7strip: //span[@class='post-timestamp']
8
9# remove labels (tags)
10strip: //span[@class='post-labels']
11test_url: http://mobileopportunity.blogspot.com/2010/12/rims-q3-financials-tale-of-two.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/modernghana.com.txt b/inc/3rdparty/site_config/standard/modernghana.com.txt
new file mode 100644
index 00000000..4c93d0cf
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/modernghana.com.txt
@@ -0,0 +1,8 @@
1title: //meta[@property="og:title"]/@content
2author: //meta[@name="author"]/@content
3date: //span[@class='date1']
4body: //div[@id='newsimage'] | //div[@id='bodytext']
5tidy: no
6prune: no
7
8test_url: http://www.modernghana.com/news/323765/1/039ghost039-teachers-removed-salaries-allowances-p.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/money.cnn.com.txt b/inc/3rdparty/site_config/standard/money.cnn.com.txt
new file mode 100644
index 00000000..a0d1628a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/money.cnn.com.txt
@@ -0,0 +1,24 @@
1title: //meta[@property="og:title"]/@content
2title: //h1[@class='storyheadline']
3author: //meta[@name="AUTHOR"]/@content
4date: //span[@class='cnnDateStamp']
5date: //meta[@name="DATE"]/@content
6body: //div[@id='storytext' or @class='storytext']
7
8strip_id_or_class: ie_column
9strip_id_or_class: sharewidgets
10strip_image_src: bug.gif
11
12strip: //div[@class="hed_side"]
13strip: //span[@class="byline"]
14strip: //a[@class="soc-twtname"]
15strip: //span[@class="cnnDateStamp"]
16strip: //div[@class="storytimestamp"]
17strip: //div[@class="cnnCol_side"]
18
19prune: no
20tidy: no
21
22test_url: http://money.cnn.com/2011/03/15/news/companies/steve_jobs_thought_process.fortune/index.htm?section=money_topstories&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fmoney_topstories+%28Top+Stories%29
23test_url: http://money.cnn.com/2012/01/27/markets/markets_newyork/index.htm
24test_url: http://money.cnn.com/2012/05/13/technology/yahoo-ceo-out-rumor/index.htm \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/monkeyzen.com.txt b/inc/3rdparty/site_config/standard/monkeyzen.com.txt
new file mode 100644
index 00000000..f779c38e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/monkeyzen.com.txt
@@ -0,0 +1,2 @@
1body: //*[(@class = "historia")]
2test_url: http://monkeyzen.com/2011/09/siluetas-de-clasicos-a-modo-de-vinilos \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/moonsault.de.txt b/inc/3rdparty/site_config/standard/moonsault.de.txt
new file mode 100644
index 00000000..061a8d5c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/moonsault.de.txt
@@ -0,0 +1,13 @@
1strip_image_src: menu
2strip_image_src: templates
3strip: //div/a
4strip: //div/b
5strip: //div/strong
6strip: //td[@width='30%']
7strip: //br[1]
8strip: //br[2]
9strip: //br[3]
10strip: //br[4]
11strip: //a[@href='http://www.moonsault.de/newzboard/index.php?act=home']
12strip_id_or_class: cse-branding-right
13test_url: http://www.moonsault.de/newzboard/index.php?news=22321&act=previous \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/moreintelligentlife.com.txt b/inc/3rdparty/site_config/standard/moreintelligentlife.com.txt
new file mode 100644
index 00000000..a7e59c30
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/moreintelligentlife.com.txt
@@ -0,0 +1,7 @@
1title: //h1[@class='print-title']
2body: //div[@class='print-submitted' or @class='print-created' or @class='print-content']
3prune: no
4
5single_page_link: //li[@class='print']/a
6
7test_url: http://moreintelligentlife.com/content/places/paul-markillie/they-trash-cars-dont-they \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/motherboard.vice.com.txt b/inc/3rdparty/site_config/standard/motherboard.vice.com.txt
new file mode 100644
index 00000000..6faf1c9a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/motherboard.vice.com.txt
@@ -0,0 +1,5 @@
1author: //span[@class="author"]/a
2date: //span[@class="date"]
3body: //div[@class="story-content"]
4strip: //aside
5test_url: http://motherboard.vice.com/blog/you-can-carry-a-copy-of-the-pirate-bay-in-your-pocket \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mothering.com.txt b/inc/3rdparty/site_config/standard/mothering.com.txt
new file mode 100644
index 00000000..a9d9195f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/mothering.com.txt
@@ -0,0 +1,7 @@
1title: //h2[contains(@class,'post_headline')]
2body: //div[@class='entry']
3convert_double_br_tags: yes
4strip_image_src: _selected.gif
5strip_id_or_class: addthis_
6strip: //a[contains(@href,'feedburner.com')]
7test_url: http://mothering.com/all-things-mothering/inspiration/motherhood-brings-me-down \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/motherjones.com.txt b/inc/3rdparty/site_config/standard/motherjones.com.txt
new file mode 100644
index 00000000..d58c7d2c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/motherjones.com.txt
@@ -0,0 +1,15 @@
1title: //h1
2body: //div[@id = 'content-area']
3next_page_link: //div[@class='node-pager']/a[contains(@class, 'next')]
4tidy: no
5author: //p[contains(@class, 'byline')]/a
6
7strip_id_or_class: node-header
8strip_id_or_class: hdr-tools
9strip_id_or_class: node-body-break
10strip_id_or_class: pullquote
11strip_id_or_class: node-pager
12strip_id_or_class: author-bio
13strip_id_or_class: node-footer
14
15test_url: http://motherjones.com/politics/2012/02/mac-mcclelland-free-online-shipping-warehouses-labor \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/motorfull.com.txt b/inc/3rdparty/site_config/standard/motorfull.com.txt
new file mode 100644
index 00000000..c6bec7e9
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/motorfull.com.txt
@@ -0,0 +1,2 @@
1body: //*[(@class = "historia")]
2test_url: http://motorfull.com/2011/09/aparca-valeo-park4u-remote \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/msdn.microsoft.com.txt b/inc/3rdparty/site_config/standard/msdn.microsoft.com.txt
new file mode 100644
index 00000000..f4f20450
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/msdn.microsoft.com.txt
@@ -0,0 +1,3 @@
1body: //div[class="mainBody"]
2footnotes: no
3test_url: http://msdn.microsoft.com/en-us/library/hh542796(VS.103).aspx \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/msnbc.msn.com.txt b/inc/3rdparty/site_config/standard/msnbc.msn.com.txt
new file mode 100644
index 00000000..ad89cda8
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/msnbc.msn.com.txt
@@ -0,0 +1,21 @@
1title: //title
2author: //div[@id='byline']
3
4date: //div[contains(@class,'timestamp')]/abbr/text()
5
6body: //div[@id='intellitTXT']
7
8strip: //div[@id='byline']
9strip: //div[contains(@class,'timestamp')]
10strip: //div[contains(@class, 'ad-label')]
11strip: //div[contains(@class, 'ad-break')]
12strip: //span[contains(@class, 'x-video')]
13strip: //span[contains(@class, 'inline')]
14strip: //div[contains(@class, 'video')]
15strip: //div[contains(@class, 'discuss')]
16strip: //div[@id='most-popular']
17strip: //div[contains(@class,'drawer')]
18strip: //*[contains(@class, 'hide')]
19
20footnotes: no
21test_url: http://www.msnbc.msn.com/id/44748412/ns/business-world_business/#.TolUv-vfDbE \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/myfoxboston.com.txt b/inc/3rdparty/site_config/standard/myfoxboston.com.txt
new file mode 100644
index 00000000..1a35b4fc
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/myfoxboston.com.txt
@@ -0,0 +1,4 @@
1body: //div[@class="col1"]//div[@class="photo"] | //div[@class="detail"]/p[@class="fontStyle21"] | //div[@class="story last"]
2tidy: no
3
4test_url: http://www.myfoxboston.com/dpp/news/local/transit-police-say-woman-spat-on-mbta-bus-driver-2010611 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/myrecipes.com.txt b/inc/3rdparty/site_config/standard/myrecipes.com.txt
new file mode 100644
index 00000000..8b99d22d
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/myrecipes.com.txt
@@ -0,0 +1,12 @@
1title: //h2[contains(@class, 'name')]
2body: //div[@class='printFullPageContentContainer']//div[contains(@class, 'recipe')]
3
4strip_id_or_class: photoBy
5strip_id_or_class: link
6
7single_page_link: //li[@class='print']/a[contains(@href, '/print/')]
8
9prune: no
10tidy: no
11
12test_url: http://www.myrecipes.com/recipe/hummingbird-cake-10000000387218/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/narenji.ir.txt b/inc/3rdparty/site_config/standard/narenji.ir.txt
new file mode 100644
index 00000000..6c3d0c24
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/narenji.ir.txt
@@ -0,0 +1,2 @@
1body: //div[@class='node']
2test_url: http://www.narenji.ir/2806 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/nasa.gov.txt b/inc/3rdparty/site_config/standard/nasa.gov.txt
new file mode 100644
index 00000000..d95530f3
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/nasa.gov.txt
@@ -0,0 +1,8 @@
1title: //div[@class='address']/span
2author: substring-before(//span[@class='credits'],',')
3date: //div[@class='promodatepress']/span
4body: //div[@class='default_style_wrap']
5strip: //div[@class='text_adjust']
6strip: //div[@class='skiplink']
7strip: //h2
8test_url: http://www.nasa.gov/mission_pages/kepler/news/kepler-21b.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/nbweekly.com.txt b/inc/3rdparty/site_config/standard/nbweekly.com.txt
new file mode 100644
index 00000000..0b722d33
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/nbweekly.com.txt
@@ -0,0 +1,10 @@
1date://span[contains(@class,'date')]
2
3body://div[contains(@class,'contWarp')]
4
5strip://div[contains(@class,'keyWord')]
6strip://div[contains(@class,'submitComt')]
7strip://div[contains(@class,'cmts')]
8strip://div[contains(@class,'notice')]
9strip://div[contains(@class,'part pt-second')]
10test_url: http://www.nbweekly.com/news/china/201203/29316.aspx \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/neh.gov.txt b/inc/3rdparty/site_config/standard/neh.gov.txt
new file mode 100644
index 00000000..45136a2b
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/neh.gov.txt
@@ -0,0 +1,17 @@
1#host configuration should be http://www.neh.gov/news/humanities/
2
3
4#meta data
5title:substring-after(substring-after(//title,':'),':')
6author:substring-after(//h2[@class = 'subHead'],'By')
7date:substring-before(substring-after(//title,':'),':')
8
9#img and caption handling
10wrap_in(small)://div[@id = 'mainContent']/table/descendant::p/descendant::text()
11wrap_in(fieldset)://div[@id = 'mainContent']/table
12
13# clean up
14strip: //table[@class = 'marginpaddingTop']
15strip: //h2[@class = 'subHead']
16
17test_url: http://www.neh.gov/news/humanities/2011-11/IslamicScholar.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/neomoney.co.txt b/inc/3rdparty/site_config/standard/neomoney.co.txt
new file mode 100644
index 00000000..564d5492
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/neomoney.co.txt
@@ -0,0 +1,3 @@
1title: //*[@class="header_title"]/h1
2body: //div[contains(@class, 'content')]
3test_url: http://neomoney.co/personal/expatriate-and-migrant-loans/expatriate-loans/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/net-security.org.txt b/inc/3rdparty/site_config/standard/net-security.org.txt
new file mode 100644
index 00000000..4e6d66d4
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/net-security.org.txt
@@ -0,0 +1,7 @@
1title: //div[@class='content-title']
2#date: substring-after(//div[@class='dernek-text-under'],'Posted on')
3body: //div[@class='content-item']
4next_page_link: //li[@class='next']/a
5convert_double_br_tags: yes
6
7test_url: http://www.net-security.org/article.php?id=1732 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/netmagazine.com.txt b/inc/3rdparty/site_config/standard/netmagazine.com.txt
new file mode 100644
index 00000000..86885445
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/netmagazine.com.txt
@@ -0,0 +1,16 @@
1title: //h1
2author: //div[@class="submitted"]/span
3
4# seems like this should work, but nothing is returned. Issue with xpath parser?
5date: //div[@class="submitted"]/time
6
7body: //div[@id="main-content"]
8
9strip_comments: no
10
11strip: //h1
12strip: //div[@class="submitted"]
13strip: //dd[@class="profile-avatar"]
14strip: //div[@class="author-profile"]/dl/dt[1]
15strip: //div[@id="right-col"]
16test_url: http://www.netmagazine.com/opinions/nielsen-wrong-mobile \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/netzpolitik.org.txt b/inc/3rdparty/site_config/standard/netzpolitik.org.txt
new file mode 100644
index 00000000..87dc3cdf
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/netzpolitik.org.txt
@@ -0,0 +1,6 @@
1title: //h1[@class='entry-title']
2author: //a[@ref='author']
3date: //span[@class='entry-date']
4body: //div[@class='entry-content']
5
6test_url: http://netzpolitik.org/2011/buch-generation-facebook/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/newmatilda.com.txt b/inc/3rdparty/site_config/standard/newmatilda.com.txt
new file mode 100644
index 00000000..ab766847
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/newmatilda.com.txt
@@ -0,0 +1,9 @@
1title: //div[@id="maincontent"]/h1
2body: //div[@id="maincontent"]
3date: //div[@id="maincontent"]/p[2]
4author: //ul[@id="contributors"]/li/p/b
5
6strip: //p[@*]
7strip: //h1
8strip: //div[@id="maincontent"]/div
9test_url: http://newmatilda.com/2011/07/22/turnbull-makes-sense-climate \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/news-gazette.com.txt b/inc/3rdparty/site_config/standard/news-gazette.com.txt
new file mode 100644
index 00000000..1f1e5d3a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/news-gazette.com.txt
@@ -0,0 +1,8 @@
1title: //div[@id="main-content"]//h2
2
3author: //div[@id="main-content"]//span[@class="authors"]
4
5date: //div[@id="main-content"]//span[@class="timestamp"]
6
7body: //div[@id="main-content"]//div[@class="content"]
8test_url: http://www.news-gazette.com/news/business/economy/2011-08-08/ibm-drops-out-blue-waters-project.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/news.cnet.com.txt b/inc/3rdparty/site_config/standard/news.cnet.com.txt
new file mode 100644
index 00000000..b7ab224a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/news.cnet.com.txt
@@ -0,0 +1,12 @@
1#This should apply to *.cnet.com. Not just news.cnet.com.
2title: //h1
3author: //img[@class="mugshot"]/@alt
4strip: //h1
5strip_id_or_class: breadcrumb
6strip: //p[@id="introP"]
7strip: //div[@class="postByline"]
8strip: //div[@class="editorBio"]
9strip: //div[@class="inline-slideshow"]
10strip: //div[@class="related"]
11body: //div[@class="postBody txtWrap"]
12test_url: http://news.cnet.com/8301-27076_3-57405303-248/apple-ipad-charging-fine-keep-it-plugged-in/?tag=mncol;posts \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/news.detik.com.txt b/inc/3rdparty/site_config/standard/news.detik.com.txt
new file mode 100644
index 00000000..3ed1dc85
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/news.detik.com.txt
@@ -0,0 +1,8 @@
1title://div[@class="content_detail"]/h1
2
3author://div[@class="author"]/strong
4
5date:substring-before(substring-after(//div[@class="content_detail"]/span[@class="date"], ','), ' WIB')
6
7body://div[@class="text_detail"]
8test_url: http://news.detik.com/read/2012/05/22/225531/1922307/10/menkeu-cek-soal-lolosnya-315-kg-sabu-dari-bea-cukai \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/news.kanaloco.jp.txt b/inc/3rdparty/site_config/standard/news.kanaloco.jp.txt
new file mode 100644
index 00000000..6fc86137
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/news.kanaloco.jp.txt
@@ -0,0 +1,9 @@
1body: //div[@id='main']
2strip: //div[@id='sbs']
3strip: //div[@id='fsizeSwitch']
4strip: //div[@id='googleAd']
5strip: //div[@id='detailFoot']
6strip_image_src: counter?key
7convert_double_br_tags: yes
8
9test_url: http://news.kanaloco.jp/localnews/article/1105200018/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/news.mynavi.jp.txt b/inc/3rdparty/site_config/standard/news.mynavi.jp.txt
new file mode 100644
index 00000000..ded680f1
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/news.mynavi.jp.txt
@@ -0,0 +1,11 @@
1title: //h2[@class="lyt-hdg-02-04"]
2
3author: //div[@class="lyt-namearea"]/a
4
5date: //div[@class="lyt-namearea"]/text()
6
7body: //div[@class="articleContent"]
8
9strip: //div[@id="tab-aside"]
10
11test_url: http://news.mynavi.jp/articles/2011/12/07/nico/index.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/news.orf.at.txt b/inc/3rdparty/site_config/standard/news.orf.at.txt
new file mode 100644
index 00000000..b60deea4
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/news.orf.at.txt
@@ -0,0 +1,11 @@
1single_page_link: //div[@id='content']//p[@class='readMore']/a
2
3title: //div[@class='hidden offscreen']/h2
4body: //div[@id="storyText"]
5move_into(//div[@id='storyText']): //div[@class='fact']
6strip: //small[@class='credit']
7strip: //small[@class='caption']
8date: substring-after(//div[@id='storyMeta']//p[@class='date'],'Publiziert am')
9strip: //p[@class='toplink']
10
11test_url: http://news.orf.at/stories/2084731/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/news.rambler.ru.txt b/inc/3rdparty/site_config/standard/news.rambler.ru.txt
new file mode 100644
index 00000000..743245f8
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/news.rambler.ru.txt
@@ -0,0 +1,9 @@
1body: //article
2title: //h1
3author: //span[@class='b-article-source-dropdown']
4strip: //span[@class='b-article-photo-incut__source']
5strip: //a[@class='b-read-more b-read-more_bottom']
6
7
8tidy:no
9test_url: http://news.rambler.ru/12972208/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/news.techmeme.com.txt b/inc/3rdparty/site_config/standard/news.techmeme.com.txt
new file mode 100644
index 00000000..c80c3327
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/news.techmeme.com.txt
@@ -0,0 +1,4 @@
1body: //div[@class='main']/div[@class='item']
2strip: //div[@class='right']
3
4test_url: http://news.techmeme.com/110516/fh-rip \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/news.yahoo.com.txt b/inc/3rdparty/site_config/standard/news.yahoo.com.txt
new file mode 100644
index 00000000..5ee04049
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/news.yahoo.com.txt
@@ -0,0 +1,12 @@
1title: //meta[@property='og:title']/@content
2title: //h1[@class='headline']
3author: //cite[contains(@class, 'byline') and contains(@class, 'vcard')]//span[@class='fn']
4date: //cite[contains(@class, 'byline') and contains(@class, 'vcard')]//abbr/@title
5body: //div[@id='mediaarticlelead']//a[@class='media'] | //div[contains(@class,'yom-art-content')]
6#strip: //cite/abbr
7strip_id_or_class: action
8strip_id_or_class: prefetch
9tidy: no
10prune: no
11
12test_url: http://news.yahoo.com/cold-la-nina-winter-forecast-west-coast-183535067.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/news.ycombinator.com.txt b/inc/3rdparty/site_config/standard/news.ycombinator.com.txt
new file mode 100644
index 00000000..0b01f8a1
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/news.ycombinator.com.txt
@@ -0,0 +1,3 @@
1strip_comments: no
2strip: //a[. = 'reply']
3test_url: http://news.ycombinator.com/item?id=1516461 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/newsbomb.gr.txt b/inc/3rdparty/site_config/standard/newsbomb.gr.txt
new file mode 100644
index 00000000..0500890f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/newsbomb.gr.txt
@@ -0,0 +1,9 @@
1date: //meta[@name='og:article:published_time']/@value
2
3body: //div[@class='itemIntroText' or @class='itemImageBlock' or @class='itemFullText']
4
5strip_id_or_class: itemImageGallery
6
7prune: no
8
9test_url: http://www.newsbomb.gr/gossip/story/257234/i-proin-moy-protimoyse-na-serfarei-apo-to-na-kanoyme-sex \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/newsle.com.txt b/inc/3rdparty/site_config/standard/newsle.com.txt
new file mode 100644
index 00000000..e500ddcc
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/newsle.com.txt
@@ -0,0 +1,2 @@
1single_page_link: //iframe/@src
2test_url: http://newsle.com/article/0/15831103/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/newsmill.se.txt b/inc/3rdparty/site_config/standard/newsmill.se.txt
new file mode 100644
index 00000000..eb7d3350
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/newsmill.se.txt
@@ -0,0 +1,12 @@
1title: //h1
2body: (//div[@class='articleImg']//img)[1] | //p[contains(@class, 'commentTextArticle') or contains(@class, 'articlePublished')] | //div[@id='articleLeftContent']
3author: //div[@class='byline']//a[contains(@href, '/user/')]
4
5strip_id_or_class: facts
6strip_id_or_class: articleBlogsHolder
7strip_id_or_class: byline
8
9prune: no
10tidy: no
11
12test_url: http://www.newsmill.se/artikel/2012/05/06/medielogiken-v-ger-tyngre-n-reportrarnas-sikter \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/newsunspun.org.txt b/inc/3rdparty/site_config/standard/newsunspun.org.txt
new file mode 100644
index 00000000..860ad66b
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/newsunspun.org.txt
@@ -0,0 +1,10 @@
1body: //div[@class='right']//div[@class='articles']
2author: //div[@id='artinfo']//a[contains(@href, '/author/')]
3strip: //div[@id='artinfo']
4strip: //table[//a[contains(@href, 'twitter.com')]]
5strip_id_or_class: twitter
6
7prune: no
8tidy: no
9
10test_url: http://www.newsunspun.org/eotn/bbc-headline-change-iran-goes-from-not-building-to-undecided-on-nuclear-bomb \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/newyorker.com.txt b/inc/3rdparty/site_config/standard/newyorker.com.txt
new file mode 100644
index 00000000..5624aa8c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/newyorker.com.txt
@@ -0,0 +1,10 @@
1title: //h1[@id='articlehed'] | //h2[@id="articleintro"]
2body: //div[@id='articletext']
3
4strip: //ul[@id="bc"] | //div[@id="yrail"] | //div[@class="entry-keywords"] | //div[@class="entry-categories"] | //div[@class="socialUtils"] | //div[@id="footer"]
5
6date: //h4[@id='articleauthor']/span[@class='dd dds']
7date: //div[@id="pagebody"]/div[@class='hentry entry']/div[@class='published']
8
9single_page_link: //div[@class='paginationViewSinglePage']/a
10test_url: http://www.newyorker.com/online/blogs/culture/2012/06/mug-shot-web-sites.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/next-gen.biz.txt b/inc/3rdparty/site_config/standard/next-gen.biz.txt
new file mode 100644
index 00000000..806a3dfd
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/next-gen.biz.txt
@@ -0,0 +1,16 @@
1# 2011-08-22 [carlo@...] initial version
2# 2011-08-22 [carlo@...] removed comments & social links
3
4tidy: no
5
6single_page_link: //a[@class="single active"]
7
8body: //div[@id="main"]//div[@class="content-region"]/article
9author: //span[@class="author-name"]
10date: //time/text()
11
12strip_id_or_class: //aside[@id="related"]
13strip: //footer
14
15title: //h1
16test_url: http://www.next-gen.biz/reviews/deus-ex-human-revolution-review \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/nfl.com.txt b/inc/3rdparty/site_config/standard/nfl.com.txt
new file mode 100644
index 00000000..70f92473
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/nfl.com.txt
@@ -0,0 +1,11 @@
1# doesn't look like selecting an attribute value works?
2# author: //meta[@id="authorName"]@value
3
4author: substring-after(//li[@id="article-hdr-meta-author"]/text(), "By ")
5date: //abbr[@id="article-time"]
6title: //div[@id="article-hdr"]/h1
7body: //div[@class="articleText"]
8
9# strip miscellaneous teasers & etc
10strip: //div[@class="removeformobile"]
11test_url: http://www.nfl.com/news/story/09000d5d82388707/article/close-shave-chiefs-haley-perseveres-through-rough-start?module=HP11_content_stream \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ngm.nationalgeographic.com.txt b/inc/3rdparty/site_config/standard/ngm.nationalgeographic.com.txt
new file mode 100644
index 00000000..60834862
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/ngm.nationalgeographic.com.txt
@@ -0,0 +1,7 @@
1next_page_link: //div[@class='nextpage_continue']/a
2strip: //div[@class='nextpage_continue']
3strip_id_or_class: nextpage
4title: //div[@class='article_title']//h1
5body: //div[@class='article_title']/..
6body: //div[@class='content']
7test_url: http://ngm.nationalgeographic.com/2012/02/tsunami/folger-text \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/nhk.or.jp.txt b/inc/3rdparty/site_config/standard/nhk.or.jp.txt
new file mode 100644
index 00000000..0a3bb913
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/nhk.or.jp.txt
@@ -0,0 +1,2 @@
1body: //div[@id = 'news_right']
2test_url: http://www.nhk.or.jp/news/html/20110309/t10014559982000.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/nintendoworldreport.com.txt b/inc/3rdparty/site_config/standard/nintendoworldreport.com.txt
new file mode 100644
index 00000000..409a8977
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/nintendoworldreport.com.txt
@@ -0,0 +1,13 @@
1body: //div[@id="main"]
2title: //div[@id="main"]/h3
3
4# Remove &lsquo;Review&rsquo; and &lsquo;Wii&rsquo;.
5strip: //div[@class="badge"]
6
7# Remove duplicate title and country flag.
8strip: //h3
9
10# Commented out below are attempts to extract the author and date, which did not work.
11# author: //p[@class="extra "]/a
12# date: //p[@class="extra "]/span[@class="when"]
13test_url: http://www.nintendoworldreport.com/review/28400 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/nojesguiden.se.txt b/inc/3rdparty/site_config/standard/nojesguiden.se.txt
new file mode 100644
index 00000000..ae2d7e41
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/nojesguiden.se.txt
@@ -0,0 +1,5 @@
1author: //span[@class='meta']/span[@class='username']
2body: //div[@class='article-content']
3
4strip_id_or_class: 'article-actions'
5test_url: http://nojesguiden.se/blogg/maja-bredberg/maja-laser-tidningen-en-helt-vanlig-lordag-i \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/northumberlandview.ca.txt b/inc/3rdparty/site_config/standard/northumberlandview.ca.txt
new file mode 100644
index 00000000..04a0a34d
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/northumberlandview.ca.txt
@@ -0,0 +1,11 @@
1title: //h1
2body: //div[@id='pn-maincontent']
3strip_id_or_class: z-menu
4strip_id_or_class: news_category
5strip_id_or_class: news_title
6strip_id_or_class: news_modify
7strip_id_or_class: news_morearticlesincat
8strip_id_or_class: ezc_comments
9strip_comments: yes
10
11test_url: http://www.northumberlandview.ca/index.php?module=news&func=display&sid=5972 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/nplusonemag.com.txt b/inc/3rdparty/site_config/standard/nplusonemag.com.txt
new file mode 100644
index 00000000..205b1af4
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/nplusonemag.com.txt
@@ -0,0 +1,6 @@
1title: /html/body/div[3]/div/div/h1
2
3body: //*[@id="article-body"]
4
5
6test_url: http://nplusonemag.com/the-outskirts-of-progress \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/npr.org.txt b/inc/3rdparty/site_config/standard/npr.org.txt
new file mode 100644
index 00000000..afab0eb3
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/npr.org.txt
@@ -0,0 +1,32 @@
1title: //div[contains(@class, 'storytitle')]//h1
2author: //p[@class="byline"]/span
3body: //div[@id='storyspan02']//*[@class='duration' or @class='download' or contains(@class, 'photo')] | //div[@id='storytext'] | //div[@class='transcript']
4date: //meta[@name="date"]/@content
5
6strip: //div[@class='enlarge_measure']
7strip: //div[@class='enlarge_html']
8strip: //a[@class='enlargeicon']
9strip: //div[contains(@class, 'bookedition')]
10strip: //div[@class='textsize']
11strip: //ul[@class='genres']
12strip: //span[@class='bull']
13strip_id_or_class: secondary
14strip_id_or_class: con1col
15strip: //h3[@class='conheader']
16
17replace_string(<a name="more">&nbsp;</a>): <!-- no more -->
18replace_string(<div class="transcript">): <div class="transcript"><h2>Transcript</h2>
19
20prune: no
21strip://div[@class="ecommercepop"]
22strip://span[@class="bull"]
23strip://span[@class="purchaseLink"]
24strip://div[@class="enlarge_html"]
25strip://div[@class="enlarge_measure"]
26strip://div[@class="container con1col small"]
27strip://a[contains(@class, "enlargebtn")]
28strip://div[contains(@class, "bucketwrap internallink")]
29
30test_url: http://www.npr.org/blogs/thetwo-way/2011/07/12/137799301/sports-loses-its-escapist-gleam-in-a-summer-of-court-dates
31test_url: http://www.npr.org/2012/07/04/156190948/feeling-under-siege-catholic-leadership-shifts-right
32test_url: http://www.npr.org/2012/12/13/166480907/the-years-best-sci-fi-crosses-galaxies-and-genres \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/nybooks.com.txt b/inc/3rdparty/site_config/standard/nybooks.com.txt
new file mode 100644
index 00000000..8ecb8961
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/nybooks.com.txt
@@ -0,0 +1,13 @@
1strip_id_or_class: sIFR-alternate
2title: //div[@id='page-title-wrapper']/div[@id='page-title']/h2
3single_page_link: //a[contains(@href, 'pagination=false') and not(contains(@href, 'printpage=true'))]
4
5body: //div[@id = 'article-body']
6strip_id_or_class:article-tools
7strip_id_or_class:js_target
8strip_id_or_class:marker
9author://div[@id = 'page-title']/h3
10date://div[@id = 'page-title']/h5/a[starts-with(@href,'/issues/')]
11
12
13test_url: http://www.nybooks.com/articles/archives/2012/feb/23/were-more-unequal-you-think/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/nymag.com.txt b/inc/3rdparty/site_config/standard/nymag.com.txt
new file mode 100644
index 00000000..f664c93d
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/nymag.com.txt
@@ -0,0 +1,8 @@
1title: //h2[contains(@class, 'primary')]
2body: //div[@id='story']
3author: //*[@class='by']/a
4date: substring-after(//*[@class='date'], 'Published')
5
6next_page_link: //div[@class='page-navigation']//li[@class='next']/a
7
8test_url: http://nymag.com/news/features/wall-street-2012-2/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/nyteknik.se.txt b/inc/3rdparty/site_config/standard/nyteknik.se.txt
new file mode 100644
index 00000000..8c9e37f4
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/nyteknik.se.txt
@@ -0,0 +1,8 @@
1title: //div[@class="article default-article"]/h1
2author: //p[@class="author"]/a[2]
3
4# Article introduction:
5#move_into(//div[@class="article-bread"]): //p[@class="lead"]
6
7body: //div[@class="article-bread"]
8test_url: http://www.nyteknik.se/nyheter/energi_miljo/energi/article3391426.ece \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/nytimes.com.txt b/inc/3rdparty/site_config/standard/nytimes.com.txt
new file mode 100644
index 00000000..8d9a794a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/nytimes.com.txt
@@ -0,0 +1,36 @@
1title://h1[@class="articleHeadline"]
2body://div[@id="article"]
3strip_id_or_class:articleTools
4strip_id_or_class:readerscomment
5#strip://div[contains(@class, "articleInline runaroundLeft")]
6strip: //div[contains(@class, "doubleRule")]
7# strip image credit - appears as a bold heading
8strip: //div[contains(@class, "articleInline")]//h6
9strip_id_or_class:enlargeThis
10strip_id_or_class:pageLinks
11strip_id_or_class:memberTools
12strip_id_or_class:articleExtras
13strip_id_or_class:singleAd
14strip_id_or_class:byline
15strip_id_or_class:dateline
16strip_id_or_class:articleheadline
17strip_id_or_class:articleBottomExtra
18strip://a[contains(@href, 'nytimes.com/adx/')]
19strip: //nyt_byline
20strip: //span[contains(@class, 'slideshow') or contains(@class, 'video')]
21strip: //p[@class='caption']//a[contains(., 'More Photos')]
22
23prune: no
24tidy: no
25
26date: substring-after(//*[contains(@class, 'dateline')], 'Published:')
27
28single_page_link: //link[contains(@href, 'pagewanted=all')]
29#single_page_link: //a[contains(@href, 'pagewanted=all') and not(contains(@href, 'login'))]
30
31strip://ul[@id = 'toolsList']
32strip://h6[@class = 'kicker']
33author:substring-after(//h6[@class='byline'],'By ')
34
35test_url: http://www.nytimes.com/2011/07/24/books/review/an-academic-authors-unintentional-masterpiece.html
36test_url: http://www.nytimes.com/2012/06/10/arts/television/the-newsroom-aaron-sorkins-return-to-tv.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/nzz.ch.txt b/inc/3rdparty/site_config/standard/nzz.ch.txt
new file mode 100644
index 00000000..81faabae
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/nzz.ch.txt
@@ -0,0 +1,12 @@
1body: //*[@class='article-full']
2title: //h3
3strip: //header[@class='group']
4#body: //p[@class='lead']
5#move_into(//p[@class='lead']): //*[@class='article-full']/figure
6#move_into(//p[@class='lead']): //div[@id='articleBodyText']
7strip: //div[@id='social-media-floater']
8strip: //div[@class='advertisement']
9strip: //div[@class='infobox']
10strip: //div[@id='articleComments']
11
12test_url: http://www.nzz.ch/wissen/wissenschaft/sonnenschutz-fuer-die-erde-1.17282213 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/observer.com.txt b/inc/3rdparty/site_config/standard/observer.com.txt
new file mode 100644
index 00000000..e409ca2e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/observer.com.txt
@@ -0,0 +1,7 @@
1body: //article[contains(@class, 'instapaper_body')]
2
3prune: no
4
5single_page_link: //a[@id='print-button']
6
7test_url: http://www.observer.com/2008/would-you-take-tumblr-man \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/off.net.mk.txt b/inc/3rdparty/site_config/standard/off.net.mk.txt
new file mode 100644
index 00000000..a2fb5f21
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/off.net.mk.txt
@@ -0,0 +1,7 @@
1body: //div[(@id = "content")]
2strip: //div[(@class = "links-bar")]
3strip: //div[(@class = "povrzani")]
4strip: //div[(@class = "povrzani-dolu")]
5strip: //div[(@class = "tags")]
6strip: //h1[(@id = "page-title")]
7test_url: http://off.net.mk/zhivot-i-zabava/gadzheti/dzhabe-raboti-dzhabe-ne-dishi \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/omaha.com.txt b/inc/3rdparty/site_config/standard/omaha.com.txt
new file mode 100644
index 00000000..53db061d
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/omaha.com.txt
@@ -0,0 +1,2 @@
1body: //div[@class='story']
2test_url: http://www.omaha.com/article/20111031/BIGRED/111039984#pelini-tremendous-challenge-ahead-for-huskers \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/omiliya.org.txt b/inc/3rdparty/site_config/standard/omiliya.org.txt
new file mode 100644
index 00000000..1b39b625
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/omiliya.org.txt
@@ -0,0 +1,9 @@
1title: //div[@id='squeeze']/h1
2strip: //div[@id='squeeze']/h1
3author: //div[@class='submitted']/a
4strip: //div[@class='submitted']/a
5convert_double_br_tags: yes
6
7
8
9test_url: http://omiliya.org/content/predchuvstvie.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/on.net.mk.txt b/inc/3rdparty/site_config/standard/on.net.mk.txt
new file mode 100644
index 00000000..be7a17ef
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/on.net.mk.txt
@@ -0,0 +1,5 @@
1body: //div[(@class = "statija")]
2strip: //div[(@class = "relatedBlock")]
3strip: //div[(@class = "swftools")]
4strip: //table[(@class = "links")]
5test_url: http://on.net.mk/video/na-trkala/lamborghini-aventador-avionot-shto-ne-leta \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/online.wsj.com.txt b/inc/3rdparty/site_config/standard/online.wsj.com.txt
new file mode 100644
index 00000000..edb52855
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/online.wsj.com.txt
@@ -0,0 +1,23 @@
1title: //meta[@property="og:title"]/@content
2body: //div[@id='article_story_body']
3
4author: //h3[@class='byline']/a
5# for slid show content
6body: //ul[@id='imageSlide']//li[@class='firstSlide']//img | (//div[@class='txt_body']//p)[1]
7date: //li[@class='dateStamp']/small
8
9strip_id_or_class: insetFullBracket
10strip_id_or_class: insettipBox
11#strip_id_or_class: legacyInset
12strip_id_or_class: recipeACShopAndBuyText
13
14strip: //div[contains(@class, 'insetContent')]//cite
15strip: //*[contains(@style, 'visibility: hidden;')]
16strip: //div[contains(@class, 'insetContent') and not(contains(@class, 'image'))]
17
18prune: no
19tidy: no
20
21test_url: http://online.wsj.com/article/SB10001424052970203363504577185322849515102.html
22# slide show
23test_url: http://online.wsj.com/article/SB10001424052970204791104577110550376458164.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/onlinewelten.com.txt b/inc/3rdparty/site_config/standard/onlinewelten.com.txt
new file mode 100644
index 00000000..1609fa83
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/onlinewelten.com.txt
@@ -0,0 +1,2 @@
1body: //div[@id='news_detail']//div[@class='contents clearfix']
2test_url: http://www.onlinewelten.com/games/aliens-colonial-marines/news/offizielle-spiel-ankuendigung-nintendos-wii-u-103690/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/onstartups.com.txt b/inc/3rdparty/site_config/standard/onstartups.com.txt
new file mode 100644
index 00000000..cccce8cd
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/onstartups.com.txt
@@ -0,0 +1,2 @@
1strip: //div[@id="dnn_LeftPane"] | //div[@id="dnn_ContentPane"]//h1 | //div[@id="dnn_ContentPane"]//p[@class="Normal"] | //div[@class="Submissions"] | //div[@id="listing"]//h3 | //div[@id="listing"][2] | //div[@id="emart-fail"] | //div[@id="emart-success"] | //div[@id="emart-form"]
2test_url: http://onstartups.com/tabid/3339/bid/37737/Secrets-Of-Freemium-Pricing-Make-The-Cheapskates-Pay.aspx \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/opensource.org.txt b/inc/3rdparty/site_config/standard/opensource.org.txt
new file mode 100644
index 00000000..2bd3ccdb
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/opensource.org.txt
@@ -0,0 +1,2 @@
1body: //div[@class='content clear-block']
2test_url: http://opensource.org/node/537 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/openthemagazine.com.txt b/inc/3rdparty/site_config/standard/openthemagazine.com.txt
new file mode 100644
index 00000000..510eb252
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/openthemagazine.com.txt
@@ -0,0 +1,4 @@
1body: //div[@id = 'content-inner']
2strip: //div[@id = 'content-bottom']
3strip_id_or_class: print_sharebutton
4test_url: http://openthemagazine.com/article/nation/sania-vs-saina \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/openwebx.org.txt b/inc/3rdparty/site_config/standard/openwebx.org.txt
new file mode 100644
index 00000000..b7663540
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/openwebx.org.txt
@@ -0,0 +1,4 @@
1body: //div[@class="chapter"]
2prune: no
3tidy: no
4test_url: http://openwebx.org/docs/springext.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/orf.at.txt b/inc/3rdparty/site_config/standard/orf.at.txt
new file mode 100644
index 00000000..ff16ca79
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/orf.at.txt
@@ -0,0 +1,11 @@
1single_page_link: //div[@id='content']//p[@class='readMore']/a
2
3title: //div[@class='hidden offscreen']/h2
4body: //div[@id="storyText"]
5move_into(//div[@id='storyText']): //div[@class='fact']
6strip: //small[@class='credit']
7strip: //small[@class='caption']
8date: substring-after(//div[@id='storyMeta']//p[@class='date'],'Publiziert am')
9strip: //p[@class='toplink']
10
11test_url: http://orf.at/stories/2084731/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/origo.hu.txt b/inc/3rdparty/site_config/standard/origo.hu.txt
new file mode 100644
index 00000000..0dedac3d
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/origo.hu.txt
@@ -0,0 +1,18 @@
1title: /html/body/div[5]/div[2]/h1
2body: /html/body/div[5]/div[2]/div[6]/div/div
3body: //*[@id="cikk"]
4strip: /html/body/div[5]/div[2]/h1
5strip: /html/body/div[5]/div[2]/div[4]
6strip: //*[@id="multidoboz"]
7strip: /html/body/div[5]/div[2]/div[6]/div[2]
8strip: //*[@id="comments"]
9strip: //*[@id="rating-doboz"]
10strip: /html/body/div[5]/div[2]/div[10]
11strip: /html/body/div[5]/div[2]/a
12strip: /html/body/div[5]/div[2]/span
13strip: /html/body/div[5]/div[2]/span[2]
14strip: /html/body/div[5]/div[2]/span[3]
15strip: /html/body/div[5]/div[2]/span[4]
16strip: /html/body/div[5]/div[2]/span[5]
17strip: //*[@id="kommentszam"]
18test_url: http://www.origo.hu/itthon/20110119-lemondott-a-kulturaert-felelos-helyettes-allamtitkar.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/pakistantvdekho.com.txt b/inc/3rdparty/site_config/standard/pakistantvdekho.com.txt
new file mode 100644
index 00000000..f03c9551
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/pakistantvdekho.com.txt
@@ -0,0 +1,11 @@
1#body: (//div[@class='ftr-yt-vid'])[1]
2body: (//blockquote[contains(@class, 'postcontent')])[1]
3body: (//div[starts-with(@id, 'post_message')])[1]
4
5prune: no
6tidy: no
7
8#replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player"
9#replace_string(</iframe>): </iframe>&nbsp;</div>
10
11test_url: http://pakistantvdekho.com/showthread.php?647741-Sitam-Gar-by-HUM-TV-Episode-07&p=659080#post659080 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/pandagon.net.txt b/inc/3rdparty/site_config/standard/pandagon.net.txt
new file mode 100644
index 00000000..d0d2a5d0
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/pandagon.net.txt
@@ -0,0 +1,5 @@
1title://h2
2author://div[@class="posted"]/a
3date://div[@class="date"]
4body://div[@class="entry"]
5test_url: http://pandagon.net/index.php/site/its-okay-to-admit-that-mass-hysteria-is-real \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/pandodaily.com.txt b/inc/3rdparty/site_config/standard/pandodaily.com.txt
new file mode 100644
index 00000000..7d1c2183
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/pandodaily.com.txt
@@ -0,0 +1,5 @@
1tidy: no
2body: //article
3date: //time/@datetime
4strip_id_or_class: sharedaddy
5test_url: http://pandodaily.com/2012/01/19/ibooks-author-is-not-going-to-hurt-publishers-it-might-even-help-them/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/panic.com.txt b/inc/3rdparty/site_config/standard/panic.com.txt
new file mode 100644
index 00000000..0361f06d
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/panic.com.txt
@@ -0,0 +1,3 @@
1body: //div[@class='entry']
2date: //h3[@class='postDate']
3test_url: http://www.panic.com/blog/2011/07/panic-is-ready-for-lion/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/parislemon.com.txt b/inc/3rdparty/site_config/standard/parislemon.com.txt
new file mode 100644
index 00000000..a3bd4b0f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/parislemon.com.txt
@@ -0,0 +1,6 @@
1title: //h2[@class="post-title"]
2author: substring-after(//div[@class="description"],'Words by ')
3date: //li[@class="date"]
4strip: //h2[@class="post-title"]
5body: //div[@class="copy"]
6test_url: http://parislemon.com/post/13462682469/the-15-inch-air \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/parliament.uk.txt b/inc/3rdparty/site_config/standard/parliament.uk.txt
new file mode 100644
index 00000000..478a669f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/parliament.uk.txt
@@ -0,0 +1,3 @@
1title: //h1
2body: //div[@id='news-article']
3test_url: http://www.parliament.uk/business/committees/committees-a-z/commons-select/backbench-business-committee/news/guidance-for-e-petitioners/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/pastebin.com.txt b/inc/3rdparty/site_config/standard/pastebin.com.txt
new file mode 100644
index 00000000..89d13b2a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/pastebin.com.txt
@@ -0,0 +1,6 @@
1title://div[@class="paste_box_line1"]/h1
2author://div[@class="paste_box_line2"]/a
3body://div[@class="text"]
4date:substring-before(substring-after(//div[@class="paste_box_line2"],'|'),'|')
5dissolve://li
6test_url: http://pastebin.com/LAykd1es \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/pastepad.fivefilters.org.txt b/inc/3rdparty/site_config/standard/pastepad.fivefilters.org.txt
new file mode 100644
index 00000000..40a049e0
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/pastepad.fivefilters.org.txt
@@ -0,0 +1,5 @@
1title: //h1
2body: //div[@id='ff-pastepad-content']
3prune: no
4# todo: add test file
5test_url: http://pastepad.fivefilters.org/test.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/pathawks.com.txt b/inc/3rdparty/site_config/standard/pathawks.com.txt
new file mode 100644
index 00000000..1a4cd25b
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/pathawks.com.txt
@@ -0,0 +1,8 @@
1title://*[contains(@class,'post-title')]
2body://div[contains(@class,'post-body')]
3body://div[contains(@class,'entry-content')]
4strip_comments:no
5prune:no
6convert_double_br_tags:yes
7tidy:yes
8test_url: http://www.pathawks.com/2011/06/crazyawesomecoloradotrip.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/pcast.me.txt b/inc/3rdparty/site_config/standard/pcast.me.txt
new file mode 100644
index 00000000..ae38e8e1
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/pcast.me.txt
@@ -0,0 +1,2 @@
1prune: no
2test_url: http://pcast.me/shownotes/get/16t \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/pcmag.com.txt b/inc/3rdparty/site_config/standard/pcmag.com.txt
new file mode 100644
index 00000000..cebea4d7
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/pcmag.com.txt
@@ -0,0 +1,10 @@
1prune:yes
2
3date://*[contains(@class,'date')]
4
5body://div[contains(@id,'content')]
6
7next_page_link://a[contains(.,'Next >')]
8
9strip_id_or_class:sponsors
10test_url: http://www.pcmag.com/article2/0,2817,2401676,00.asp \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/pcworld.com.txt b/inc/3rdparty/site_config/standard/pcworld.com.txt
new file mode 100644
index 00000000..30ccbb5f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/pcworld.com.txt
@@ -0,0 +1,19 @@
1title: //div[@class='articleHead']//h1
2author: //div[@class="author-name"]/a[1]
3body: //div[@class="main"]
4
5# remove 'From the Lab' and 'Recent posts' text
6strip: //div[@class='blogLabel']
7
8# remove byline and meta info
9strip: //h1
10strip: //div[@class="article-meta"]
11strip: //div[@class="author-info"]
12
13#strip tags and categories
14strip: //div[@class="department"]
15
16#strip product cap links
17strip: //div[@class="cap-main"]
18strip: //div[@id="compare-lede"]
19test_url: http://www.pcworld.com/article/262034/are-printer-companies-gouging-us-on-laser-toner-pricing.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/penny-arcade.com.txt b/inc/3rdparty/site_config/standard/penny-arcade.com.txt
new file mode 100644
index 00000000..f97615f1
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/penny-arcade.com.txt
@@ -0,0 +1,23 @@
1# 2012-01-14 carlo@... - fixed title, body; added author, date
2
3title: //div[@class="title"]/h2/a
4# body: //div[@class="post"]
5# author: //p[@class="iconEmail"]/a
6# date: //p[@class="iconDate"]
7
8# 1/24/2013 yosoyju - fixed author, date, and body, added support for PA Report
9
10# Penny Arcade
11
12author: //li[@class="iconEmail"]/a
13date: //li[@class="iconDate"]
14body: //div[@class="body"]
15
16# PA Report
17
18author: //div[@class="meta"]/p/a
19date: substring-after(//div[@class="meta"]/p, '/ ')
20title: substring-after(//title, '- ')
21
22test_url: http://penny-arcade.com/2012/01/13/i-put-some-news-in-your-news
23test_url: http://penny-arcade.com/report/editorial-article/the-dystopian-future-of-casual-games-personalized-targeted-pricing-and-mech \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/pentaxforums.com.txt b/inc/3rdparty/site_config/standard/pentaxforums.com.txt
new file mode 100644
index 00000000..00f61a48
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/pentaxforums.com.txt
@@ -0,0 +1,2 @@
1next_page_link: //a[contains(., 'Next:')]
2test_url: http://www.pentaxforums.com/reviews/long-exposure-handhelds/introduction.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/philadelphiaeagles.com.txt b/inc/3rdparty/site_config/standard/philadelphiaeagles.com.txt
new file mode 100644
index 00000000..a369fd65
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/philadelphiaeagles.com.txt
@@ -0,0 +1,6 @@
1prune: no
2tidy: no
3body: //div[@class='article-content']
4dissolve: //nobr/a
5dissolve: //nobr
6test_url: http://www.philadelphiaeagles.com/news/article-1/Jacksons-Light-Shined-On-Sunday-Night/51a862de-42b4-40f1-a5a8-ba0fb8a435b7 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/philly.com.txt b/inc/3rdparty/site_config/standard/philly.com.txt
new file mode 100644
index 00000000..41318f63
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/philly.com.txt
@@ -0,0 +1,10 @@
1title: //h1[@class='entry-title']
2author: //p[@class='byline']/span
3body: //@id='body-content'
4date: //div[@class='article_timestamp']/span
5
6strip: //@class=b-group
7strip: //*[contains(@style, 'none')]
8strip: //a[contains(@href, 'comments')]
9strip: //*[contains(@class, 'comment')]
10test_url: http://www.philly.com/philly/sports/eagles/20120127_Ohio_State_s_Posey_didn_t_waste_time_lost_to_suspension.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/photo.tutsplus.com.txt b/inc/3rdparty/site_config/standard/photo.tutsplus.com.txt
new file mode 100644
index 00000000..4e2ccb01
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/photo.tutsplus.com.txt
@@ -0,0 +1,6 @@
1author: substring-before(//div[@class='post_meta'],' on')
2date: substring-after(substring-before(//div[@class='post_meta'],'with'),' on')
3title: //h1[class='post_title']
4body: //div[@class='article']
5
6test_url: http://photo.tutsplus.com/articles/news/a-brilliant-beginners-guide-to-architectural-photography/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/php.net.txt b/inc/3rdparty/site_config/standard/php.net.txt
new file mode 100644
index 00000000..7c57a84d
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/php.net.txt
@@ -0,0 +1,6 @@
1body: //div[@id='content']
2strip_id_or_class: manualnavbar
3
4prune: no
5
6test_url: http://www.php.net/manual/en/migration5.incompatible.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/physicstoday.org.txt b/inc/3rdparty/site_config/standard/physicstoday.org.txt
new file mode 100644
index 00000000..a8163995
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/physicstoday.org.txt
@@ -0,0 +1,7 @@
1title: //div[@class='abstitle']//h1
2author: //div[@class='authorList']
3body: //div[@id='fulltext_body']
4
5prune: no
6
7test_url: http://www.physicstoday.org/resource/1/phtoad/v64/i10/p48_s1?bypassSSO=1 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/pitchfork.com.txt b/inc/3rdparty/site_config/standard/pitchfork.com.txt
new file mode 100644
index 00000000..3decc538
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/pitchfork.com.txt
@@ -0,0 +1,16 @@
1title:concat(//h1,' - ',//h2,' - ',//h3)
2author://address
3date://span[@class='pub-date']
4body://div[@id='main']
5single_page_link://link[@rel='canonical']
6strip://div[@class='info']
7strip_id_or_class:'object-grid related-content'
8strip_id_or_class:'object-prevnext'
9strip_id_or_class:'object-header'
10strip_id_or_class:'source'
11strip_id_or_class:'label'
12strip_id_or_class:'title'
13dissolve://ul
14strip://li[@class='next']
15strip://li[@class='prev']
16test_url: http://pitchfork.com/features/why-we-fight/8796-on-the-far-slope-of-the-uncanny-valley/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/pittnews.com.txt b/inc/3rdparty/site_config/standard/pittnews.com.txt
new file mode 100644
index 00000000..92777073
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/pittnews.com.txt
@@ -0,0 +1,8 @@
1title: //h2[@class='post-title']
2author: substring-before(substring-after(//h3[@class='post-byline'],'By:'),'/')
3date: substring-before(substring-after(//p[@class='post-details'],'Posted on '),'in')
4strip: //h2[@class='post-title']
5strip: //p[@class='post-details']
6strip: //h3[@class='post-byline']
7body: //div[@id='content']
8test_url: http://pittnews.com/newsstory/mens-basketball-pitt-recruit-robinson-to-bring-leadership/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/pittsburgh.pirates.mlb.com.txt b/inc/3rdparty/site_config/standard/pittsburgh.pirates.mlb.com.txt
new file mode 100644
index 00000000..824cb064
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/pittsburgh.pirates.mlb.com.txt
@@ -0,0 +1,15 @@
1title: substring-before(//title,'pirates.com')
2date: //span[@class='timeStamp']
3author: substring-before(substring-after(//div[@class='byLine'],'By'),'/')
4body: //div[@id='article']
5#strip: //div[@class='inner']
6strip: //div[@id='article_head']
7strip: //p[@class='tagLine']
8strip: //div[@id='article_related_links']
9strip: //div[@id='article_related_mlb']
10strip: //div[@id='article_related_club']
11strip: //span[@class='more']
12strip: //div[@class='article_component']
13strip: //span[@class='screen_reader']
14strip: //ul[@class='columnists_blurb']
15test_url: http://pittsburgh.pirates.mlb.com/news/article.jsp?ymd=20120330&content_id=27759040&vkey=news_pit&c_id=pit \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/pittsburghlive.com.txt b/inc/3rdparty/site_config/standard/pittsburghlive.com.txt
new file mode 100644
index 00000000..b3e66166
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/pittsburghlive.com.txt
@@ -0,0 +1,7 @@
1title: substring-before(//title,'- Pittsburgh Tribune')
2author: substring-before(substring-after(//div[@class='byline'],'By '),',')
3date: substring-after(substring-after(//div[@class='byline'],','),',')
4body: //div[@id='storyBody']
5strip: //div[@class='morestories']
6dissolve: //p[@class='subheader']
7test_url: http://www.pittsburghlive.com/x/pittsburghtrib/sports/columnists/s_785654.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/pittsburghmagazine.com.txt b/inc/3rdparty/site_config/standard/pittsburghmagazine.com.txt
new file mode 100644
index 00000000..dd715d8f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/pittsburghmagazine.com.txt
@@ -0,0 +1,8 @@
1title: //title
2author: substring-after(//div[@class='by-line'],'BY')
3
4body: //div[@id='article-body']
5
6strip: //div[@class='by-line']
7strip: //div[@id='article-body']/h1
8test_url: http://www.pittsburghmagazine.com/Pittsburgh-Magazine/May-2012/Verde-Lights-the-Night/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/pittsburghpanthers.com.txt b/inc/3rdparty/site_config/standard/pittsburghpanthers.com.txt
new file mode 100644
index 00000000..6113b96e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/pittsburghpanthers.com.txt
@@ -0,0 +1,4 @@
1title: //span[@class='StoryHeadline']
2strip: //div[@class='fivevert']
3body: //div[@id='Content']
4test_url: http://www.pittsburghpanthers.com/sports/m-baskbl/recaps/031412aaa.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/pittscriptblog.com.txt b/inc/3rdparty/site_config/standard/pittscriptblog.com.txt
new file mode 100644
index 00000000..3936310d
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/pittscriptblog.com.txt
@@ -0,0 +1,8 @@
1title: //h1[@class='articletitle']
2author: substring-after(//span[@class='author'],'by')
3date: //span[@class='created']
4body: //div[@class='article']
5strip: //div[@class='headline']
6strip: //p[@class='articleinfo']
7#dissolve: //p[@class='subheader']
8test_url: http://www.pittscriptblog.com/2012-articles/march/2012-football-opponents-set-and-the-attendance-dilemma.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/playboy.com.txt b/inc/3rdparty/site_config/standard/playboy.com.txt
new file mode 100644
index 00000000..07b347a0
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/playboy.com.txt
@@ -0,0 +1,6 @@
1author: //article//*[@class="author"]
2date: //article//*[@class="publication-date"]
3body: //article
4strip: //article/header
5strip: //article/section
6test_url: http://www.playboy.com/playground/view/playboy-interview-jon-hamm \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/plus.google.com.txt b/inc/3rdparty/site_config/standard/plus.google.com.txt
new file mode 100644
index 00000000..50a5dbf5
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/plus.google.com.txt
@@ -0,0 +1,17 @@
1body: //div[@id='contentPane']//div[@class='vg']
2body: //div[@id='contentPane']
3
4# Grab the author by finding the first profile pic, then backing up a node and getting the title of <a> tag which will be the author hopefully. Sorry can't test this due to parser errors, thanks google :(
5
6author: //div[@id='contentPane']//img[contains(@alt, 'profile photo')][1]/../@title
7
8
9strip: //*[@title="People who +1'd this"]/../..
10strip: //*[contains(@class, 'a-b-f-i-Hg-Uf')]
11strip: //*[@role='menu']
12strip: //img[contains(@alt, 'profile photo')]
13strip: //*[@class='a-f-i-Ad']
14
15tidy: no
16
17test_url: http://plus.google.com/u/0/117840649766034848455/posts/FddaP6jeCqp \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/plzkthxbai.com.txt b/inc/3rdparty/site_config/standard/plzkthxbai.com.txt
new file mode 100644
index 00000000..bb9be0a9
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/plzkthxbai.com.txt
@@ -0,0 +1,4 @@
1title: //h2[@class='jcw-pagetitle'
2date: //p[@class='postinfo']
3body: //div[@class='contenttext']
4test_url: http://plzkthxbai.com/blog/2011/06/28/1password-and-internet-security/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/pogue.blogs.nytimes.com.txt b/inc/3rdparty/site_config/standard/pogue.blogs.nytimes.com.txt
new file mode 100644
index 00000000..880311d3
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/pogue.blogs.nytimes.com.txt
@@ -0,0 +1,4 @@
1body: //div[@id="content"]/div[1]
2
3title: //h1[@class="entry-title"]
4test_url: http://pogue.blogs.nytimes.com/2011/05/12/the-future-of-skype/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/politico.com.txt b/inc/3rdparty/site_config/standard/politico.com.txt
new file mode 100644
index 00000000..121fd5b9
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/politico.com.txt
@@ -0,0 +1,13 @@
1title://div[contains(@class, "article")]/h1
2body://div[contains(@class,"story-text")]
3
4# Why doesn't this work? next_page_link://ul[contains(@class,"pagination")]/li/a[@rel="next"]
5
6next_page_link://ul[contains(@class,"pagination")]/li[contains(@class, "current")]/following-sibling::node()/a
7date://meta[@name="publish_date"]/@content
8
9strip://div[contains(@class, "breadcrumbs")]
10strip://a[contains(@class, "hidden")]
11strip://div[contains(@class, "story-embed")]
12strip://div[contains(@class, "story-text")]//p/a[contains(text(), "Also on POLITICO:")]/..
13test_url: http://www.politico.com/news/stories/0712/78105.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/politifact.com.txt b/inc/3rdparty/site_config/standard/politifact.com.txt
new file mode 100644
index 00000000..fd247b5b
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/politifact.com.txt
@@ -0,0 +1,4 @@
1body: //div[@id="content"]
2
3strip: //div[@class="pfcontentmid"]/div[position()>4]|//div[@class="pfad"]
4test_url: http://www.politifact.com/truth-o-meter/statements/2011/may/30/barbara-boxer/barbara-boxer-says-medicare-overhead-far-lower-pri/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/politiken.dk.txt b/inc/3rdparty/site_config/standard/politiken.dk.txt
new file mode 100644
index 00000000..8deecbca
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/politiken.dk.txt
@@ -0,0 +1,13 @@
1# 21/10-2011:
2# Added Author+Date
3# Remove fakta-boks if found
4# Deleted 'Læs også...' filter
5# - Change in markup caused it to strip too much.
6
7author://span[@class='autor-name']
8date:substring-after(//div[@class='art-created'], ' ')
9title: //h1[contains(@class, 'stor-type')]
10body: //div[@id='art-body']
11strip: //div[@class='art-fakta article-box']
12
13test_url: http://politiken.dk/kultur/boger/skonlitteratur_boger/ECE1426386/makabre-tegneserie-zombier-aeder-alt-levende/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/popularmechanics.com.txt b/inc/3rdparty/site_config/standard/popularmechanics.com.txt
new file mode 100644
index 00000000..85b7656b
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/popularmechanics.com.txt
@@ -0,0 +1,8 @@
1next_page_link: //div[@id='longPagination']/a[@class='next']
2
3title: //div[@id='contentHeader']//h1
4
5body: //div[@id='articleBody']
6# this is so sad
7body: //div[@id='intelliTXT']
8test_url: http://www.popularmechanics.com/technology/aviation/crashes/what-really-happened-aboard-air-france-447-6611877 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/positioningmag.com.txt b/inc/3rdparty/site_config/standard/positioningmag.com.txt
new file mode 100644
index 00000000..21cd833c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/positioningmag.com.txt
@@ -0,0 +1,19 @@
1title: //div[@id="newsDetailTitle"]
2author: //span[@id="showAuthor"]
3date: //span[@id="showRefDate"]
4
5strip: //div[@id="breadcrumbs"]
6strip: //span[@id="PageTitle"]
7strip: //div[@id="newsDetailAuthorPublish"]
8
9strip: //div[@class="leadPix"]
10
11strip: //span[@id="ctl00_PageTitle"]
12strip: //div[@id="newsDetailTitle"]
13convert_double_br_tags:yes
14
15strip: //div[@id="newsDetailCredential"]
16strip: //div[@id="sidebar2"]
17strip: //div[@id="footer"]
18
19test_url: http://www.positioningmag.com/magazine/details.aspx?id=41083 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/post-gazette.com.txt b/inc/3rdparty/site_config/standard/post-gazette.com.txt
new file mode 100644
index 00000000..1ea945a0
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/post-gazette.com.txt
@@ -0,0 +1,26 @@
1title: //div[@class='story_headline']
2author: substring-before(substring-after(//div[@class='story_byline'],'By'),'/')
3date: //div[@class='story_lastupdate']
4body: //div[@id='story']
5strip: //div[@class='story_byline']
6strip: //div[@class='story_lastupdate']
7strip: //div[@class='story_headline']
8strip: //div[@id='abuse']
9strip: //h2
10strip: //div[@class='pagenumbers_wrap']
11strip: //ul[@class='pagenumbers']
12strip: //div[starts-with(., 'To report inappropriate comments')]
13
14strip_id_or_class: story_share
15strip_id_or_class: OUTBRAIN
16strip_id_or_class: story_box_right
17strip: //div[a[@href='http://www.post-gazette.com/pg/12062/1213990-42.stm']]
18strip: //ul[@id='pikame']/li[position()>1]
19
20prune: no
21tidy: no
22
23single_page_link: //a[contains(@href, '?p=0')]
24
25test_url: http://www.post-gazette.com/stories/sports/penguins/pens-crosby-expects-to-return-thursday-226648/
26test_url: http://www.post-gazette.com/stories/sports/pirates/pirates-fork-over-changes-for-fans-at-pnc-park-629789 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/posta.com.tr.txt b/inc/3rdparty/site_config/standard/posta.com.tr.txt
new file mode 100644
index 00000000..86cb5d0b
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/posta.com.tr.txt
@@ -0,0 +1,15 @@
1title: //div[@id='divAdnetKeyword']/h1
2body: //div[@id='_middle_content_bottom']
3
4wrap_in(fieldset)://div[@id='_middle_content_bottom_child2']/img
5
6strip: //div[@id='_middle_content_bottom_child1']
7strip: //div[@id='_middle_content_bottom_child4']
8strip: //div[@class='cls']
9strip: //div[@class='iphoneBox']
10strip: //ul[@class='ilgiliHaber']
11strip: //div[@class='yorumlar']
12strip: //div[@class='kategoriler']
13strip: //div[@class='textSize']
14strip: //span[@class='tarih']
15test_url: http://www.posta.com.tr/yasam/teknoloji/HaberDetay/Fedailer_Istanbul_da.htm?ArticleID=101044 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/prb.org.txt b/inc/3rdparty/site_config/standard/prb.org.txt
new file mode 100644
index 00000000..7f7a5031
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/prb.org.txt
@@ -0,0 +1,8 @@
1title: //h1
2date: /html/head/meta[@name="date"]/@content
3body: //div[@id="featuredlinksbox"]
4strip: //div[@class="relatedbox"]
5strip: //h1
6strip: //br
7strip_image_src: "/images"
8test_url: http://www.prb.org/Journalists/Webcasts/2011/military-families.aspx \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/prog21.dadgum.com.txt b/inc/3rdparty/site_config/standard/prog21.dadgum.com.txt
new file mode 100644
index 00000000..906c27a0
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/prog21.dadgum.com.txt
@@ -0,0 +1,9 @@
1title: //h1
2body: //div[@id='left']
3strip: //h1
4convert_double_br_tags: yes
5strip_id_or_class: entry-footer
6strip: //h1[. = 'Previously']/following::*
7author: string('James Hague')
8date: //div[@class = 'entry-footer']/text()
9test_url: http://prog21.dadgum.com/105.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/prolost.com.txt b/inc/3rdparty/site_config/standard/prolost.com.txt
new file mode 100644
index 00000000..cef811d4
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/prolost.com.txt
@@ -0,0 +1,4 @@
1body: //div[@class='body']
2title: //h2[@class='title']
3date: //span[@class='posted-on']
4test_url: http://prolost.com/blog/2011/10/13/real-men-comp-with-film.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/propublica.org.txt b/inc/3rdparty/site_config/standard/propublica.org.txt
new file mode 100644
index 00000000..11e63bd0
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/propublica.org.txt
@@ -0,0 +1,11 @@
1title: //h1[@class="article-title"]
2author: //meta[@name="author"]/@content
3body: //div[@class="article-full"]
4strip_id_or_class: sidebar_inject
5strip_id_or_class: callout
6strip_id_or_class: content-inset
7strip_id_or_class: byline-block
8strip_id_or_class: photo-caption
9strip_id_or_class: foot-tools
10
11test_url: http://www.propublica.org/article/pardon-applicants-benefit-from-friends-in-high-places \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/prosa.dk.txt b/inc/3rdparty/site_config/standard/prosa.dk.txt
new file mode 100644
index 00000000..dedd33d3
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/prosa.dk.txt
@@ -0,0 +1,4 @@
1author: //p[@class='name']
2date: substring-before(//p[@class='date'], ' | ')
3body: //div[@class='news_single_item']
4test_url: http://www.prosa.dk/aktuelt/nyhed/artikel/internetaktivisten-uden-maske/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/prospectmagazine.co.uk.txt b/inc/3rdparty/site_config/standard/prospectmagazine.co.uk.txt
new file mode 100644
index 00000000..19059c4a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/prospectmagazine.co.uk.txt
@@ -0,0 +1,26 @@
1#basics
2author: (//div[contains(@class,'author')])[1]
3date: substring-before(//a[@class='issue'], '&mdash;')
4#body://div[@class = 'entry']
5# use this until move_into support is ready
6body: //div[@class = 'entry' or @class='standfirst' or @class='lead_image']
7
8#moves header image and tagline into body
9move_into(//div[@class='entry']/div)://div[@class = 'lead_image']
10move_into(//div[@class='entry']/div)://div[@class = 'standfirst']
11
12
13# moves author info to end of text
14move_into(//p[strong[string(.) = 'Follow Prospect on Twitter']])://div[@id='sidebar_content']/p/em
15
16prune: no
17
18# strips social links
19strip_id_or_class:login-status
20strip_id_or_class:shareinpost
21strip_id_or_class:content_subscribe
22strip_id_or_class:postinfo
23strip_id_or_class:postutils
24strip_id_or_class:comments
25strip://strong[string(.) = 'Follow Prospect on Twitter']
26test_url: http://www.prospectmagazine.co.uk/2011/07/postmodernism-is-dead-va-exhibition-age-of-authenticism/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/psychologytoday.com.txt b/inc/3rdparty/site_config/standard/psychologytoday.com.txt
new file mode 100644
index 00000000..3da3cea3
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/psychologytoday.com.txt
@@ -0,0 +1,9 @@
1title: //div[@class="page-title"]/h1
2author: //a[@title="View Bio"]
3date: substring-before(substring-after(//span[@class="submitted"], 'Published on '), ' by')
4strip://div[@class="page-title"]/h1
5strip://div[@class="article-abstract"]
6strip://div[@class="article-meta"]
7strip://div[@id="rightColumn"]
8strip://div[@id="inline-content-bottom-left"]
9test_url: http://www.psychologytoday.com/blog/how-happiness/201205/my-quibble-facebook \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/publications.parliament.uk.txt b/inc/3rdparty/site_config/standard/publications.parliament.uk.txt
new file mode 100644
index 00000000..fa099473
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/publications.parliament.uk.txt
@@ -0,0 +1,4 @@
1author: //meta[@name="Author"]
2date: //meta[@name="Date"]
3strip: //h5
4test_url: http://www.publications.parliament.uk/pa/ld201011/ldhansrd/text/111109-0003.htm \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/purpleplanetmedia.com.txt b/inc/3rdparty/site_config/standard/purpleplanetmedia.com.txt
new file mode 100644
index 00000000..126f9e27
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/purpleplanetmedia.com.txt
@@ -0,0 +1,4 @@
1title: //div[@class='title']
2body: //div[@class='body']
3next_page_link: //div[@class='source']/text()[contains(., 'page')]/following-sibling::a
4test_url: http://purpleplanetmedia.com/eye/inte/ngaiman.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/quantumdiaries.org.txt b/inc/3rdparty/site_config/standard/quantumdiaries.org.txt
new file mode 100644
index 00000000..a366c1b3
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/quantumdiaries.org.txt
@@ -0,0 +1,14 @@
1title: //div[contains(@class, "hentry")]/h3
2
3author: //div[contains(@class, "hentry")]/h2[contains(@class, "author_bio")]
4
5date: substring-before(substring-after(normalize-space(//p[contains(@class, "postmetadata")]/small), "was posted on "), " and is filed under")
6
7body: //div[contains(@class, "entry")]
8
9strip_id_or_class: addtoany_share_save_container
10strip_id_or_class: postmetadata
11strip_id_or_class: author_bio
12strip_id_or_class: author_bio_2
13strip: //div[contains(@class, "hentry")]/h3
14test_url: http://www.quantumdiaries.org/2011/10/25/piling-up/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/queerty.com.txt b/inc/3rdparty/site_config/standard/queerty.com.txt
new file mode 100644
index 00000000..655f8b80
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/queerty.com.txt
@@ -0,0 +1,3 @@
1body: //div[@class='copy']
2title: //h1[@class='hed']
3test_url: http://www.queerty.com/rawhide-radicals-meet-five-heroes-from-the-leather-community-20120302/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/quepasa.cl.txt b/inc/3rdparty/site_config/standard/quepasa.cl.txt
new file mode 100644
index 00000000..fae4e6a3
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/quepasa.cl.txt
@@ -0,0 +1,6 @@
1title: //h1
2
3body: //div[@class="cuerpoArticulo"]
4
5
6test_url: http://www.quepasa.cl/magazine/articulo/print.html?id=5299 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/quora.com.txt b/inc/3rdparty/site_config/standard/quora.com.txt
new file mode 100644
index 00000000..3d34f2f8
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/quora.com.txt
@@ -0,0 +1,17 @@
1tidy: no
2prune: no
3body: //div[contains(@class, 'main_col')]
4title: //h1
5
6strip_id_or_class: hidden
7strip_id_or_class: item_action_bar
8strip_id_or_class: answer_voters
9strip_id_or_class: question_topics
10strip_id_or_class: answer_header_text
11strip_id_or_class: editor_link
12strip_id_or_class: view_tag
13strip_id_or_class: include_details
14strip_id_or_class: sig_edit
15strip_id_or_class: profile_photo_img
16
17test_url: http://www.quora.com/What-everyday-habit-do-you-wish-you-had-developed-earlier-in-life \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/radar.oreilly.com.txt b/inc/3rdparty/site_config/standard/radar.oreilly.com.txt
new file mode 100644
index 00000000..99ab4bb1
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/radar.oreilly.com.txt
@@ -0,0 +1,3 @@
1date://span[@class='date']
2body://div[@class='entry-body']
3test_url: http://radar.oreilly.com/2012/01/genome-cloud-digital-humanities-hadoop-world-strata.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/radionz.co.nz.txt b/inc/3rdparty/site_config/standard/radionz.co.nz.txt
new file mode 100644
index 00000000..e2617dc5
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/radionz.co.nz.txt
@@ -0,0 +1,3 @@
1body: //div[@class='body']
2title: //div[@class='newsstory']/h2
3test_url: http://www.radionz.co.nz/news/stories/2010/07/18/12481029a86d \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/randsinrepose.com.txt b/inc/3rdparty/site_config/standard/randsinrepose.com.txt
new file mode 100644
index 00000000..f0c91c51
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/randsinrepose.com.txt
@@ -0,0 +1,11 @@
1title: //div[@id='center-col']/h4
2author: substring-before(//title,'In')
3date: substring-after(//div[@class='commenttext']/span,'#')
4body: //div[@id='center-col']
5strip: //div[@id='center-col']/h4
6strip: //div[@class='graytext']
7
8# Anthony Perez-Sanz 2012.3.14
9# Removed long gif from the end
10strip: //img[@src='http://www.randsinrepose.com/spreader.gif']
11test_url: http://www.randsinrepose.com/archives/2012/03/13/hacking_is_important.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/readability.com.txt b/inc/3rdparty/site_config/standard/readability.com.txt
new file mode 100644
index 00000000..80337291
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/readability.com.txt
@@ -0,0 +1,3 @@
1single_page_link: //link[@rel='canonical']/@href
2
3test_url: http://www.readability.com/read?url=http://feeds.gawker.com/~r/lifehacker/full/~3/jaxAjSay_Rw/add-a-rain-gutter-to-a-picnic-table-for-a-built+in-drink-cooler \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/readwriteweb.com.txt b/inc/3rdparty/site_config/standard/readwriteweb.com.txt
new file mode 100644
index 00000000..ff799aa0
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/readwriteweb.com.txt
@@ -0,0 +1,8 @@
1title: //h1[@class="titlelink"]
2date: //span[@class="timestamp"]/@data-published
3body: //div[@class="asset-content"]
4strip_id_or_class: related-entries
5strip_id_or_class: like-and-retweet
6
7author: //div[@id="submeta"]/a[1]
8test_url: http://www.readwriteweb.com/archives/why_facebook_terrifies_google.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/real.gr.txt b/inc/3rdparty/site_config/standard/real.gr.txt
new file mode 100644
index 00000000..fe5ab672
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/real.gr.txt
@@ -0,0 +1,3 @@
1body: //div[@id='_ctl12__ctl0_Article']
2prune: no
3autodetect_on_failure: no \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/recipe.com.txt b/inc/3rdparty/site_config/standard/recipe.com.txt
new file mode 100644
index 00000000..8c8f0e0c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/recipe.com.txt
@@ -0,0 +1,10 @@
1body: //div[@class='recipedetailsleft' or @id='recipePrepAndServe' or @id='recipeingredients']
2
3strip_id_or_class: location
4strip_id_or_class: savings
5strip_id_or_class: recipeDetailDescButton
6
7prune: no
8tidy: no
9
10test_url: http://www.recipe.com/avocado-basil-pasta/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/red-hot-girls.com.txt b/inc/3rdparty/site_config/standard/red-hot-girls.com.txt
new file mode 100644
index 00000000..3ae959b1
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/red-hot-girls.com.txt
@@ -0,0 +1,5 @@
1body: //div[@class='short-text' or starts-with(@id, 'news-id-')]
2prune: no
3tidy: no
4
5test_url: http://red-hot-girls.com/2011/06/10/the_red_hot_natalia_maria_53_pics.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/reddit.com.txt b/inc/3rdparty/site_config/standard/reddit.com.txt
new file mode 100644
index 00000000..58ca9ece
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/reddit.com.txt
@@ -0,0 +1,16 @@
1# This setup grabs the text from a Reddit self post. It ignores all comments etc.
2
3title: //p[@class="title"]/a/text()
4
5author: //p[@class="tagline"]/a
6
7# this doesn't work for some reason...?
8date: //p[@class="tagline"]//@datetime
9
10body: //div[@class="expando"]//div[@class="usertext-body"]
11
12strip_id_or_class: tagline
13strip_id_or_class: unvotable-message
14strip_id_or_class: buttons
15
16test_url: http://www.reddit.com/r/truegaming/comments/wfe7r/i_wrote_about_the_problems_i_honestly_feel_that/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/redmondpie.com.txt b/inc/3rdparty/site_config/standard/redmondpie.com.txt
new file mode 100644
index 00000000..12a96187
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/redmondpie.com.txt
@@ -0,0 +1,13 @@
1title: //div[@class='posthead']//h2
2body: //div[contains(@class, 'postcontent') or @class='posthead']
3author: //div[@class='posthead']//a[@rel='author']
4
5strip: //div[@class='posthead']//h2
6replace_string(>Advertisements</div>): ></div>
7replace_string(<p>You can follow us on): <p style="display:none;">
8strip_id_or_class: likeThisPost
9
10prune: no
11tidy: no
12
13test_url: http://www.redmondpie.com/how-to-play-music-directly-from-home-screen-folders-on-iphone/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/redtape.msnbc.msn.com.txt b/inc/3rdparty/site_config/standard/redtape.msnbc.msn.com.txt
new file mode 100644
index 00000000..4f195a06
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/redtape.msnbc.msn.com.txt
@@ -0,0 +1,20 @@
1# Think there might be something up with your parser that it strips out 'print' from the title :)
2
3title: //meta[@name='title']/@content
4author: //meta[@name='author']/@content
5date: //meta[@name='date']/@content
6
7body: //div[@class='articleText']
8
9strip: //div[contains(@class, 'day')]
10strip: //div[contains(@class, 'month')]
11strip: //div[contains(@class, 'year')]
12strip: //div[contains(@class, 'time')]
13strip: //h1[@class='gl_headline']
14strip: //div[@class='byline']
15strip: //div[@id='left_ear']
16strip: //div[@id='right_ear']
17strip: //div[contains(@class, 'PopularPosts')]
18strip ://div[@class='discuss_page_break']
19strip ://div[contains(@class, 'p-content_TagList')]
20test_url: http://redtape.msnbc.msn.com/_news/2011/09/28/8020661-sprint-raises-fee-but-wont-free-users-from-two-year-contracts?preview=true \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/reflets.info.txt b/inc/3rdparty/site_config/standard/reflets.info.txt
new file mode 100644
index 00000000..4a9fab67
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/reflets.info.txt
@@ -0,0 +1,5 @@
1body://div[@class='storycontent']
2date://div[@class='date']
3strip://li[@class='sharing_label']
4strip://a[@class='FlattrButton']
5test_url: http://reflets.info/orange-nokia-siemens-deep-packet-inspection/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/renenekuda.cz.txt b/inc/3rdparty/site_config/standard/renenekuda.cz.txt
new file mode 100644
index 00000000..0b3dee1d
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/renenekuda.cz.txt
@@ -0,0 +1,3 @@
1title: //*[@class='entry-title']
2body: //div[@class='entry-content']
3test_url: http://www.renenekuda.cz/recept-na-produktivitu/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/retrieverweekly.com.txt b/inc/3rdparty/site_config/standard/retrieverweekly.com.txt
new file mode 100644
index 00000000..1264ee3f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/retrieverweekly.com.txt
@@ -0,0 +1,6 @@
1single_page_link://a[contains(@href, 'print')]
2
3# Grab metadata from the "printer-friendly" page, after specifying single_page_link
4title://h2
5date://cite
6test_url: http://www.retrieverweekly.com/?cmd=displaystory&story_id=7548&format=html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/reuters.com.txt b/inc/3rdparty/site_config/standard/reuters.com.txt
new file mode 100644
index 00000000..c5c94a4f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/reuters.com.txt
@@ -0,0 +1,10 @@
1title: //h1[@class='headline3']
2author: substring-after(//p[@class="byline"], 'By ')
3date: //meta[@name="REVISION_DATE"]/@content
4body: //div[@id='articleImage' or @id='frame_fd1fade'] | //span[@id='articleText'] | //div[@class='pageNavigation']
5strip: //li[@class='next']
6strip: //span[@class='articleLocation']
7prune: no
8tidy: no
9
10test_url: http://www.reuters.com/article/2011/04/08/us-ivorycoast-killings-idUSTRE73732A20110408 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/revistapiaui.estadao.com.br.txt b/inc/3rdparty/site_config/standard/revistapiaui.estadao.com.br.txt
new file mode 100644
index 00000000..dbe42932
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/revistapiaui.estadao.com.br.txt
@@ -0,0 +1,10 @@
1title: //div[@class="article_header"]/h3
2author: //div[@class="autor"]/p/*
3date: substring-after(substring-after(//div[@class="flt-left"],"> "), "> ")
4
5move_into(//div[@class="new_article"]): //div[@class="img_article"]/img
6
7body: //div[@class="article_content"]
8convert_double_br_tags: yes
9
10test_url: http://revistapiaui.estadao.com.br/edicao-68/questoes-latino-americanas/filhos-da-guerra-suja \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/richardmuscat.wordpress.com.txt b/inc/3rdparty/site_config/standard/richardmuscat.wordpress.com.txt
new file mode 100644
index 00000000..904a11dd
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/richardmuscat.wordpress.com.txt
@@ -0,0 +1,5 @@
1body: //div[@id="post"]
2strip: //div[@id="author-description"]
3date: //span[@class="entry-date"]
4author: //span[@class="author vcard"]
5test_url: http://richardmuscat.wordpress.com/2011/06/20/the-price-of-free/?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+TheBrooksReview+%28The+Brooks+Review%29 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ritemail.blogspot.com.txt b/inc/3rdparty/site_config/standard/ritemail.blogspot.com.txt
new file mode 100644
index 00000000..82cfaf27
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/ritemail.blogspot.com.txt
@@ -0,0 +1,5 @@
1body: //div[@class='post-body entry-content']
2strip: //div[@id='lws_0']
3prune: no
4
5test_url: http://ritemail.blogspot.com/2011/06/hayden-panettiere-candids-in-los.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/rockpapershotgun.com.txt b/inc/3rdparty/site_config/standard/rockpapershotgun.com.txt
new file mode 100644
index 00000000..3035527c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/rockpapershotgun.com.txt
@@ -0,0 +1,8 @@
1title: //h2
2
3strip: //div[ contains(@class, 'respond') ] | //h2 | //h1
4
5date: substring-after(//p[@class='info'], ' on ')
6
7author: //p[@class='info']//a
8test_url: http://www.rockpapershotgun.com/2010/07/29/rps-half-verdict-starcraft-2/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/rodrigo.sharpcube.com.txt b/inc/3rdparty/site_config/standard/rodrigo.sharpcube.com.txt
new file mode 100644
index 00000000..abe70351
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/rodrigo.sharpcube.com.txt
@@ -0,0 +1,7 @@
1author: //article/header/span[@class='author']
2title://article/header/h1
3body: //article
4strip: //article/header
5strip: //article/p[@class='metadata']
6footnotes: yes
7test_url: http://rodrigo.sharpcube.com/2010/06/20/using-and-sharing-a-vpn-connection-on-your-mac/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/rogerebert.com.txt b/inc/3rdparty/site_config/standard/rogerebert.com.txt
new file mode 100644
index 00000000..26792330
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/rogerebert.com.txt
@@ -0,0 +1,8 @@
1title: substring-before(//title,':')
2author: substring-after(substring-before(//div[@class='text']/b,'/'),'BY')
3
4body: //div[@class='text']
5
6strip: //a[contains(@href,'printart')]
7strip_id_or_class: enlarge_photo
8test_url: http://rogerebert.com/apps/pbcs.dll/article?AID=/20120411/REVIEWS/120419998/1005/GLOSSARY \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/rolfinjapan.nl.txt b/inc/3rdparty/site_config/standard/rolfinjapan.nl.txt
new file mode 100644
index 00000000..d618c23f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/rolfinjapan.nl.txt
@@ -0,0 +1,6 @@
1body: //div[contains(@class, 'inhoud')]
2date: //span[@class ='published']
3author: //span[@class ='author']
4strip: //div[@class = 'grid_2']
5strip: //div[@class = 'block-citation-text']
6test_url: http://www.rolfinjapan.nl/2011/06/duizend-kraanvogels/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/rollingstone.com.txt b/inc/3rdparty/site_config/standard/rollingstone.com.txt
new file mode 100644
index 00000000..9a10a69e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/rollingstone.com.txt
@@ -0,0 +1,10 @@
1title: //h1
2author: //h3[@class="byline"]/strong
3
4body: //div[@id='main']/h2 | //div[@id='main']//div[@class='body']
5
6prune: no
7
8single_page_link: //a[@class='print-page']
9
10test_url: http://www.rollingstone.com/politics/news/the-plastic-bag-wars-20110725 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/rottentomatoes.com.txt b/inc/3rdparty/site_config/standard/rottentomatoes.com.txt
new file mode 100644
index 00000000..b5b29fe4
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/rottentomatoes.com.txt
@@ -0,0 +1,11 @@
1body: //div[@class='movie_content_area']
2strip_id_or_class: tomatometer_bar_help
3strip_id_or_class: critic-links
4strip_id_or_class: top-critics-numbers
5strip_id_or_class: fan_side
6strip_id_or_class: fblike
7strip_id_or_class: rating_widget
8strip_id_or_class: friend_reviews
9prune: no
10
11test_url: http://www.rottentomatoes.com/m/thor/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/roughtype.com.txt b/inc/3rdparty/site_config/standard/roughtype.com.txt
new file mode 100644
index 00000000..f2f00392
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/roughtype.com.txt
@@ -0,0 +1,5 @@
1body: //div[@class='content']
2strip: //p[@class='postmeta']/following::*
3strip: //p[@class='postmeta']
4strip: //p[@align='left']
5test_url: http://www.roughtype.com/archives/2012/01/power_to_the_da.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/roy.gbiv.com.txt b/inc/3rdparty/site_config/standard/roy.gbiv.com.txt
new file mode 100644
index 00000000..6ff03de8
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/roy.gbiv.com.txt
@@ -0,0 +1,2 @@
1strip_comments: no
2test_url: http://roy.gbiv.com/untangled/2008/rest-apis-must-be-hypertext-driven \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/rpgsite.net.txt b/inc/3rdparty/site_config/standard/rpgsite.net.txt
new file mode 100644
index 00000000..e7f29bbe
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/rpgsite.net.txt
@@ -0,0 +1,4 @@
1body: //div[@id='news-text']
2prune: no
3test_url: http://www.rpgsite.net/news/1964-tetsuya-nomura-says-hell-soon-show-the-future-of-final-fantasy
4test_url: http://www.rpgsite.net/news/1965-new-atelier-totori-plus-screens-and-artwork \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/rubysfera.pl.txt b/inc/3rdparty/site_config/standard/rubysfera.pl.txt
new file mode 100644
index 00000000..d9df7684
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/rubysfera.pl.txt
@@ -0,0 +1,9 @@
1author: //div[contains(@class, 'author_text')]/h4/text()
2date: //li[@class='date']
3
4# stripping excessive tags
5strip: //div[contains(@class, 'entry_meta')]
6strip: //div[contains(@class, 'single_meta')]
7strip: //br[contains(@class, 'clear')]
8strip: //h3[contains(., 'Komentarz')]
9test_url: http://rubysfera.pl/2011/09/10-porad-o-rvm/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ruhlman.com.txt b/inc/3rdparty/site_config/standard/ruhlman.com.txt
new file mode 100644
index 00000000..7a21c4af
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/ruhlman.com.txt
@@ -0,0 +1,6 @@
1title: //h1[@class='entry-title']
2author: ///span[@class='author vcard']
3date: //abbr[@class='published']
4body: //div[@class='entry-content']
5
6test_url: http://ruhlman.com/2009/05/cookbooks-that-teach/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ruttloff.org.txt b/inc/3rdparty/site_config/standard/ruttloff.org.txt
new file mode 100644
index 00000000..c036dcf8
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/ruttloff.org.txt
@@ -0,0 +1,3 @@
1author: //a[@class='author']
2tidy: no
3test_url: http://ruttloff.org/2012/06/13/intervention \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/salon.com.txt b/inc/3rdparty/site_config/standard/salon.com.txt
new file mode 100644
index 00000000..04f8afd5
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/salon.com.txt
@@ -0,0 +1,11 @@
1title: //meta[@property='og:title']/@content
2author: (//span[@class="byline"]/a)[1]
3date: //span[contains(@class, "toLocalTime")]
4body: (//div[contains(@class, "articleInner")]//img[contains(@src, 'media.salon.com') and contains(@src, '460x')])[1] | //div[contains(@class, "articleContent") or contains(@class, "writerMeta")]
5
6prune: no
7
8# deal with singleton links
9single_page_link: (//h1/a[contains(@href, '/singleton')])[1]
10
11test_url: http://www.salon.com/2011/10/25/occupying_the_rust_belt/singleton/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/salzburg.com.txt b/inc/3rdparty/site_config/standard/salzburg.com.txt
new file mode 100644
index 00000000..31067481
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/salzburg.com.txt
@@ -0,0 +1,6 @@
1body: //p[@class='teaser1 darkgrey myriad']
2move_into(//p[@class='teaser1 darkgrey myriad']): //div[@class='artikel clear']
3strip: //div[@class='hidden']
4strip: //div[@id='article_related_source']
5
6test_url: http://www.salzburg.com/nachrichten/oesterreich/politik/sn/artikel/deutliche-nachbesserungen-bei-lehrerdienstrecht-19469/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/saveyourself.ca.txt b/inc/3rdparty/site_config/standard/saveyourself.ca.txt
new file mode 100644
index 00000000..354f5911
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/saveyourself.ca.txt
@@ -0,0 +1,25 @@
1title://h1
2
3# my section divs seem to interfere with the Instapaper parser, so I ditch 'em
4dissolve://div[contains(@class, 'section')]
5
6#these don't seem to be necessary, but just in case
7strip_id_or_class:'masthead'
8strip_id_or_class:'footer'
9
10#again, Instapaper seems to understand where my content is, but just in case
11body://div[@id='content']
12
13# in general, I want the Instapaper view to look like my print CSS, so I remove things specified for the screen or non-printing
14strip_id_or_class:'screen-only'
15strip_id_or_class:'no-print'
16
17#other misc removals and simplifications
18strip_id_or_class:'popup'
19strip_id_or_class:'ZoomSpin'
20
21#I have a lot of content in sidebars and "meta" asides that can work inline just fine, but has to be distinguished somehow with some minimal formatting, so I put them in blockquotes
22wrap_in(blockquote)://div[contains(@class, 'sidebar')]
23wrap_in(blockquote)://div[contains(@class, 'meta')]
24wrap_in(blockquote)://p[contains(@class, 'meta')]
25test_url: http://saveyourself.ca/tutorials/low-back-pain.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sbnation.com.txt b/inc/3rdparty/site_config/standard/sbnation.com.txt
new file mode 100644
index 00000000..c213843c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/sbnation.com.txt
@@ -0,0 +1,28 @@
1title: //h1[@id='stream_title']
2
3# Author and date don't work
4author: //div[@class='byline']
5date: //div[@class='date-stamp']
6
7body: //div[@class='node-article']
8
9strip_id_or_class: fb-like-box
10strip_id_or_class: stream-fb-like
11strip_id_or_class: social-meta
12strip_id_or_class: social-spoken
13strip_id_or_class: twitter-share-button
14strip_id_or_class: twitter-follow-button
15strip_id_or_class: spinner_node_list
16strip_id_or_class: node-sort-link
17strip_id_or_class: stream_title
18strip_id_or_class: stream_summary
19strip_id_or_class: update-count-container
20strip_id_or_class: major-updates
21strip_id_or_class: newsletter-slide
22strip_id_or_class: author-mini-profile
23strip_id_or_class: byline
24strip_id_or_class: header
25strip_id_or_class: footer
26
27# Works, but "no text" errors on: http://www.sbnation.com/nba/2012/3/9/2856780/nba-scores-dwight-howard-bulls-magic-mavs-suns
28test_url: http://www.sbnation.com/nba/2012/3/13/2867226/dwight-howard-trade-rumors-2012-faq-orlando-magic \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/schneier.com.txt b/inc/3rdparty/site_config/standard/schneier.com.txt
new file mode 100644
index 00000000..67181b65
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/schneier.com.txt
@@ -0,0 +1,25 @@
1author: //p[@class='mastname']
2
3body: //div[@class='indivbody']
4date: //div[@class='indivbody']/h2[1]
5
6# Remove blog title. Specify first occurrence in case h1 is used in article
7strip: //div[@class='indivbody']/h1[1]
8
9# Remove blog description (the first p element)
10strip: //div[@class='indivbody']/p[1]
11
12# Remove navigation (second p element)
13strip: //div[@class='indivbody']/p[2]
14
15# Remove duplicate of article title. Specify first occurrence in case h3 is used in article
16strip: //div[@class='indivbody']/h3[1]
17
18# Remove publishing date, it's extracted by rule above
19strip: //div[@class='indivbody']/h2[1]
20
21# Remove duplicate of date at end, and newsletter signup
22strip: //p[@class='posted']
23
24# Leave date at top
25test_url: http://www.schneier.com/blog/archives/2010/12/security_in_202.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/science.orf.at.txt b/inc/3rdparty/site_config/standard/science.orf.at.txt
new file mode 100644
index 00000000..89ebfe08
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/science.orf.at.txt
@@ -0,0 +1,11 @@
1body: //div[@class="storybox"]
2title: //div[@class="storybox"]//h1
3strip: //p[@class='metaline']
4date: substring-after(//*[@class='time'],'Erstellt am')
5strip: //div[@class='fact']
6strip: //p[@class='backlink']
7strip: //div[@class='mailto']
8strip: //div[@id='forumDisclaimer']
9strip: //div[@class='forum']
10
11test_url: http://science.orf.at/stories/1700900/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/scienceblogs.de.txt b/inc/3rdparty/site_config/standard/scienceblogs.de.txt
new file mode 100644
index 00000000..08c16842
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/scienceblogs.de.txt
@@ -0,0 +1,12 @@
1single_page_link: //div[@class='c2c1']/div[@class='toptheme further line']//ul//li/a
2
3author: //div[@class='details clear']//a[@class='hi']
4body: //div[@class='title']
5strip: //p[@class='entrypagination']
6strip: //p[@class='details_top']
7date: //p[@class='details_top']
8title: //div[@class='title']/h1
9strip: //p[@class='details']
10strip: //p[@class='details_bottom']
11
12test_url: http://www.scienceblogs.de/astrodicticum-simplex/2011/10/weltuntergang-reloaded-das-jungste-gericht-findet-am-21-oktober-statt.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/scienceticker.info.txt b/inc/3rdparty/site_config/standard/scienceticker.info.txt
new file mode 100644
index 00000000..75a52824
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/scienceticker.info.txt
@@ -0,0 +1,11 @@
1body: //div[@class='post']
2title: //h1[@id='singlePageTitle']
3date: substring-before(//small,'&bull; Rubrik')
4
5strip: //div[@class='post-ratings']
6strip: //div[@class='post-ratings-loading']
7strip: //a[@title='Empfehlen Sie den Text weiter!']
8strip: //a[@title='Drucken']
9strip: //div[@class='share']
10
11test_url: http://www.scienceticker.info/2011/11/24/forscher-finden-gedachtnismolekul/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/scientificamerican.com.txt b/inc/3rdparty/site_config/standard/scientificamerican.com.txt
new file mode 100644
index 00000000..d510407d
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/scientificamerican.com.txt
@@ -0,0 +1,25 @@
1#
2# After site revisions at SciAm, this configuration does
3# not work, especially for multi-page articles. For
4# every article there is now a "Print" link which
5# is far more reliable. So this configuration should be
6# removed or disabled.
7# 2/3/13
8#
9
10# meta data
11title://h1[@class = 'articleTitle']
12author:substring-after(//span[@class = 'byline'],'By ')
13date:substring-before(//span[@class = 'datestamp'],'|')
14
15#body content
16body://div[@id = 'articleContent']
17#next_page_link://li[@id = 'flairPagination']/a[last()]
18
19single_page_link: //a[contains(@href, 'print=true')]
20
21#cleanup
22strip://div[@class = 'fsgBooks']
23
24test_url: http://www.scientificamerican.com/article.cfm?id=do-brain-scans-comatose-patients-reveal-conscious-state
25test_url: http://www.scientificamerican.com/article.cfm?id=solar-wind-transforms-venus-into-shape-of-comet \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/scotusblog.com.txt b/inc/3rdparty/site_config/standard/scotusblog.com.txt
new file mode 100644
index 00000000..f29e37f9
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/scotusblog.com.txt
@@ -0,0 +1,8 @@
1title: //title
2author: //p[@id='author-name-role']/a
3date: substring-after(//p[@class='time'],'Posted')
4body: //div[@id='main']
5strip: //div[@id='author-info']
6strip: //div[@id='author-links']
7strip: //h1
8test_url: http://www.scotusblog.com/2012/04/shaken-baby-case-an-update/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/scraplab.net.txt b/inc/3rdparty/site_config/standard/scraplab.net.txt
new file mode 100644
index 00000000..84be27f9
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/scraplab.net.txt
@@ -0,0 +1,3 @@
1title: //h2
2body: //div[@class='body']
3test_url: http://scraplab.net/2010/10/26/please-keep-your-belongings-with-you-at-all-times/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/scripting.com.txt b/inc/3rdparty/site_config/standard/scripting.com.txt
new file mode 100644
index 00000000..d8b969b1
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/scripting.com.txt
@@ -0,0 +1,8 @@
1strip: //a[starts-with(@href, '#')]
2strip: //*[@class='storyByline']
3body: //*[@class='storyPageText']/..
4author: string('Dave Winer')
5date: substring-before(substring-after(//*[@class='storyByline'], 'on'), 'at')
6title: //h1
7footnotes: no
8test_url: http://scripting.com/stories/2011/07/08/yeahImStillYawning.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sct.temple.edu.txt b/inc/3rdparty/site_config/standard/sct.temple.edu.txt
new file mode 100644
index 00000000..9927675b
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/sct.temple.edu.txt
@@ -0,0 +1,5 @@
1body: //*[@class="entry-content"]
2title: //h1[@class="entry-title"]
3date: //*[@class="entry-date"]
4author: //*[@class="author vcard"]
5test_url: http://sct.temple.edu/blogs/news-events/2011/05/congratulations-sct-class-of-2011/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/searchengineland.com.txt b/inc/3rdparty/site_config/standard/searchengineland.com.txt
new file mode 100644
index 00000000..f176d7c7
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/searchengineland.com.txt
@@ -0,0 +1,20 @@
1body: //div[@class="storyBox"]
2title: //div[@class="storyBox"]/h1
3author: //a[@rel="author"]
4date: substring-before(//span[@class="dateline"], 'by')
5
6#Removes related content but cleans up article text
7strip: //h1
8strip: //p[@class="homeStory tdmSideInfo"]
9strip: //div[@id="bylineShare"]
10strip: //script
11strip: //hr
12
13strip_id_or_class: homeStory
14strip_id_or_class: authorpic
15strip_id_or_class: insideComments
16strip_id_or_class: authorbio
17strip_id_or_class: gpt-ad-sel-cube
18strip_id_or_class: smxTextAd
19
20test_url: http://searchengineland.com/googles-jaw-dropping-sponsored-post-campaign-for-chrome-106348 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/seattletransitblog.com.txt b/inc/3rdparty/site_config/standard/seattletransitblog.com.txt
new file mode 100644
index 00000000..5129c069
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/seattletransitblog.com.txt
@@ -0,0 +1,5 @@
1title: //h3[@class="storytitle"]
2date: //div[@class='meta']
3body: //div[@class='storycontent']
4
5test_url: http://seattletransitblog.com/2012/06/19/times-st-louis-interested-in-buying-waterfront-streetcars/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sebbo.net.txt b/inc/3rdparty/site_config/standard/sebbo.net.txt
new file mode 100644
index 00000000..3e800a16
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/sebbo.net.txt
@@ -0,0 +1,4 @@
1title: substring-before(//title, '«')
2body: //div[@class = 'entry']
3strip_id_or_class: 'postmetabox'
4test_url: http://sebbo.net/2010/12/akkus/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/seriouseats.com.txt b/inc/3rdparty/site_config/standard/seriouseats.com.txt
new file mode 100644
index 00000000..d7b4788c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/seriouseats.com.txt
@@ -0,0 +1,15 @@
1body: //div[@id='content']
2
3# clean up recipe pages
4strip: //h2[@class='fn'] | //h2[@class='double-lined'] | //h3 | //div[@id='threeColumn2'] | //div[@id='threeColumn3']
5
6#recipe pages
7strip_id_or_class: "recipe-feedback"
8strip_id_or_class: "comments"
9strip_id_or_class: "procedure-number"
10strip_id_or_class: "more-with-author"
11
12#slice
13strip_id_or_class: "inner"
14
15test_url: http://www.seriouseats.com/recipes/2010/09/peking-duck-mandarin-pancakes-plum-sauce-recipe.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sf.curbed.com.txt b/inc/3rdparty/site_config/standard/sf.curbed.com.txt
new file mode 100644
index 00000000..9f443d5c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/sf.curbed.com.txt
@@ -0,0 +1,7 @@
1title: //h1[@class='post-title']
2author: //div[@class='post-byline']/a
3date: substring-before(//div[@class='post-byline'], ', by')
4
5body: //div[@class='post-body']
6dissolve: //noscript
7test_url: http://sf.curbed.com/archives/2011/10/17/lower_haight_loft_would_really_really_really_like_a_buyer.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sf.eater.com.txt b/inc/3rdparty/site_config/standard/sf.eater.com.txt
new file mode 100644
index 00000000..fca656d2
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/sf.eater.com.txt
@@ -0,0 +1,7 @@
1title: //h1[@class="post-title"]
2author: //div[@class="post-byline"]/a
3date: substring-before(//div[@class='post-byline'], ', by')
4
5body: //div[@class='post-body']
6strip_id_or_class: post-kicker
7test_url: http://sf.eater.com/archives/2012/05/22/nate_pollack_talks_about_the_american_grilled_cheese_kitchen_moving_into_the_mission.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sfgate.com.txt b/inc/3rdparty/site_config/standard/sfgate.com.txt
new file mode 100644
index 00000000..5f73fbcb
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/sfgate.com.txt
@@ -0,0 +1,12 @@
1title: /html/head/title
2
3body: //div[@id = 'articlecontent']/div[contains(@class, 'bodytext')]
4author: //div[@class = 'articleheadings']/p[contains(@class,'author')]/span[@class = 'fn']
5date: //div[@class = 'articleheadings']/span[@class = 'updated']
6strip: //div[div[contains(@class, 'imgbox')]]
7
8body: //div[@class = 'blogitem']
9author: //p[@class="credit"]/span[@class="author"]/a[position() = 1]
10date: //span[@class = 'pubdate']
11
12test_url: http://www.sfgate.com/columnists/garchik/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sfweekly.com.txt b/inc/3rdparty/site_config/standard/sfweekly.com.txt
new file mode 100644
index 00000000..a11fe4cb
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/sfweekly.com.txt
@@ -0,0 +1,3 @@
1body: //div[contains(@class, 'content_body')]
2strip_id_or_class: det_rel
3test_url: http://www.sfweekly.com/2012-03-14/news/cia-lsd-wayne-ritchie-george-h-white-mk-ultra/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/shabayek.com.txt b/inc/3rdparty/site_config/standard/shabayek.com.txt
new file mode 100644
index 00000000..b175720e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/shabayek.com.txt
@@ -0,0 +1,3 @@
1date: //span[@class='date']
2body: //div[@class='post_content']
3test_url: http://www.shabayek.com/blog/2011/10/16/%D8%AF%D8%B1%D9%88%D8%B3-%D9%85%D9%86-%D9%82%D8%B5%D8%A9-%D8%AA%D8%A3%D8%B3%D9%8A%D8%B3-%D8%AA%D9%88%D9%8A%D8%AA%D8%B1-%E2%80%93%D8%AC3/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/shawnblanc.net.txt b/inc/3rdparty/site_config/standard/shawnblanc.net.txt
new file mode 100644
index 00000000..b536fc3a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/shawnblanc.net.txt
@@ -0,0 +1,11 @@
1title://*[@class='primary']/h1
2date: //*[@class='articledate']
3author: substring-before(substring-after(//*[@class='block first']/p,'2012 '),'.')
4body: //div[@class='primary']
5footnotes: yes
6strip: //*[@class='primary']/h1
7strip: //*[@class='articledate']
8strip: //*[@class='detailsarticle']
9strip: //*[@class='endnav']
10strip: //*[@class='endmeta']
11test_url: http://shawnblanc.net/2011/11/kindle-touch-review/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/shifteleven.com.txt b/inc/3rdparty/site_config/standard/shifteleven.com.txt
new file mode 100644
index 00000000..68059ae1
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/shifteleven.com.txt
@@ -0,0 +1,6 @@
1body: //div[ @class='entry-content' ]
2
3strip: //div[ contains(@class, 'sharing') ]
4
5date: //div[ @class='entry-meta' ]/a
6test_url: http://shifteleven.com/articles/2008/05/10/issue-tracking-git-ticgit \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/siasat.pk.txt b/inc/3rdparty/site_config/standard/siasat.pk.txt
new file mode 100644
index 00000000..a82ce69c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/siasat.pk.txt
@@ -0,0 +1,11 @@
1#body: (//div[@class='ftr-yt-vid'])[1]
2body: (//blockquote[contains(@class, 'postcontent')])[1]
3body: (//div[starts-with(@id, 'post_message')])[1]
4
5prune: no
6tidy: no
7
8#replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player"
9#replace_string(</iframe>): </iframe>&nbsp;</div>
10
11test_url: http://www.siasat.pk/forum/showthread.php?107668-Policy-Matters-17th-March-2012-Dr-Shahid-Masood-Gen-Hameed-gul-amp-Fawad-Chudhary-Pak-US-Relationship&p=787733 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/simonwillison.net.txt b/inc/3rdparty/site_config/standard/simonwillison.net.txt
new file mode 100644
index 00000000..e3ad6e41
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/simonwillison.net.txt
@@ -0,0 +1,5 @@
1body: //div[contains(@class, "entry")]
2
3date: //div[contains(@class, "entryFooter")]/a
4
5test_url: http://simonwillison.net/2009/Oct/22/redis/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/singaporeanstocksinvestor.blogspot.com.txt b/inc/3rdparty/site_config/standard/singaporeanstocksinvestor.blogspot.com.txt
new file mode 100644
index 00000000..a1b6b673
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/singaporeanstocksinvestor.blogspot.com.txt
@@ -0,0 +1,5 @@
1body: //div[@class='post-body']
2strip: //div[@id='lws_0']
3prune: no
4
5test_url: http://singaporeanstocksinvestor.blogspot.com/2011/04/aims-amp-capital-industrial-reit.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/singularityhub.com.txt b/inc/3rdparty/site_config/standard/singularityhub.com.txt
new file mode 100644
index 00000000..3999d4d4
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/singularityhub.com.txt
@@ -0,0 +1,2 @@
1body://div[contains(@class,"entry-content")]
2test_url: http://singularityhub.com/2011/05/21/google-invades-your-home-android-phones-control-your-appliances-and-accessories-video/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sintagoulis.gr.txt b/inc/3rdparty/site_config/standard/sintagoulis.gr.txt
new file mode 100644
index 00000000..822bbeb0
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/sintagoulis.gr.txt
@@ -0,0 +1,6 @@
1title: //div[@class='headline']//h2
2body: //div[contains(@class, 'storycontent')]
3
4prune: no
5
6test_url: http://sintagoulis.gr/sokolatenia/sokolatenia-mpompa-me-amaretti- \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/slashfilm.com.txt b/inc/3rdparty/site_config/standard/slashfilm.com.txt
new file mode 100644
index 00000000..78d38ecf
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/slashfilm.com.txt
@@ -0,0 +1,15 @@
1title: substring-before(//title,'| /Film')
2date: substring-before(substring-after(//p[@class='post-info'],'Posted on '),'by')
3strip: //div[@class='pm-left']
4strip: //div[@class='pm-right']
5strip: //h2/span
6next_page_link: //h2/strong/a
7strip: //h2/strong/a
8strip: //p[contains(text(),'we have to split this post over')]
9strip: //p[@class='post-info']
10strip: //h1/a
11strip: //img[contains(@src,'siteimages/authors')]
12strip: //div[@id='header']
13strip: //div[@class='topad-right']
14strip: //strong[contains(text(),'Cool Posts From Around the Web:')]
15test_url: http://www.slashfilm.com/superhero-bits-206/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/slate.com.txt b/inc/3rdparty/site_config/standard/slate.com.txt
new file mode 100644
index 00000000..e92f6a06
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/slate.com.txt
@@ -0,0 +1,19 @@
1title: //h1[@class="sl-art-head-dek"]
2body: //article//div[@class='sl-art-body']/div[contains(@class, 'body')]
3strip: //div[@class="department_kicker"]
4strip: //div[@id="insider_ad_wrapper" or @id="insider_ad_inner"]
5strip: //div[@id="bottom_sponsored_links"]
6strip: //div[@class="sl-art-ad-midflex"]
7#strip: //dl
8#strip: //p[em/a[contains(@href, 'facebook.com')]]
9prune: no
10
11author: //div[@id='author_bio']//a[contains(@href, '/author/')]
12author: //a[contains(@href, '/authors.')]
13
14date: substring-before(substring-after(//span[@class='sl-art-byline'], 'Posted '), ', at ')
15
16single_page_link: //a[@class='sl-art-sinpage']
17
18test_url: http://www.slate.com/id/2274583/pagenum/all/
19test_url: http://www.slate.com/id/2293116/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/slice.seriouseats.com.txt b/inc/3rdparty/site_config/standard/slice.seriouseats.com.txt
new file mode 100644
index 00000000..1a902b96
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/slice.seriouseats.com.txt
@@ -0,0 +1,15 @@
1body: //div[@id='content']
2
3# clean up recipe pages
4strip: //h2[@class='fn'] | //h2[@class='double-lined'] | //h3 | //div[@id='threeColumn2'] | //div[@id='threeColumn3']
5
6#recipe pages
7strip_id_or_class: "recipe-feedback"
8strip_id_or_class: "comments"
9strip_id_or_class: "procedure-number"
10strip_id_or_class: "more-with-author"
11
12#slice
13strip_id_or_class: "inner"
14
15test_url: http://slice.seriouseats.com/archives/2010/10/the-pizza-lab-how-to-make-great-new-york-style-pizza.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/slog.thestranger.com.txt b/inc/3rdparty/site_config/standard/slog.thestranger.com.txt
new file mode 100644
index 00000000..daa5e31b
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/slog.thestranger.com.txt
@@ -0,0 +1,4 @@
1strip_id_or_class: postCategory
2title: //h3[@class='postTitle']
3body: //div[@class='postBody']
4test_url: http://slog.thestranger.com/slog/archives/2010/10/12/sl-letter-of-the-day-leave-it-alone \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/smartinvestor.de.txt b/inc/3rdparty/site_config/standard/smartinvestor.de.txt
new file mode 100644
index 00000000..ec6c55c8
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/smartinvestor.de.txt
@@ -0,0 +1,5 @@
1title: //td[@class='hweissblau2']
2body: //p[@class='copy'] | //div[@class='Section1']
3prune: no
4
5test_url: http://www.smartinvestor.de/news/smartinvestor/detail.hbs?itemid=item949496655&recnr=14593 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sme.sk.txt b/inc/3rdparty/site_config/standard/sme.sk.txt
new file mode 100644
index 00000000..c3d01ffb
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/sme.sk.txt
@@ -0,0 +1,3 @@
1title: //meta[@property='og:title']/@content
2date: //p[@class='autor_line']/b/text()
3test_url: http://www.sme.sk/c/6268206/lipsic-vidi-malcharkove-uplatky.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/smithsonianmag.com.txt b/inc/3rdparty/site_config/standard/smithsonianmag.com.txt
new file mode 100644
index 00000000..10a3f717
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/smithsonianmag.com.txt
@@ -0,0 +1,20 @@
1# meta data
2title://h1[@id = 'articleTitle']
3author:substring-after(//ul[@id = 'byLine']/li[1],'By ')
4date:substring-before(substring-after(//ul[@id = 'byLine']/li[last()],','),',')
5body://div[@id = 'article-body']
6
7# full content
8single_page_link://td/li[@class = 'article-singlepage']/a
9
10# caption clean up
11wrap_in(i)://span[@class='articleImageCaptionwide']
12move_into (//span[@class='articleImageCaptionwide'])://div[@id = 'articleImage']/p
13
14
15# clean up
16strip://p[@id = 'articlePaginationWrapper']
17strip://ul[contains(@class, 'cat-breadcrumb')]
18strip://div [@class= 'viewMorePhotos']
19
20test_url: http://www.smithsonianmag.com/history-archaeology/The-Goddess-Goes-Home.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/smokingapples.com.txt b/inc/3rdparty/site_config/standard/smokingapples.com.txt
new file mode 100644
index 00000000..e22af7a9
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/smokingapples.com.txt
@@ -0,0 +1,5 @@
1title: //h2[@class='custom-entry-title']
2author: substring-after(//span[@class='author vcard'],'by ')
3date: substring-after(//span[@class='publ'],'Published on ')
4body: //div[@class='postentry-content']
5test_url: http://smokingapples.com/software/popclip-for-mac/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sourcebooks.com.txt b/inc/3rdparty/site_config/standard/sourcebooks.com.txt
new file mode 100644
index 00000000..668fc44a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/sourcebooks.com.txt
@@ -0,0 +1,4 @@
1#grab the actual content div
2body: //div[@class='rt-article']
3
4test_url: http://www.sourcebooks.com/next/sourcebooks-next-our-blog/1601-another-piece-of-the-e-puzzle-or-when-good-ebook-promotions-go-bad.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/spectator.co.uk.txt b/inc/3rdparty/site_config/standard/spectator.co.uk.txt
new file mode 100644
index 00000000..a05c8395
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/spectator.co.uk.txt
@@ -0,0 +1,7 @@
1author: /html/body/div[@id='wrapper']/div[@id='main-content']/div[@class='article_body']/a[@class='author-link']/child::text()
2
3body: /html/body/div[@id='wrapper']/div[@id='main-content']/div[@class='article_body']
4
5# Not very helpfull, the title and author are container by the same element that contains the body
6strip: /html/body/div[@id='wrapper']/div[@id='main-content']/div[@class='article_body']/h2 | /html/body/div[@id='wrapper']/div[@id='main-content']/div[@class='article_body']/a[@class='author-link']
7test_url: http://www.spectator.co.uk/arts-and-culture/night-and-day/7449683/spotify-sunday-my-personal-soundtrack.thtml \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/spectrum.ieee.org.txt b/inc/3rdparty/site_config/standard/spectrum.ieee.org.txt
new file mode 100644
index 00000000..4b0704a8
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/spectrum.ieee.org.txt
@@ -0,0 +1,3 @@
1body://div[@class="articleBody"]
2author://p[@class="articleBodyTtl"]
3test_url: http://spectrum.ieee.org/semiconductors/processors/behind-intels-new-randomnumber-generator/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/speirs.org.txt b/inc/3rdparty/site_config/standard/speirs.org.txt
new file mode 100644
index 00000000..3bf859e3
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/speirs.org.txt
@@ -0,0 +1,2 @@
1body://div[@class="body"]
2test_url: http://speirs.org/blog/2011/5/5/ipad-trials-at-oklahoma-state.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/spiegel.de.txt b/inc/3rdparty/site_config/standard/spiegel.de.txt
new file mode 100644
index 00000000..390c075c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/spiegel.de.txt
@@ -0,0 +1,75 @@
1# A. Niepel, narya.de@...
2# - added single_page_link
3# - added author for default and single page view
4# - added date for single page view
5# fforst@...
6# - Fixed it
7# bode2104@...
8# - Fixed single_page_link
9# - Included intro text in single page view
10# - Added body in default view
11
12# set body
13tidy: no
14# body in single page view
15body: //div[@id="spArticleContent"]
16# body in default view
17body: //div[@id="spArticleSection"]
18# body in "Fotostrecke"
19body: //div[@id="spBigaContent"]
20
21# set date in single page view
22date: //div[@id="spArticleContent"]/h3
23# strip date
24strip: //div[@id="spArticleContent"]/h3
25# set date in "Fotostrecke"
26date: //div[@id="spBigaDatum"]
27
28#set title in single page view
29title: //div[@id='spArticleContent']/h2
30# strip title
31strip: //div[@id='spArticleContent']/h1
32strip: //div[@id='spArticleContent']/h2
33#set title in "Fotostrecke"
34title: //div[@class='spBigaHeadline']
35
36# set author
37author: //p[@class="spAuthor"]/a
38author: substring-after(//p[@class="spAuthor"], 'Von ')
39# strip author
40strip: //p[@class='spAuthor']
41
42# remove captions
43strip: //*/span[@class='spPicLayerText']
44strip: //*/div[@class='spPanoPlayerPaneControl']
45strip: //*/div[@class='spCredit']
46strip: //*/div[@class='spCredit']/following-sibling::p
47
48# remove ads
49strip: //div[@class='spMInline']
50
51# remove photogalleries and extras
52strip: //div[@class='spPhotoGallery']
53strip: //div[@class='spPhotoGallery']/following-sibling::br
54strip: //div[@class='spAssetAlignleft']
55strip: //div[contains(@class,'spAsset')]
56strip: //br[@clear='all']
57
58# remove community functions
59strip: //div[@id='spSocialBookmark']
60strip: //div[contains(@class, 'spCommunityBox')]
61strip: //div[contains(@class, 'spArticleNewsfeedBox')]
62strip: //div[@class='spArticleCredit']
63
64# remove clutter in "Fotostrecke"
65strip: //div[@id='spBreadcrumb']
66strip: //div[@id='spBigaLatestEntries']
67strip: //div[contains(@class, 'spBigaNavi')]
68strip: //div[@class='spDottedLine']
69
70# Use link to print article for single page view
71single_page_link: //a[contains(@href, '-druck')]
72
73# use next link in "Fotostrecke"
74next_page_link: //a[@class='spBigaControlForw']
75test_url: http://www.spiegel.de/politik/deutschland/0,1518,787602,00.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/spin.com.txt b/inc/3rdparty/site_config/standard/spin.com.txt
new file mode 100644
index 00000000..66f6192b
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/spin.com.txt
@@ -0,0 +1,5 @@
1tidy: no
2body: //section[contains(@class, 'main')]
3strip: //footer
4strip: //a[@class='paginated']
5test_url: http://www.spin.com/articles/bathlands-deep-heart-americas-new-drug-nightmare \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/splatf.com.txt b/inc/3rdparty/site_config/standard/splatf.com.txt
new file mode 100644
index 00000000..d5671652
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/splatf.com.txt
@@ -0,0 +1,5 @@
1author:string('Dan Frommer/SplatF')
2date://div[@class='postdate']
3body://div[@class='entry']
4title://div[@class='post']/h1
5test_url: http://www.splatf.com/2012/02/month-six/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/splitsider.com.txt b/inc/3rdparty/site_config/standard/splitsider.com.txt
new file mode 100644
index 00000000..d1d392e7
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/splitsider.com.txt
@@ -0,0 +1,4 @@
1author: //div[@class='byline']/a
2date: //div[@id='date']
3body: //div[@class='entry']
4test_url: http://splitsider.com/2011/10/saturday-nights-children-rob-riggle-2004-2005/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sport.detik.com.txt b/inc/3rdparty/site_config/standard/sport.detik.com.txt
new file mode 100644
index 00000000..b404b829
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/sport.detik.com.txt
@@ -0,0 +1,8 @@
1title://div[@class="content_detail"]/h1
2
3author://div[@class="author"]/strong
4
5date:substring-before(substring-after(//div[@class="content_detail"]/*[@class="date"], ','), ' WIB')
6
7body://div[@class='text_detail']
8test_url: http://sport.detik.com/sepakbola/read/2012/05/23/065011/1922350/71/agen-silva-ingin-bertahan-di-milan?b99220270 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sport.orf.at.txt b/inc/3rdparty/site_config/standard/sport.orf.at.txt
new file mode 100644
index 00000000..a794ded9
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/sport.orf.at.txt
@@ -0,0 +1,11 @@
1single_page_link: //div[@id='content']//p[@class='readMore']/a
2
3title: //div[@class='hidden offscreen']/h2
4body: //div[@id="storyText"]
5move_into(//div[@id='storyText']): //div[@class='fact']
6strip: //small[@class='credit']
7strip: //small[@class='caption']
8date: substring-after(//div[@id='storyMeta']//p[@class='date'],'Publiziert am')
9strip: //p[@class='toplink']
10
11test_url: http://sport.orf.at/stories/2084851/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sports.espn.go.com.txt b/inc/3rdparty/site_config/standard/sports.espn.go.com.txt
new file mode 100644
index 00000000..e0f8223c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/sports.espn.go.com.txt
@@ -0,0 +1,12 @@
1title: //div[@class='headline'] | //div[@class='mod-header']/h3
2body: //div[contains(@class, 'article')]
3strip: //div[contains(@class, 'mod-inline')]
4strip: //*/span[@class='page-actions']/a
5strip: //*/span[@class='page-actions']/a
6strip: //div[@class='page-actions']/*
7strip: //div[@class='headline'] | //div[@class='mod-header']/h3
8strip: //div[@class='mod-blog-navigation']
9strip: //div[@class='monthday']
10strip: //div[@class='time']
11strip: //div[@class='timeofday']
12test_url: http://sports.espn.go.com/espn/page2/story?page=simmonsnfl2010/lebron_james_return_clevelend&sportCat=nba \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sports.yahoo.com.txt b/inc/3rdparty/site_config/standard/sports.yahoo.com.txt
new file mode 100644
index 00000000..96a3bb71
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/sports.yahoo.com.txt
@@ -0,0 +1,9 @@
1title: //div[@id='article']/div[@class='hd']/h1
2body: //p[@id='byline'] | //div[@id='article']//div[@class='body_copy 0']
3strip: //div[@class='foot']
4strip: //div[@id='sidebar']//div[@class='ft']
5strip: //p[@id='byline']//em
6tidy: no
7prune: no
8
9test_url: http://sports.yahoo.com/nba/news?slug=ap-nbafinals \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sportschau.de.txt b/inc/3rdparty/site_config/standard/sportschau.de.txt
new file mode 100644
index 00000000..6500e75c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/sportschau.de.txt
@@ -0,0 +1,22 @@
1title://div[@id='ardContent']/h1
2
3author://p[@id='ardAutor']
4author://span[@id='ardQuelle']
5author:string('sportschau.de')
6
7date:substring-after(//span[@id='ardStand'], 'Stand: ')
8
9body://div[@id='ardContent']
10
11strip://div[@id='ardContent']/h1
12strip://p[@id='ardAutor']
13strip: //div[@class='embeddedPlayer_clipinfo']
14strip: //div[@class='ardMehrZumThemaRechts']
15strip: //*[contains(@class, 'inv')]
16
17strip: //p[@id='ardAbbinder']
18strip: //div[@class='socialBookmarks']
19strip: //div[@id='ardContentEnd']
20strip: //div[@id='ardDisclaimer']
21strip: //div[@id='ardRechteSpalte']
22test_url: http://www.sportschau.de/sp/fussball/news201203/17/analyse_leverkusen_gladbach.jsp \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sportsillustrated.cnn.com.txt b/inc/3rdparty/site_config/standard/sportsillustrated.cnn.com.txt
new file mode 100644
index 00000000..afc5879f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/sportsillustrated.cnn.com.txt
@@ -0,0 +1,26 @@
1# main sportsillustrated.com articles
2#
3body: //div[@id="cnnStoryContent"]
4title: //div[@id="cnnStoryHeadline"]//h1
5author: //div[@id="cnnSubBanner"]//strong
6date: substring-after(//div[@id="cnnTimeStamp"], "Updated: ")
7date: substring-after(//div[@id="cnnTimeStamp"], "Posted: ")
8
9# kill ugly font buttons
10strip: //div[@id="cnnSCFontButtons"]
11
12# kill misc filler videos & etc
13strip: //div[@class="cnnDivideContent"]
14strip: //*[@class="cnnTMbox"]
15
16# si vault articles
17# -------------
18body: //div[@class="siv_artPara"]
19title: //div[@class="siv_artHeader"]//h1
20author: //div[@class="byline"]
21date: //div[@class="date"]
22
23next_page_link: //div[@id='cnnStoryContinue']/a
24strip_id_or_class: cnnstorypagination
25
26test_url: http://sportsillustrated.cnn.com/2012/writers/peter_king/02/27/combine/index.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sprengsatz.de.txt b/inc/3rdparty/site_config/standard/sprengsatz.de.txt
new file mode 100644
index 00000000..16636bc5
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/sprengsatz.de.txt
@@ -0,0 +1,5 @@
1title: //h2
2author: string('Michael Spreng')
3date: //div[@class='date']
4body: //div[@class='entry']
5test_url: http://www.sprengsatz.de/?p=3691 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sqlite.org.txt b/inc/3rdparty/site_config/standard/sqlite.org.txt
new file mode 100644
index 00000000..4872519a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/sqlite.org.txt
@@ -0,0 +1,7 @@
1body: //div[@id='ff-body']
2
3replace_string(<h1 align=center>): <div id="ff-body"><h1 align=center>
4
5prune: no
6
7test_url: http://www.sqlite.org/fileformat2.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/squashed.tumblr.com.txt b/inc/3rdparty/site_config/standard/squashed.tumblr.com.txt
new file mode 100644
index 00000000..388209a9
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/squashed.tumblr.com.txt
@@ -0,0 +1,4 @@
1body: //div[@class='content']
2date: substring-before( //div[@class='unit dateAndNotes'], 'with')
3title: //h3
4test_url: http://squashed.tumblr.com/post/17613522228/lets-stop-blaming-the-victims-of-predatory-lending \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/stackoverflow.com.txt b/inc/3rdparty/site_config/standard/stackoverflow.com.txt
new file mode 100644
index 00000000..e5317bac
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/stackoverflow.com.txt
@@ -0,0 +1,14 @@
1body: //div[@class='post-text' or @class='user-action-time' or @class='user-details' or @class='vote'] | //div[@id='answers-header']//h2
2
3replace_string(<div class="user-details"><br></div>): <!-- nothing -->
4replace_string(<div class="vote">): <div class="vote"><h3>Vote count:
5
6strip_id_or_class: vote-up
7strip_id_or_class: vote-down
8strip_id_or_class: star-off
9strip_id_or_class: favoritecount
10strip_id_or_class: -share
11strip_id_or_class: badgecount
12
13
14test_url: http://stackoverflow.com/questions/4484289/id-like-to-understand-the-jquery-plugin-syntax \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/stalbansreview.co.uk.txt b/inc/3rdparty/site_config/standard/stalbansreview.co.uk.txt
new file mode 100644
index 00000000..bde14217
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/stalbansreview.co.uk.txt
@@ -0,0 +1,14 @@
1title: //div[@class='articleLeft']/h3
2
3author: substring-after(//span[@class='articleAuthor']/a,'By ')
4
5date: substring-before(//span[@class='articleDateTime'],'in ')
6
7body: //div[@class='articleLeft']
8strip: //div[@class='articleMoreNews']
9strip: //div[@class='articleLeft']/h3
10strip: //div[@class='articleLeft']/p[@class='articleInfo clearfix']
11
12# Remove duplicate title from text
13strip: //div[@id='site']/div[5][@class='holder']/div[1][@class='hBlock']/div[1][@class='sglCol article']/h3
14test_url: http://www.stalbansreview.co.uk/news/9581446.New_roundabout_in_King_Harry_Lane/r/?ref=rss \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/standard.co.uk.txt b/inc/3rdparty/site_config/standard/standard.co.uk.txt
new file mode 100644
index 00000000..22a33484
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/standard.co.uk.txt
@@ -0,0 +1,16 @@
1autodetect_next_page: no
2footnotes: no
3dissolve: //div[@class="column-2"]//div[@class="widget"]
4dissolve: //div[@class="column-2"]//div
5
6author: //div[@class="innerbyline"]/a
7strip: //div[@class="innerbyline"]/a
8
9strip: //p[@class="dateline"]
10date: //p[@class="dateline"]
11
12title: //h1[@class="title"]
13author: //div[@class="innerbyline"]/a
14date: //p[@class="dateline"]
15body: //div[@class="column-2"]
16test_url: http://www.standard.co.uk/lifestyle/esmagazine/grace-and-flavour-pizarro-7938350.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/staradvertiser.com.txt b/inc/3rdparty/site_config/standard/staradvertiser.com.txt
new file mode 100644
index 00000000..0579455f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/staradvertiser.com.txt
@@ -0,0 +1,11 @@
1title: //h1[@id='storyTitle']
2author: substring-after(//span[@class='hsa_postCredit'], 'By ')
3date://span[@class='hsa_dateStamp']
4body: //div[@class='storytext']
5strip_id_or_class: insideStoryAd
6strip_id_or_class: printDesc
7strip_id_or_class: sb_2010_story_tools
8strip_id_or_class: FBConnectButton_Text
9strip_id_or_class: breadcrumbs
10prune: no
11test_url: http://www.staradvertiser.com/news/20111112_World_leaders_step_onto_isle_stage.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/stephenfry.com.txt b/inc/3rdparty/site_config/standard/stephenfry.com.txt
new file mode 100644
index 00000000..1169984f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/stephenfry.com.txt
@@ -0,0 +1,8 @@
1title: /html/head/meta[@name='title']/@content
2author: //span[contains(concat(' ',normalize-space(@class),' '),' article_author ')]/a
3date: //span[contains(concat(' ',normalize-space(@class),' '),' article_date ')]
4
5body: //div[@class='entry-content']
6
7single_page_link: //p[@class='pagination']/a
8test_url: http://www.stephenfry.com/2011/10/06/steve-jobs/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/stlbeacon.org.txt b/inc/3rdparty/site_config/standard/stlbeacon.org.txt
new file mode 100644
index 00000000..d66fee9f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/stlbeacon.org.txt
@@ -0,0 +1,5 @@
1title: article/h1
2author: //p[@class='byline']
3date: //p[@class='date']
4body: //div[@class='body']
5test_url: https://www.stlbeacon.org/#!/content/23404/mogop_caucus_031712 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/stockholm.etc.se.txt b/inc/3rdparty/site_config/standard/stockholm.etc.se.txt
new file mode 100644
index 00000000..073043d5
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/stockholm.etc.se.txt
@@ -0,0 +1,5 @@
1strip_id_or_class: 'left'
2strip_id_or_class: 'right'
3strip_id_or_class: 'block-belowcontent'
4
5test_url: http://stockholm.etc.se/reportage/bakom-stangda-dorrar-pa-fas-3-massa \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/streetsblog.net.txt b/inc/3rdparty/site_config/standard/streetsblog.net.txt
new file mode 100644
index 00000000..0b62a3d6
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/streetsblog.net.txt
@@ -0,0 +1,7 @@
1title: //h2[@class="post-title"]
2date: //span[@class="post-date"]
3body: //div[@class="post-entry"]
4
5#This is also good for *.streetsblog.org, for example:
6#http://dc.streetsblog.org/2011/10/21/friday-job-market/
7test_url: http://streetsblog.net/2011/10/20/look-out-below-one-in-nine-bridges-structurally-deficient-reports-t4a/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/stuff.co.nz.txt b/inc/3rdparty/site_config/standard/stuff.co.nz.txt
new file mode 100644
index 00000000..12fd0939
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/stuff.co.nz.txt
@@ -0,0 +1,22 @@
1title://div[@id='left_col']/h1
2author:substring-after(//span[contains(@class,'storycredit')],'BY ')
3author://span[contains(@class,'storycredit')]
4date:substring-after(//div[contains(@class,'toolbox_date')],'Last updated ')
5date://div[contains(@class,'toolbox_date')]
6body://div[@id='left_col']
7
8strip_id_or_class: toolbox
9strip_id_or_class: story_features
10strip_id_or_class: sharebox_new
11strip_id_or_class: related_box
12strip_id_or_class: sponsored_links
13strip_id_or_class: hidden_ad
14strip_id_or_class: story_content_top
15strip_id_or_class: total_number
16strip_id_or_class: sort_order
17strip_id_or_class: subscribe_order
18
19strip://div[contains(@class,'ad_story')]
20
21test_url: http://www.stuff.co.nz/national/politics/3930344/PM-issues-challenge
22test_url: http://www.stuff.co.nz/entertainment/7045944/International-praise-for-Ladyhawke \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/stumbleupon.com.txt b/inc/3rdparty/site_config/standard/stumbleupon.com.txt
new file mode 100644
index 00000000..85682166
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/stumbleupon.com.txt
@@ -0,0 +1,3 @@
1single_page_link: //iframe[@id='stumbleFrame']/@src
2
3test_url: www.stumbleupon.com/su/35V0wB/zouchmagazine.com/poetry-violet/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/subtraction.com.txt b/inc/3rdparty/site_config/standard/subtraction.com.txt
new file mode 100644
index 00000000..454e37b1
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/subtraction.com.txt
@@ -0,0 +1,17 @@
1title: //*[@id='posts']/div[1]/h2
2author: //*[@id='posts']/div[1]/div[2]/span[2]/a
3date: //*[@class='date']
4body: //div[@class='body-lead']
5
6# take out the bit saying 'body'
7strip: //div[@class='body-lead']/div[@class='info-label']
8
9
10
11
12
13
14
15
16
17test_url: http://www.subtraction.com/2011/02/01/unnecessary-explanations \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sueddeutsche.de.txt b/inc/3rdparty/site_config/standard/sueddeutsche.de.txt
new file mode 100644
index 00000000..4aa9410c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/sueddeutsche.de.txt
@@ -0,0 +1,18 @@
1# 2012-12-04: complete rewrite after Süddeutsche.de relaunch - carlo@...
2
3single_page_link: //a[ contains( @href, "/2.220/" ) ]
4
5body: //article[@id="sitecontent"]/section[@class="body"]
6author: //address[@class="author"]
7date: //div[@class="header"]//h1//span[@class="updated"]
8wrap_in(small): //div[@class="footer"]
9wrap_in(i): //figcaption/h3
10dissolve: //figcaption//h3
11dissolve: //figure/div[@class="body"]
12dissolve: //figure/a
13
14strip: //figure[ not( contains(@class, "zoomimage" ) ) ]
15strip: //div[@data-onlineonly="true"]
16strip: //address[@class="author"]
17
18test_url: http://www.sueddeutsche.de/muenchen/mietshaus-am-gaertnerplatz-alles-muss-raus-1.1556693 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/summify.com.txt b/inc/3rdparty/site_config/standard/summify.com.txt
new file mode 100644
index 00000000..1128e1bb
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/summify.com.txt
@@ -0,0 +1,2 @@
1strip_id_or_class: toolbar
2test_url: http://summify.com/story/Tmt1YQ0JBgKTAHwK/www.nybooks.com/articles/archives/2003/jan/16/fixed-opinions-or-the-hinge-of-history/?pagination=false \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/suntimes.com.txt b/inc/3rdparty/site_config/standard/suntimes.com.txt
new file mode 100644
index 00000000..13390e4f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/suntimes.com.txt
@@ -0,0 +1,14 @@
1title: //div[@class='story-details']/h1
2date: //span[@class='date-time']
3Author: substring-after(//p[@class='by-line'], 'By ')
4
5strip: //div[@class='videoThumbnails']
6strip: //div[@class='ad-square2-container']
7strip: //div[@class='homeDeliveryContainer5']
8
9strip: //div[@class='image-description']
10strip: //div[@id='internal-side-bar']
11
12strip: //span[@class='hide']
13strip: //div[@class='date']
14test_url: http://www.suntimes.com/technology/ihnatko/8816567-452/review-kindle-fire-is-no-ipad-killer-but-it-is-a-killer-device.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/svd.se.txt b/inc/3rdparty/site_config/standard/svd.se.txt
new file mode 100644
index 00000000..02b5b8ca
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/svd.se.txt
@@ -0,0 +1,4 @@
1# Ads
2strip_id_or_class: articlead
3
4test_url: http://www.svd.se/nyheter/inrikes/manga-huggormsbitna-golfare_5004031.svd \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sydsvenskan.se.txt b/inc/3rdparty/site_config/standard/sydsvenskan.se.txt
new file mode 100644
index 00000000..da6772aa
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/sydsvenskan.se.txt
@@ -0,0 +1,11 @@
1title: //h1
2
3author: //a[contains(@href, '/sok/?')]/text()
4
5date: substring-after(//span[@class='date'], 'Publicerad ')
6
7body: //div[@class='two_column_left']
8strip_id_or_class: story
9strip: //div[@class='leadText saplo:lead']/h5
10
11test_url: http://www.sydsvenskan.se/kultur-och-nojen/-jag-vill-garna--stanna-- \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/symmetrymagazine.org.txt b/inc/3rdparty/site_config/standard/symmetrymagazine.org.txt
new file mode 100644
index 00000000..3109c0e7
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/symmetrymagazine.org.txt
@@ -0,0 +1,12 @@
1title: //div[contains(@class, "post")]/h2
2
3author: //div[contains(@class, "post")]/p[position()=last()]/text()[1]
4
5date: //div[contains(@class, "post")]/p[1]
6
7body: //div[contains(@class, "post")]
8
9strip: //div[contains(@class, "post")]/h2[1]
10strip: //div[contains(@class, "post")]/p[1]
11strip: //div[contains(@class, "post")]/p[position()=last()]
12test_url: http://www.symmetrymagazine.org/breaking/?p=12784 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sz-magazin.sueddeutsche.de.txt b/inc/3rdparty/site_config/standard/sz-magazin.sueddeutsche.de.txt
new file mode 100644
index 00000000..c3e34977
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/sz-magazin.sueddeutsche.de.txt
@@ -0,0 +1,15 @@
1title: //h1
2body://div[@class='drucken']
3author: substring-after(//span[@class='autor'], 'Von ')
4author: //span[@class='autor']
5
6single_page_link://a[contains(@href, '/drucken/')]
7convert_double_br_tags:yes
8
9dissolve://div[@class='vorspann']
10
11strip://h1
12strip_id_or_class: klassifizierung
13strip_id_or_class: source
14strip_id_or_class: autor
15test_url: http://sz-magazin.sueddeutsche.de/texte/anzeigen/37567 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tagesschau.de.txt b/inc/3rdparty/site_config/standard/tagesschau.de.txt
new file mode 100644
index 00000000..8ce8a90e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/tagesschau.de.txt
@@ -0,0 +1,23 @@
1title://h1[1]
2
3author: substring-after(//em, 'Von ')
4author:string('tagesschau.de')
5
6date:substring-after(//div[@class='standDatum'], 'Stand: ')
7
8body://div[contains(@class, 'article')] | //div[contains(@class, 'centerCol')]
9
10strip://h1[1]
11strip: //div[contains(@class, 'directLinks')]
12strip: //div[contains(@class, 'zitatBox')]
13strip: //div[contains(@class, 'teaserBox metaBlock')]
14strip: //*[contains(@class, 'inv')]
15strip: //span[@class='imgSubline']
16strip: //*[contains(@class, 'topline')][1]
17strip: //div[@id='rightCol'][1]
18strip: //div[@id="footer"][1]
19strip: //div[@class="fPlayer"]
20strip: //div[@id='seitenanfang']
21strip: //div[@class='standDatum']
22strip: //em
23test_url: http://www.tagesschau.de/ausland/wahlkampffrankreich102.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tampabay.com.txt b/inc/3rdparty/site_config/standard/tampabay.com.txt
new file mode 100644
index 00000000..bfe841c6
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/tampabay.com.txt
@@ -0,0 +1,5 @@
1title: //span[@class="entry-title"]
2author: //*[contains(@class, 'item')]/p/a/text()
3date: substring-after(//*[contains(@class, 'item')]/p/text()[3], 'Posted:')
4body: //div[@class="entry-content"]
5test_url: http://www.tampabay.com/news/salvador-dali-leaders-want-st-petersburg-city-council-to-put-brakes-on/1236349 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/taptaptap.com.txt b/inc/3rdparty/site_config/standard/taptaptap.com.txt
new file mode 100644
index 00000000..13de70e9
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/taptaptap.com.txt
@@ -0,0 +1,4 @@
1title: //h3[@class="storytitle"]
2body: //div[@class="post"]
3strip: //div[@class="blurbBox"]
4test_url: http://taptaptap.com/blog/apples-precedents-vs-apples-guidelines/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tasteofhome.com.txt b/inc/3rdparty/site_config/standard/tasteofhome.com.txt
new file mode 100644
index 00000000..77773363
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/tasteofhome.com.txt
@@ -0,0 +1,15 @@
1title: //span[@id='ctl00_ctl00_MainContent_MainContent_RecipeImage1_lblRecipeTitle']
2body: //div[@id='RDNEW']//*[@class='Recipe-imgCon' or @class='Recipe-Intro' or @class='recipeDetails']
3strip_id_or_class: rec-ExRightPanel
4strip_id_or_class: divCarousel
5strip_id_or_class: preptimeOuter
6strip_id_or_class: cooktimeOuter
7strip_id_or_class: durationOuter
8strip_id_or_class: divImageFooter
9strip_id_or_class: microFormatFnIngred
10strip: //span[@class='Recipe-Intro']//*[@class='link' or @class='rating']
11
12prune: no
13tidy: no
14
15test_url: http://www.tasteofhome.com/recipes/Grinch-Punch \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/taz.de.txt b/inc/3rdparty/site_config/standard/taz.de.txt
new file mode 100644
index 00000000..6e84527b
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/taz.de.txt
@@ -0,0 +1,8 @@
1date: //div[@class='secthead']
2body: //div[@class='sectbody']
3title: concat(//div[@class='sectbody']/h4,': ',//div[@class='sectbody']/h1)
4author: //span[@class='author']
5strip: //p[@class='caption']
6strip_id_or_class: rack
7
8test_url: http://www.taz.de/Protestbewegung-Occupy/!80188/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tbray.org.txt b/inc/3rdparty/site_config/standard/tbray.org.txt
new file mode 100644
index 00000000..fbe94fa4
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/tbray.org.txt
@@ -0,0 +1,5 @@
1body: //div[@id='centercontent']
2strip: //div[@id='rightcontent']
3date: substring-before( //div[@id='cats'], '·')
4title: //h1
5test_url: http://www.tbray.org/ongoing/When/201x/2012/03/04/Mobile-Money \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tcng.org.txt b/inc/3rdparty/site_config/standard/tcng.org.txt
new file mode 100644
index 00000000..765224e4
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/tcng.org.txt
@@ -0,0 +1,4 @@
1title: //div[@id='main-content']/h1
2body: //div[@id='main-content']
3strip: //div[@id='main-content']/h1
4test_url: http://www.tcng.org/index.php/blog/view/teaching-basic-health-cutting-down-costs \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tech.fortune.cnn.com.txt b/inc/3rdparty/site_config/standard/tech.fortune.cnn.com.txt
new file mode 100644
index 00000000..b6d17da4
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/tech.fortune.cnn.com.txt
@@ -0,0 +1,4 @@
1title: //h1[@class='storyheadline']
2body: //div[@class='storytext']
3strip: //strong
4test_url: http://tech.fortune.cnn.com/2011/03/17/why-startups-dont-go-public-anymore/?section=money_topstories&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fmoney_topstories+%28Top+Stories%29 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tech.sina.com.cn.txt b/inc/3rdparty/site_config/standard/tech.sina.com.cn.txt
new file mode 100644
index 00000000..f7228ebf
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/tech.sina.com.cn.txt
@@ -0,0 +1,11 @@
1title://h1[contains(@id,'artibodyTitle')]
2
3date://span[contains(@id,'pub_date')]
4
5body://div[contains(@id,'artibody')]
6
7strip://div[contains(@class,'otherContent')]
8
9next_page_link://p[@class='page']/a[contains(.,'下一页')]
10
11test_url: http://tech.sina.com.cn/mobile/n/2012-03-22/07476863046.shtml \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/techcrunch.com.txt b/inc/3rdparty/site_config/standard/techcrunch.com.txt
new file mode 100644
index 00000000..f436acb5
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/techcrunch.com.txt
@@ -0,0 +1,18 @@
1body: //div[contains(@class, 'media-container') or contains(@class, 'body-copy')]
2
3author: //a[@class="name"]
4
5date: //div[@class="post-time"]
6
7title: //h1[@class="headline"]
8strip_id_or_class: module-crunchbase
9
10# The following is for the mobile site
11body: //div[@id="singlentry"]
12author: substring-after(//span[@class="single-post-meta-top"],'rsaquo; ')
13date: substring-before(//div[@class="single-post-meta-top"],' @')
14title: //a[@class="sh2"]
15
16prune: no
17
18test_url: http://techcrunch.com/2011/10/18/apples-insanely-great-q1-2012/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/techdirt.com.txt b/inc/3rdparty/site_config/standard/techdirt.com.txt
new file mode 100644
index 00000000..727f3701
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/techdirt.com.txt
@@ -0,0 +1,12 @@
1body: //div[@class='story']
2title: //div[@class='story']/h1
3strip: //div[@class='story']/h1
4
5author: //div[@class='details']/p[contains(., 'by ')]/a
6date: //p[@class='storydate']
7
8strip: //p[a[contains(., 'Leave a Comment')]]
9strip_id_or_class: share
10strip_id_or_class: maincolumn_head
11strip_id_or_class: maincolmod
12test_url: http://www.techdirt.com/articles/20120112/17455117394/sega-gets-it-right-about-sopa-its-time-hard-reset-copyright-law-congress.shtml \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/techmeme.com.txt b/inc/3rdparty/site_config/standard/techmeme.com.txt
new file mode 100644
index 00000000..8644e00f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/techmeme.com.txt
@@ -0,0 +1,3 @@
1single_page_link_in_feed: //b/a
2
3test_url_feed: http://www.techmeme.com/feed.xml \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/technicallyjordan.tumblr.com.txt b/inc/3rdparty/site_config/standard/technicallyjordan.tumblr.com.txt
new file mode 100644
index 00000000..cc26ee4c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/technicallyjordan.tumblr.com.txt
@@ -0,0 +1,8 @@
1title: //h2
2author: //meta[@name="author"]/@content
3date: //h3
4body: //div[@class="postBody"]
5strip: //h1
6strip: //h2
7strip: //h3
8test_url: http://technicallyjordan.tumblr.com/post/22914659822/facebook-to-launch-app-store-knock-off \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/technologyreview.com.txt b/inc/3rdparty/site_config/standard/technologyreview.com.txt
new file mode 100644
index 00000000..41f21d46
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/technologyreview.com.txt
@@ -0,0 +1,16 @@
1title: //header[@class='article-meta']/h1
2title: substring-before(//title, '|')
3
4body: //section[contains(@class, 'body')]
5
6# Author & Date for News and Featured Stories
7author: //ul[@class='byline']/li/a
8author: substring-before(substring-after(//ul[@class='byline']/li, 'By '), ' on')
9date: substring-after(//ul[@class='byline']/li, 'on ')
10
11# Author & Date for "Views"
12author: //div[@class='view-byline']/div[@class='meta']/h2[1]
13date: //div[@class='view-byline']/div[@class='meta']/h2[2]
14
15next_page_link: //section[@class='pagination']/a[contains(@class, 'continue')]
16test_url: http://www.technologyreview.com/news/427567/facebooks-telescope-on-human-behavior/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/techpinions.com.txt b/inc/3rdparty/site_config/standard/techpinions.com.txt
new file mode 100644
index 00000000..89ed8349
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/techpinions.com.txt
@@ -0,0 +1,7 @@
1body: //div[@class="post"]
2
3strip: //div[@class="post-meta"]
4strip: //div[@id="socialicons"]
5strip: //div[@id="authorbox"]
6
7test_url: http://techpinions.com/why-google-and-microsoft-hate-siri/3572 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/techradar.com.txt b/inc/3rdparty/site_config/standard/techradar.com.txt
new file mode 100644
index 00000000..ed92a974
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/techradar.com.txt
@@ -0,0 +1,12 @@
1# Title without news/reviews etc. appended
2title: //div[@id='subColumn1Pad']/div[1][@class='article']/div[1][@class='articleHead']/h1
3
4# Remove home link
5strip: //div[@id='page_logo']/a
6
7# Remove utilities
8strip: //*[(@id = "utilities")]
9
10# Remove comments link
11strip: //div[@id='subColumn1Pad']/div[1][@class='article']/div[1][@class='articleHead']/p[@class='tiny']
12test_url: http://www.techradar.com/news/television/sky-to-rebrand-living-as-sky-living-903105 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/telegraaf.nl.txt b/inc/3rdparty/site_config/standard/telegraaf.nl.txt
new file mode 100644
index 00000000..ff3cd06e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/telegraaf.nl.txt
@@ -0,0 +1,9 @@
1body: //div[@id='artikelKolom']
2strip: //div[@class='broodMediaBox']/div[@class='docbox' or @class='artBannerWrapper']
3strip: //div[@id='artikeltoolbar']
4strip: //div[@class='reactiebalk artspacer' or @class='bannercenter clearfix artspacer']
5strip: //div[@id='artikelKolomRechts' or @id='TMGTweetWidget']
6tidy: no
7prune: no
8
9test_url: http://www.telegraaf.nl/binnenland/10275097/__Identiteit_man_in_sloot_onbekend__.html?cid=rss \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/telegraph.co.uk.txt b/inc/3rdparty/site_config/standard/telegraph.co.uk.txt
new file mode 100644
index 00000000..e1faf23b
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/telegraph.co.uk.txt
@@ -0,0 +1,10 @@
1body: //div[@class='byline' or @id='storyEmbSlide' or @id='mainBodyArea']
2strip: //p[@class='comments']
3strip: //div[@id='storyEmbSlide']//div[contains(@class, "hide")]
4strip: //div[@id='tmg-related-links' or @id='outbrain-related-links' or @id='onespot-related-links']
5strip: //p[@class='bbpTweet']/span[@class='timestamp']
6strip: //p[@class='bbpTweet']/span[@class='metadata']//img
7tidy: no
8prune: no
9
10test_url: http://www.telegraph.co.uk/news/worldnews/europe/ireland/8663451/Is-Ireland-divorcing-from-the-Catholic-Church.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/theappleblog.com.txt b/inc/3rdparty/site_config/standard/theappleblog.com.txt
new file mode 100644
index 00000000..3bd555f1
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/theappleblog.com.txt
@@ -0,0 +1,3 @@
1# Remove home link
2strip: //div[@id='blog-title']/a
3test_url: http://theappleblog.com/2010/10/21/the-new-macbook-air-is-underwhelming/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/theatlantic.com.txt b/inc/3rdparty/site_config/standard/theatlantic.com.txt
new file mode 100644
index 00000000..267fd39c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/theatlantic.com.txt
@@ -0,0 +1,18 @@
1title: //div[@id='article']/h1
2title: //h1
3
4body: //div[@class='articleText']
5body: //div[@class='articleContent']
6body: //div[@id='article']
7date: //*[contains(@class, 'date')]
8author: //div[@id='profile']//*[@class='authors']//a[1]
9author: //*[@class='author']/span
10prune: no
11
12strip: //div[@class='moreOnBoxWithImages']
13
14single_page_link: //a[@class='print']
15
16test_url: http://www.theatlantic.com/technology/archive/2011/04/want-to-see-how-crazy-a-bot-run-market-can-be/237773/
17test_url: http://www.theatlantic.com/magazine/archive/2007/11/the-autumn-of-the-multitaskers/6342/
18test_url: http://www.theatlantic.com/entertainment/archive/2012/04/30-rock-live-a-funny-reminder-of-why-sitcoms-arent-shot-live-anymore/256447/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thebostonchannel.com.txt b/inc/3rdparty/site_config/standard/thebostonchannel.com.txt
new file mode 100644
index 00000000..64df90c1
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/thebostonchannel.com.txt
@@ -0,0 +1,7 @@
1title: //meta[@name='og:title']/@content
2date: //meta[@name='created']/@content
3body: //div[@class="StoryBody" or @class="storyTeaser"]
4
5replace_string(<p></p>): <br /><br />
6
7test_url: http://www.thebostonchannel.com/slideshow/news/28210648/detail.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thebrowser.com.txt b/inc/3rdparty/site_config/standard/thebrowser.com.txt
new file mode 100644
index 00000000..c3c20504
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/thebrowser.com.txt
@@ -0,0 +1,10 @@
1title: //h2[contains(@class, 'page-title')]
2body: //div[@id='content']/div[contains(@id, 'node-')]/div[@class='content']
3
4prune: no
5
6strip: //div[contains(@class, 'node-book')]//a[@class='button']
7
8single_page_link: //a[@class='tool-print']
9
10test_url: http://thebrowser.com/interviews/yotam-ottolenghi-on-his-favourite-cookery-books \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thecarton.net.txt b/inc/3rdparty/site_config/standard/thecarton.net.txt
new file mode 100644
index 00000000..9ef4ed8b
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/thecarton.net.txt
@@ -0,0 +1,10 @@
1title: substring-before(//title, ' &ndash; ')
2author:string('Shawn')
3date: //*/time/@pubdate
4
5
6strip: //header
7strip: //div[@id='prev_next']
8strip: //div[@id='masthead']
9
10test_url: http://thecarton.net/2012/12/20/imdb \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thedaily.com.txt b/inc/3rdparty/site_config/standard/thedaily.com.txt
new file mode 100644
index 00000000..24ebbbac
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/thedaily.com.txt
@@ -0,0 +1,24 @@
1#keep all body text
2prune: no
3
4#title, body, metadata
5title: //div[@class='story_header']/h1
6body: //div[@id='content']
7author: substring-after(//span[@class='byline'], "by ")
8author: substring-after(//span[@class='byline'], "By ")
9author: //span[@class='byline']
10date: //span[@class='date']
11
12#formatting
13convert_double_br_tags: yes
14dissolve: //div[@class='slides_full']/ul/li
15
16# cleanup
17strip: //a[@id='story_note']
18strip: //br
19strip: //div[@class='intro']
20strip: //div[@class='share-block']
21strip: //div[@class='sidebar-social']
22strip: //div[@class='top-stories']
23strip: //div[@class='prevnext']
24test_url: http://www.thedaily.com/page/2012/01/09/010912-news-college-costs-1-5/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thedailybeast.com.txt b/inc/3rdparty/site_config/standard/thedailybeast.com.txt
new file mode 100644
index 00000000..4781c65a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/thedailybeast.com.txt
@@ -0,0 +1,7 @@
1title: //h1
2body: //article/div[contains(@class, 'article-body')]
3#strip: //header/hgroup/h1
4strip: //footer[@class='storyFooter']
5single_page_link: //li[@class='print']/a
6prune: no
7test_url: http://www.thedailybeast.com/articles/2010/04/06/how-mastercard-predicts-divorce.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thedailymash.co.uk.txt b/inc/3rdparty/site_config/standard/thedailymash.co.uk.txt
new file mode 100644
index 00000000..0f15558d
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/thedailymash.co.uk.txt
@@ -0,0 +1,14 @@
1# Remove duplicated title
2strip: //div[@id='content']/div[1][@class='full_intro']/h2
3
4# Remove links, ads etc.
5strip: //*[(@class= "aside")]
6
7# Remove the date and add it to the date published field in Instapaper
8strip: //div[@class="date"]
9date: //div[@class="date"]
10
11# There is no byline on The Daily Mash.
12
13convert_double_br_tags: yes
14test_url: http://www.thedailymash.co.uk/index.php?option=com_content&task=view&id=4994&Itemid=81&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+thedailymash+%28The+Daily+Mash.+It%27s+news+to+us.%29 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thefilmexperience.net.txt b/inc/3rdparty/site_config/standard/thefilmexperience.net.txt
new file mode 100644
index 00000000..e6b5115a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/thefilmexperience.net.txt
@@ -0,0 +1,2 @@
1body: //div[@class='body']
2test_url: http://thefilmexperience.net/blog/2011/12/30/distant-relatives-2001-a-space-odyssey-and-the-tree-of-life.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/theglobalmail.org.txt b/inc/3rdparty/site_config/standard/theglobalmail.org.txt
new file mode 100644
index 00000000..fae0fb29
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/theglobalmail.org.txt
@@ -0,0 +1,41 @@
1title: //h1[@id="headline"]
2author: //div[contains(@class, "editorial-byline-author")]/a
3date: substring-after(//div[contains(@class, "editorial-byline-meta")], " | ")
4
5# The article body contains a mix or article and non-article elements, so lot of manual tweaks are needed
6body: //div[@id="template"]
7strip_id_or_class: editorial-byline-pic
8strip_id_or_class: editorial-byline
9strip_id_or_class: headline
10
11# Include the leadin paragraph in the body text, but remove quotes because they're out of context
12dissolve: //div[contains(@id, "leadin")]
13strip_id_or_class: pullquote
14
15# Image captions removed because they're confusing in body text
16strip_id_or_class: image-caption-content
17
18# Remove header and footer
19strip_id_or_class: header
20strip_id_or_class: footer
21
22# Remove the hidden logo that seems to be used to cause Facebook to show the logo instead of a random article image
23strip: /html/body/span[contains(@style, "display: none")]
24
25# Remove search box
26strip_id_or_class: searchContainer
27strip: //div[contains(@class, "searchInstruction")]
28strip: //div[contains(@class, "searchResults")]/h4
29
30# Remove the 'Letters to the Editor' section
31strip_id_or_class: letter-text
32strip_id_or_class: letter-from
33strip_id_or_class: letter-date
34
35# Remove Like/Tweet links
36strip_id_or_class: social-tab
37
38# Remove 'divider' which causes an inexplicable slash to appear in the article body
39strip_id_or_class: divider
40
41test_url: http://www.theglobalmail.org/feature/tiramisu-time-in-pyongyang/88/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/theglobeandmail.com.txt b/inc/3rdparty/site_config/standard/theglobeandmail.com.txt
new file mode 100644
index 00000000..90634a08
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/theglobeandmail.com.txt
@@ -0,0 +1,5 @@
1single_page_link: //div[contains(@class, 'pagination')]//a[contains(@title, 'ingle page')]
2tidy: no
3prune: no
4
5test_url: http://www.theglobeandmail.com/report-on-business/rob-magazine/how-a-novice-miner-survived-a-summer-in-the-klondike/article2345350/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/theindychannel.com.txt b/inc/3rdparty/site_config/standard/theindychannel.com.txt
new file mode 100644
index 00000000..3544f247
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/theindychannel.com.txt
@@ -0,0 +1,13 @@
1title: //h1[@class="Headline"]
2date: substring-after(//div[@class="posted"], 'EDT ')
3body: //div[@class="storyBody"]
4
5strip: //td[@class="AssocContentTD"]
6strip: //div[@id="pageTitle"]
7strip: //div[@class="posted"]
8strip: //div[@class="updated"]
9strip: //div[@class="js-kit-disclaimer"]
10strip: //table[@class="row3table"]
11strip: //div[@class="container2"]
12strip: //div[@id="delta"]
13test_url: http://www.theindychannel.com/news/31050840/detail.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/themillions.com.txt b/inc/3rdparty/site_config/standard/themillions.com.txt
new file mode 100644
index 00000000..e3e57fea
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/themillions.com.txt
@@ -0,0 +1,10 @@
1title: /html/body/div/div[2]/div/div/div/h3
2
3body: /html/body/div/div[2]/div/div/div/div[2]
4
5strip: /html/body/div/div[2]/div/div/div/div[6]/div[3]/div/div/div
6
7tidy: no
8
9# any way to get rid of this word character garbage?
10test_url: http://www.themillions.com/2010/07/at-the-movies-with-david-mitchell-the-thousand-autumns-of-jacob-de-zoet.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/themuseumofinnocence.com.txt b/inc/3rdparty/site_config/standard/themuseumofinnocence.com.txt
new file mode 100644
index 00000000..518bff93
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/themuseumofinnocence.com.txt
@@ -0,0 +1,7 @@
1body: single-review
2strip_id_or_class: featured-review
3strip_id_or_class: resources
4strip_id_or_class: rate-the-book
5strip_id_or_class: write-review
6
7test_url: http://themuseumofinnocence.com/review.php?id=1179 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thenation.com.txt b/inc/3rdparty/site_config/standard/thenation.com.txt
new file mode 100644
index 00000000..d88bcdd6
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/thenation.com.txt
@@ -0,0 +1,11 @@
1title: //h1[@class='print-title']
2body: //div[@class='print-content']
3author: //a[contains(@href, '/authors')]
4author: substring-before(//div[@class='print-created'], '|')
5date: //span[@class='article-date']
6date: substring-after(//div[@class='print-created'], '|')
7prune: no
8
9single_page_link: //ul[contains(@class, 'article-actions-bar')]//a[contains(@href, '/print/article/')]
10
11test_url: http://www.thenation.com/article/162331/hard-against-time-roy-fisher \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thenetworkgarden.blogs.com.txt b/inc/3rdparty/site_config/standard/thenetworkgarden.blogs.com.txt
new file mode 100644
index 00000000..846b8a8a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/thenetworkgarden.blogs.com.txt
@@ -0,0 +1,4 @@
1body: //div[@id="beta-inner"]
2title: //h3[@class="entry-header"]
3
4test_url: http://thenetworkgarden.blogs.com/weblog/2011/09/microsoft-metro-and-the-next-wave-in-computing.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thenextweb.com.txt b/inc/3rdparty/site_config/standard/thenextweb.com.txt
new file mode 100644
index 00000000..fdc70005
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/thenextweb.com.txt
@@ -0,0 +1,12 @@
1body: //div[@class= 'article-body']
2author: //div[@class='featured mb-1']//a[starts-with(@href,'/author')]
3
4strip: //div[@class = 'bargo']
5strip: //div[@class = 'tf']
6strip: //div[@class = 'article']/div[@class = 'blue-box']
7strip_id_or_class: respond
8
9tidy: no
10next_page_link: //div[@class='pages-wrapper']//span/following-sibling::a/@href
11
12test_url: http://thenextweb.com/apple/2011/10/12/tnw-review-a-complete-guide-to-apples-ios-5-with-icloud-an-os-14-years-in-the-making/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/theoaklandpress.com.txt b/inc/3rdparty/site_config/standard/theoaklandpress.com.txt
new file mode 100644
index 00000000..c7132321
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/theoaklandpress.com.txt
@@ -0,0 +1,3 @@
1body: //div[@id='fullstory']
2strip: //div[@id='page_leftbar']
3test_url: http://theoaklandpress.com/articles/2011/04/25/news/doc4db5330e0bce9220005852.txt \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/theonion.com.txt b/inc/3rdparty/site_config/standard/theonion.com.txt
new file mode 100644
index 00000000..12918b88
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/theonion.com.txt
@@ -0,0 +1,11 @@
1title: //h2[@class='title']
2date: substring-before(//p[@class='meta'], '|')
3body: //div[@class='story']
4#body: //div[@class='article_body']
5
6strip: //h2[@class='title']
7strip: //p[@class='meta']
8strip: //div[@class='ga_section']
9strip: //div[@id='recent_slider']
10
11test_url: http://www.theonion.com/articles/pathetic-bobcats-owner-again-regaling-players-with,27572/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thepioneerwoman.com.txt b/inc/3rdparty/site_config/standard/thepioneerwoman.com.txt
new file mode 100644
index 00000000..f89f3a87
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/thepioneerwoman.com.txt
@@ -0,0 +1,11 @@
1title: //h1[@class='post-title']
2body: //div[@class='post']
3author: //p[@class='posted-by']
4date: //div[@class='sprite post-date']
5
6# The body of the post doesn't have it's own div so we have to strip out the metadata
7strip: //div[@class='author_avatar']
8strip: //div[@class='sprite post-date']
9strip: //h1[@class='post-title']
10strip: //p[@class='posted-by']
11test_url: http://thepioneerwoman.com/cooking/2011/08/pie-fats-a-comparison/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/theregister.co.uk.txt b/inc/3rdparty/site_config/standard/theregister.co.uk.txt
new file mode 100644
index 00000000..ebcc55d5
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/theregister.co.uk.txt
@@ -0,0 +1,5 @@
1title: //div[@id="article"]/h2
2author: //div[@id="article"]/p[@class="byline"]/a[1]
3date: //div[@id="article"]/p[@class="dateline"]/a[2]
4body: //div[@id="article"]/div[@id="body"]
5test_url: http://www.theregister.co.uk/2011/10/06/gas_bill_shocker/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/theroot.com.txt b/inc/3rdparty/site_config/standard/theroot.com.txt
new file mode 100644
index 00000000..ebff662d
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/theroot.com.txt
@@ -0,0 +1,3 @@
1body: //div[@id='node-content']
2strip_id_or_class: pager
3test_url: http://www.theroot.com/views/why-i-am-male-feminist \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/therumpus.net.txt b/inc/3rdparty/site_config/standard/therumpus.net.txt
new file mode 100644
index 00000000..d01a89bb
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/therumpus.net.txt
@@ -0,0 +1,4 @@
1title: /html/body/div/div[2]/div/div/h1
2
3body: /html/body/div/div[2]/div/div/div[2]
4test_url: http://therumpus.net/2010/07/the-rumpus-interview-with-david-means/?full=yes \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thesiasat.com.txt b/inc/3rdparty/site_config/standard/thesiasat.com.txt
new file mode 100644
index 00000000..ab9a99e8
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/thesiasat.com.txt
@@ -0,0 +1,11 @@
1#body: (//div[@class='ftr-yt-vid'])[1]
2body: (//blockquote[contains(@class, 'postcontent')])[1]
3body: (//div[starts-with(@id, 'post_message')])[1]
4
5prune: no
6tidy: no
7
8#replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player"
9#replace_string(</iframe>): </iframe>&nbsp;</div>
10
11test_url: http://www.thesiasat.com/showthread.php?19220-Dunya-News-HASB-E-HAAL-16-06-2012-Part-1-5 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thesimpledollar.com.txt b/inc/3rdparty/site_config/standard/thesimpledollar.com.txt
new file mode 100644
index 00000000..d5c6c9e0
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/thesimpledollar.com.txt
@@ -0,0 +1,4 @@
1title: //h3[@class='post-title']/a[@class='post-title-link']
2body: //div[@class='post-content']
3author: //div[@class='post-meta-under-title']/a
4test_url: http://www.thesimpledollar.com/2011/09/13/determining-the-size-of-your-emergency-fund/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thespoiler.co.uk.txt b/inc/3rdparty/site_config/standard/thespoiler.co.uk.txt
new file mode 100644
index 00000000..e2ed1e63
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/thespoiler.co.uk.txt
@@ -0,0 +1,3 @@
1strip: //*[(@id = "content")]/h2
2strip: //*[(@class = "wp-notable-line")]
3test_url: http://www.thespoiler.co.uk/index.php/2010/10/21/wayne-rooney-tells-man-utd-its-not-me-its-you \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thespoof.com.txt b/inc/3rdparty/site_config/standard/thespoof.com.txt
new file mode 100644
index 00000000..409dc0c9
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/thespoof.com.txt
@@ -0,0 +1,9 @@
1title: //h1[contains(@class, 'cTitle')]
2body: //div[contains(@class, 'KonaBody') or @id='articleimageright']
3author: //meta[@name='Author']/@content
4date: //meta[@name='OriginalPublicationDate']/@content
5
6prune: no
7tidy: no
8
9test_url: http://www.thespoof.com/news/spoof.cfm?headline=s8i108389 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thestranger.com.txt b/inc/3rdparty/site_config/standard/thestranger.com.txt
new file mode 100644
index 00000000..0f9855c8
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/thestranger.com.txt
@@ -0,0 +1,12 @@
1# savage* filtering is for Savage Love, such as: http://www.thestranger.com/seattle/SavageLove?oid=5135029
2
3#other filtering are plain articles, such as: http://www.thestranger.com/seattle/the-stranger-election-control-board/Content?oid=5142885
4
5title: //div[@id='savageColumn_head']/h1
6title: //h1[@class="headlineLarge"]
7
8strip: //div[@id='savage_right'] | //div[@id='savageColumn_head'] | //div[@id='savageArticleRight'] | //div[@id='articleRight'] | //div[@class='savAppBanner']
9
10body: //div[@id='savageColumn']
11body: //div[@id='story_text']
12test_url: http://www.thestranger.com/seattle/SavageLove?oid=5135029 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thestreet.com.txt b/inc/3rdparty/site_config/standard/thestreet.com.txt
new file mode 100644
index 00000000..5de75637
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/thestreet.com.txt
@@ -0,0 +1,25 @@
1title: //div[@id='storyHdr']/h1
2title: //div[@id='print']//h2
3body: //div[@class="virtualpage"]
4body: //div[@id='print']//div[@id='bd']
5author: //meta[@name="AUTHOR"]/@content
6author: (//div[@id='print']//div[@id='bd']/h4)[1]
7date: //meta[@name="DATE"]/@content
8date: //div[@id='print']//div[@id='dte']
9
10strip_id_or_class: articleFooter
11strip_id_or_class: sidebar
12strip_id_or_class: ie6PrintSubhead
13strip_id_or_class: subHdr
14
15
16replace_string(<P/>): </p><p>
17
18prune: no
19
20#TODO: redirects back - perhaps needs referer to work
21single_page_link: //div[@id='storyDetail']//a[contains(@href, '/print/')]
22
23test_url: http://www.thestreet.com/story/11386556/1/which-of-these-10-dividend-stocks-is-worth-the-risk.html
24# multi page
25test_url: http://www.thestreet.com/story/11387090/1/7-ubs-stock-picks-for-2012.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thethaovanhoa.vn.txt b/inc/3rdparty/site_config/standard/thethaovanhoa.vn.txt
new file mode 100644
index 00000000..6b3277eb
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/thethaovanhoa.vn.txt
@@ -0,0 +1,2 @@
1strip:/html/body/form/div[2]/div[3]/div/div/div/div/div/div/div/div/div/div[2]/div[3]/div[2]/div/p[2]
2test_url: http://thethaovanhoa.vn/151N20110519085606745T129/levante-quyet-giu-caicedo.htm \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/theverge.com.txt b/inc/3rdparty/site_config/standard/theverge.com.txt
new file mode 100644
index 00000000..11c5c153
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/theverge.com.txt
@@ -0,0 +1,31 @@
1title: //h1[contains(@class, "headline")]
2
3author: //p[contains(@class, "byline")]/a[contains(@class, "author")]
4
5date: substring-after(normalize-space(//p[contains(@class, "byline")]/span[contains(@class, "publish-date")]), "on ")
6
7body: //article[contains(@class, 'feature-entry')]
8body: //article
9prune: no
10tidy: no
11
12strip: //article/header
13strip: //*[@id='sticky-menu']
14strip: //aside
15strip: //nav
16
17strip_id_or_class: gallery
18strip_id_or_class: article-meta
19strip_id_or_class: story-navigation
20strip_id_or_class: slegend
21strip_id_or_class: related-product-meta
22strip_id_or_class: comments
23strip_id_or_class: ui-jump-list
24strip_id_or_class: pullquote
25
26strip: //q
27
28strip: //a[contains(@class, 'entry-section-title')]
29
30test_url: http://www.theverge.com/2012/2/29/2821763/lytro-review
31test_url: http://www.theverge.com/2011/11/3/2534861/nokia-lumia-800-review \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/theweek.com.txt b/inc/3rdparty/site_config/standard/theweek.com.txt
new file mode 100644
index 00000000..27281ceb
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/theweek.com.txt
@@ -0,0 +1,4 @@
1body: //div[@class="briefingEntry"]
2prune: no
3
4test_url: http://theweek.com/article/index/215763/insider-trading-on-capitol-hill \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thinkprogress.org.txt b/inc/3rdparty/site_config/standard/thinkprogress.org.txt
new file mode 100644
index 00000000..8934b68e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/thinkprogress.org.txt
@@ -0,0 +1,4 @@
1author: //p[@class="byline"]/a
2body: //div[@class="post"]
3
4test_url: http://thinkprogress.org/special/2011/11/12/367040/harvard-law-professor-criticizes-homeland-security-feel-of-overreaction-to-occupy-harvard/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thisdaylive.com.txt b/inc/3rdparty/site_config/standard/thisdaylive.com.txt
new file mode 100644
index 00000000..958d4b27
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/thisdaylive.com.txt
@@ -0,0 +1,2 @@
1body: //div[@class='main-content-panel']/div[@class='img'] | //div[@id='page_content_Content9_oModuleContent_2_div_Body']
2test_url: http://www.thisdaylive.com/articles/australia-pm-talks-human-rights-with-chinas-wen/90394/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thisismynext.com.txt b/inc/3rdparty/site_config/standard/thisismynext.com.txt
new file mode 100644
index 00000000..6850b4be
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/thisismynext.com.txt
@@ -0,0 +1,8 @@
1author: //div[@class='meta clearfix']/a
2body: //div[@class='post']
3
4strip: //div[@class='metaCat']
5strip: //div[@class='post']/h1
6strip: //div[@class='post']/div[@class='meta clearfix']
7strip: //div[@class='post']/div[@class='social-bar clearfix']
8test_url: http://thisismynext.com/2011/10/18/galaxy-nexus-android-ice-cream-sandwich-pictures-video-hands-on/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tidbits.com.txt b/inc/3rdparty/site_config/standard/tidbits.com.txt
new file mode 100644
index 00000000..8bcf2ec1
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/tidbits.com.txt
@@ -0,0 +1,3 @@
1author: //span[@class='fn']
2date: substring-before(substring-after(//*[@id='center_ajax_sub']/div/div[3],'|'),'|')
3test_url: http://tidbits.com/article/12651 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/time.com.txt b/inc/3rdparty/site_config/standard/time.com.txt
new file mode 100644
index 00000000..fd3fe08c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/time.com.txt
@@ -0,0 +1,14 @@
1# 2011-10-25 - carlo@... - Initial setup.
2
3single_page_link: //li[@class='print']/a/@href
4
5title: //h1
6author: //meta[@name="byline"]/@content
7date: //meta[@name="date"]/@content
8
9strip: //span[@class="see"]
10strip: //div[@class="byline"]
11strip: //div[@id="date2"]
12strip: //h1
13
14test_url: http://www.time.com/time/specials/packages/article/0,28804,2094921_2094923_2094924,00.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/timeshighereducation.co.uk.txt b/inc/3rdparty/site_config/standard/timeshighereducation.co.uk.txt
new file mode 100644
index 00000000..17297732
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/timeshighereducation.co.uk.txt
@@ -0,0 +1,6 @@
1title: //h1
2body: //div[@class="storytext"]
3strip: //div[@id="thelogin"]
4strip: //*[@class="hide"]
5strip: //div[@id="anchored"]
6test_url: http://www.timeshighereducation.co.uk/story.asp?sectioncode=26&storycode=416124&c=1 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tipb.com.txt b/inc/3rdparty/site_config/standard/tipb.com.txt
new file mode 100644
index 00000000..9533eb0f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/tipb.com.txt
@@ -0,0 +1,9 @@
1body: //div[@id='content']
2
3strip_id_or_class: featured-box
4strip_id_or_class: postmeta
5strip_id_or_class: respond
6
7author: //a[contains(@href, '/author/') and contains(@title, 'Posts by')]
8date: substring-before(//a[contains(@href, '/author/') and contains(@title, 'Posts by')]/.., ' by ')
9test_url: http://www.tipb.com/2011/10/17/iphone-4s-review/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tnr.com.txt b/inc/3rdparty/site_config/standard/tnr.com.txt
new file mode 100644
index 00000000..65a1899f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/tnr.com.txt
@@ -0,0 +1,17 @@
1title: //div[contains(@class, 'article_detail')]/div[@class='entry_header']/h1
2title: //div[contains(@class, 'article_detail')]//h1
3title: //h1
4
5body: //div[contains(@class, 'article_detail')]
6
7author: //div[@class='article_detail']/div[@class='entry_header']/li/div[@class='author']//h3
8author: div[@class='author']//h3
9strip: //div[contains(@class, 'field-field-book-cover')]
10
11date: translate(//*[@class='post_date' and contains(., ' 20')], '|', '')
12
13prune: no
14
15single_page_link: //a[@class='print-page']
16
17test_url: http://www.tnr.com/blog/jonathan-chait/92991/did-obama-get-rolled \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tomdispatch.com.txt b/inc/3rdparty/site_config/standard/tomdispatch.com.txt
new file mode 100644
index 00000000..d8548c78
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/tomdispatch.com.txt
@@ -0,0 +1,6 @@
1title: //div[@id='maincontent']//div[@class='title']
2body: //div[@id='maincontent']//div[@class='byline'] | //div[@id='maincontent']//div[@class='meat']
3
4tidy: no
5
6test_url: http://www.tomdispatch.com/post/175436/tomgram:_noam_chomsky%2C_the_imperial_mentality_and_9_11/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tomshardware.com.txt b/inc/3rdparty/site_config/standard/tomshardware.com.txt
new file mode 100644
index 00000000..2bba6de8
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/tomshardware.com.txt
@@ -0,0 +1,8 @@
1tidy: no
2title: //title
3author: //a[@itemprop = 'author']
4date: //time[@itemprop = 'datePublished']
5body: //div[@id = 'intelliTXT']
6
7next_page_link: //li[@class="pagin next"]/a
8test_url: http://www.tomshardware.com/reviews/gaming-graphics-card-review,3107.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tomshardware.de.txt b/inc/3rdparty/site_config/standard/tomshardware.de.txt
new file mode 100644
index 00000000..e910003c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/tomshardware.de.txt
@@ -0,0 +1,12 @@
1body://div[@id="news-content"]/div[@id="intelliTXT"][1]
2
3author://div[@id="header-news-infos"]/a[1]
4
5date: //div[@id="header-news-infos"]/span[1]
6
7title://h1[@id="header-news-title" and @class="hardwareTitle"][1]
8
9strip://div[@id="news-content"]/div[@id="intelliTXT"]/table
10
11footnotes: no
12test_url: http://www.tomshardware.de/DDR4-DDR3-ISSCC-Samsung-Hynix,news-247133.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/toolsandtoys.net.txt b/inc/3rdparty/site_config/standard/toolsandtoys.net.txt
new file mode 100644
index 00000000..dbe60b15
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/toolsandtoys.net.txt
@@ -0,0 +1,6 @@
1body: //div[@class='post']
2
3strip: //div[@class='social']
4strip: //span[@class='next']
5strip: //span[@class='previous']
6test_url: http://toolsandtoys.net/noble-tonic-02/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/trailer.web-view.net.txt b/inc/3rdparty/site_config/standard/trailer.web-view.net.txt
new file mode 100644
index 00000000..e7a9c82d
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/trailer.web-view.net.txt
@@ -0,0 +1,2 @@
1title: concat(substring-before(//title,':'),': ',//div[@class='Date2'])
2test_url: http://trailer.web-view.net/Show/0XC4EFE5D648B716BA2E134BC7CE61B9CC001E04F11E9434438186735DBD637488.htm \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/traningslara.se.txt b/inc/3rdparty/site_config/standard/traningslara.se.txt
new file mode 100644
index 00000000..96e491fa
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/traningslara.se.txt
@@ -0,0 +1,8 @@
1title: //div[@class="Post-body"]//span[@class="PostHeader"]
2author: //div[@class="PostHeaderIcons metadata"]/a[@title="Author"]
3date: substring-before(//div[@class="PostHeaderIcons metadata"], '|')
4body: //div[@class="Post-body"]
5strip_id_or_class: print1
6strip_id_or_class: metadata
7strip_id_or_class: authorbox
8test_url: http://traningslara.se/skoinlagg-och-skador-finns-det-nagot-samband/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/triblive.com.txt b/inc/3rdparty/site_config/standard/triblive.com.txt
new file mode 100644
index 00000000..82797db9
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/triblive.com.txt
@@ -0,0 +1,13 @@
1title: //title
2author: //span/a
3date: substring-after(//small,'Published:')
4
5strip: //h1[@class='vert_class']
6strip: //h1[@class='headline']
7strip: //img[contains(@src,'logo_triblive.gif')]
8
9#strip: //h6
10#strip_img_src: logo_triblive.gif
11
12single_page_link: //a[@class='stprint']
13test_url: http://triblive.com/sports/2819913-85/lemieux-deal-penguins-burkle-nhl-owners-team-mario-bettman-case \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/truthdig.com.txt b/inc/3rdparty/site_config/standard/truthdig.com.txt
new file mode 100644
index 00000000..e7c1a4bc
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/truthdig.com.txt
@@ -0,0 +1,10 @@
1title: //div[@class='printbody']/h1
2body: //div[@class='printbody']
3prune: no
4
5strip: //div[@class='printbody']/a[@href='http://www.truthdig.com/']
6strip: //table[@class='footer']
7
8single_page_link: //div[@class='article_tools']//a[contains(@href, '/print/')]
9
10test_url: http://www.truthdig.com/report/item/the_election_march_of_the_trolls_20110829/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tthfanfic.org.txt b/inc/3rdparty/site_config/standard/tthfanfic.org.txt
new file mode 100644
index 00000000..0dab5b0f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/tthfanfic.org.txt
@@ -0,0 +1,4 @@
1title: //h2
2author: //a[starts-with(@href, '/AuthorStories')]
3body: //div[@id='storyinnerbody']
4test_url: http://www.tthfanfic.org/Story-6512/Kudra+Journeys.htm \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tthor.com.txt b/inc/3rdparty/site_config/standard/tthor.com.txt
new file mode 100644
index 00000000..902fcd13
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/tthor.com.txt
@@ -0,0 +1,2 @@
1prune: no
2test_url: http://www.tthor.com/06/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tuaw.com.txt b/inc/3rdparty/site_config/standard/tuaw.com.txt
new file mode 100644
index 00000000..b86f8ccb
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/tuaw.com.txt
@@ -0,0 +1,6 @@
1title: //h1[@class='posttitle']
2author: //span[@class='author']/a
3date: //span[@class='timestamp']
4body: //div[@class='body']
5
6test_url: http://www.tuaw.com/2011/10/19/apple-posts-fans-memories-of-steve-jobs/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tuckreview.com.txt b/inc/3rdparty/site_config/standard/tuckreview.com.txt
new file mode 100644
index 00000000..a3946cbc
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/tuckreview.com.txt
@@ -0,0 +1,6 @@
1title: //h1[@class='post-title']
2author: //div[@class='display-name']
3date: //div[@class='date']
4body: //div[@class='body']
5footnotes: no
6test_url: http://tuckreview.com/2012/8/14/migrating-to-v6 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tvtropes.org.txt b/inc/3rdparty/site_config/standard/tvtropes.org.txt
new file mode 100644
index 00000000..08dbba59
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/tvtropes.org.txt
@@ -0,0 +1,20 @@
1# Google Custom Search
2strip_id_or_class: google_branding_style
3
4# Avoid double title
5strip_id_or_class: pagetitle
6
7# external links are labelled
8strip_image_src: http://static.mediatropes.info/pmwiki/pub/external_link.gif
9
10title: //div[@class="pagetitle"]
11body: //div[@id="wikitext"]
12
13# don't get clever.
14strip_comments: no
15prune: no
16
17# navigation in footer lives inside the wikitext div, annoyingly.
18strip_id_or_class: pathholder
19
20test_url: http://tvtropes.org/pmwiki/pmwiki.php/Main/WithinParameters \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/twitter.com.txt b/inc/3rdparty/site_config/standard/twitter.com.txt
new file mode 100644
index 00000000..12ab1546
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/twitter.com.txt
@@ -0,0 +1,9 @@
1title: //title
2body: (//p[contains(@class, 'js-tweet-text')])[1]
3author: (//strong[contains(@class, 'fullname')])[1]
4date: //span[contains(@class, 'js-short-timestamp')]/@data-time
5
6prune: no
7tidy: no
8
9test_url: https://twitter.com/medialens/status/216883678582804480 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/uefa.com.txt b/inc/3rdparty/site_config/standard/uefa.com.txt
new file mode 100644
index 00000000..088d6586
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/uefa.com.txt
@@ -0,0 +1,6 @@
1body: //div[@class='d3cmsCBody']//div[@class='pubText pubDate' or @class='newsComment' or contains(@class, 'newsPhoto') or @class='newsText']
2strip: //div[contains(@class, 'mpindex')]
3prune: no
4tidy: no
5
6test_url: http://www.uefa.com/uefaeuropaleague/news/newsid=1617320.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/uk.xbox360.ign.com.txt b/inc/3rdparty/site_config/standard/uk.xbox360.ign.com.txt
new file mode 100644
index 00000000..29e19565
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/uk.xbox360.ign.com.txt
@@ -0,0 +1,23 @@
1# applies to uk.ds.ign.com, uk.wii.ign.com etc.
2# possibly to non-UK versions, but I can&rsquo;t test that
3
4title: //h1[@class="headline"]
5author: //div[@class="hdr-sub byline"]/a
6date: //h2[@class="publish-date"]/span
7body: //div[@id="main-article-content"]
8
9strip: //ul[@class="lnks-readmore"]
10
11strip: //div[@class="inlineImageCaption"]
12# can&rsquo;t make the images appear, so remove the captions
13
14strip: //div[@style="width:468px"]
15# video caption links
16
17convert_double_br_tags: yes
18
19strip_comments: no
20# otherwise the &lsquo;Closing Comments&rsquo; are removed
21
22# Ratings box could do with some rearranging, but it&rsquo;s tricky
23test_url: http://uk.xbox360.ign.com/articles/121/1210717p1.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/uni-watch.com.txt b/inc/3rdparty/site_config/standard/uni-watch.com.txt
new file mode 100644
index 00000000..cbe87d19
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/uni-watch.com.txt
@@ -0,0 +1,17 @@
1author: substring-before(substring-after(//div[@class='post-byline'], 'By '), ', on')
2date: substring-after(//div[@class='post-byline'], ', on')
3
4# for some reason, the following is producing a "no text [48]" error
5#title: //div[@class='post-headline']
6
7# for some reason, the following doesn't appear to isolate just the body copy
8body: //div[@class='post-bodycopy']
9
10# we solve the above issue by stripping out everything else we don't want
11# these can probably all be removed if the body: command above worked
12strip_id_or_class: reply
13strip_id_or_class: left
14strip_id_or_class: post-headline
15strip_id_or_class: post-byline
16strip_id_or_class: footer
17test_url: http://www.uni-watch.com/2011/10/18/the-curious-case-of-steve-debergs-microphone-and-speaker/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/urbandictionary.com.txt b/inc/3rdparty/site_config/standard/urbandictionary.com.txt
new file mode 100644
index 00000000..86061f77
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/urbandictionary.com.txt
@@ -0,0 +1,3 @@
1title: //title
2body: //td[@id='content']
3test_url: http://www.urbandictionary.com/define.php?term=Grown-Ass \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/usccb.org.txt b/inc/3rdparty/site_config/standard/usccb.org.txt
new file mode 100644
index 00000000..eb10a48f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/usccb.org.txt
@@ -0,0 +1,6 @@
1body: //div[@id='CS_Element_maincontent']
2
3tidy: no
4prune: no
5
6test_url: http://www.usccb.org/bible/readings/072412.cfm \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/useit.com.txt b/inc/3rdparty/site_config/standard/useit.com.txt
new file mode 100644
index 00000000..f6be84c4
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/useit.com.txt
@@ -0,0 +1,8 @@
1title: //h1
2
3date: substring-after(//p[@class='overline']/strong, ',')
4body: //div[@class="maintext"]
5strip: //p[@class='overline']
6strip: //h1
7tidy: no
8test_url: http://www.useit.com/alertbox/mobile-startup-screen.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ux.artu.tv.txt b/inc/3rdparty/site_config/standard/ux.artu.tv.txt
new file mode 100644
index 00000000..a893bda0
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/ux.artu.tv.txt
@@ -0,0 +1,7 @@
1author: ("Arturo Toledo")
2title: //div[@class="post"]/h2
3body: //div[@class="entry"]
4
5# Remove Twitter button
6strip: //div[@class="entry"]/p[2]/a/img
7test_url: http://ux.artu.tv/?p=192 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/uzivatelsketestovani.cz.txt b/inc/3rdparty/site_config/standard/uzivatelsketestovani.cz.txt
new file mode 100644
index 00000000..3661b06a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/uzivatelsketestovani.cz.txt
@@ -0,0 +1,2 @@
1title:h1
2test_url: http://www.uzivatelsketestovani.cz/wiki/doku.php/skoleni-axure-rp \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/vanityfair.com.txt b/inc/3rdparty/site_config/standard/vanityfair.com.txt
new file mode 100644
index 00000000..bfc47d1f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/vanityfair.com.txt
@@ -0,0 +1,30 @@
1title: //meta[@property="og:title"]/@content
2author: //div[contains(@class, 'byline')]//span[contains(@class, 'name')]
3date: //div[contains(@class, 'cn_date_time')]
4body: //div[contains(@class, 'pageContainers')]
5body: //article[@id='items-container']
6#body: //h2[@class='sub-header'] | //div[contains(@class, 'contributor-type') or @class='display-date' or @class='content-container']
7
8strip_id_or_class: bc
9strip_id_or_class: utilities
10strip_id_or_class: list-supporting
11strip_id_or_class: yrail
12strip_id_or_class: urail
13
14prune: no
15#tidy: no
16
17strip_id_or_class: super-rubric-section
18strip_id_or_class: cn_date_time
19strip_id_or_class: cn_contributors
20strip_id_or_class: cn_pagination_controls
21strip_id_or_class: cn_features_container
22strip_id_or_class: global-footer
23strip_id_or_class: cn_ecom_placement
24strip: //li[@class='blogNavPrev']
25
26single_page_link: //a[@title='Print this page']
27
28test_url: http://www.vanityfair.com/politics/features/2011/05/egypt-revolutionaries-201105
29test_url: http://www.vanityfair.com/politics/features/2008/08/hitchens200808
30test_url: http://www.vanityfair.com/style/2012/01/prisoners-of-style-201201 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/varingen.no.txt b/inc/3rdparty/site_config/standard/varingen.no.txt
new file mode 100644
index 00000000..6b5e0ae0
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/varingen.no.txt
@@ -0,0 +1,5 @@
1title: //div[@class='ArticleHeadlineDetailedView']
2date: //span[@class='ArticlePublicationDateTimeDetailedView']
3author://span[@class='ArticleBylineDetailedView']
4body: //div[@class='ArticleTextDetailedView']
5test_url: http://www.varingen.no/Nyheter/tabid/392/Default.aspx?ModuleId=56651&articleView=true \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/varsity.co.uk.txt b/inc/3rdparty/site_config/standard/varsity.co.uk.txt
new file mode 100644
index 00000000..b1db4c35
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/varsity.co.uk.txt
@@ -0,0 +1,4 @@
1# FB comments are inside an h2. Weird. Without this, the line 'Comments' is preserved by the text parser
2
3strip: //h2
4test_url: http://www.varsity.co.uk/reviews/2662 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/vedomosti.ru.txt b/inc/3rdparty/site_config/standard/vedomosti.ru.txt
new file mode 100644
index 00000000..ba999171
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/vedomosti.ru.txt
@@ -0,0 +1,3 @@
1title: //td[@class='second_content']/h1
2body: //td[@class='second_content']/div[@class='article_text']
3test_url: http://www.vedomosti.ru/newspaper/article/259377/rasprodazha_mailru \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/veggbilder.no.txt b/inc/3rdparty/site_config/standard/veggbilder.no.txt
new file mode 100644
index 00000000..14144c0f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/veggbilder.no.txt
@@ -0,0 +1,5 @@
1author: //div[@class="blogginnleggForfatter"]
2date: concat(//div[@class='blogginnleggDatoDag'],' ',//div[@class='blogginnleggDatoMnd'])
3strip: //div[contains(@id,"bloggDelingslenker")]
4strip: //div[contains(@id,"bloggDelingslenker")]
5test_url: http://veggbilder.no/blogginnlegg/fristelser \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/vemedio.com.txt b/inc/3rdparty/site_config/standard/vemedio.com.txt
new file mode 100644
index 00000000..294ace9c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/vemedio.com.txt
@@ -0,0 +1,6 @@
1title: //h2
2date: substring-before(//small," &bull; Permalink")
3author:string('Martin Hering')
4
5Strip: //p/small
6test_url: http://vemedio.com/blog/posts/state-of-support-and-icloud \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/venturebeat.com.txt b/inc/3rdparty/site_config/standard/venturebeat.com.txt
new file mode 100644
index 00000000..41bfa8c5
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/venturebeat.com.txt
@@ -0,0 +1,6 @@
1title: //h1[@class="entry-title"]
2author: //div[@class="author-name"]
3date: //span[@class="the-time"]
4body: //div[@class="entry-content"]
5strip: //div[@class="vb-gallery"]
6test_url: http://venturebeat.com/2012/07/17/marissa-mayer-yahoo/#s:mayer-1 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/version.php b/inc/3rdparty/site_config/standard/version.php
index e61807ed..34a87357 100644
--- a/inc/3rdparty/site_config/standard/version.php
+++ b/inc/3rdparty/site_config/standard/version.php
@@ -1,2 +1 @@
1<?php <?php return 4; ?> \ No newline at end of file
2return 1; \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/version.txt b/inc/3rdparty/site_config/standard/version.txt
new file mode 100644
index 00000000..bf0d87ab
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/version.txt
@@ -0,0 +1 @@
4 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/version2.dk.txt b/inc/3rdparty/site_config/standard/version2.dk.txt
new file mode 100644
index 00000000..74203cad
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/version2.dk.txt
@@ -0,0 +1,12 @@
1title: //article/header/h1
2
3author: //article/header/section[@class='byline']/span[contains(@class, 'author')]/a
4date: //article/header/section[@class='byline']/span[@class='published']/span
5
6body: //article/section[@class='body']
7
8convert_double_br_tags: yes
9
10# This is required, because Tidy chokes on the HTML5 tags...
11tidy: no
12test_url: http://www.version2.dk/artikel/17069-amerikansk-hit-investor-er-vild-med-danske-net-ivaerksaettere \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/verybestbaking.com.txt b/inc/3rdparty/site_config/standard/verybestbaking.com.txt
new file mode 100644
index 00000000..4cdd0c0f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/verybestbaking.com.txt
@@ -0,0 +1,7 @@
1title: //title
2body: //div[contains(@class, 'printRecipe')]
3strip: //div[@class='recipeHeader']
4prune: no
5tidy: no
6single_page_link: //ul[@class='printOptions']//a[contains(@href, 'detail.aspx?p=1&showphoto=true')]
7test_url: http://www.verybestbaking.com/recipes/143190/Penne-Pasta-with-Sun-dried-Tomato-Cream-Sauce/detail.aspx \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/vg.no.txt b/inc/3rdparty/site_config/standard/vg.no.txt
new file mode 100644
index 00000000..fceeea09
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/vg.no.txt
@@ -0,0 +1,3 @@
1body: //div[@id='artikkelspalte']
2strip_id_or_class: 'breadcrumb'
3test_url: http://www.vg.no/spill/artikkel.php?artid=10003628 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/video.forbes.com.txt b/inc/3rdparty/site_config/standard/video.forbes.com.txt
new file mode 100644
index 00000000..1dca55a3
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/video.forbes.com.txt
@@ -0,0 +1,9 @@
1title: concat("Video: ", //div[@id='currentVideoTitleDivId'])
2body: //div[@id='currentVideoDescriptionId']
3author: //meta[@name='author']/@content
4
5replace_string(<div id="currentVideoDescriptionId" style="display): <div id="currentVideoDescriptionId" style="displayitplease
6
7replace_string(<div id="currentVideoTitleDivId" style="display): <div id="currentVideoTitleDivId" style="displayitplease
8
9test_url: http://video.forbes.com/fvn/business/wells-fargo-inside-the-bank-that-works \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/videogum.com.txt b/inc/3rdparty/site_config/standard/videogum.com.txt
new file mode 100644
index 00000000..a1663813
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/videogum.com.txt
@@ -0,0 +1,6 @@
1title: //h2[@class='posttitle']
2date: substring-before(substring-after(//span[@class='postdate'], 'on '), ' by')
3date: //span[@class='postdate']
4author: //span[@class='postdate']/a
5body: //div[@class='entry line_top']
6test_url: http://videogum.com/395042/here-are-some-afternoon-links-92/list/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/villagevoice.com.txt b/inc/3rdparty/site_config/standard/villagevoice.com.txt
new file mode 100644
index 00000000..df374602
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/villagevoice.com.txt
@@ -0,0 +1,9 @@
1title: //h2[@class='headline']
2
3body: //div[@class='ContentPrint']
4
5prune: no
6
7single_page_link: //a[contains(@href, '/printVersion/')]
8
9test_url: http://www.villagevoice.com/2010-03-16/news/new-york-s-ten-worst-landlords/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/vimeo.com.txt b/inc/3rdparty/site_config/standard/vimeo.com.txt
new file mode 100644
index 00000000..d6c6701a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/vimeo.com.txt
@@ -0,0 +1,17 @@
1title: //title
2body: //iframe
3
4find_string: <html>&lt;iframe
5replace_string: <iframe id="video"
6
7find_string: &gt;&lt;/iframe&gt;</html>
8replace_string: ></iframe>
9
10replace_string(&quot;): "
11
12single_page_link: //link[@type='text/xml+oembed']
13
14prune: no
15tidy: no
16
17test_url: http://vimeo.com/35941909 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/visir.is.txt b/inc/3rdparty/site_config/standard/visir.is.txt
new file mode 100644
index 00000000..0f03198e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/visir.is.txt
@@ -0,0 +1,14 @@
1# Author's name, when present, has 'skrifar:' ('writes:') appended to it.
2# In case of multiple authors, this would be 'skrifa:', hence only 7 characters
3# are stripped off.
4author: substring(//div[@class='paragraph']/div[@class='meta'], 0, string-length(//div[@class='paragraph']/div[@class='meta']) - 7)
5
6date: //span[@class='date']
7title: //h1
8body: //div[@class='paragraph']
9
10# Strip out author string when present
11strip: //div[@class='paragraph']/div[@class='meta']
12
13convert_double_br_tags: yes
14test_url: http://visir.is/esb,-ipa,-bhm-og-bsrb/article/2012701319997 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/vitispr.com.txt b/inc/3rdparty/site_config/standard/vitispr.com.txt
new file mode 100644
index 00000000..8b2a300e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/vitispr.com.txt
@@ -0,0 +1,6 @@
1strip: //*[(@id = "ja-search")]
2body: //*[(@id = "ja-mainbody")]
3body: //*[(@id = "content-mass-bottom")]
4strip://h3[contains(span,'Related Posts')]
5strip://img
6test_url: http://vitispr.com/blog/coventry-is-a-technology-hotspot \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/vivirmexico.com.txt b/inc/3rdparty/site_config/standard/vivirmexico.com.txt
new file mode 100644
index 00000000..e6a72700
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/vivirmexico.com.txt
@@ -0,0 +1,2 @@
1body: //*[(@class = "historia")]
2test_url: http://vivirmexico.com/2011/09/en-veracruz-arrojan-35-cuerpos-a-plena-luz-del-dia-esta-si-es-una-alarma-social \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/vnexpress.net.txt b/inc/3rdparty/site_config/standard/vnexpress.net.txt
new file mode 100644
index 00000000..23c928bf
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/vnexpress.net.txt
@@ -0,0 +1,8 @@
1body: //div[@cpms_content]//h2[@class='Lead'] | //div[@cpms_content]//p[@class='Normal'] | //div[@cpms_content]//table
2strip://div[@class="box-item"]
3strip://div[@id="ARTICLE_BANNER"]
4strip://a
5strip://div[@class="tag-parent"]
6strip://div[@class="email-print txtr"]
7
8test_url: http://vnexpress.net/gl/xa-hoi/2011/04/tim-thay-nan-nhan-cuoi-cung-vu-sap-mo-da-o-len-co/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/voices.washingtonpost.com.txt b/inc/3rdparty/site_config/standard/voices.washingtonpost.com.txt
new file mode 100644
index 00000000..6bd0e855
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/voices.washingtonpost.com.txt
@@ -0,0 +1,3 @@
1title: //h1
2body: //div[@class='entrytext']
3test_url: http://voices.washingtonpost.com/ezra-klein/2010/10/why_isnt_monetary_policy_discr.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/vworker.com.txt b/inc/3rdparty/site_config/standard/vworker.com.txt
new file mode 100644
index 00000000..a39c9f4e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/vworker.com.txt
@@ -0,0 +1,3 @@
1body: //div[contains(@class, 'KonaBody')]
2
3test_url: http://www.vworker.com/RentACoder/misc/BidRequests/ShowBidRequest.asp?lngBidRequestId=1634186 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/waffle.wootest.net.txt b/inc/3rdparty/site_config/standard/waffle.wootest.net.txt
new file mode 100644
index 00000000..afcba0f3
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/waffle.wootest.net.txt
@@ -0,0 +1,4 @@
1title: //h2[@class="title"]
2body: //div[@class="post"]
3
4test_url: http://waffle.wootest.net/2011/06/22/on-reading-news/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/walrusmagazine.com.txt b/inc/3rdparty/site_config/standard/walrusmagazine.com.txt
new file mode 100644
index 00000000..3ab22172
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/walrusmagazine.com.txt
@@ -0,0 +1,14 @@
1title: //div[@id='pr']/h3
2author: //div[@class='dateline']//a[contains(@href, '/author/')]
3
4# print page
5body: //div[@id='prbody']
6# standard page
7body: //div[@id='pgbody']
8
9# for multi-page articles
10single_page_link: //div[@class='tipjar']//a[contains(@href, '/printerFriendly.php?')]
11
12prune: no
13
14test_url: http://www.walrusmagazine.com/articles/2011.12-memoir-kidnapped \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/warnerbros.fr.txt b/inc/3rdparty/site_config/standard/warnerbros.fr.txt
new file mode 100644
index 00000000..a41a3511
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/warnerbros.fr.txt
@@ -0,0 +1,3 @@
1title: //h3
2body: //div[@class="content_wysiwyg"]
3test_url: http://www.warnerbros.fr/game-of-thrones-un-junket-vu-de-l-interieur-268.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/washingtonmonthly.com.txt b/inc/3rdparty/site_config/standard/washingtonmonthly.com.txt
new file mode 100644
index 00000000..edf16422
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/washingtonmonthly.com.txt
@@ -0,0 +1,10 @@
1title://a[@class = 'headline-article']
2
3author: substring-after(//div[@class = 'article']/p[@class = 'author'], 'By ')
4date://div[@class = 'article']/span[@class = 'date']
5body://div[@class = 'article']
6single_page_link://a[@class = 'print']
7strip://p[@class = 'author']
8strip://a[@class = 'headline-article']
9strip://span[@class = 'date']
10test_url: http://www.washingtonmonthly.com/magazine/julyaugust_2011/features/the_trinity_sisters030380.php \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/washingtonpost.com.txt b/inc/3rdparty/site_config/standard/washingtonpost.com.txt
new file mode 100644
index 00000000..2931ca5f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/washingtonpost.com.txt
@@ -0,0 +1,21 @@
1body: //div[@class="article_body"]
2author://meta[@name='DC.creator']/@content
3title://meta[@name='title']/@content
4date://div[contains(@class,'byline')]//span[contains(@class,'published')]/@title
5date://meta[@name="DC.date.issued"]/@content
6strip://div[@class="relative primary-slot padding-top img-border gallery-container photo-wrapper"]
7strip://div[@id="wp-column six end"]
8strip://div[contains(@class,'hidden')]
9strip://div[@id='article-side-rail']
10strip://div[@class="module component todays-paper-module curved"]
11strip://div[@class="module component live-qa curved img-border"]
12strip://div[@class="module component newsletter-signup curved"]
13strip://div[@class="module featured-stories component curved img-border"]
14
15strip_id_or_class: carousel
16strip_id_or_class: toolbar
17strip_id_or_class: module
18
19test_url: http://www.washingtonpost.com/world/europe/in-europe-new-fears-of-german-might/2011/10/19/gIQA3baZ7L_story.html?hpid=z1
20test_url: http://www.washingtonpost.com/national/health-science/radical-theory-of-first-americans-places-stone-age-europeans-in-delmarva-20000-years-ago/2012/02/28/gIQA4mriiR_story.html
21test_url: http://www.washingtonpost.com/lifestyle/magazine/the-sorry-fate-of-a-tech-pioneer-halsey-minor-and-historic-virginia-estate-carters-grove/2012/05/30/gJQAwdJG4U_story.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/web-libre.org.txt b/inc/3rdparty/site_config/standard/web-libre.org.txt
new file mode 100644
index 00000000..dfcd0081
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/web-libre.org.txt
@@ -0,0 +1,6 @@
1body: //div[@id='template_article']
2
3strip_id_or_class: article_more
4strip: //hr
5
6test_url: http://www.web-libre.org/dossiers/jacuzzi-gonflable,8493.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/weblog.bignerdranch.com.txt b/inc/3rdparty/site_config/standard/weblog.bignerdranch.com.txt
new file mode 100644
index 00000000..9e75a8a8
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/weblog.bignerdranch.com.txt
@@ -0,0 +1,5 @@
1title://div[@class="post"]/h2
2author://p[@class="postinfo"]/a
3date:substring-before(substring-after(//p[@class="postinfo"],' on '),' under ')
4body://div[@class="contenttext"]
5test_url: http://weblog.bignerdranch.com/?p=304 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/weblogs.asp.net.txt b/inc/3rdparty/site_config/standard/weblogs.asp.net.txt
new file mode 100644
index 00000000..3fabda0b
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/weblogs.asp.net.txt
@@ -0,0 +1,9 @@
1title: //h2[@class="pageTitle"]
2strip: //div[@class="postfoot"]
3strip: //h2[@class="pageTitle"]
4strip: //h3[@class="pageTitle"]
5body: //div[@class="post"]
6author: substring-before(substring-after(//div[@class="postfoot"], 'by'), 'Filed')
7date: substring-before(substring-after(//div[@class="postfoot"], 'Published'), 'by')
8
9test_url: http://weblogs.asp.net/scottgu/archive/2011/08/31/html-editor-smart-tasks-and-event-handler-generation-asp-net-vnext-series.aspx \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/webpaper.nzz.ch.txt b/inc/3rdparty/site_config/standard/webpaper.nzz.ch.txt
new file mode 100644
index 00000000..8922b02f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/webpaper.nzz.ch.txt
@@ -0,0 +1,8 @@
1tidy: no
2dissolve: //div[@id="content"]/div/article/header
3body: //div[@id="content"]/div/article
4title: //div[@id="content"]/div/article/h1
5date: //div[@id="content"]/div/article/header/div[@id="issueSelectTrigger"]
6strip: //div[@id="content"]/div/article/h1
7
8test_url: http://webpaper.nzz.ch/2012/06/23/front/JJKMS/aphrodite-und-die-kommunisten?guest_pass=24a3ca5b6d%3AJJKMS%3Ad30e1be8628c099669671d4da56cdce4187790ba \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/welt.de.txt b/inc/3rdparty/site_config/standard/welt.de.txt
new file mode 100644
index 00000000..6e4f828f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/welt.de.txt
@@ -0,0 +1,22 @@
1# set body
2tidy: no
3body: //div[contains(@class, 'articleContent')]
4
5# remove clutter
6strip: //div[@class='advertising']
7strip: //div[@class='themenalarm']
8strip: //div[contains(@class, 'inTextTeaser')]
9
10# remove captions
11strip: //span[@class='copyRight']
12
13# remove photo galleries and extras
14strip: //div[contains(@class, 'textGallery')]
15strip: //div[contains(@class, 'videoGallery')]
16strip: //div[contains(@class, 'imageGallery')]
17strip: //div[contains(@class, 'openContent')]
18
19# remove comments
20strip: //div[@id = 'writeComment']
21
22test_url: http://www.welt.de/vermischtes/weltgeschehen/article11050589/27-Bergleute-in-neuseelaendischer-Mine-vermisst.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/westhamtillidie.com.txt b/inc/3rdparty/site_config/standard/westhamtillidie.com.txt
new file mode 100644
index 00000000..b9343029
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/westhamtillidie.com.txt
@@ -0,0 +1,6 @@
1title: substring-before(//title, '«')
2
3body: //div[@class='entry']
4strip: //div[@class='sharing_label']
5strip: //div[@class='snap_nopreview sharing robots-nocontent']
6test_url: http://www.westhamtillidie.com/2012/03/11/twelve-things-we-learned-from-the-doncaster-game/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/what-if.xkcd.com.txt b/inc/3rdparty/site_config/standard/what-if.xkcd.com.txt
new file mode 100644
index 00000000..a88a02c9
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/what-if.xkcd.com.txt
@@ -0,0 +1,2 @@
1autodetect_next_page: no
2test_url: http://what-if.xkcd.com/1/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/whatever.scalzi.com.txt b/inc/3rdparty/site_config/standard/whatever.scalzi.com.txt
new file mode 100644
index 00000000..52c5cf1b
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/whatever.scalzi.com.txt
@@ -0,0 +1,7 @@
1strip: //div[@class="navigation"]
2strip: //div[@id="sidebar"]
3strip: //div[@id="post-extra-content"]
4strip: //div[@id="footer"]
5strip: //div[contains(@class, "sharing")]
6
7test_url: http://whatever.scalzi.com/2011/01/09/quick-giffords-follow-up/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/wheelyric.com.txt b/inc/3rdparty/site_config/standard/wheelyric.com.txt
new file mode 100644
index 00000000..aa9783cf
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/wheelyric.com.txt
@@ -0,0 +1,11 @@
1body://div[contains(@class,'oAndtLyrics')]
2strip://div[contains(@class,'info')]
3strip://div[contains(@id,'romanization')]
4strip://div[contains(@id,'youtube')]
5strip://div[contains(@id,'romanizationSelector')]
6strip://div[contains(@id,'langSelectWrap')]
7strip://div[contains(@id,'requestTranslationWrap')]
8strip://div[contains(@id,'viewMore')]
9strip://div[contains(@class,'lyricsListInMainContent')]
10strip://div[contains(@class,'descIpNoti')]
11test_url: http://wheelyric.com/lyrics/121#2 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/wiki.guildwars.com.txt b/inc/3rdparty/site_config/standard/wiki.guildwars.com.txt
new file mode 100644
index 00000000..1f262a0a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/wiki.guildwars.com.txt
@@ -0,0 +1,8 @@
1title: //h1
2body: //div[@id='content']
3strip_id_or_class: editsection
4strip_id_or_class: toc
5strip: //div[@id='siteNotice']
6strip: //div[@id='content']//table[last()]
7prune: no
8test_url: http://wiki.guildwars.com/wiki/Monk \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/wiki.guildwars2.com.txt b/inc/3rdparty/site_config/standard/wiki.guildwars2.com.txt
new file mode 100644
index 00000000..e176907e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/wiki.guildwars2.com.txt
@@ -0,0 +1,8 @@
1title: //h1
2body: //div[@id='content']
3strip_id_or_class: editsection
4strip_id_or_class: toc
5strip: //div[@id='siteNotice']
6strip: //div[@id='content']//table[last()]
7prune: no
8test_url: http://wiki.guildwars2.com/wiki/Guardian \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/wikitravel.org.txt b/inc/3rdparty/site_config/standard/wikitravel.org.txt
new file mode 100644
index 00000000..da5bd0b5
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/wikitravel.org.txt
@@ -0,0 +1,14 @@
1# copied from .wikipedia.org.txt
2title: //h1[@id='firstHeading' or @class='firstHeading']
3body: //div[@id = 'bodyContent']
4strip_id_or_class: editsection
5#strip_id_or_class: toc
6strip_id_or_class: vertical-navbox
7strip: //table[@id='toc'] | //div[@id='p-toc']
8strip: //div[@id='catlinks' or @id='contentSub']
9strip: //div[@id='jump-to-nav']
10strip: //div[@class='thumbcaption']//div[@class='magnify']
11strip: //table[@class='navbox']
12prune: no
13tidy: no
14test_url: http://wikitravel.org/wiki/en/index.php?title=Bangkok&printable=yes \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/will-self.com.txt b/inc/3rdparty/site_config/standard/will-self.com.txt
new file mode 100644
index 00000000..24467c22
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/will-self.com.txt
@@ -0,0 +1,4 @@
1strip: //div[@class="widget-area"]
2title: //*[@class="entry-title"]
3date: //time[@class="entry-date"]
4test_url: http://will-self.com/2012/02/01/real-meals-dominos-pizza/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/williampfaff.com.txt b/inc/3rdparty/site_config/standard/williampfaff.com.txt
new file mode 100644
index 00000000..fb5f92ed
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/williampfaff.com.txt
@@ -0,0 +1,3 @@
1title: substring-after(//span[@class='itemTitle'], ':')
2body: //div[@id='content']
3test_url: http://www.williampfaff.com/modules/news/article.php?storyid=491 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/winfuture.de.txt b/inc/3rdparty/site_config/standard/winfuture.de.txt
new file mode 100644
index 00000000..bc936370
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/winfuture.de.txt
@@ -0,0 +1,12 @@
1title: //h1/span
2
3body: //div[@id="news_content"]
4
5author: //div[@class="bookmarks_btm"]/p[1]/a[1]/text()
6
7date: //span[@class='date']
8
9# Rubrikenbild entfernen
10strip: //div[@id="news_content"]/a[1]
11
12test_url: http://winfuture.de/news,69672.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/winrumors.com.txt b/inc/3rdparty/site_config/standard/winrumors.com.txt
new file mode 100644
index 00000000..cedb4390
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/winrumors.com.txt
@@ -0,0 +1,6 @@
1title: //h1[@class='page-heading']
2author: //small/strong/a
3#their date string is relative, so if you save the page 2 hours after it is posted it may say 'two hours ago, instead of providing a useful date/time'
4date: substring-before(substring-after(//small,'on'),'with')
5body: //div[@class='entry']
6test_url: http://www.winrumors.com/chinese-windows-phone-launch-still-on-track-for-early-2012/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/winsupersite.com.txt b/inc/3rdparty/site_config/standard/winsupersite.com.txt
new file mode 100644
index 00000000..db6a6fc9
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/winsupersite.com.txt
@@ -0,0 +1,3 @@
1date: //*[@class='kicker']
2body: //*[@class='KonaBody']
3test_url: http://www.winsupersite.com/article/paul-thurrotts-wininfo/android-malware-surges-separate-studies-141364 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/wired.com.txt b/inc/3rdparty/site_config/standard/wired.com.txt
new file mode 100644
index 00000000..69bbf5b7
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/wired.com.txt
@@ -0,0 +1,22 @@
1title: //meta[@property="og:title"]/@content
2title: //h1
3title: //*[@class='posttitle']
4author: //*[@class='entryAuthor']/a[1]
5author://*[@class='member-title']
6author://li[@class='author']/a[contains(@href, '/author/')]
7date: substring-after(//div[@class='entryAuthor'], '·')
8date: substring-before(//*[@class='entryDate'], '|')
9body: //div[@class='entry']
10strip: //span[contains(@class, 'nextprev')]
11#strip_id_or_class: ngg-galleryoverview
12# ngg-galleryoverview is the whole content sometimes, e.g. http://www.wired.com/underwire/2011/12/best-mixtapes-of-2011/?pid=5736&viewall=true
13
14strip: //p[span[contains(@class, 'contentjump')]]
15strip: //text()[contains(., 'nextpage')]
16
17prune: no
18
19single_page_link: //a[contains(@href, '/all/1') and contains(@class, 'contentjumpall')]
20
21test_url: http://www.wired.com/cloudline/2011/10/meet-arms-cortex-a15-the-future-of-the-ipad-and-possibly-the-macbook-air/
22test_url: http://www.wired.com/threatlevel/2012/05/ff_counterfeiter/all/1 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/wmnf.org.txt b/inc/3rdparty/site_config/standard/wmnf.org.txt
new file mode 100644
index 00000000..ffb6b2d1
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/wmnf.org.txt
@@ -0,0 +1,13 @@
1title: //div[@class="bodyText"]/h1/text()
2body: //div[@class="bodyText"]
3
4# author and date are separated by only a newline
5# can't figure out how to tokenize that yet
6author: //div[@class="bodyText"]/span[@class="info"]/text()
7date: //div[@class="bodyText"]/span[@class="info"]/text()
8
9# strip metdata from body text
10strip: //div[@class="bodyText"]/h1/text()
11strip: //div[@class="bodyText"]/span[@class="info"]
12strip: //div[@class="bodyText"]/span[@class="info"]
13test_url: http://www.wmnf.org/news_stories/light-rail-advocates-join-forces-to-combat-opposition-in-pinellas \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/wmpoweruser.com.txt b/inc/3rdparty/site_config/standard/wmpoweruser.com.txt
new file mode 100644
index 00000000..d9011d24
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/wmpoweruser.com.txt
@@ -0,0 +1,4 @@
1date://*[@class="entry-date"]
2author://*[@class="author vcard"]
3strip://*[@style="position:relative;left:72px;top:2px;"]|//*[@id="authorbox"]
4test_url: http://wmpoweruser.com/breaking-nokia-announces-nfc-support-in-lumia-610-windows-phone-device/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/worldpoultry.net.txt b/inc/3rdparty/site_config/standard/worldpoultry.net.txt
new file mode 100644
index 00000000..0e42ca5e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/worldpoultry.net.txt
@@ -0,0 +1,5 @@
1title: //div[@class="content article"]/h1
2date: substring-after(//*[@class='date'], '//')
3body: //*[@class='article-content']
4strip: //*[@id='nomodal']
5test_url: http://www.worldpoultry.net/news/kyrgyzstan-restricts-poultry-imports-from-russia-and-kazakhstan-9332.html \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/worldwidewords.org.txt b/inc/3rdparty/site_config/standard/worldwidewords.org.txt
new file mode 100644
index 00000000..733d607f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/worldwidewords.org.txt
@@ -0,0 +1,4 @@
1title: //p[@id='content']
2
3body: //div[@class='contentblock']
4test_url: http://www.worldwidewords.org/weirdwords/ww-gro1.htm \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/wow.joystiq.com.txt b/inc/3rdparty/site_config/standard/wow.joystiq.com.txt
new file mode 100644
index 00000000..759fb81f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/wow.joystiq.com.txt
@@ -0,0 +1,6 @@
1title: //h2[@class="posttitle"]
2body: //div[@class="post"]
3strip: //h2[@class="posttitle"]
4strip: //p[@class="filed-under"]
5convert_double_br_tags: yes
6test_url: http://wow.joystiq.com/2011/06/20/the-overachiever-guide-to-midsummer-festival-2011-achievements/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/www1.folha.uol.com.br.txt b/inc/3rdparty/site_config/standard/www1.folha.uol.com.br.txt
new file mode 100644
index 00000000..0846be2c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/www1.folha.uol.com.br.txt
@@ -0,0 +1,15 @@
1body://div[@id='articleNew']
2strip://div[@id='articleBy']
3strip://div[@id='articleDate']
4strip://td[@class='articleGraphicCredit']
5strip://h1
6strip://div[@id='articleEnd']
7strip://p[@class='tagline']
8strip://div[@class='openBox adslibraryArticle']
9strip_id_or_class:ad-180x150-1
10
11
12title: //div[@id="articleNew"]/h1
13author: //div[@id="articleBy"]/p/b
14date: substring-before(//div[@id="articleDate"], "-")
15test_url: http://www1.folha.uol.com.br/mundo/1115805-ex-ditador-argentino-videla-e-condenado-a-50-anos-de-prisao.shtml \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/www3.imperial.ac.uk.txt b/inc/3rdparty/site_config/standard/www3.imperial.ac.uk.txt
new file mode 100644
index 00000000..71306af2
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/www3.imperial.ac.uk.txt
@@ -0,0 +1,2 @@
1strip_id_or_class: hidelabel
2test_url: http://www3.imperial.ac.uk/newsandeventspggrp/imperialcollege/newssummary/news_14-7-2010-15-53-18 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/wyborcza.pl.txt b/inc/3rdparty/site_config/standard/wyborcza.pl.txt
new file mode 100644
index 00000000..f99467c2
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/wyborcza.pl.txt
@@ -0,0 +1,11 @@
1title:h1
2author: //*[@class = 'author']
3date: //*[@class = 'date']
4body: //*[@id = 'art']
5next_page_link: //*[@id='Str']/a[contains(text(), 'nastepne')]
6strip: //*[@class = 'rel_zdjTOP']
7strip: //*[@id = 'rel']
8strip: //*[@class = 'txt_upl']
9strip: //*[@id='Str']
10strip: //*[@id='source']
11test_url: http://wyborcza.pl/1,123455,11536088,Gdy_peknie_fejs__obryzga_wszystko.html?as=1&startsz=x \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/wyctim.com.txt b/inc/3rdparty/site_config/standard/wyctim.com.txt
new file mode 100644
index 00000000..d8c8713b
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/wyctim.com.txt
@@ -0,0 +1,3 @@
1body: //div[@class='article-body']
2title: //h1
3test_url: http://wyctim.com/icloud-sync-regebbi-rendszereken/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/wz-newsline.de.txt b/inc/3rdparty/site_config/standard/wz-newsline.de.txt
new file mode 100644
index 00000000..fbc1d3d2
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/wz-newsline.de.txt
@@ -0,0 +1,5 @@
1title://h1
2
3date://p[@class='articleDate']
4body://div[@class='articleBody wzStandardArticle']
5test_url: http://www.wz-newsline.de/home/sport/tennis/federer-zum-vierten-mal-sieger-in-indian-wells-1.938050 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/xoeb.us.txt b/inc/3rdparty/site_config/standard/xoeb.us.txt
new file mode 100644
index 00000000..e02960e0
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/xoeb.us.txt
@@ -0,0 +1,4 @@
1title: //h1[@class="entry-title"]
2author: //span[@class="fn"]
3date: //p[@class="meta"]
4test_url: http://xoeb.us/blog/2012/03/16/my-mistakes-with-our-first-release/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/yated.com.txt b/inc/3rdparty/site_config/standard/yated.com.txt
new file mode 100644
index 00000000..13a3ea64
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/yated.com.txt
@@ -0,0 +1,2 @@
1title: //div[@class='pagetitle']
2test_url: http://www.yated.com/content.asp?categoryid=7&contentid=582 \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/yostivanich.com.txt b/inc/3rdparty/site_config/standard/yostivanich.com.txt
new file mode 100644
index 00000000..9e24db3c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/yostivanich.com.txt
@@ -0,0 +1,5 @@
1title://div[@class='entry-title']
2body://div[@class='entry-content']
3strip_comments:yes
4convert_double_br_tags:yes
5test_url: http://www.yostivanich.com/2010/07/11/wired-com-with-world-watching-wikileaks-falls-into-disrepair/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/youtube.com.txt b/inc/3rdparty/site_config/standard/youtube.com.txt
new file mode 100644
index 00000000..d52b7356
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/youtube.com.txt
@@ -0,0 +1,15 @@
1title: //title
2body: //iframe
3
4find_string: <html>&lt;iframe
5replace_string: <iframe id="video"
6
7find_string: &gt;&lt;/iframe&gt;</html>
8replace_string: ></iframe>
9
10single_page_link: //link[@type='text/xml+oembed']
11
12prune: no
13tidy: no
14
15test_url: http://www.youtube.com/watch?v=F6gLH0r3iVU \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/zdnet.com.txt b/inc/3rdparty/site_config/standard/zdnet.com.txt
new file mode 100644
index 00000000..b244b229
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/zdnet.com.txt
@@ -0,0 +1,10 @@
1title: //h1[@class="h s-1"]
2author: substring-before(substring-after(//p[@class="meta s-10"], 'By'), '|')
3author: substring-after(//div[@class="bio"]//h3, 'About ')
4date: substring-after(//p[@class="meta s-10"], '|')
5date: substring-after(//p[@class="meta"], '|')
6body: //div[@class="content-1 entry space-1 clear"]
7body: //div[@class="storyBody"]
8
9test_url: http://www.zdnet.com/blog/microsoft/the-bing-back-end-more-on-cosmos-tiger-and-scope/10920
10test_url: http://www.zdnet.com/researchers-find-web-tracking-up-privacy-down-7000000358/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/zeit.de.txt b/inc/3rdparty/site_config/standard/zeit.de.txt
new file mode 100644
index 00000000..66a7f1ac
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/zeit.de.txt
@@ -0,0 +1,44 @@
1# 2012-12-23 [carlo@...] fixed half-assed headlines in articles, removed inline author profiles, adjusted picture captions
2# 2012-03-17 [dkless@...] Cut metadata parts in the beginning and the ends of the content block; copyright entries for pictures removed; Author fixed, not sure if old entries still valid (I left them); Weird problems with some pages addressed (see last section for removing hidden section)
3# 2011-12-09 [carlo@...] Removed "related articles" block
4# 2011-08-23 [carlo@...] changed single page link to use print version: page works better, less ambiguity. Related cleanups and simplifications.
5# 2011-08-20 [carlo@...] added author, fixed date
6
7
8single_page_link: //a[@title='Druckversion']
9tidy: no
10
11title: //title
12date: substring-before( //li[@class="date"], " " )
13author: //li[@class="author"]/a/text() | //li[@class="author first"]/a/text()
14author: substring-after(//li[@class='source first '], 'Quelle: ')
15
16strip_id_or_class: articleheader
17strip: //div[@id="comments"] | //div[@class="pagination block"] | //p[@class="ressortbacklink"] | //div[@id="relatedArticles"] | // div[@class="inline portrait"]
18
19#Removes author and date from the start
20strip: //ul[@class="tools"]
21#Removes copyright statement - often disturb as first line of the news
22strip: //p[@class="copyright"]
23strip: //div[@class="copyright"]
24#Removes pagination links at the end
25strip: //div[@class="pagination"]
26
27# Fix picture captions
28wrap_in(small): //p[@class="caption"]/text()
29
30# Fix sub-headlines
31wrap_in(h2): //p/strong
32dissolve: //h2/strong
33
34#Sometimes things are embedded in the print version that are not displayed on the web, but will be displayed in the mobilized versions and lead even to problems. These sections are removed here.
35strip_id_or_class:"informatives"
36strip_id_or_class:"bottom"
37strip_id_or_class:"teasermosaic"
38strip_id_or_class:"comments"
39strip_id_or_class:"articlefooter af"
40strip_id_or_class:"relateds"
41strip_id_or_class:"pagination"
42
43footnotes: no
44test_url: http://www.zeit.de/kultur/film/2012-12/Kurzfilmtag \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/zerodistraction.com.txt b/inc/3rdparty/site_config/standard/zerodistraction.com.txt
new file mode 100644
index 00000000..d3b60c7d
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/zerodistraction.com.txt
@@ -0,0 +1,4 @@
1author: //span[@class='author']//a
2date: //span[@class='date']
3test_url: http://zerodistraction.com/blog/2012/3/11/retina-ipad-that-means-i-am-going-digital-only-for-comic-boo.html
4test_url: http://zerodistraction.com/notes/unreasonably-grumpy \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/zerokspot.com.txt b/inc/3rdparty/site_config/standard/zerokspot.com.txt
new file mode 100644
index 00000000..ea9132aa
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/zerokspot.com.txt
@@ -0,0 +1,3 @@
1title: //h1
2body: //div[@id="primarycontent"]
3test_url: http://zerokspot.com/weblog/2011/06/26/europython2011/ \ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/zingtrain.com.txt b/inc/3rdparty/site_config/standard/zingtrain.com.txt
new file mode 100644
index 00000000..2a2f58a8
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/zingtrain.com.txt
@@ -0,0 +1,3 @@
1title: substring-after(id, 'post')/h2
2body://div[@class = 'entry']
3test_url: http://www.zingtrain.com/category/ontrack/january-2007/ \ No newline at end of file