]> git.immae.eu Git - github/wallabag/wallabag.git/commitdiff
updated specific configuration for parsing
authorNicolas Lœuillet <nicolas@loeuillet.org>
Sun, 13 Jul 2014 08:15:40 +0000 (10:15 +0200)
committerNicolas Lœuillet <nicolas@loeuillet.org>
Sun, 13 Jul 2014 08:15:40 +0000 (10:15 +0200)
952 files changed:
inc/3rdparty/site_config/standard/24ways.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/36kr.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/37signals.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/3quarksdaily.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/3voor12.vpro.nl.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/43folders.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/500px.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/512pixels.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/5by5.tv.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/7newsbelize.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/944.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/README.md [new file with mode: 0755]
inc/3rdparty/site_config/standard/aachener-nachrichten.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/aachener-zeitung.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/abc.es.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/abc.net.au.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/abcnews.go.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/accesstoinsight.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/acidcow.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/acquia.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/acroswing.fr.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/aftenposten.no.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/aftonbladet.se.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/aht.seriouseats.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/albayan.ae.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/alex.mullr.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/alexduner.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/alexduner.squarespace.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/alistapart.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/aljazeera.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/allrecipes.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/allthingsd.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/allyou.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/alphabeta.argaam.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/alriyadh.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/alseraj.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/alt1040.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/alternet.org.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/altfoto.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/alumni.stanford.edu.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/amandala.com.bz.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/amazon.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/americandrink.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/americascup.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/americastestkitchenfeed.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/amptoons.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/anandtech.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/androidpolice.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/andyrutledge.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/annatravelling.wordpress.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/applature.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/apple.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/appledaily.com.tw.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/appleinsider.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/appleweblog.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/archdaily.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/archiveofourown.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/arstechnica.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/articles.boston.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/articles.courant.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/articles.washingtonpost.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/asahi.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ascarter.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/astronews.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/asymco.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/autoblog.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/avclub.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/baltimoresun.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/baseballprospectus.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/basicthinking.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/bb.is.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/bbc.co.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/bbcgoodfood.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/benoitmaison.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/berlingske.dk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/bernama.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/betabeat.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/betanews.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/biography.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/bitelia.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/bizjournals.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/bjango.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/blog.arsln.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/blog.asmartbear.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/blog.cloudflare.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/blog.fefe.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/blog.instagram.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/blog.instapaper.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/blog.jaysalvat.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/blog.kaelig.fr.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/blog.naver.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/blog.pchome.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/blog.pinboard.in.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/blog.renren.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/blog.sina.com.cn.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/blog.spu.edu.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/blog.wells.ee.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/blogs.aljazeera.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/blogs.forbes.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/blogs.hbr.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/blogs.msdn.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/blogs.reuters.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/blogs.scientificamerican.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/blogs.smithsonianmag.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/blogs.technet.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/bluetouff.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/boagworld.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/boingboing.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/boldizsar.palotas.eu.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/book.douban.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/bookforum.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/borderhouseblog.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/bostonglobe.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/bostonreview.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/boundlessline.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/bowdoinorient.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/brainfacts.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/brandeins.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/brandingstrategyinsider.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/brasil.elpais.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/brettterpstra.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/brisbanetimes.com.au.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/brookings.edu.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/brooksreview.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/bt.no.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/buffed.de.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/buquad.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/business2community.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/businessinsider.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/businessnews.com.tn.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/businessweek.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/buzzfeed.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/bygonebureau.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/cable.co.uk.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/cardboardconnection.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/carpeaqua.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/cars.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/catb.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/cbc.ca.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/cbn.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/cbsnews.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/cedarrepublican.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/chareidi.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/chinamining.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/chomsky.info.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/chrisltd.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/christianitytoday.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/christianpf.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/christies.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/chrome.google.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/chronicle.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ciaosamin.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/cicero.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ciperchile.cl.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/cjr.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/classyllama.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/clientk.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/clubic.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/cmswire.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/cn.engadget.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/cn.reuters.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/cnet.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/cnn.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/cnnsi.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/code.activestate.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/code.fivefilters.org.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/code.google.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/codeproject.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/codinghorror.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/collegehumor.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/communities-dominate.blogs.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/community.service-now.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/computer.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/computerbase.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/computerworld.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/computerworld.dk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/contemporist.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/conversaciones.nokia.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/cooper.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/core77.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/counterpunch.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/crazybutable.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/crimemagazine.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/crimethinc.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/crn.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/csmonitor.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/csnbayarea.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/csnphilly.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/css-tricks.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/cucharasonica.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/cw.com.tw.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/da.feedsportal.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/dagogtid.no.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/dailydot.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/dailykos.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/dailymail.co.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/dailystar.com.lb.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/danleech.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/dansdata.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/dantri.com.vn.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/daringfireball.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/datanami.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/dcurt.is.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/defomicron.net.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/delong.typepad.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/democracynow.org.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/derstandard.at.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/designtagebuch.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/desitvforum.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/details.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/developers.facebook.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/devlinsangle.blogspot.co.at.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/dictionary.reference.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/diepresse.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/digiphoto.techbang.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/digital-photography-school.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/digitalspy.co.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/dilbert.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/dinamalar.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/dn.se.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/dobreprogramy.pl.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/doctac.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/domusweb.it.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/dou.ua.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/douban.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/dpreview.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/dr.dk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/dramasonline.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/drdobbs.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/drive2.ru.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/dropbox.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/drupal.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/dukebasketballreport.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/dushumashang.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/dvice.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/eamesinerudition.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/eandt.theiet.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/eastoftheweb.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ebay.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ecetia.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/econlog.econlib.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/economia.estadao.com.br.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/economist.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/edge-online.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/edge.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/edition.channel5belize.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/edition.cnn.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/eetimes.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/ekultura.hu.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/elance.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/elderscrollsonline.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/elektroniknet.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/elmalpensante.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/elpais.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/emaratalyoum.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/en.espnf1.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/engadget.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/engineering.tumblr.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/english.aljazeera.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/enikos.gr.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/entertainment.timesonline.co.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ericsuh.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/es.hu.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/escapistmagazine.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/espn.go.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/esquire.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/essentialpublicradio.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/etc.se.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/eternabuenosaires.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/eurogamer.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/evo.co.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/expressen.se.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/extracine.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/f1actual.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/facebook.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/facta.co.jp.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/falter.at.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/fanfiction.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/fastcompany.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/faz.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/fertigung.de.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/fictionpress.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ficwad.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/finance.yahoo.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/findtheswagger.tumblr.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/firstthings.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/fivechapters.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/fivefilters.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/fivethirtyeight.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/flyingmachinestudios.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/fm4.orf.at.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/fnal.gov.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/focus.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/folklore.org.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/food.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/fool.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/forbes.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/foreignaffairs.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/foreignpolicy.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/forsvaret.no.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/foxnews.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/freelancer.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/freytag-film.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/fria.nu.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/friatidningen.se.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/friendskorner.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ft.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ftchinese.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/ftd.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/fubiz.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/futurezone.at.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/gamasutra.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/gameblog.fr.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/gamechurch.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/gamer.no.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/gamereactor.no.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/garythink.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/gasteroprod.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/gatopardo.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/gawker.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/geeksofdoom.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/geenstijl.nl.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/getnews.jp.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/giantbomb.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/giga.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/gigaom.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/gihyo.jp.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/gist.github.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/givemesomethingtoread.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/gizmodo.co.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/gizmodo.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/gizmodo.uol.com.br.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/gizmologia.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/gizmovil.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/global.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/globalissues.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/globoesporte.globo.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/gloswielkopolski.pl.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/goal.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/golem.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/good.is.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/goodfil.ms.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/gossip-tv.gr.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/goteborgsfria.se.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/gothamist.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/gotomanager.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/gov.ky.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/gp.se.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/gq.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/grantland.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/greatergreaterwashington.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/groups.drupal.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/gulfnews.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/guokr.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/haberler.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/hackmake.org.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/halo.bungie.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/hammers.theoffside.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/handelsblatt.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/hanselman.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/hardware.fr.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/hardware.no.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/hbr.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/headrush.typepad.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/heise-online.mobi.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/heise.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/hemmings.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/heroturko.me.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/hespress.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/hiamag.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/highscalability.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/hiperpop.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/hiphopleeft.nl.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/historytoday.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/hmercer.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/hollywoodlife.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/hometheaterreview.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/hosted.ap.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/howtogeek.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/hs.fi.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ht.ly.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/huffingtonpost.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/humantransit.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/hurriyet.com.tr.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/hvg.hu.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/hypebeast.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/icannabis.tumblr.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/idealog.co.nz.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/idlewords.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/igeneration.fr.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ignoredbydinosaurs.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ilounge.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ilyabirman.ru.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/inc.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/independent.co.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/indiatimes.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/inessential.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/info.abril.com.br.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/infoq.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/informador.com.mx.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/information.dk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/informationarchitects.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/informationclearinghouse.info.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/informit.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/infoworld.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/infzm.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/inhabitat.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/instagr.am.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/interest.co.nz.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/iolanguage.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ipadclub.nl.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ipadplanet.nl.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/iphoneclub.nl.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/iphonehacks.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/iplaysoft.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/isource.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/itavisen.no.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/itmedia.co.jp.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/itstactical.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/itwire.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/itworld.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/izismile.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/jalopnik.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/jandan.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/jetzt.sueddeutsche.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/jjahnke.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/jobbank.gc.ca.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/joelonsoftware.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/jouire.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/joystiq.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/juedische-allgemeine.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/juppy.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/kachestvo.ru.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/kachiblog.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/kathimerini.gr.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/kenrockwell.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/kicker.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/kickstarter.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/kingarthurflour.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/kotaku.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/kottke.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/kumailplus.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/kumb.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/kwerfeldein.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/landetsfria.se.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/laphamsquarterly.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/laprensagrafica.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/laquadrature.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/lareviewofbooks.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/latimes.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/laughingsquid.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/leancrew.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/lefigaro.fr.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/lemonde.fr.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/lesnumeriques.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/letemps.ch.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/libcom.org.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/lifeandculture.fr.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/lifehacker.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/lifestyle.inquirer.net.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/lifeweek.com.cn.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/linkedin.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/livescience.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/longform.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/loopinsight.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/lostgarden.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/lovefm.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/lovetv.com.bz.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/lrb.co.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/luminous-landscape.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/luxuo.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/m.bbc.co.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/m.douban.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/m.vanityfair.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/mac4ever.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/macdrifter.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/macformat.techradar.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/macgeneration.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/macmagazine.com.br.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/macrumors.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/macstories.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/mactalk.com.au.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/mactechnews.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/macworld.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/mainichi.jp.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/mainpost.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/makeuseof.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/manager.co.th.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/marco.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/marksdailyapple.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/martinfowler.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/mashable.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/matt.might.net.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/mattcutts.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/mbl.is.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/medialens.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/medium.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/megamp3.eu.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/menshealth.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/metafilter.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/mforum.cari.com.my.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/mikeash.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/mikeindustries.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/minnesota.publicradio.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/minnpost.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/mirrorfootball.co.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/mises.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/mlb.mlb.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/mlb.sbnation.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/mlssoccer.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/mmo-champion.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/mnn.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/mno.hu.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/mobile.nytimes.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/mobile.slate.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/mobileopportunity.blogspot.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/modernghana.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/money.cnn.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/monkeyzen.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/moonsault.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/moreintelligentlife.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/motherboard.vice.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/mothering.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/motherjones.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/motorfull.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/movie.douban.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/msdn.microsoft.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/msnbc.msn.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/myfoxatlanta.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/myfoxboston.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/myrecipes.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/narenji.ir.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/nasa.gov.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/nbweekly.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/neh.gov.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/neomoney.co.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/net-security.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/netmagazine.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/netzpolitik.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/newleftproject.org.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/newmatilda.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/newrepublic.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/news-gazette.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/news.cnet.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/news.detik.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/news.kanaloco.jp.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/news.mynavi.jp.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/news.orf.at.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/news.rambler.ru.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/news.techmeme.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/news.yahoo.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/news.ycombinator.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/news.zing.vn.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/news247.gr.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/newsbomb.gr.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/newsle.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/newsmill.se.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/newsunspun.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/newsweek.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/newswise.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/newyorker.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/next-gen.biz.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/nfl.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ngm.nationalgeographic.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/nhk.or.jp.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/nintendoworldreport.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/nojesguiden.se.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/northumberlandview.ca.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/nosalty.hu.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/nplusonemag.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/npr.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/nybooks.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/nymag.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/nyteknik.se.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/nytimes.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/nzz.ch.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/observer.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/off.net.mk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/omaha.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/omiliya.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/on.net.mk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/online.wsj.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/onlinewelten.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/onstartups.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ontologicalgeek.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/opensource.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/openthemagazine.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/openwebx.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/orf.at.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/origo.hu.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/oschina.net.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/pakistantvdekho.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/pakmedia.tv.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/pandagon.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/pandodaily.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/panic.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/papodehomem.com.br.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/parislemon.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/parliament.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/pastebin.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/pastepad.fivefilters.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/pathawks.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/pcast.me.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/pcmag.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/pcworld.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/penny-arcade.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/pentaxforums.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/philadelphiaeagles.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/philly.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/photo.tutsplus.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/php.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/physicstoday.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/pinterest.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/pitchfork.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/pittnews.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/pittsburgh.pirates.mlb.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/pittsburghlive.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/pittsburghmagazine.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/pittsburghpanthers.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/pittscriptblog.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/planetvita.de.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/playboy.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/plus.google.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/plzkthxbai.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/pogue.blogs.nytimes.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/politico.com.txt
inc/3rdparty/site_config/standard/politifact.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/politiken.dk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/polygon.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/popularmechanics.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/portertech.ca.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/positioningmag.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/post-gazette.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/posta.com.tr.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/prb.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/prog21.dadgum.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/prolost.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/propublica.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/prosa.dk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/prospectmagazine.co.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/protothema.gr.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/psychologytoday.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/publications.parliament.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/publico.pt.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/purpleplanetmedia.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/qctimes.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/quantumdiaries.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/queerty.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/quepasa.cl.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/quora.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/racjonalista.pl.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/radar.oreilly.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/radionz.co.nz.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/randsinrepose.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/readability.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/readwriteweb.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/real.gr.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/recipe.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/red-hot-girls.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/reddit.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/redmondpie.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/redtape.msnbc.msn.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/reflets.info.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/renenekuda.cz.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/resume.se.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/retrieverweekly.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/reuters.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/revistapiaui.estadao.com.br.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/rezeptwelt.de.txt [new file with mode: 0644]
inc/3rdparty/site_config/standard/richardmuscat.wordpress.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ritemail.blogspot.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ritholtz.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/robertsspaceindustries.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/robots.thoughtbot.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/rockpapershotgun.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/rodrigo.sharpcube.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/rogerebert.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/rolfinjapan.nl.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/rollingstone.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/rottentomatoes.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/roughtype.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/roy.gbiv.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/rpgsite.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/rubysfera.pl.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ruhlman.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ruttloff.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/salon.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/salzburg.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sanpedrosun.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/saveyourself.ca.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sayidaty.net.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/sbnation.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/schneier.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/science.orf.at.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/scienceblogs.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/scienceticker.info.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/scientificamerican.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/scilogs.de.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/scotusblog.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/scraplab.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/scripting.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sct.temple.edu.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/searchenginejournal.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/searchengineland.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/seattletransitblog.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sebbo.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/select.yeeyan.org.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/seriouseats.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sf.curbed.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sf.eater.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sfgate.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sfweekly.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/shabayek.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/shawnblanc.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/shifteleven.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/siasat.pk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/signalscv.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/simonwillison.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/singaporeanstocksinvestor.blogspot.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/singularityhub.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sintagoulis.gr.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sivers.org.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/skanesfria.se.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/slashfilm.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/slate.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/slice.seriouseats.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/slog.thestranger.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/smartinvestor.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sme.sk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/smithsonianmag.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/smokingapples.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/somethingawful.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/songshuhui.net.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/sourcebooks.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/spectator.co.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/spectrum.ieee.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/speirs.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/spiegel.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/spiked-online.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/spin.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/splatf.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/splitsider.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sport.detik.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sport.orf.at.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sport365.fr.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/sports.espn.go.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sports.yahoo.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sportschau.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sportsillustrated.cnn.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sprengsatz.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sqlite.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/squashed.tumblr.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/stackoverflow.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/stalbansreview.co.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/standard.co.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/staradvertiser.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/stephenfry.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/stlbeacon.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/stockholm.etc.se.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/stockholmsfria.nu.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/straightdope.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/streetsblog.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/stuff.co.nz.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/stumbleupon.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/subtraction.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sueddeutsche.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/summify.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/suntimes.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/svd.se.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/svt.se.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/sydsvenskan.se.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/symmetrymagazine.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sz-magazin.sueddeutsche.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sz.de.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/tagesschau.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/tampabay.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/taptaptap.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/tasteofhome.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/taz.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/tbray.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/tcmanila.tk.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/tcng.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/tech.fortune.cnn.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/tech.gilt.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/tech.sina.com.cn.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/techcrunch.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/techdirt.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/techhive.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/techmeme.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/technicallyjordan.tumblr.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/technologizer.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/technologyreview.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/techpinions.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/techradar.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/telegraaf.nl.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/telegraph.co.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thanhnien.com.vn.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/the-magazine.org.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/theage.com.au.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/theamericanscholar.org.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/theappleblog.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/theatlantic.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/theatlanticcities.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/thebostonchannel.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thebrowser.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thecarton.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thedaily.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thedailybeast.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thedailymash.co.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thedisneyblog.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/theeuropean-magazine.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/thefilmexperience.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thegamedesignforum.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/theglobalmail.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/theglobeandmail.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thegreatdiscontent.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/theguardian.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/theindychannel.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/themarker.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/themillions.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/themuseumofinnocence.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thenation.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thenetworkgarden.blogs.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thenextgeneration.org.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/thenextweb.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/theoaklandpress.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/theonion.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thepioneerwoman.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/theregister.co.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/theroot.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/therumpus.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thesiasat.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thesimpledollar.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thespoiler.co.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thespoof.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thestranger.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thestreet.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thethaovanhoa.vn.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/theverge.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/theweek.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thinkprogress.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thisdaylive.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thisismynext.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/tidbits.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/time.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/timeshighereducation.co.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/tipb.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/tnr.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/tomdispatch.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/tomshardware.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/tomshardware.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/toolsandtoys.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/tracks.ranea.org.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/trailer.web-view.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/trailerzone.de.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/traningslara.se.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/triblive.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/truthdig.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/tthfanfic.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/tthor.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/tuaw.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/tuckreview.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/tvtropes.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/twitter.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/uefa.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/uk.xbox360.ign.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/uni-watch.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/unwinnable.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/uppsalafria.se.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/urbandictionary.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/usatoday.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/usccb.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/useit.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/usfirst.org.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/utdailybeacon.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/ux.artu.tv.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/uzivatelsketestovani.cz.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/vanityfair.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/varingen.no.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/varsity.co.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/vea.gov.vn.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/vedomosti.ru.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/veggbilder.no.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/vemedio.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/venturebeat.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/version2.dk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/verybestbaking.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/vg.no.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/video.forbes.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/videogum.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/villagevoice.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/vimeo.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/viply.de.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/visir.is.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/vitispr.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/vivirmexico.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/vnexpress.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/voices.washingtonpost.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/vworker.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/waffle.wootest.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/walrusmagazine.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/warnerbros.fr.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/washingtoninstitute.org.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/washingtonmonthly.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/washingtonpost.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/web-libre.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/weblog.bignerdranch.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/weblogs.asp.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/webpaper.nzz.ch.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/webwereld.nl.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/welt.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/westhamtillidie.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/what-if.xkcd.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/whatever.scalzi.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/wheelyric.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/wiki.guildwars.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/wiki.guildwars2.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/wikihow.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/wikitravel.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/will-self.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/williampfaff.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/winfuture.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/winrumors.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/winsupersite.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/wired.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/wmnf.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/wmpoweruser.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/worldpoultry.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/worldwidewords.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/wow.joystiq.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/wpmayor.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/wtatennis.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/www1.folha.uol.com.br.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/www3.imperial.ac.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/wyborcza.pl.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/wyctim.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/wz-newsline.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/xfgjls.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/xoeb.us.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/yated.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ynet.co.il.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/yostivanich.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/yourerie.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/youtube.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/zcommunications.org.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/zdnet.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/zeit.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/zerohedge.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/zerokspot.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/zhihu.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/zingtrain.com.txt [changed mode: 0644->0755]
inc/poche/Database.class.php

old mode 100644 (file)
new mode 100755 (executable)
index 03bd195..86c9e07
@@ -1,6 +1,6 @@
-title: //div[@class='meta']/h2/a\r
-author: //div[@class='meta']/h2/following-sibling::p/a/text()\r
-date://div[@class='meta']/h2/strong\r
-body: //div[@id='article']\r
+title: //div[@class='meta']/h2/a
+author: //div[@class='meta']/h2/following-sibling::p/a/text()
+date://div[@class='meta']/h2/strong
+body: //div[@id='article']
 strip: //div[@class='domore']
 test_url: http://24ways.org/2011/composing-the-new-canon
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/36kr.com.txt b/inc/3rdparty/site_config/standard/36kr.com.txt
new file mode 100755 (executable)
index 0000000..d73d7de
--- /dev/null
@@ -0,0 +1,8 @@
+title: //h1[contains(@class, 'entry-title')]
+date: //meta[@name='weibo: article:create_at']/@content
+body: //div[contains(@class, 'mainContent')]
+strip_id_or_class: related_topics
+
+prune: no
+
+test_url: http://www.36kr.com/p/207879.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 43a10ae..531cac1
@@ -1,6 +1,6 @@
-title: //div[@class='post_header']//h2/a\r
-author: //span[@class='author']\r
-date: //span[@class='date']\r
-body: //div[@id='Content']\r
+title: //div[@class='post_header']//h2/a
+author: //span[@class='author']
+date: //span[@class='date']
+body: //div[@id='Content']
 
 test_url: http://37signals.com/svn/posts/2785-the-end-of-the-it-department
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c4e7940..80a3958
@@ -1,9 +1,9 @@
-body: //div[@class='content']\r
-date: //div[@class='content']/h2\r
-strip: //div[@class='content']/h2\r
-title: //div[@class='content']/h3\r
-\r
-strip: //div[@id='postmenu']\r
-strip: //div[@class='trackback']\r
-tidy: no\r
+body: //div[@class='content']
+date: //div[@class='content']/h2
+strip: //div[@class='content']/h2
+title: //div[@class='content']/h3
+
+strip: //div[@id='postmenu']
+strip: //div[@class='trackback']
+tidy: no
 test_url: http://www.3quarksdaily.com/3quarksdaily/2012/01/martin-luther-king-i-have-a-dream.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e8073f6..3777c66
@@ -1,4 +1,4 @@
-body: //*[@class = 'content']\r
-author: //*[@class = 'submitted']/a\r
+body: //*[@class = 'content']
+author: //*[@class = 'submitted']/a
 date: substring-after(//*[@class = 'submitted']/text(), '|')
 test_url: http://www.43folders.com/2011/04/22/cranking
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 68e6b2d..b9b7e9d
@@ -1,27 +1,27 @@
-# very loose setup for both 500px.com/photo/* and 500px.com/blog/*\r
-# photo page example: http://500px.com/photo/4181666\r
-# blog page example: http://500px.com/blog/110\r
-\r
-# avoid "no text" error\r
-tidy:no\r
-prune:no\r
-\r
-# reorganize photo page elements\r
-#body://div[contains(@class,'container')]\r
-move_into(body)://div[contains(@id,'thephoto')]\r
-move_into(body)://div[contains(@id,'description')]\r
-move_into(body)://div[contains(@id,'tags')]\r
-move_into(body)://div[contains(@id,'photo-info')]\r
-\r
-# clean photo page info\r
-strip://span[contains(@id,'copyright')]\r
-strip://*[contains(@id,'store')]\r
-strip://*[contains(@id,'user-info')]\r
-strip://*[contains(@id,'photo-stats')]\r
-strip://*[contains(@id,'voting_controls_container')]\r
-strip://*[contains(@id,'more-photos')]\r
-strip://*[contains(@id,'embed-photo')]\r
-\r
-# clean blog page side bar\r
+# very loose setup for both 500px.com/photo/* and 500px.com/blog/*
+# photo page example: http://500px.com/photo/4181666
+# blog page example: http://500px.com/blog/110
+
+# avoid "no text" error
+tidy:no
+prune:no
+
+# reorganize photo page elements
+#body://div[contains(@class,'container')]
+move_into(body)://div[contains(@id,'thephoto')]
+move_into(body)://div[contains(@id,'description')]
+move_into(body)://div[contains(@id,'tags')]
+move_into(body)://div[contains(@id,'photo-info')]
+
+# clean photo page info
+strip://span[contains(@id,'copyright')]
+strip://*[contains(@id,'store')]
+strip://*[contains(@id,'user-info')]
+strip://*[contains(@id,'photo-stats')]
+strip://*[contains(@id,'voting_controls_container')]
+strip://*[contains(@id,'more-photos')]
+strip://*[contains(@id,'embed-photo')]
+
+# clean blog page side bar
 strip://*[contains(@class,'col d3 clearafter')]
 test_url: http://500px.com/photo/3641041?from=editors
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index dce0df4..59b70a9
@@ -1,9 +1,9 @@
-body: //*[@id="episode"]\r
-prune: no\r
-tidy: no\r
-\r
-autodetect_next_page: no\r
-strip_id_or_class: player\r
-\r
+body: //*[@id="episode"]
+prune: no
+tidy: no
+
+autodetect_next_page: no
+strip_id_or_class: player
+
 strip://*[@id="header"]
 test_url: http://5by5.tv/buildanalyze/60
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/7newsbelize.com.txt b/inc/3rdparty/site_config/standard/7newsbelize.com.txt
new file mode 100755 (executable)
index 0000000..46d09f8
--- /dev/null
@@ -0,0 +1,7 @@
+title: //*[@id='sstitle']
+body: //div[@id='sstory']
+strip_id_or_class: newsoptions
+prune: no
+
+test_url: http://www.7newsbelize.com/sstory.php?nid=25654
+test_url: http://www.7newsbelize.com/7news.xml
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 84380e7..8bf6a4c
@@ -1,9 +1,9 @@
-title: //h2[@class='border']\r
-body: //div[@class='padding']\r
-\r
-convert_double_br_tags: yes\r
-\r
-strip: //div[@id='social_sharing']\r
-strip: //div[@class='socialLinks']\r
+title: //h2[@class='border']
+body: //div[@class='padding']
+
+convert_double_br_tags: yes
+
+strip: //div[@id='social_sharing']
+strip: //div[@class='socialLinks']
 
 test_url: http://www.944.com/articles/mild-obsessions-frock-la-get-to-know-victoria-tik-s-haute-sustainable-fashion-line/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/README.md b/inc/3rdparty/site_config/standard/README.md
new file mode 100755 (executable)
index 0000000..9040ba8
--- /dev/null
@@ -0,0 +1,38 @@
+Full-Text RSS site config files
+================
+
+[Full-Text RSS](http://fivefilters.org/content-only/), our article extraction tool, makes use of site-specific extraction rules to improve results. Each time a URL is processed, it checks to see if there are extraction rules for the site being processed. If there are no site patterns, it tries to detect the content block automatically.
+
+This repository contains the site config files we use in Full-Text RSS.
+
+### Contributing changes
+
+We chose GitHub for this set of files because they offer one feature which we hope will make contributing changes easier: [file editing](https://github.com/blog/844-forking-with-the-edit-button) through the web interface. 
+
+You can now make changes to any of our site config files and request that your changes be pulled into the main set we maintain. This is what GitHub calls the Fork and Pull model:
+
+> The Fork & Pull Model lets anyone fork an existing repository and push changes to their personal fork without requiring access be granted to the source repository. The changes must then be pulled into the source repository by the project maintainer. This model reduces the amount of friction for new contributors and is popular with open source projects because it allows people to work independently without upfront coordination.
+
+When we receive a pull request we'll review the changes and if everything's okay we'll update our copy.
+
+If a site is not in our set, you can create a file for it in the same way. See [Creating files on GitHub](https://github.com/blog/1327-creating-files-on-github).
+
+### How to write a site config file
+
+The quickest and simplest way is to use our [point-and-click interface](http://siteconfig.fivefilters.org). It's a simple tool only intended to create a rule to extract the correct content block. 
+
+For further refinements, e.g. selecting the title, stripping elements, dealing with multi-page articles, please see our [help page](http://help.fivefilters.org/customer/portal/articles/223153-site-patterns).
+
+### Instapaper
+
+When we introduced site patterns, we chose to adopt the [same format](http://blog.instapaper.com/post/730281947) used by Instapaper. This allows us to make use of the existing extraction rules contributed by Instapaper users. 
+
+Marco, Instapaper's creator, graciously opened up the database of contributions to everyone:
+
+> And, recognizing that your efforts could be useful to a wide range of other tools and services, I'll make the list of all of these site-specific configurations available to the public, free, with no strings attached.
+
+Most of the extraction rules in our set are borrowed from Instapaper. You can see the list maintained by Instapaper at [instapaper.com/bodytext/](http://instapaper.com/bodytext/) (login required).
+
+### Testing site config files
+
+Currently you will have to have a copy of Full-Text RSS to test changes to the site config files. In the future we will try to make this process easier.
old mode 100644 (file)
new mode 100755 (executable)
index 379592e..b60c15d
@@ -1,10 +1,10 @@
-title: //meta[@property='og:title']/@content\r
-body: //*[@class='fliesstext_detail' or @class='detail_fliesstext'] | //img[@itemprop="image" and starts-with(@src, "/sixcms/media.php/")]\r
-\r
-strip_id_or_class: socialshareprivacy1\r
-strip_id_or_class: zvaFacebookButton\r
-\r
-tidy: no\r
-prune: no\r
-\r
+title: //meta[@property='og:title']/@content
+body: //*[@class='fliesstext_detail' or @class='detail_fliesstext'] | //img[@itemprop="image" and starts-with(@src, "/sixcms/media.php/")]
+
+strip_id_or_class: socialshareprivacy1
+strip_id_or_class: zvaFacebookButton
+
+tidy: no
+prune: no
+
 test_url: http://www.aachener-nachrichten.de/lokales/aachen-detail-an/2517757
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 4d76fac..013afa4
@@ -1,10 +1,10 @@
-title: //meta[@property='og:title']/@content\r
-body: //*[@class='fliesstext_detail' or @class='detail_fliesstext'] | //img[@itemprop="image" and starts-with(@src, "/sixcms/media.php/")]\r
-\r
-strip_id_or_class: socialshareprivacy1\r
-strip_id_or_class: zvaFacebookButton\r
-\r
-tidy: no\r
-prune: no\r
-\r
+title: //meta[@property='og:title']/@content
+body: //*[@class='fliesstext_detail' or @class='detail_fliesstext'] | //img[@itemprop="image" and starts-with(@src, "/sixcms/media.php/")]
+
+strip_id_or_class: socialshareprivacy1
+strip_id_or_class: zvaFacebookButton
+
+tidy: no
+prune: no
+
 test_url: http://www.aachener-zeitung.de/sixcms/detail.php?template=az_detail&id=2552718
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a99833d..43aadc4
@@ -1,7 +1,7 @@
-title: //meta[@property='og:title']/@content\r
-body: //div[@class='datosi' or @class='date' or @class='photo-alt1' or @class='text']\r
-strip_id_or_class: colB\r
-\r
-prune: no\r
+title: //meta[@property='og:title']/@content
+body: //div[@class='datosi' or @class='date' or @class='photo-alt1' or @class='text' or @itemprop='articleBody']
+strip_id_or_class: colB
+
+prune: no
 
 test_url: http://www.abc.es/20120209/tv-series/abci-house-ultima-temporada-201202090936.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 5e6269c..22b3a0f
@@ -1,10 +1,18 @@
-title: //h1\r
-author: //div[@class="byline"]/a\r
-date: //span[@class="timestamp"]\r
-\r
-strip: //p[@class="topics"]\r
-strip: //h1\r
-strip: //div[@class="byline"]\r
-strip: //p[@class="published"]\r
+title: //div[@class='article section']//h1
+author: //div[@class="byline"]/a
+date: //span[@class="timestamp"]
+body: //div[@class="page section"]
+
+strip: //a[@class="inline-caption"]
+strip: //p[@class="ticker section noprint"]
+strip: //p[@class="topics"]
+strip: //h1
+strip: //div[@class="byline"]
+strip: //p[@class="published"]
 strip: //div[contains(@class,"featured-scroller")]
-test_url: http://www.abc.net.au/news/2011-11-08/crabb-carbon-legislation-abbott-demolition/3652544
\ No newline at end of file
+strip_id_or_class: footer
+
+tidy: no
+
+test_url: http://www.abc.net.au/news/2013-03-27/open-speed-highways-change-clp-giles/4597892
+test_url: http://www.abc.net.au/news/2013-04-30/credit-growth-remains-subdued/4660054?section=business
old mode 100644 (file)
new mode 100755 (executable)
index c515d3e..8d36735
@@ -1,27 +1,27 @@
-title: //h1[@class='headline']\r
-body: //div[@id='storyText']\r
-# for video entries\r
-body: //img[@id='ff-img'] | //div[@id='meta']//div[contains(@class, 'overview')]\r
-author: //div[@class='byline']\r
-date: //div[@class='date']\r
-strip: //*[@id='date_partner']\r
-\r
-strip: //div[@class='breadcrumb']\r
-strip: //div[contains(@class,'show_tools')]\r
-strip: //div[@id='sponsoredByAd']\r
-strip: //div[contains(@class,'rel_container')]\r
-strip: //p[a[starts-with(@href, 'http://www.twitter.com')]]\r
-strip: //p[a[starts-with(@href, 'http://www.facebook.com')]]\r
-strip: //p[contains(., 'Click here to return to')]\r
-#strip_id_or_class: media\r
-strip_id_or_class: mediaplayer\r
-\r
-replace_string(<link rel="image_src" href="http): <img id="ff-img" src="http\r
-\r
-prune: no\r
-\r
-single_page_link: concat(//li[@class='pager']//a/@href, '&singlePage=true')\r
-\r
-test_url: http://abcnews.go.com/Politics/newt-gingrich-rocky-rollout-presidential-campaign-recover/story?id=13632744\r
-# multi-page\r
+title: //h1[@class='headline']
+body: //div[@id='storyText']
+# for video entries
+body: //img[@id='ff-img'] | //div[@id='meta']//div[contains(@class, 'overview')]
+author: //div[@class='byline']
+date: //div[@class='date']
+strip: //*[@id='date_partner']
+
+strip: //div[@class='breadcrumb']
+strip: //div[contains(@class,'show_tools')]
+strip: //div[@id='sponsoredByAd']
+strip: //div[contains(@class,'rel_container')]
+strip: //p[a[starts-with(@href, 'http://www.twitter.com')]]
+strip: //p[a[starts-with(@href, 'http://www.facebook.com')]]
+strip: //p[contains(., 'Click here to return to')]
+#strip_id_or_class: media
+strip_id_or_class: mediaplayer
+
+replace_string(<link rel="image_src" href="http): <img id="ff-img" src="http
+
+prune: no
+
+single_page_link: concat(//li[@class='pager']//a/@href, '&singlePage=true')
+
+test_url: http://abcnews.go.com/Politics/newt-gingrich-rocky-rollout-presidential-campaign-recover/story?id=13632744
+# multi-page
 test_url: http://abcnews.go.com/Blotter/family-freed-american-hostage-somalia-seals-obama/story?id=15439544
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index b5d8507..45d6653
@@ -1,9 +1,9 @@
-title: //div[@id='H_docTitle']\r
-\r
-body: //div[@id='H_meta' or @id='H_content' or @id='F_footer']\r
-\r
-strip_id_or_class: F_toenail\r
-\r
-prune: no\r
-\r
+title: //div[@id='H_docTitle']
+
+body: //div[@id='H_meta' or @id='H_content' or @id='F_footer']
+
+strip_id_or_class: F_toenail
+
+prune: no
+
 test_url: http://www.accesstoinsight.org/lib/authors/nyanaponika/wheel026.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 60ede6a..2195865
@@ -1,3 +1,3 @@
-body: //div[starts-with(@id, 'news-id-')]\r
-\r
+body: //div[starts-with(@id, 'news-id-')]
+
 test_url: http://acidcow.com/fun/20933-acid-picdump-83-pics.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 5ddf542..2803611
@@ -1,9 +1,9 @@
-title://h1[@class="title"]\r
-author://div[@class="submitted"]/span/a\r
-date://div[@class="submitted"]/span\r
-body://div[@class="content-wrapper"]\r
-\r
-strip://div[@id="skip-link"]\r
-strip://div[@id="region-content-3-3"]\r
+title://h1[@class="title"]
+author://div[@class="submitted"]/span/a
+date://div[@class="submitted"]/span
+body://div[@class="content-wrapper"]
+
+strip://div[@id="skip-link"]
+strip://div[@id="region-content-3-3"]
 strip://div[@id="section-footer"]
 test_url: https://www.acquia.com/blog/drupals-long-warmth-toward-third-party-code
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 57d86d2..6b1d67f
@@ -1,5 +1,5 @@
-tidy:no\r
-date: //time[@class='updated']\r
-dissolve: //ul[@class='video-gallery']/li\r
+tidy:no
+date: //time[@class='updated']
+dissolve: //ul[@class='video-gallery']/li
 dissolve: //ul[@class='video-gallery']
 test_url: http://www.acroswing.fr/actualites/competition_rock/selectif_bellegarde_sur_valserine__2012-02-26.php
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/aftenposten.no.txt b/inc/3rdparty/site_config/standard/aftenposten.no.txt
new file mode 100755 (executable)
index 0000000..8a69c35
--- /dev/null
@@ -0,0 +1,5 @@
+title: //h1[@class='articleTitle ']
+body: //div[@class='bodyText widget storyContent']
+strip: //p/span[@class='quote']/..
+strip_id_or_class: 'pull1'
+test_url: https://www.aftenposten.no/meninger/spaltister/Portrett-av-scenekunstneren-som-ung-mann-7167959.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/aftonbladet.se.txt b/inc/3rdparty/site_config/standard/aftonbladet.se.txt
new file mode 100755 (executable)
index 0000000..b6c576a
--- /dev/null
@@ -0,0 +1,13 @@
+author: //article//address[contains(@class, 'author')]
+body: //article[.//div[contains(@class, 'abBodyText')]]//*[contains(@class, 'abLeadText') or contains(@class, 'abBodyText') or contains(@class, 'abImageBlock') or contains(@class, 'abIGSatellite')]
+
+strip: //address//img
+strip: //footer
+strip_id_or_class: abSticky
+
+prune: no
+
+test_url: http://www.aftonbladet.se/sportbladet/hockey/sverige/allsvenskan/article17498194.ab
+test_url: http://www.aftonbladet.se/debatt/article16207536.ab
+test_url: http://www.aftonbladet.se/debatt/debattamnen/politik/article17483377.ab
+test_url: http://www.aftonbladet.se/rss.xml
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 408e909..b2d88a0
@@ -1,15 +1,15 @@
-body: //div[@id='content']\r
-\r
-# clean up recipe pages\r
-strip: //h2[@class='fn'] | //h2[@class='double-lined'] | //h3 | //div[@id='threeColumn2'] | //div[@id='threeColumn3']\r
-\r
-#recipe pages\r
-strip_id_or_class: "recipe-feedback"\r
-strip_id_or_class: "comments"\r
-strip_id_or_class: "procedure-number"\r
-strip_id_or_class: "more-with-author"\r
-\r
-#slice\r
-strip_id_or_class: "inner"\r
+body: //div[@id='content']
+
+# clean up recipe pages
+strip: //h2[@class='fn'] | //h2[@class='double-lined'] | //h3 | //div[@id='threeColumn2'] | //div[@id='threeColumn3']
+
+#recipe pages
+strip_id_or_class: "recipe-feedback"
+strip_id_or_class: "comments"
+strip_id_or_class: "procedure-number"
+strip_id_or_class: "more-with-author"
+
+#slice
+strip_id_or_class: "inner"
 
 test_url: http://aht.seriouseats.com/archives/2009/12/the-burger-lab-salting-ground-beef.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/albayan.ae.txt b/inc/3rdparty/site_config/standard/albayan.ae.txt
new file mode 100755 (executable)
index 0000000..f6c093d
--- /dev/null
@@ -0,0 +1,6 @@
+body: //div[@id='main-column']//div[@class='content']
+
+prune: no
+
+test_url: http://www.albayan.ae/across-the-uae/education/2013-08-29-1.1949645
+test_url: http://www.albayan.ae/1.448?ot=ot.AjaxPageLayout
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/alexduner.com.txt b/inc/3rdparty/site_config/standard/alexduner.com.txt
new file mode 100755 (executable)
index 0000000..bd9de9d
--- /dev/null
@@ -0,0 +1,4 @@
+body: //section[@class='content']
+date: //span[1]
+author: //h1[@id='sitetitle']
+test_url: https://alexduner.com/blog/2013/1/something-i-learned-today
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/alexduner.squarespace.com.txt b/inc/3rdparty/site_config/standard/alexduner.squarespace.com.txt
new file mode 100755 (executable)
index 0000000..875405e
--- /dev/null
@@ -0,0 +1,4 @@
+body: //section[@class='content']
+date: //span[1]
+author: //h1[@id='sitetitle']
+test_url: https://alexduner.squarespace.com/blog/2013/1/tech-culture-from-the-outside-looking-in
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 090f7eb..7a7096e
@@ -1,12 +1,12 @@
-title: //h1[@class='title']\r
-author: //h3[@class='byline']/a\r
-date: //div[@class='ishinfo']\r
-\r
-body: //*[@id='articletext']\r
-strip_id_or_class: 'ishinfo'\r
-strip_id_or_class: 'metastuff'\r
-strip_id_or_class: 'learnmore'\r
-strip_id_or_class: 'discuss'\r
-\r
+title: //h1[@class='title']
+author: //h3[@class='byline']/a
+date: //div[@class='ishinfo']
+
+body: //*[@id='articletext']
+strip_id_or_class: 'ishinfo'
+strip_id_or_class: 'metastuff'
+strip_id_or_class: 'learnmore'
+strip_id_or_class: 'discuss'
+
 prune: no
 test_url: http://www.alistapart.com/articles/organizing-mobile/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 4f0148f..d3bf401
@@ -1,8 +1,8 @@
-title: //span[@id='DetailedTitle']\r
-body: //td[@id='tdTextContent']\r
-strip_id_or_class: Skyscrapper_Body\r
-date: //span[@id='ctl00_cphBody_lblDate']\r
-author: //div[@id="dvAuthorInfo"]//a/text()\r
-strip: //table[ tbody/tr/td/object ]\r
-prune: no\r
+title: //span[@id='DetailedTitle']
+body: //td[@id='tdTextContent']
+strip_id_or_class: Skyscrapper_Body
+date: //span[@id='ctl00_cphBody_lblDate']
+author: //div[@id="dvAuthorInfo"]//a/text()
+strip: //table[ tbody/tr/td/object ]
+prune: no
 test_url: http://www.aljazeera.com/indepth/opinion/2012/01/2012114121925380575.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e9767bd..85dc2a5
@@ -1,14 +1,14 @@
-title: //h1[@id='itemTitle']\r
-body: //img[@id="ctl00_CenterColumnPlaceHolder_recipe_photoStuff_imgPhoto"] | //div[@id='ctl00_CenterColumnPlaceHolder_recipe_divSubmitter'] | //div[contains(@class, 'recipe-details-content')]\r
-strip: //div[@class='top-left' or @class='top-right' or @class='bot-left' or @class='bot-right']\r
-strip: //div[contains(@class, 'rightcoltoolsdiv')]\r
-strip: //div[contains(@class, 'servings-form')]\r
-strip: //p[@class='nutritional-information']\r
-strip: //a[contains(@class, 'nutritional-information') or contains(@class, 'nutritionanchor')]\r
-strip: //div[@id='nutri-info']/div[contains(@class, 'title')]\r
-strip: //img[@id='ctl00_CenterColumnPlaceHolder_recipe_imgSubmitter']\r
-strip_id_or_class: eshaAttribute\r
-strip_id_or_class: eshaParagraph\r
-prune: no\r
+title: //h1[@id='itemTitle']
+body: //img[@id="ctl00_CenterColumnPlaceHolder_recipe_photoStuff_imgPhoto"] | //div[@id='ctl00_CenterColumnPlaceHolder_recipe_divSubmitter'] | //div[contains(@class, 'recipe-details-content')]
+strip: //div[@class='top-left' or @class='top-right' or @class='bot-left' or @class='bot-right']
+strip: //div[contains(@class, 'rightcoltoolsdiv')]
+strip: //div[contains(@class, 'servings-form')]
+strip: //p[@class='nutritional-information']
+strip: //a[contains(@class, 'nutritional-information') or contains(@class, 'nutritionanchor')]
+strip: //div[@id='nutri-info']/div[contains(@class, 'title')]
+strip: //img[@id='ctl00_CenterColumnPlaceHolder_recipe_imgSubmitter']
+strip_id_or_class: eshaAttribute
+strip_id_or_class: eshaParagraph
+prune: no
 
 test_url: http://allrecipes.com/Recipe/Taco-Pie/Detail.aspx?src=rotd
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index cd52498..f8c67d0
@@ -1,10 +1,13 @@
-title://div[@class="article-title"]/h1[@class="title"]\r
-date: //p[@class="article-date"]\r
-body://*[@class="article-body article-text"]\r
-# Trim out related posts at bottom of article\r
-strip://blockquote[@class="memo"]\r
-\r
-# Yup, no idea why author won't work...\r
-author://div[@class="page-header article-header clearfix"]/p[@class="title"]\r
+title://div[@class="article-title"]/h1[@class="title"]
+date: //p[@class="article-date"]
+body://div[contains(@class, "article-body")]
+# Trim out related posts at bottom of article
+strip://blockquote[@class="memo"]
+
+tidy: no
+
+# Yup, no idea why author won't work...
+author://div[@class="page-header article-header clearfix"]/p[@class="title"]
 # [Marco:] Author won't work here because the page defines the "home" link under the author's name as rel="author", which always gets priority if the page has defined it.
-test_url: http://allthingsd.com/20120513/exclusive-yahoos-thompson-out-levinsohn-in-board-settlement-with-loeb-nears-completion/
\ No newline at end of file
+test_url: http://allthingsd.com/20120513/exclusive-yahoos-thompson-out-levinsohn-in-board-settlement-with-loeb-nears-completion/
+test_url: http://allthingsd.com/20131010/google-cio-ben-fried-on-how-google-works/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3c26c68..a13a725
@@ -1,8 +1,8 @@
-title: //div[@id='pageHdr']//h1\r
-body: //div[@id='pageHdr']/*[@class='dek'] | //div[@id='printArticle' or @id='slideShowPrint']\r
-strip: //div[contains(@class, 'infoBox') or @id='infoBox']\r
-single_page_link: //li[@id='print']/a\r
-\r
+title: //div[@id='pageHdr']//h1
+body: //div[@id='pageHdr']/*[@class='dek'] | //div[@id='printArticle' or @id='slideShowPrint']
+strip: //div[contains(@class, 'infoBox') or @id='infoBox']
+single_page_link: //li[@id='print']/a
+
 prune: no
-\r
+
 test_url: http://www.allyou.com/budget-home/money-shopping/freebies-online-00400000066392/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f5865f8..da1a67b
@@ -1,11 +1,11 @@
-body: //div[@class = 'entry']\r
-date: substring-after(//p[@class="date"],'بتاريخ ')\r
-strip_id_or_class: date\r
-strip_id_or_class: follow-single\r
-strip_id_or_class: ratingblock\r
-strip_id_or_class: newRatingHolder\r
-strip_id_or_class: postmetadata\r
-strip_id_or_class: addthis_toolbox\r
-strip_id_or_class: addthis_default_style\r
+body: //div[@class = 'entry']
+date: substring-after(//p[@class="date"],'بتاريخ ')
+strip_id_or_class: date
+strip_id_or_class: follow-single
+strip_id_or_class: ratingblock
+strip_id_or_class: newRatingHolder
+strip_id_or_class: postmetadata
+strip_id_or_class: addthis_toolbox
+strip_id_or_class: addthis_default_style
 strip_id_or_class: size-full
 test_url: http://alphabeta.argaam.com/?p=35657
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d006000..be7c43d
@@ -1,9 +1,9 @@
-body: //div[@id = "article-view"]\r
-body: //div[contains(@class, 'article')]//div[contains(@class, 'photo_bg')]\r
-author: //p[@class = "author"]\r
-strip: //h1\r
-strip: //h2\r
-strip_id_or_class: author\r
-prune: no\r
-test_url: http://www.alriyadh.com/2011/10/10/article674357.html\r
+body: //div[@id = "article-view"]
+body: //div[contains(@class, 'article')]//div[contains(@class, 'photo_bg')]
+author: //p[@class = "author"]
+strip: //h1
+strip: //h2
+strip_id_or_class: author
+prune: no
+test_url: http://www.alriyadh.com/2011/10/10/article674357.html
 test_url: http://www.alriyadh.com/net/article/780935
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/alternet.org.txt b/inc/3rdparty/site_config/standard/alternet.org.txt
new file mode 100755 (executable)
index 0000000..e92252e
--- /dev/null
@@ -0,0 +1,4 @@
+single_page_link: //div[contains(@class, 'story_tools')]//a[contains(@href, '/print/')]
+
+test_url: http://www.alternet.org/civil-liberties/noam-chomsky-surveillance-state-beyond-imagination-being-created-one-freest
+test_url: http://feeds.feedblitz.com/alternet
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7fd4719..a5bd03b
@@ -1,10 +1,10 @@
-title: //h1\r
-\r
-author: substring-after(//div[@class="enableBullets"]/preceding-sibling::p[1], "By ")\r
-\r
-date: //div/a[contains (@href, "issue")]\r
-\r
-move_into(//div[@class="enableBullets"]/p): (//div[@id="content"]//img)[1]\r
-\r
+title: //h1
+
+author: substring-after(//div[@class="enableBullets"]/preceding-sibling::p[1], "By ")
+
+date: //div/a[contains (@href, "issue")]
+
+move_into(//div[@class="enableBullets"]/p): (//div[@id="content"]//img)[1]
+
 body: //div[@class="enableBullets"]
 test_url: http://alumni.stanford.edu/get/page/magazine/article/?article_id=54819
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/amandala.com.bz.txt b/inc/3rdparty/site_config/standard/amandala.com.bz.txt
new file mode 100755 (executable)
index 0000000..fb0e21b
--- /dev/null
@@ -0,0 +1,6 @@
+body: //div[@id='content']//div[contains(@class, 'content')]
+strip_id_or_class: widget
+strip: //a[contains(@href, 'upm_export=')]
+
+test_url: http://amandala.com.bz/news/feed/
+test_url: http://amandala.com.bz/news/poor-pse-results-30-raise/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 1a23c4b..cd7ad15
@@ -1,19 +1,19 @@
-title: //span[@id = 'btAsinTitle']\r
-body: (//*[@id='prodImageCell']//a)[1] | //div[@id = 'ps-content'] | //span[@id='actualPriceValue'] | //h2[.='Product Details']/following-sibling::div | //div[@class='h2' and .='Product Description']/following-sibling::div\r
-#strip_id_or_class: quantityDropdownDiv\r
-#strip_id_or_class: addToCartSpan\r
-#strip_id_or_class: oneClickDiv\r
-strip_id_or_class: nocontent\r
-strip_id_or_class: masDynamicConten\r
-strip_id_or_class: dynamic-content\r
-prune: no\r
-\r
-find_string: <span id="actualPriceValue">\r
-replace_string: <span id="actualPriceValue"><br />Price: \r
-\r
-strip_id_or_class: collapsePS\r
-strip_id_or_class: expandPS\r
-strip_id_or_class: psPlaceHolde\r
-strip: //li[contains(., 'update product info') or contains(., 'give feedback on images')]\r
-\r
+title: //span[@id = 'btAsinTitle']
+body: (//*[@id='prodImageCell']//a)[1] | //div[@id = 'ps-content'] | //span[@id='actualPriceValue'] | //h2[.='Product Details']/following-sibling::div | //div[@class='h2' and .='Product Description']/following-sibling::div
+#strip_id_or_class: quantityDropdownDiv
+#strip_id_or_class: addToCartSpan
+#strip_id_or_class: oneClickDiv
+strip_id_or_class: nocontent
+strip_id_or_class: masDynamicConten
+strip_id_or_class: dynamic-content
+prune: no
+
+find_string: <span id="actualPriceValue">
+replace_string: <span id="actualPriceValue"><br />Price: 
+
+strip_id_or_class: collapsePS
+strip_id_or_class: expandPS
+strip_id_or_class: psPlaceHolde
+strip: //li[contains(., 'update product info') or contains(., 'give feedback on images')]
+
 test_url: http://www.amazon.com/Common-Sense-Forestry-Living-Mother/dp/1931498210/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index dee0e86..7145f3f
@@ -1,6 +1,6 @@
-title: //div[@class='head']/h2/a\r
-author: //div[@class='head']/a\r
-date: //div[@class='head']/p[@class='date']/a\r
-body: //div[@class='copy']\r
+title: //div[@class='head']/h2/a
+author: //div[@class='head']/a
+date: //div[@class='head']/p[@class='date']/a
+body: //div[@class='copy']
 strip: //p[@class='meta']
 test_url: http://americandrink.net/post/10567188712/free-the-hooch
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index b1673b6..31723f8
@@ -1,10 +1,10 @@
-title: //div[@class="editorial-content"]/h3\r
-body: //div[@class="hero-image" or @class="editorial-content"]\r
-\r
-strip: //ul[@class="hero-caption"]\r
-strip_id_or_class: footer\r
-\r
-prune: no\r
-tidy: no\r
-\r
+title: //div[@class="editorial-content"]/h3
+body: //div[@class="hero-image" or @class="editorial-content"]
+
+strip: //ul[@class="hero-caption"]
+strip_id_or_class: footer
+
+prune: no
+tidy: no
+
 test_url: http://www.americascup.com/en/Latest/News/2012/3/Coutts-and-Peyron-tell-transformative-tale-at-Global-Sports-Forum/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8bf31ec..c2b62b5
@@ -1,5 +1,5 @@
-title: //h1[@class="post-title"]\r
-author: //span[@class="author"]/a\r
-date: //span[@class="date"]\r
+title: //h1[@class="post-title"]
+author: //span[@class="author"]/a
+date: //span[@class="date"]
 body: //div[@class="post-content main"]
 test_url: http://www.americastestkitchenfeed.com/gadgets-and-gear/2012/07/chill-out-with-tovolos-king-cube-silicone-ice-cube-tray/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/amptoons.com.txt b/inc/3rdparty/site_config/standard/amptoons.com.txt
new file mode 100755 (executable)
index 0000000..87547c6
--- /dev/null
@@ -0,0 +1,8 @@
+title: //title
+
+body: //div[@class="entry-content"]
+
+author: //span[@class="author vcard"]
+
+date: //span[@class="entry-date"]
+test_url: http://www.amptoons.com/blog/2013/03/14/open-thread-and-link-farm-i-hate-being-sick-edition/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8067e03..7d80491
@@ -1,11 +1,11 @@
-author: //a[@class='b'][1]\r
-date: substring-after(substring-before(//div, 'Posted in'), ' on ')\r
-strip_image_src: /content/images/globals/\r
-strip: //h2[. = 'Page 1']/preceding::p\r
-strip: //h2\r
-\r
-prune: no\r
-\r
-single_page_link: concat('http://www.anandtech.com/print/', substring-after(//meta[@property='og:url']/@content, '/show/'))\r
-\r
+author: //a[@class='b'][1]
+date: substring-after(substring-before(//div, 'Posted in'), ' on ')
+strip_image_src: /content/images/globals/
+strip: //h2[. = 'Page 1']/preceding::p
+strip: //h2
+
+prune: no
+
+single_page_link: concat('http://www.anandtech.com/print/', substring-after(//meta[@property='og:url']/@content, '/show/'))
+
 test_url: http://www.anandtech.com/show/5812/eurocom-monster-10-clevos-little-monster/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/androidpolice.com.txt b/inc/3rdparty/site_config/standard/androidpolice.com.txt
new file mode 100755 (executable)
index 0000000..8f9b1a2
--- /dev/null
@@ -0,0 +1,5 @@
+body: //div[@class='post_content']
+date: //div[@class='date_day'] | div[@class='date_month']
+
+test_url: http://www.androidpolice.com/2014/03/30/music-boss-for-pebble-can-now-control-playback-and-volume-on-chromecast-content-from-your-smartwatch/
+
old mode 100644 (file)
new mode 100755 (executable)
index f9ffd3c..ce31fcf
@@ -1,9 +1,9 @@
-title: //h2\r
-author: string('Andy Rutledge')\r
-date: //div[@class='articledate']\r
-body: //div[@class='copybody']\r
-\r
-strip: //*[@class='space']\r
-strip: //*[@class='articleFoot']\r
-\r
+title: //h2
+author: string('Andy Rutledge')
+date: //div[@class='articledate']
+body: //div[@class='copybody']
+
+strip: //*[@class='space']
+strip: //*[@class='articleFoot']
+
 test_url: http://www.andyrutledge.com/hungry-for-a-better-menu.php
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a5c7c08..2d8937f
@@ -1,9 +1,9 @@
-title: //h1[@class="title"]\r
-\r
-author: ("Anna Manasova")\r
-# is ignored, unfortunately\r
-\r
-date: //p[@class="date"]\r
-\r
+title: //h1[@class="title"]
+
+author: ("Anna Manasova")
+# is ignored, unfortunately
+
+date: //p[@class="date"]
+
 body: //div[@class="entry"]
 test_url: http://annatravelling.wordpress.com/2011/11/07/a-day-of-cooking-thai/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a78a615..a820bba
@@ -1,18 +1,18 @@
-title: //h1[contains(@class, 'title')#\r
-body: //div[@id='mainContent']//div[contains(@class, 'section_content')] | //ul[@class='section_footer']\r
-date: //div[@class='date']\r
-\r
-strip_id_or_class: sharethis\r
-strip_id_or_class: stats\r
-strip_id_or_class: apply_form\r
-strip_id_or_class: job_map\r
-strip_id_or_class: respond\r
-strip: //h1//span[@class='type']\r
-strip: //li[@class='print' or @class='map']\r
-\r
-replace_string(<ul class="section_footer" style="display): <ul class="section_footer" style="display-bla\r
-\r
-prune: no\r
-tidy: no\r
-\r
+title: //h1[contains(@class, 'title')#
+body: //div[@id='mainContent']//div[contains(@class, 'section_content')] | //ul[@class='section_footer']
+date: //div[@class='date']
+
+strip_id_or_class: sharethis
+strip_id_or_class: stats
+strip_id_or_class: apply_form
+strip_id_or_class: job_map
+strip_id_or_class: respond
+strip: //h1//span[@class='type']
+strip: //li[@class='print' or @class='map']
+
+replace_string(<ul class="section_footer" style="display): <ul class="section_footer" style="display-bla
+
+prune: no
+tidy: no
+
 test_url: http://applature.com/mining-jobs/jobs/nickel-west-leinster-analytical-laboratory-technician/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 4c48395..a54dccc
@@ -1,7 +1,7 @@
-strip: //p[@class='sosumi']\r
-# Aren't they witty?\r
-\r
-# I can't work out what causes the  before the title. \r
-title: //h1[@class='title']\r
-strip: //h1[@class='title']\r
+strip: //p[@class='sosumi']
+# Aren't they witty?
+
+# I can't work out what causes the  before the title. 
+title: //h1[@class='title']
+strip: //h1[@class='title']
 test_url: http://www.apple.com/pr/library/2011/02/15appstore.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/appledaily.com.tw.txt b/inc/3rdparty/site_config/standard/appledaily.com.tw.txt
new file mode 100755 (executable)
index 0000000..82d6f37
--- /dev/null
@@ -0,0 +1,4 @@
+body: //div[contains(@class, 'articulum')]
+
+test_url: http://www.appledaily.com.tw/realtimenews/article/new/20140120/330479
+test_url: http://www.appledaily.com.tw/rss/create/kind/rnews/type/new/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 279fbce..5ae1050
@@ -1,11 +1,23 @@
-title: //p[@class='title']\r
-\r
-author: //p[text() = 'By ']/a/text()\r
-strip: //p[text() = 'By ']\r
-\r
-body: //td[@class='bod']\r
-strip_id_or_class: title\r
-strip_id_or_class: minor\r
-\r
-strip_id_or_class: multipagefooter\r
-test_url: http://www.appleinsider.com/articles/12/02/29/inside_os_x_108_mountain_lion_safari_52_gets_a_simplified_user_interface_with_new_sharing_features.html
\ No newline at end of file
+title: //h1[@class="art-head"]
+
+author: //p[contains(@class, 'byline')]/a
+#author: //p[text() = 'By ']/a/text()
+#strip: //p[text() = 'By ']
+
+date: //p[contains(@class, 'date-header')]
+
+body: //div[@class="article"]
+strip_id_or_class: lazy
+#strip_id_or_class: minor
+strip_id_or_class: multipagefooter
+strip_id_or_class: date-header
+strip_id_or_class: byline
+
+find_string: <noscript>
+replace_string: <div>
+find_string: </noscript>
+replace_string: </div>
+
+test_url: http://www.appleinsider.com/articles/12/02/29/inside_os_x_108_mountain_lion_safari_52_gets_a_simplified_user_interface_with_new_sharing_features.html
+test_url: http://appleinsider.com/articles/13/10/03/goldee-companion-app-for-philips-hue-bulbs-offers-shifting-dynamic-light-scenes
+test_url: http://appleinsider.com/appleinsider.rss
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 9476cf5..0178639
@@ -1,5 +1,5 @@
-date: //div[@class='post_date']\r
-\r
-body: //div[@class='post_content']\r
+date: //div[@class='post_date']
+
+body: //div[@class='post_content']
 
 test_url: http://www.archdaily.com/185325/p10-mixed-use-building-studio-up
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 50ff632..579de51
@@ -1,18 +1,22 @@
-# Description: Fix XPaths to include ALL chapters on 'view_full_work' pages.\r
-# Include: work meta, summary, chapter information, and notes which Instapaper strips out on default.\r
-# Exclude: header, footer, navigation, comments.\r
-# Notes: User is a newbie with XPaths.\r
-\r
-title: //h2[@class='title']\r
-author: //h3[@class='byline']\r
-author: //a[@class='login author']\r
-\r
-strip_id_or_class:header\r
-strip_id_or_class:navigation\r
-strip_id_or_class:feedback\r
-strip_id_or_class:kudos\r
-strip_id_or_class:add_comment_placeholder\r
-strip_id_or_class:add_comment\r
-strip_id_or_class:globalize\r
+# Description: Fix XPaths to include ALL chapters on 'view_full_work' pages.
+# Include: work meta, summary, chapter information, and notes which Instapaper strips out on default.
+# Exclude: header, footer, navigation, comments.
+# Notes: User is a newbie with XPaths.
+
+title: //h2[@class='title']
+author: //h3[@class='byline']
+author: //a[@class='login author']
+
+strip_id_or_class:header
+strip_id_or_class:navigation
+strip_id_or_class:feedback
+strip_id_or_class:kudos
+strip_id_or_class:add_comment_placeholder
+strip_id_or_class:add_comment
+strip_id_or_class:globalize
 strip_id_or_class:footer
-test_url: http://archiveofourown.org/works/229402?view_full_work=true
\ No newline at end of file
+
+single_page_link: //div[@id='main']//a[contains(@href, 'view_adult=true')]
+
+test_url: http://archiveofourown.org/works/229402?view_full_work=true
+test_url: http://archiveofourown.org/works/750111/chapters/1399929
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 49bb3db..767f680
@@ -1,16 +1,17 @@
-author: //p[@class='byline']/a\r
-body: //div[contains(@class,'article-content')]\r
-strip: //h2[@class='title']\r
-strip_id_or_class: byline\r
-prune: no\r
-\r
-date: //div[@class='byline']/span[@class='posted']//abbr/@original-title\r
-date: //div[@class='byline']/span[@class='posted']//abbr\r
-\r
-title: //div[@id='story']//h2[@class='title']\r
-\r
-strip: //div[@class='pager']\r
-next_page_link: //nav//a[span/@class='next']/@href\r
-\r
-test_url: http://arstechnica.com/tech-policy/news/2012/02/gigabit-internet-for-80-the-unlikely-success-of-californias-sonicnet.ars\r
-test_url: http://arstechnica.com/apple/2005/04/macosx-10-4/
\ No newline at end of file
+author: //p[@class='byline']/a
+body: //div[contains(@class,'article-content')]
+strip: //h2[@class='title']
+strip_id_or_class: byline
+strip_id_or_class: story-sidebar
+prune: no
+
+date: //div[@class='byline']/span[@class='posted']//abbr/@original-title
+date: //div[@class='byline']/span[@class='posted']//abbr
+
+title: //div[@id='story']//h2[@class='title']
+
+strip: //div[@class='pager']
+next_page_link: //nav//a[span/@class='next']/@href
+
+test_url: http://arstechnica.com/tech-policy/news/2012/02/gigabit-internet-for-80-the-unlikely-success-of-californias-sonicnet.ars
+test_url: http://arstechnica.com/apple/2005/04/macosx-10-4/
old mode 100644 (file)
new mode 100755 (executable)
index e54423b..73bcdb4
@@ -1,6 +1,6 @@
-title: //div[@class="mod-bostonarticleheader mod-articleheader"]/h1\r
-author: substring-after(//div[@class="mod-bostonarticlebyline mod-articlebyline"]/span[3],"By ")\r
-date: //div[@class="mod-bostonarticlebyline mod-articlebyline"]/span[@class="pubdate"]\r
-\r
+title: //div[@class="mod-bostonarticleheader mod-articleheader"]/h1
+author: substring-after(//div[@class="mod-bostonarticlebyline mod-articlebyline"]/span[3],"By ")
+date: //div[@class="mod-bostonarticlebyline mod-articlebyline"]/span[@class="pubdate"]
+
 strip_id_or_class: mod-pagination
 test_url: http://articles.boston.com/2011-10-23/news/30313691_1_bigfoot-free-speech-monadnock-state-park
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a08f204..984d81d
@@ -1,11 +1,11 @@
-title: //div[@class="mod-courantarticleheader mod-articleheader"]/h1\r
-date: //div[@class="mod-courantarticlebyline mod-articlebyline"]/span[@class="pubdate"]\r
-author: //div[@class="mod-courantarticlebyline mod-articlebyline"]/span[3]\r
-\r
-strip_id_or_class: mod-article-byline\r
-strip_id_or_class: mod-article-header\r
-strip_id_or_class: mod-article-subtitle\r
-#This leaves some crud after the article, but it's better than nothing.\r
-#It would be ideal if we could set the body to every element matching //div[contains(@class, "mod-articletext")]/p, but it seems like body only takes the first matching element.\r
+title: //div[@class="mod-courantarticleheader mod-articleheader"]/h1
+date: //div[@class="mod-courantarticlebyline mod-articlebyline"]/span[@class="pubdate"]
+author: //div[@class="mod-courantarticlebyline mod-articlebyline"]/span[3]
+
+strip_id_or_class: mod-article-byline
+strip_id_or_class: mod-article-header
+strip_id_or_class: mod-article-subtitle
+#This leaves some crud after the article, but it's better than nothing.
+#It would be ideal if we could set the body to every element matching //div[contains(@class, "mod-articletext")]/p, but it seems like body only takes the first matching element.
 
 test_url: http://articles.courant.com/2011-10-22/news/hc-green-drugsearch--1022-20111022_1_drugs-in-student-lockers-police-dogs-lockdown
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/articles.washingtonpost.com.txt b/inc/3rdparty/site_config/standard/articles.washingtonpost.com.txt
new file mode 100755 (executable)
index 0000000..a76c2d0
--- /dev/null
@@ -0,0 +1,11 @@
+body: //div[contains(@class, "article_body")]
+# print view
+body: //div[@id='print_facet']//div[@id='body']
+
+tidy: no
+prune: no
+
+single_page_link: concat(substring-before(//div[@id="echo_container_a"]/@guid, '_story.html'), '_print.html')
+
+test_url: http://articles.washingtonpost.com/2011-10-22/world/35279694_1_germany-acts-german-leaders-chancellor-angela-merkel
+test_url: http://articles.washingtonpost.com/2013-05-31/opinions/39658000_1_chemical-weapons-mass-destruction-cartels
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 2562edb..b4eec7b
@@ -1,3 +1,3 @@
-body: //div[@id='HeadLine']\r
+body: //div[@id='HeadLine']
 strip: //div[@id='utility_right']
 test_url: http://www.asahi.com/culture/update/0520/TKY201105200321.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 5236d09..0327e84
@@ -1,5 +1,5 @@
-title: //h1[@class='article_title']\r
-author: //span[@class='author']\r
-date: //h2[@class='dateline']\r
+title: //h1[@class='article_title']
+author: //span[@class='author']
+date: //h2[@class='dateline']
 body: //div[@class='article_body']
 test_url: http://ascarter.net/2012/02/20/enough-is-enough.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 33e8153..8de2227
@@ -1,7 +1,7 @@
-title: //span[@class='titel']\r
-author: //span[@class='metadaten_C']/a//span[@class='metadaten_C']\r
-date: substring-after(//span[@class='metadaten_C'],'astronews.com')\r
-strip: //span[@class='bu']\r
-strip_image_src: '/_images/'\r
+title: //span[@class='titel']
+author: //span[@class='metadaten_C']/a//span[@class='metadaten_C']
+date: substring-after(//span[@class='metadaten_C'],'astronews.com')
+strip: //span[@class='bu']
+strip_image_src: '/_images/'
 
 test_url: http://www.astronews.com/news/artikel/2011/10/1110-021.shtml
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index adad5f1..f639b04
@@ -1,8 +1,8 @@
-# Johannes Stühler\r
-\r
-title://h2\r
-author://span[@class='meta-content']\r
-date://abbr[@class='date published']/@title\r
-body://div[@class='entry-content']\r
+# Johannes Stühler
+
+title://h2
+author://span[@class='meta-content']
+date://abbr[@class='date published']/@title
+body://div[@class='entry-content']
 
 test_url: http://www.asymco.com/2011/01/14/is-android-more-efficient-than-ios-at-generating-search-revenue/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 58681bf..291db99
@@ -1,6 +1,6 @@
-prune: no\r
-body: //div[@class='post-body']\r
-author: //p[@class='byline']//a\r
-date: substring-after(//div[@class='about']/p[2], 'Posted')\r
+prune: no
+body: //div[@class='post-body']
+author: //p[@class='byline']//a
+date: substring-after(//div[@class='about']/p[2], 'Posted')
 strip: //div[@class='body']/div[@class='meta']
 test_url: http://www.autoblog.com/2012/01/17/next-gen-bmw-x5-caught-again/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 776ee10..c365a7a
@@ -1,4 +1,4 @@
-author: //*[@id="article_wrapper"]/div[1]/a[1]\r
-body: //*[@id="article_wrapper"]/div[2]\r
+author: //*[@id="article_wrapper"]/div[1]/a[1]
+body: //*[@id="article_wrapper"]/div[2]
 date: //*[@id="article_wrapper"]/div[1]/text()[2]
 test_url: http://www.avclub.com/articles/forgetmenot,70904
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 32adff8..35b6242
@@ -1,12 +1,12 @@
-single_page_link: //div[@class='toppaginate']//a[@rel='nofollow']\r
-convert_double_br_tags: yes\r
-\r
-title: //div[@class="story"]/h1\r
-body: //div[@id="story-body-text"]\r
-author: //span[@class="byline"]\r
-date: //p[@class="date"]\r
-\r
-strip: //*[@class='all']\r
-strip: //*[@class='articlerail']\r
+single_page_link: //div[@class='toppaginate']//a[@rel='nofollow']
+convert_double_br_tags: yes
+
+title: //div[@class="story"]/h1
+body: //div[@id="story-body-text"]
+author: //span[@class="byline"]
+date: //p[@class="date"]
+
+strip: //*[@class='all']
+strip: //*[@class='articlerail']
 
 test_url: http://www.baltimoresun.com/news/maryland/bs-md-omalley-budget-2-20120116,0,5340585.story
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/baseballprospectus.com.txt b/inc/3rdparty/site_config/standard/baseballprospectus.com.txt
new file mode 100755 (executable)
index 0000000..1207b34
--- /dev/null
@@ -0,0 +1,13 @@
+title: //h1[@class='title']
+author: //p[@class="author"]/a[1]
+body: //div[@class="article"]
+date: //p[@class="date"]
+
+# remove user tools
+strip: //div[@class='tools']
+strip: //h1
+strip: //h2[@class='subtitle']
+strip: //p[@class='author']
+strip: //p[@class='date']
+
+test_url: http://www.baseballprospectus.com/article.php?articleid=18463
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ab58314..f08c1f2
@@ -1,7 +1,7 @@
-title: //h2\r
-date: //span[@class='date']\r
-body: //div[@class='entry']\r
-\r
-strip: //div[@class='zusatz']\r
+title: //h2
+date: //span[@class='date']
+body: //div[@class='entry']
+
+strip: //div[@class='zusatz']
 
 test_url: http://www.basicthinking.de/blog/2011/12/13/sagt-social-networks-adieu-begrust-private-networks/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index eaafaf1..57f7fdf
@@ -1,13 +1,13 @@
-author: substring(//h3[@class='headlines']/span[@class='dates'],0,string-length(//h3[@class='headlines']/span[@class='dates'])-20)\r
-\r
-\r
-date: substring((//h3[@class='headlines']/span[@class='dates']),string-length(//h3[@class='headlines']/span[@class='dates'])-18,12)\r
-\r
-\r
-body:  //div[@class='first-article-big']\r
-strip: //table[@class='newsimagecontainer']\r
-strip: //h3[@class='headlines']\r
-strip: //iframe[@class='headlines']\r
-strip: //a[@class='newslink']\r
+author: substring(//h3[@class='headlines']/span[@class='dates'],0,string-length(//h3[@class='headlines']/span[@class='dates'])-20)
+
+
+date: substring((//h3[@class='headlines']/span[@class='dates']),string-length(//h3[@class='headlines']/span[@class='dates'])-18,12)
+
+
+body:  //div[@class='first-article-big']
+strip: //table[@class='newsimagecontainer']
+strip: //h3[@class='headlines']
+strip: //iframe[@class='headlines']
+strip: //a[@class='newslink']
 convert_double_br_tags: yes
 test_url: http://bb.is/Pages/82?NewsID=174119
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 9c5c341..ef1f491
@@ -1,32 +1,42 @@
-body: //div[@class="story-body"]\r
-title: //h1[@class="story-header"]\r
-date: //span[@class="story-date"]/span[@class='date']\r
-\r
-# recipes, e.g. http://www.bbc.co.uk/food/recipes/mymincepies_71055\r
-body: //div[contains(@class, 'hrecipe')]//div[@id='subcolumn-1']\r
-\r
-#strip: //div[@class="story-feature narrow"]\r
-#strip: //div[@class="story-feature wide"]\r
-#strip: //div[@class="story-feature dslideshow-enclosure"]\r
-strip: //div[contains(@class, "story-feature")]\r
-strip: //span[@class="story-date"]\r
-#strip: //div[@class="caption body-narrow-width"]\r
-strip: //div[@class="warning"]//p\r
-strip: //div[@id='page-bookmark-links-head']\r
-strip: //object\r
-strip: //div[contains(@class, "bbccom_advert_placeholder")]\r
-strip: //div[contains(@class, "embedded-hyper")]\r
-strip: //div[contains(@class, 'market-data')]\r
-strip: //a[contains(@class, 'hidden')]\r
-strip: //div[contains(@class, 'hypertabs')]\r
-strip: //div[contains(@class, 'related')]\r
-strip: //form[@id='comment-form']\r
-strip: //div[contains(@class, 'comment-introduction')]\r
-\r
-replace_string(<noscript>): <div>\r
-replace_string(</noscript>): </div>\r
-\r
-prune: no\r
-\r
-dissolve: //h2\r
-test_url: http://www.bbc.co.uk/news/business-15060862
\ No newline at end of file
+body: //div[@class="story-body"]
+# for video entries
+body: //div[contains(@class, "videoInStory") or @id="meta-information"]
+title: //h1[@class="story-header"]
+date: //span[@class="story-date"]/span[@class='date']
+# for sport site
+date: //meta[@name='DCTERMS.created']/@content
+author: //div[@id='headline']//span[@class='byline-name']
+
+# recipes, e.g. http://www.bbc.co.uk/food/recipes/mymincepies_71055
+body: //div[contains(@class, 'hrecipe')]//div[@id='subcolumn-1']
+
+#strip: //div[@class="story-feature narrow"]
+#strip: //div[@class="story-feature wide"]
+#strip: //div[@class="story-feature dslideshow-enclosure"]
+strip: //div[contains(@class, "story-feature")]
+strip: //span[@class="story-date"]
+#strip: //div[@class="caption body-narrow-width"]
+strip: //div[@class="warning"]//p
+strip: //div[@id='page-bookmark-links-head']
+strip: //object
+strip: //div[contains(@class, "bbccom_advert_placeholder")]
+strip: //div[contains(@class, "embedded-hyper")]
+strip: //div[contains(@class, 'market-data')]
+strip: //a[contains(@class, 'hidden')]
+strip: //div[contains(@class, 'hypertabs')]
+strip: //div[contains(@class, 'related')]
+strip: //form[@id='comment-form']
+strip: //div[contains(@class, 'comment-introduction')]
+strip: //div[contains(@class, 'share-tools')]
+strip: //div[@id='also-related-links']
+
+replace_string(<noscript>): <div>
+replace_string(</noscript>): </div>
+
+prune: no
+
+dissolve: //h2
+test_url: http://www.bbc.co.uk/sport/0/football/23224017
+test_url: http://www.bbc.co.uk/news/business-15060862
+# video entry
+test_url: http://www.bbc.co.uk/news/world-asia-22056933
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/bbcgoodfood.com.txt b/inc/3rdparty/site_config/standard/bbcgoodfood.com.txt
new file mode 100755 (executable)
index 0000000..1547d62
--- /dev/null
@@ -0,0 +1,16 @@
+title: //header//h1
+#body: //article[contains(@class, 'node-full')]
+body: //div[contains(@class, 'recipe-details') or contains(@class, 'tips-carousel')] | //section[@id='recipe-ingredients' or @id='recipe-method']
+
+strip_id_or_class: recipe-rating-wrapper
+strip_id_or_class: magazine-subcribe-header
+strip_id_or_class: hide
+strip_id_or_class: recipe-actions
+strip_id_or_class: buy-ingredients
+strip_id_or_class: related-content
+strip_id_or_class: recipe-magazine-ad
+strip_id_or_class: copy-right
+
+prune: no
+
+test_url: http://www.bbcgoodfood.com/recipes/1131634/minced-beef-wellington
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f341d59..72c1bae
@@ -1,16 +1,16 @@
-body: //div[@class="entry-content"]\r
-\r
-# Remove text &lsquo;Tweet&rsquo;\r
-strip: //div[@class="entry-content"]/div[last()]\r
-\r
-title: h1[@class="entry-title"]\r
-\r
-# If the Instapaper text parser worked with HTML5 tags, we would use:\r
-date: //time[@class="entry-date"]\r
-\r
-# But since it does not, use this more complicated rule:\r
-date: //div[@class="entry-meta"]/a[@rel="bookmark"]\r
-\r
-# Unfortunately, the following rule is overridden by the automatically found author.\r
+body: //div[@class="entry-content"]
+
+# Remove text &lsquo;Tweet&rsquo;
+strip: //div[@class="entry-content"]/div[last()]
+
+title: h1[@class="entry-title"]
+
+# If the Instapaper text parser worked with HTML5 tags, we would use:
+date: //time[@class="entry-date"]
+
+# But since it does not, use this more complicated rule:
+date: //div[@class="entry-meta"]/a[@rel="bookmark"]
+
+# Unfortunately, the following rule is overridden by the automatically found author.
 author: ("Benoit Maison")
 test_url: http://www.benoitmaison.org/2011/12/06/why-siri-had-to-start-in-beta/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 607c998..9f8c41c
@@ -1,3 +1,3 @@
-title: //h1[@class='headline']\r
+title: //h1[@class='headline']
 body: //div[contains(@class, 'article-wrapper')]
 test_url: http://www.berlingske.dk/danmark/festen-er-flyttet-nordpaa
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/bernama.com.txt b/inc/3rdparty/site_config/standard/bernama.com.txt
new file mode 100755 (executable)
index 0000000..fdc04b7
--- /dev/null
@@ -0,0 +1,5 @@
+body: //div[contains(@class, "NewsText"]
+prune: no
+
+test_url: http://www.bernama.com/bernama/v7/rss/english.php
+test_url: http://www.bernama.com/bernama/v7/newsindex.php?id=943513
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0eaf085..90a54a2
@@ -1,7 +1,7 @@
-# some articles at this site like this one doesn't\r
-# seem to pick up the article body via normal \r
-# processing, other articles come through fine\r
-# http://www.betanews.com/joewilcox/article\r
-#  /Google-is-a-marketing-sensation/1309708375\r
+# some articles at this site like this one doesn't
+# seem to pick up the article body via normal 
+# processing, other articles come through fine
+# http://www.betanews.com/joewilcox/article
+#  /Google-is-a-marketing-sensation/1309708375
 body: //*[@id="article"]
 test_url: http://www.betanews.com/joewilcox/article/Google-is-a-marketing-sensation/1309708375
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index dc07129..e431037
@@ -1,8 +1,8 @@
-title: //div[contains(@class, 'main-content')]//h1\r
-body: //div[@class='summary-column'] | //div[contains(@class, 'main-content')]\r
-\r
-prune: no\r
-\r
-single_page_link: //div[@id='biography-action-links']//a[contains(@href, '/print/')]\r
+title: //div[contains(@class, 'main-content')]//h1
+body: //div[@class='summary-column'] | //div[contains(@class, 'main-content')]
+
+prune: no
+
+single_page_link: //div[@id='biography-action-links']//a[contains(@href, '/print/')]
 
 test_url: http://www.biography.com/print/profile/martin-luther-9389283
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/bizjournals.com.txt b/inc/3rdparty/site_config/standard/bizjournals.com.txt
new file mode 100755 (executable)
index 0000000..cfba766
--- /dev/null
@@ -0,0 +1,13 @@
+date: //meta[@name='publish-date']/@content
+body: //div[contains(@class, 'articleContentWrapper')]
+prune: no
+
+strip: //div[contains(@class, 'staff_info')]//dd[contains(., 'Twitter')]
+
+strip_id_or_class: related_content
+strip_id_or_class: enlarge
+strip_id_or_class: photoBy
+strip_id_or_class: older
+
+test_url: http://www.bizjournals.com/cincinnati/news/2013/10/03/harris-teeter-shareholders-vote-on.html
+test_url: http://feeds.bizjournals.com/industry_20?format=xml
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6cb0463..0fed552
@@ -1,7 +1,7 @@
-title: //h1[@class='articlehead']\r
-body: //div[@class='column']\r
-strip: //h1\r
-strip: //div[@class='help']\r
-\r
+title: //h1[@class='articlehead']
+body: //div[@class='column']
+strip: //h1
+strip: //div[@class='help']
+
 #no author or date/time provided in current layout
 test_url: http://bjango.com/articles/actions/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 1f43f49..7ac8cc1
@@ -1,8 +1,8 @@
-tidy: no\r
-prune: no\r
-date: //article/header/h6/time\r
-title: //article/header/h3\r
-author: //meta[@name='author']/@content\r
-body: //article//post\r
+tidy: no
+prune: no
+date: //article/header/h6/time
+title: //article/header/h3
+author: //meta[@name='author']/@content
+body: //article//post
 
 test_url: http://blog.arsln.org/aska-ayip-oluyor/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 81c3bda..78d7f51
@@ -1,7 +1,7 @@
-title: //title\r
-author: //span[@class='author vcard']/a\r
-date: //p[@class='headline_meta']/abbr[@class='published']\r
-body: //div[@class='format_text entry-content']\r
-\r
+title: //title
+author: //span[@class='author vcard']/a
+date: //p[@class='headline_meta']/abbr[@class='published']
+body: //div[@class='format_text entry-content']
+
 strip: //div[@id='dd_ajax_float']
 test_url: http://blog.asmartbear.com/how-to-get-quality-freelance-graphics-design-work-on-a-budget.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a4c5aae..db80a35
@@ -1,9 +1,9 @@
-# Instapaper gets this back to front and only gets the blog title instead of the article title.\r
-title: substring-before(//title, '-')\r
-\r
-author: //a[ contains(@href, '/people') ]\r
-\r
-body: //div[ @class='post' ]\r
-\r
+# Instapaper gets this back to front and only gets the blog title instead of the article title.
+title: substring-before(//title, '-')
+
+author: //a[ contains(@href, '/people') ]
+
+body: //div[ @class='post' ]
+
 # Date is impossible to retrieve since they use those stupid "fuzzy" dates, inserted through javascript, at posterous.
 test_url: http://blog.cloudflare.com/understanding-analytics-when-is-a-page-view-n
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 92272b7..97e48e6
@@ -1,5 +1,5 @@
-title: //h2\r
-date: //h3\r
-body: //ul\r
+title: //h2
+date: //h3
+body: //ul
 
 test_url: http://blog.fefe.de/?ts=b063bf55
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3065dd8..13d1d44
@@ -1,11 +1,11 @@
-# clean Instagram blog a little bit\r
-\r
-tidy:no\r
-prune:no\r
-\r
-body://div[contains(@id,'content')]\r
-\r
-strip_id_or_class:meta\r
-strip_id_or_class:notes\r
+# clean Instagram blog a little bit
+
+tidy:no
+prune:no
+
+body://div[contains(@id,'content')]
+
+strip_id_or_class:meta
+strip_id_or_class:notes
 strip_id_or_class:pagination
 test_url: http://blog.instagram.com/post/8757832007/fromwhereistand
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.instapaper.com.txt b/inc/3rdparty/site_config/standard/blog.instapaper.com.txt
new file mode 100755 (executable)
index 0000000..fda01b1
--- /dev/null
@@ -0,0 +1,9 @@
+author: //a[@href="http://www.marco.org/about"]
+date: //span[@class="date"]
+
+# Remove the date from article body.
+strip: //span[@class="date"]
+
+# Remove pagination links from article body.
+strip: //div[@id="pagination"]
+test_url: http://blog.instapaper.com/post/31303984531
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 4e467fe..e89ad3a
@@ -1,4 +1,4 @@
-date: //span[contains(@class, 'date-links')]\r
-author: //span[contains(@class, 'author-links')]\r
+date: //span[contains(@class, 'date-links')]
+author: //span[contains(@class, 'author-links')]
 body: //div[contains(@class, 'entry-content')] 
 test_url: http://blog.jaysalvat.com/article/celui-qui-avait-refait-son-site-web
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ac18ad1..bcd3bdc
@@ -1,5 +1,5 @@
-body: //*[contains(@class, 'post_content')]\r
-author: string('Kaelig Deloumeau-Prigent')\r
-title: //h1[@class='title']\r
+body: //*[contains(@class, 'post_content')]
+author: string('Kaelig Deloumeau-Prigent')
+title: //h1[@class='title']
 date: //span[@class='date']
 test_url: http://blog.kaelig.fr/post/24877648508/preprocesseurs-css-renoncer-par-choix-ou-par
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 702789a..73c30c4
@@ -1,6 +1,6 @@
-title: //span[@class='pcol1 itemSubjectBoldfont']\r
-body: //div[@id='postListBody']\r
-date: //p[@class='date fil5 pcol2']\r
-single_page_link: /html/frameset/frame[1]/attribute::src\r
+title: //span[@class='pcol1 itemSubjectBoldfont']
+body: //div[@id='postListBody']
+date: //p[@class='date fil5 pcol2']
+single_page_link: /html/frameset/frame[1]/attribute::src
 strip: //div[@class='post-btn']
 test_url: http://blog.naver.com/how2invest/110135068757
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3089001..de81beb
@@ -1,12 +1,12 @@
-# PCHOME blog, a popular Chinese blog host\r
-# Oct 15, 2011\r
-#  \r
-\r
-title://*[contains(@class,'imp')]/h2\r
-\r
-date://*[contains(@class,'imp')]/span\r
-body://div[contains(@id,'blog_content')]\r
-\r
-\r
+# PCHOME blog, a popular Chinese blog host
+# Oct 15, 2011
+#  
+
+title://*[contains(@class,'imp')]/h2
+
+date://*[contains(@class,'imp')]/span
+body://div[contains(@id,'blog_content')]
+
+
 
 test_url: http://blog.pchome.net/article/462502.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index b7afe45..40f0c56
@@ -1,6 +1,6 @@
-title: //a[@class="blog_title"]\r
-date: //p[@class="when"]/a\r
-body: //div[@class="blog_entry"]\r
-strip_id_or_class:blog_title\r
+title: //a[@class="blog_title"]
+date: //p[@class="when"]/a
+body: //div[@class="blog_entry"]
+strip_id_or_class:blog_title
 strip_id_or_class:when
 test_url: http://blog.pinboard.in/2011/11/the_social_graph_is_neither/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.renren.com.txt b/inc/3rdparty/site_config/standard/blog.renren.com.txt
new file mode 100755 (executable)
index 0000000..401d31e
--- /dev/null
@@ -0,0 +1,11 @@
+# This filter is tested on:
+# http://blog.renren.com/share/224959024/14260739544
+# http://blog.renren.com/share/231323504/14261768898
+# http://blog.renren.com/share/230305019/1502806705
+
+title://h1[contains(@class, 'title-article')]
+author://span[contains(@class, 'name')]
+body://div[contains(@class, 'content-body')]
+
+convert_double_br_tags:yes
+test_url: http://blog.renren.com/share/230305019/1502806705
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index acb9ce8..4895272
@@ -1,26 +1,26 @@
-# Sina blog, the most popular blog host in China.\r
-# Its source code is horrible.\r
-#  \r
-# Issue:\r
-# Only the first image in the article is displayed.\r
-# The rest images are replace by a 1x1 transparent gif by sina blog host.\r
-# \r
-\r
-title://*[contains(@class,'titName SG_txta')]\r
-author://*[contains(@id,'ownernick')]\r
-date://*[contains(@class,'time SG_txtc')]\r
-body://div[contains(@class,'articalContent')]\r
-\r
-# Remove redundant content which has span class start with "MASS"\r
-# Example <span class="MASSf21674ffeef7"></span>\r
-strip://span[contains(@class,'MASS')]\r
-\r
-# Remove comment\r
-strip://div[contains(@class,'allComm')]\r
-\r
-# Remove hiden text and link\r
-strip://ins\r
-\r
-tidy:no\r
-convert_double_br_tags:yes\r
+# Sina blog, the most popular blog host in China.
+# Its source code is horrible.
+#  
+# Issue:
+# Only the first image in the article is displayed.
+# The rest images are replace by a 1x1 transparent gif by sina blog host.
+# 
+
+title://*[contains(@class,'titName SG_txta')]
+author://*[contains(@id,'ownernick')]
+date://*[contains(@class,'time SG_txtc')]
+body://div[contains(@class,'articalContent')]
+
+# Remove redundant content which has span class start with "MASS"
+# Example <span class="MASSf21674ffeef7"></span>
+strip://span[contains(@class,'MASS')]
+
+# Remove comment
+strip://div[contains(@class,'allComm')]
+
+# Remove hiden text and link
+strip://ins
+
+tidy:no
+convert_double_br_tags:yes
 test_url: http://blog.sina.com.cn/s/blog_5054769e0102dtja.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8c8b383..eae6982
@@ -1,6 +1,6 @@
-title: //h2/a[@class="no-link title"]\r
-author: //h2[@id="blog_owner"]\r
-date: //time\r
-strip: //h2/a[@class="no-link title"]\r
-test_url: http://blog.wells.ee/retina\r
+title: //h2/a[@class="no-link title"]
+author: //h2[@id="blog_owner"]
+date: //time
+strip: //h2/a[@class="no-link title"]
+test_url: http://blog.wells.ee/retina
 test_url: http://blog.wells.ee/skeuomorphism
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f630127..2a66952
@@ -1,8 +1,8 @@
-# 2011-08-23 [carlo@...] Initial version.\r
-\r
-author: //div[@id="blogauthordatebox-node"]//a[@title="View user profile."]/text()\r
-\r
-# why yes, I do feel a bit dirty\r
-date: substring-before( substring-after( substring-after( //div[@id="blogauthordatebox-node"]//td[3], "on " ), ", "), " " )\r
+# 2011-08-23 [carlo@...] Initial version.
+
+author: //div[@id="blogauthordatebox-node"]//a[@title="View user profile."]/text()
+
+# why yes, I do feel a bit dirty
+date: substring-before( substring-after( substring-after( //div[@id="blogauthordatebox-node"]//td[3], "on " ), ", "), " " )
 
 test_url: http://blogs.aljazeera.net/asia/2011/08/22/peoples-hero
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3664d16..d47c352
@@ -1,4 +1,4 @@
-title: //div[@id='pageFeature']/h1\r
-body: //div[@id='articleBody']\r
-strip: //div[@class='module wide']\r
+title: //div[@id='pageFeature']/h1
+body: //div[@id='articleBody']
+strip: //div[@class='module wide']
 test_url: http://blogs.hbr.org/bregman/2011/04/the-1-killer-of-meetings-and-w.html?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+harvardbusiness+%28HBR.org%29
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3d3ec02..b2ff833
@@ -1,6 +1,6 @@
-title: //h3[@class="post-name"]\r
-author: //span[@class="user-name"]\r
-date: //div[@class="post-date"]\r
-body: //div[@class="post-content user-defined-markup"]\r
+title: //h3[@class="post-name"]
+author: //span[@class="user-name"]
+date: //div[@class="post-date"]
+body: //div[@class="post-content user-defined-markup"]
 footnotes: no
 test_url: http://blogs.msdn.com/b/b8/archive/2011/10/04/designing-the-start-screen.aspx
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6907bcb..d3eb996
@@ -1,3 +1,3 @@
-title: //div[@id='single']/h1\r
+title: //div[@id='single']/h1
 body: //div[@id='postcontent']
 test_url: http://blogs.reuters.com/felix-salmon/2010/07/16/the-value-of-a-strong-brand-apple-edition/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a7d1508..2102015
@@ -1,16 +1,16 @@
-# meta data\r
-title://h1[@class = 'postTitle']\r
-author:substring-before(substring-after(//span[@class = 'byline'],'By '),'|')\r
-date://span[@class = 'datestamp']\r
-\r
-#body content\r
-body://div[@id = 'singleBlogPost']\r
-\r
-#reclaim author info\r
-move_into(//div[@id = 'singleBlogPost'])://div[@id = 'aboutAuthorDiv']\r
-strip://p[@class = 'moreLink mobileHide']\r
-\r
-#cleanup comments, there might be some open <div> sections\r
-strip://div[@id = 'comments2']\r
+# meta data
+title://h1[@class = 'postTitle']
+author:substring-before(substring-after(//span[@class = 'byline'],'By '),'|')
+date://span[@class = 'datestamp']
+
+#body content
+body://div[@id = 'singleBlogPost']
+
+#reclaim author info
+move_into(//div[@id = 'singleBlogPost'])://div[@id = 'aboutAuthorDiv']
+strip://p[@class = 'moreLink mobileHide']
+
+#cleanup comments, there might be some open <div> sections
+strip://div[@id = 'comments2']
 strip://h3[a[@href = '#add-comment']]
 test_url: http://blogs.scientificamerican.com/a-blog-around-the-clock/2012/07/10/science-blogs-definition-and-a-history/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ba8bc6e..1bc65e7
@@ -1,15 +1,15 @@
-# metadata\r
-author://div[@class = 'post']/div[@class='meta']/a[1]\r
-date://div[@id = 'rap']/h2[1]\r
-body://div[@class = 'post']\r
-\r
-# wrapping caption and image\r
-wrap_in(fieldset)://div[contains(@class, 'wp-caption')]\r
-\r
-\r
-# clean up\r
-strip://div[@class = 'post']/h3[@class = 'storytitle']\r
-strip://div[@class = 'post']/div[@class = 'social']\r
-strip://img[@style = 'display:none;']\r
+# metadata
+author://div[@class = 'post']/div[@class='meta']/a[1]
+date://div[@id = 'rap']/h2[1]
+body://div[@class = 'post']
+
+# wrapping caption and image
+wrap_in(fieldset)://div[contains(@class, 'wp-caption')]
+
+
+# clean up
+strip://div[@class = 'post']/h3[@class = 'storytitle']
+strip://div[@class = 'post']/div[@class = 'social']
+strip://img[@style = 'display:none;']
 strip://img[@height='0' and @width='0']
 test_url: http://blogs.smithsonianmag.com/adventure/2011/10/tips-for-women-traveling-in-turkey/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a2909fd..3d0fbad
@@ -1,6 +1,9 @@
-title: //h3[@class="post-name"]\r
-author: //span[@class="user-name"]\r
-date: //div[@class="post-date"]\r
-body: //div[@class="post-content user-defined-markup"]\r
+title: //h3[@class="post-name"]
+author: //span[@class="user-name"]
+date: //div[@class="post-date"]
+body: //div[@class="post-content user-defined-markup"]
+strip_id_or_class: log-feedback-list
+tidy: no
 footnotes: no
-test_url: http://blogs.technet.com/b/dlemson/archive/2004/03/03/83304.aspx
\ No newline at end of file
+test_url: http://blogs.technet.com/b/dlemson/archive/2004/03/03/83304.aspx
+test_url: http://blogs.technet.com/b/isablog/archive/2009/01/07/a-pptp-client-might-fail-to-connect-to-a-vpn-server-on-the-internet-through-an-isa-server-2006.aspx
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index fbe7a5c..543d392
@@ -1,4 +1,4 @@
-body://div[@class='entry']\r
-date://div[@class='meta']\r
+body://div[@class='entry']
+date://div[@class='meta']
 strip://a[@class='FlattrButton']
 test_url: http://bluetouff.com/2012/03/02/polemique-google-vie-privee/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 91e48fd..3b3da99
@@ -1,8 +1,8 @@
-title: //h1[@class="entry-title"][2]\r
-author: string("Paul Boag")\r
-date: substring(//span[@class="meta"], 11)\r
-body: //article\r
-strip: //h2\r
-strip: //h1\r
+title: //h1[@class="entry-title"][2]
+author: string("Paul Boag")
+date: substring(//span[@class="meta"], 11)
+body: //article
+strip: //h2
+strip: //h1
 strip: //div[@id="callsToAction"]
 test_url: http://boagworld.com/working-in-web-design/dealing-with-the-dickheads/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 9169e8f..4f39661
@@ -1,11 +1,11 @@
-# This is far from perfect, but so is BoingBoing's markup\r
-title: //h2[@class="headline"]\r
-single_page_link: //h2[@class="headline"]/a\r
-#date: //p[@class="byline"]\r
-body: //div[@class="post"]\r
-\r
-strip_id_or_class: shareMe\r
-strip_id_or_class: authorbox\r
-strip_id_or_class: byline\r
+# This is far from perfect, but so is BoingBoing's markup
+title: //h2[@class="headline"]
+single_page_link: //h2[@class="headline"]/a
+#date: //p[@class="byline"]
+body: //div[@class="post"]
+
+strip_id_or_class: shareMe
+strip_id_or_class: authorbox
+strip_id_or_class: byline
 
 test_url: http://boingboing.net/2011/10/23/understanding-the-hyperrich-through-the-lens-of-tomorrows-history.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 4cc4904..3616bbf
@@ -1,3 +1,3 @@
-title: //h2[@class='entry-title']\r
+title: //h2[@class='entry-title']
 body: //div[@class='entry-content']
 test_url: http://boldizsar.palotas.eu/blog/?p=1394
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8b95856..fe2d2cb
@@ -1,6 +1,6 @@
-body: //span[@property='v:description']\r
-date: //span[@property='v:dtreviewed']\r
-author: //span[@property='v:reviewer']\r
-prune: no\r
+body: //span[@property='v:description']
+date: //span[@property='v:dtreviewed']
+author: //span[@property='v:reviewer']
+prune: no
 
 test_url: http://book.douban.com/review/2422662/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 331f415..03b6003
@@ -1,19 +1,19 @@
-#metadata\r
-title://div[@class = 'Topper']/h1\r
-author://div[@class = 'Topper']/h3\r
-date://div[@class = 'Topper']/h6\r
-body://div[@class = 'Core']\r
-\r
-\r
-\r
-# clean up\r
-strip://div[@class = 'Topper']/h1\r
-strip://div[@class = 'Topper']/h3\r
-strip://div[@class = 'Topper']/h4\r
-strip://div[@class = 'Topper']/h5\r
-strip://div[@class = 'Topper']/h6\r
-strip://br[@clear = 'all']\r
-strip://div[@class = 'adCore']\r
-strip://div[@class = 'BookR']\r
+#metadata
+title://div[@class = 'Topper']/h1
+author://div[@class = 'Topper']/h3
+date://div[@class = 'Topper']/h6
+body://div[@class = 'Core']
+
+
+
+# clean up
+strip://div[@class = 'Topper']/h1
+strip://div[@class = 'Topper']/h3
+strip://div[@class = 'Topper']/h4
+strip://div[@class = 'Topper']/h5
+strip://div[@class = 'Topper']/h6
+strip://br[@clear = 'all']
+strip://div[@class = 'adCore']
+strip://div[@class = 'BookR']
 strip://div[@class = 'InfoBox']
 test_url: http://bookforum.com/inprint/018_04/8595
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 190738d..b4e116f
@@ -1,7 +1,7 @@
-title://h1\r
-author://div[@class="meta"]/span/a\r
-date://div[@class="date"]\r
-body://div[@class="content article"]\r
-strip://div[@class="content article"]/h1\r
+title://h1
+author://div[@class="meta"]/span/a
+date://div[@class="date"]
+body://div[@class="content article"]
+strip://div[@class="content article"]/h1
 
 test_url: http://borderhouseblog.com/?p=7832
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d3e6f43..4c74a34
@@ -1,16 +1,16 @@
-# NOTE:  If testing this configuration yields bad results, including junk text like "Try BostonGlobe.com today" and "THIS STORY APPEARED IN", please replace the Test URL with a current-day headline link from bostonglobe.com.\r
-\r
-title: //div[@class="header"]/h1\r
-author: substring-after(//div[@class="byline"]/h2[@class="author"],"By ")\r
-date: //div[@class="byline"]/p[last()]\r
-body: //div[@class="article-body"]\r
-\r
-strip_id_or_class: aside\r
-strip_id_or_class: promo\r
-strip_id_or_class: skip-nav\r
-strip_id_or_class: article-more\r
-strip_id_or_class: article-bar\r
-\r
-# This removes image captions.  If the parser starts saving images from bostonglobe.com (currently, it does not), then this directive should be removed.\r
+# NOTE:  If testing this configuration yields bad results, including junk text like "Try BostonGlobe.com today" and "THIS STORY APPEARED IN", please replace the Test URL with a current-day headline link from bostonglobe.com.
+
+title: //div[@class="header"]/h1
+author: substring-after(//div[@class="byline"]/h2[@class="author"],"By ")
+date: //div[@class="byline"]/p[last()]
+body: //div[@class="article-body"]
+
+strip_id_or_class: aside
+strip_id_or_class: promo
+strip_id_or_class: skip-nav
+strip_id_or_class: article-more
+strip_id_or_class: article-bar
+
+# This removes image captions.  If the parser starts saving images from bostonglobe.com (currently, it does not), then this directive should be removed.
 strip_id_or_class: figure
 test_url: http://bostonglobe.com/news/nation/2012/03/17/illinois-primary-could-pivotal/PsDzFZqvhEYyXbOcF9FOkO/story.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6856701..64e04a1
@@ -1,15 +1,15 @@
-#basics\r
-title://h3[@class = 'article_title']\r
-date://span[@class = 'article_date']\r
-body://div[@id = 'center_column_article']\r
-#correct, but author not being picked up in preview\r
-author://span[@class = 'article_author']\r
-\r
-#strips basics from article\r
-strip_id_or_class:article_title\r
-strip_id_or_class:article_date\r
-strip_id_or_class:article_author\r
-\r
-#strips pull quotes\r
+#basics
+title://h3[@class = 'article_title']
+date://span[@class = 'article_date']
+body://div[@id = 'center_column_article']
+#correct, but author not being picked up in preview
+author://span[@class = 'article_author']
+
+#strips basics from article
+strip_id_or_class:article_title
+strip_id_or_class:article_date
+strip_id_or_class:article_author
+
+#strips pull quotes
 strip_id_or_class:pull_quote
 test_url: http://www.bostonreview.net/BR36.4/megan_pugh_agnes_de_mille_dance.php
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index bfc3f3d..a836e1e
@@ -1,5 +1,5 @@
-title: substring-before(//title, '|')\r
-body: //div[@class="entry"]\r
-# Remove the author's picture\r
+title: substring-before(//title, '|')
+body: //div[@class="entry"]
+# Remove the author's picture
 strip: //div[@class="entry"]/a[1]
 test_url: http://www.boundlessline.org/2011/06/the-nyts-on-gender-over-the-weekend.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/bowdoinorient.com.txt b/inc/3rdparty/site_config/standard/bowdoinorient.com.txt
new file mode 100755 (executable)
index 0000000..932143d
--- /dev/null
@@ -0,0 +1,6 @@
+title: //*[@class='articletitle']
+body: //*[(@id='articlebody')]
+date: //*[(@class='articledate')]
+author: //*[(@class='articleauthor')]
+autodetect_next_page: no
+test_url: http://bowdoinorient.com/article/8045
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 94b0f56..9705f62
@@ -1,10 +1,10 @@
-title: //div[@class="standard"]/h1\r
-author: string("BrainFacts.org")\r
-date: //div[@class="meta"]/strong\r
-\r
-strip: //p[@class="skip"]\r
-strip: //div[@class="meta"]\r
-strip: //div[@class="standard"]/h1\r
-strip: //div[@class="modal"]\r
+title: //div[@class="standard"]/h1
+author: string("BrainFacts.org")
+date: //div[@class="meta"]/strong
+
+strip: //p[@class="skip"]
+strip: //div[@class="meta"]
+strip: //div[@class="standard"]/h1
+strip: //div[@class="modal"]
 strip: //div[@class="columnRight"]
 test_url: http://brainfacts.org/diseases-disorders/childhood-disorders/articles/2011/autism-the-pervasive-developmental-disorder/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3753ce6..36aa2ef
@@ -1,7 +1,7 @@
-# set body\r
-body: //div[@id='theContent']\r
-\r
-# set title\r
-title: //div[@id='theContent']/h3\r
+# set body
+body: //div[@id='theContent']
+
+# set title
+title: //div[@id='theContent']/h3
 strip: //div[@id='theContent']/h3
 test_url: http://www.brandeins.de/archiv/magazin/gegessen-wird-immer/artikel/hunger.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 1950484..fc02053
@@ -1,3 +1,3 @@
-date://h2[@class="date-header"]\r
+date://h2[@class="date-header"]
 body://div[@class="entry-content"]
 test_url: http://www.brandingstrategyinsider.com/2011/12/top-twelve-branding-keys-for-2012.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/brasil.elpais.com.txt b/inc/3rdparty/site_config/standard/brasil.elpais.com.txt
new file mode 100755 (executable)
index 0000000..0b8feb6
--- /dev/null
@@ -0,0 +1,23 @@
+title: //meta[@name='DC.title']/@content
+title: //div[contains(@class, 'cabecera_noticia')]//h1
+date: //meta[@name='DC.date']/@content
+date: //meta[@name='date']/@content
+body: //div[@class='columna_texto']
+body: //div[@id='cuerpo_noticia']
+body: //div[@class='estructura_2col_1zq']//div[@class='margen_n']
+
+prune: no
+
+strip_id_or_class: disposicion_vertical
+strip_id_or_class: ampliar_foto
+strip_id_or_class: utilidades
+strip_id_or_class: info_relacionada
+strip_id_or_class: m-kiosko
+strip_id_or_class: info_complementa
+
+strip: //p[@class='nota_pie']
+strip: //div[starts-with(@id, 'sumario') and contains(., 'más información')]
+strip: //div[@id='coment' or @id='foros_not']
+
+test_url: http://elpais.com/elpais/2012/02/06/gente/1328526783_491687.html
+test_url: http://www.elpais.com/articulo/cultura/mano/retrato/materia/elpepicul/20120207elpepicul_2/Tes
old mode 100644 (file)
new mode 100755 (executable)
index f6f7377..55da178
@@ -1,5 +1,5 @@
-body: //div[@class='post full']\r
-title: //h1\r
-author: substring-after(//title, '- ')\r
+body: //div[@class='post full']
+title: //h1
+author: substring-after(//title, '- ')
 date: //span[@class='date']
 test_url: http://brettterpstra.com/byword-for-ios/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 9f4fc4e..17a4760
@@ -1,13 +1,13 @@
-title: //div[@id='contentheader']/h1\r
-author: //p[@class='attribution']/span[@class='author']/*\r
-# Is there a way to pull multiple authors? My XPath here is just grabbing the first\r
-\r
-date: /html/head/meta[@name="date"]/@content\r
-body: //div[@class='main-content']\r
-\r
-strip: //p[@class='byline']\r
-strip: //div[@class='img-gallery']\r
-strip: //div[@class='callout']\r
-strip: //div[@class='add-your-view']\r
+title: //div[@id='contentheader']/h1
+author: //p[@class='attribution']/span[@class='author']/*
+# Is there a way to pull multiple authors? My XPath here is just grabbing the first
+
+date: /html/head/meta[@name="date"]/@content
+body: //div[@class='main-content']
+
+strip: //p[@class='byline']
+strip: //div[@class='img-gallery']
+strip: //div[@class='callout']
+strip: //div[@class='add-your-view']
 convert_double_br_tags: yes
 test_url: http://www.brookings.edu/opinions/2011/1018_cyberattack_libya_goldsmith.aspx
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 71cafcd..d33d7d4
@@ -1,6 +1,6 @@
-title: //h1\r
-body: //div[@class='article']\r
-body: //div[@class='post']\r
-date: //*[@id='single']/span\r
-prune: no\r
+title: //h1
+body: //div[@class='article']
+body: //div[@class='post']
+date: //*[@id='single']/span
+prune: no
 test_url: http://brooksreview.net/2011/11/readability-agency/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/bt.no.txt b/inc/3rdparty/site_config/standard/bt.no.txt
new file mode 100755 (executable)
index 0000000..200c2e4
--- /dev/null
@@ -0,0 +1,12 @@
+title: //h1[contains(@class,'articleTitle')]
+author: //span[@itemprop='name']
+date: //time[@class='published']
+body: //div[contains(@class,'bodyText')]
+
+strip_id_or_class: 'pull1'
+strip_id_or_class: 'relationArticle'
+strip: //span[@class='quote']
+
+# strip h2 if at end of article (typically a request for comments)
+strip: //div[contains(@class,'bodyText')]/node()[last()-1]/self::h2
+test_url: http://www.bt.no/meninger/debatt/Typisk-norsk-a-vare-god-nok-2884108.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/buffed.de.txt b/inc/3rdparty/site_config/standard/buffed.de.txt
new file mode 100755 (executable)
index 0000000..3dd36ce
--- /dev/null
@@ -0,0 +1,14 @@
+date: //meta[@itemProp='datePublished']/@content
+body: //div[@class='intro' or contains(@class, 'article_text')]
+prune: no
+strip_id_or_class: embedcode
+strip_id_or_class: EmbedSwitch
+strip_id_or_class: EmbedText
+strip_id_or_class: bildergalerie
+strip_id_or_class: subline_seohour_image
+strip_id_or_class: ova-player
+strip_id_or_class: jcarouseloutput
+strip_id_or_class: cbox_embedded
+
+test_url: http://www.buffed.de/SWTOR-Star-Wars-The-Old-Republic-PC-218697/News/SWTOR-Ab-Patch-24-Lore-Klamotten-faerben-1090051/
+test_url: http://www.buffed.de/feed.cfm?menu_alias=home
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a75fa04..f0fd08d
@@ -1,8 +1,8 @@
-title: //h1\r
-author: //h2/a\r
-date: substring-after(//h2, '|')\r
-strip_id_or_class: 'attachment'\r
-strip: //h3\r
-\r
+title: //h1
+author: //h2/a
+date: substring-after(//h2, '|')
+strip_id_or_class: 'attachment'
+strip: //h3
+
 body: //div[@class='entry']
 test_url: http://buquad.com/2012/04/09/paul-ryan/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/business2community.com.txt b/inc/3rdparty/site_config/standard/business2community.com.txt
new file mode 100755 (executable)
index 0000000..0dcc7ff
--- /dev/null
@@ -0,0 +1,5 @@
+date: substring-after(//p[@class='byline'],'Published')
+
+strip: //div[@class='article-meta']
+
+test_url: http://www.business2community.com/social-media/funky-ways-to-print-instagram-photos-0485340
old mode 100644 (file)
new mode 100755 (executable)
index c773db8..39eb742
@@ -1,12 +1,16 @@
-title://div[@class="sl-layout-post"]/h1\r
-body: //div[contains(@class, 'post-content') or contains(@class, 'KonaBody')]\r
-strip: //div[contains(@class, "post-sidebar")]\r
-strip: //div[@id='related-links']\r
-author://div[@class="byline"]/a\r
-date://div[@class="byline"]/span[@class="date"]\r
-prune: no\r
-\r
-strip://*[contains(@class,'sponsored-text')]\r
-strip: //div[@id='post_footer']\r
-\r
-test_url: http://www.businessinsider.com/microsoft-just-put-one-of-its-hardcore-technical-geniuses-on-xbox-2012-1
\ No newline at end of file
+title://div[@class="sl-layout-post"]/h1
+body: //div[contains(@class, 'post-content') or contains(@class, 'slide-module') or contains(@class, 'KonaBody')]
+strip: //div[contains(@class, "post-sidebar")]
+strip: //div[@id='related-links']
+strip: //div[@class='related-links-container']
+strip: //p[@class='source']
+author://div[@class="byline"]/a
+date://div[@class="byline"]/span[@class="date"]
+prune: no
+
+single_page_link: //a[contains(text(), 'View as one page')]
+
+strip://*[contains(@class,'sponsored-text')]
+strip: //div[@id='post_footer']
+
+test_url: http://www.businessinsider.com/microsoft-just-put-one-of-its-hardcore-technical-geniuses-on-xbox-2012-1
old mode 100644 (file)
new mode 100755 (executable)
index 714cfc9..6502b8e
@@ -1,12 +1,12 @@
-body: //div[@id='article_detail']\r
-title: //meta[@property='og:title']/@content\r
-date: //div[@id='date_com_art']//a[@class='date']\r
-author: //div[@id='article_detail']//font[@class='auteur']\r
-\r
-strip_id_or_class: porte_titre_theme\r
-strip_id_or_class: cont_param\r
-strip_id_or_class: date_com_art\r
-\r
-prune: no\r
-\r
+body: //div[@id='article_detail']
+title: //meta[@property='og:title']/@content
+date: //div[@id='date_com_art']//a[@class='date']
+author: //div[@id='article_detail']//font[@class='auteur']
+
+strip_id_or_class: porte_titre_theme
+strip_id_or_class: cont_param
+strip_id_or_class: date_com_art
+
+prune: no
+
 test_url: http://www.businessnews.com.tn/details_article.php?a=31073&t=522&lang=fr&temp=1
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7b3d063..0308559
@@ -1,30 +1,30 @@
-# story has several pages, should be detected\r
-body: //div[@id='storyBody']\r
-body: //div[@id='article_body']\r
-body: //div[@id='story_body']\r
-\r
-title://h1[@id='article_headline']\r
-\r
-# article author\r
-author: //p[@class='author']/a\r
-# story author(s)\r
-author: substring-after(//p[@class='byline'], 'By ')\r
-\r
-# article date\r
-date: //span[@class='published_date']\r
-# story date\r
-date: //span[@class='date']\r
-\r
-date: substring-after(//div[contains(@class,'attributor')],'on')\r
-strip_id_or_class: inset\r
-strip: //p/span[@class='photoCredit']\r
-strip: //h1\r
-\r
-strip_id_or_class: page_count\r
-strip_id_or_class: tools\r
-strip_id_or_class: pagination\r
-\r
-single_page_link: //li[@id='stPrint']/a\r
-\r
-test_url: http://www.businessweek.com/magazine/buyback-insurance-a-good-deal-for-retailers-07282011.html\r
+# story has several pages, should be detected
+body: //div[@id='storyBody']
+body: //div[@id='article_body']
+body: //div[@id='story_body']
+
+title://h1[@id='article_headline']
+
+# article author
+author: //p[@class='author']/a
+# story author(s)
+author: substring-after(//p[@class='byline'], 'By ')
+
+# article date
+date: //span[@class='published_date']
+# story date
+date: //span[@class='date']
+
+date: substring-after(//div[contains(@class,'attributor')],'on')
+strip_id_or_class: inset
+strip: //p/span[@class='photoCredit']
+strip: //h1
+
+strip_id_or_class: page_count
+strip_id_or_class: tools
+strip_id_or_class: pagination
+
+single_page_link: //li[@id='stPrint']/a
+
+test_url: http://www.businessweek.com/magazine/buyback-insurance-a-good-deal-for-retailers-07282011.html
 test_url: http://www.businessweek.com/articles/2012-06-06/american-pain-the-largest-u-dot-s-dot-pill-mills-rise-and-fall
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6df8bc4..97dddae
@@ -1,15 +1,15 @@
-# Creator: Greg Leuch <greg@...>\r
-\r
-# It can be messy.\r
-tidy:no\r
-\r
-# The basic template.\r
-title: //h1[@data-print='title']\r
-author: //a[@data-print='author']\r
-date: //time[@data-print='date']\r
-body: //div[@data-print='body']\r
-body: //section[@data-print='body']\r
-\r
-# For various things...\r
+# Creator: Greg Leuch <greg@...>
+
+# It can be messy.
+tidy:no
+
+# The basic template.
+title: //h1[@data-print='title']
+author: //a[@data-print='author']
+date: //time[@data-print='date']
+body: //div[@data-print='body']
+body: //section[@data-print='body']
+
+# For various things...
 strip: *[@data-print="ignore"]
 test_url: http://www.buzzfeed.com/hgrant/35-reasons-why-dogs-hate-the-holidays
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0abb643..63c8213
@@ -1,6 +1,6 @@
-title: //h1\r
-author: //a[contains(@href, '/author/')]\r
-date: //*[@class='post-date']\r
-strip: //*[@class='post-date']\r
+title: //h1
+author: //a[contains(@href, '/author/')]
+date: //*[@class='post-date']
+strip: //*[@class='post-date']
 strip: //h1
 test_url: http://bygonebureau.com/2011/06/20/an-existential-psychoanalysis/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/cable.co.uk.txt b/inc/3rdparty/site_config/standard/cable.co.uk.txt
new file mode 100755 (executable)
index 0000000..435bf3b
--- /dev/null
@@ -0,0 +1,11 @@
+title: //div[@class='page-content']//h1
+body: //div[@class='page-content']
+strip_id_or_class: editorial-bar-top
+strip_id_or_class: social-bottom
+strip_id_or_class: comment-form
+strip_id_or_class: pc-why
+
+prune: no
+tidy: no
+
+test_url: http://www.cable.co.uk/news/bt-vision-unveils-interactive-guide-application-800734218/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3adc7a3..49f3430
@@ -1,8 +1,8 @@
-title: //h1[@class='producttabbed-title']\r
-body: //div[@class='postTabs_divs postTabs_curr_div']\r
-strip: //div[@class='ratingblock2']\r
-strip: //p[@id='breadcrumbs']\r
-strip: //div[@style='display: none']\r
-\r
+title: //h1[@class='producttabbed-title']
+body: //div[@class='postTabs_divs postTabs_curr_div']
+strip: //div[@class='ratingblock2']
+strip: //p[@id='breadcrumbs']
+strip: //div[@style='display: none']
+
 
 test_url: http://www.cardboardconnection.com/2012-topps-archives-baseball-cards
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7ba1ed7..5ea302e
@@ -1,6 +1,6 @@
-title: //h2\r
-body: //div[@class='entry']\r
-\r
-prune: no\r
+title: //h2
+body: //div[@class='entry']
+
+prune: no
 # otherwise the footnotes are removed
 test_url: http://carpeaqua.com/2011/03/27/the-intersection-of-power-and-portability/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/cars.com.txt b/inc/3rdparty/site_config/standard/cars.com.txt
new file mode 100755 (executable)
index 0000000..71c5c05
--- /dev/null
@@ -0,0 +1,7 @@
+title: //div[contains(@class, 'basicInfo')]//h1
+
+body: //img[@id='chosenPhotoIMG'] | //div[@id='aboutThisVehicleBox']
+
+prune: no
+
+test_url: http://www.cars.com/go/search/detail.jsp?listingId=115364779
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8908292..2cd197f
@@ -1,7 +1,7 @@
-body: //div[@class='article']\r
-strip: //div[@class='revhistory']\r
-strip: //div[@class='toc']\r
-tidy: no\r
-prune: no\r
+body: //div[@class='article']
+strip: //div[@class='revhistory']
+strip: //div[@class='toc']
+tidy: no
+prune: no
 
 test_url: http://catb.org/~esr/faqs/smart-questions.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 2530510..ba5faf3
@@ -1,5 +1,5 @@
-title: //div[contains(@class, 'headline')]/h1\r
-author: //h5[contains(@class, 'byline')]\r
-date: substring-after(//h4[contains(@class, 'posted')], 'Posted: ')\r
+title: //div[contains(@class, 'headline')]/h1
+author: //h5[contains(@class, 'byline')]
+date: substring-after(//h4[contains(@class, 'posted')], 'Posted: ')
 body: //div[@id="storyboard"]
 test_url: http://www.cbc.ca/news/world/story/2012/01/16/cruise-ship-monday.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/cbn.com.txt b/inc/3rdparty/site_config/standard/cbn.com.txt
new file mode 100755 (executable)
index 0000000..de8d883
--- /dev/null
@@ -0,0 +1,8 @@
+body: //div[contains(@class, 'articleText')]
+date: //div[contains(@class, 'articleDate')]
+author: //a[contains(@id, 'articleDetails_lnkByLine')]
+prune: no
+
+test_url: http://www.cbn.com/cbnnews/world/2013/June/Chilly-G-8-Obama-Putin-Agree-to-Disagree-on-Syria/
+test_url: http://www.cbn.com/cbnnews/world/2013/June/UK-Agency-Accused-of-Hacking-Foreign-Diplomats/
+test_url: http://www.cbn.com/cbnnews/feed/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 4ba3da1..04d2023
@@ -1,14 +1,15 @@
-date: //meta[@name="published"]/@content\r
-date: //div[@class="timeLine"]\r
-title: //div[@id='contentBody']//h1\r
-author: //dl[@class="storyBlogByline"]/dd/a\r
-body: //div[@id='storyMediaBox'] | //div[contains(@class, 'storyText')]\r
-\r
-# Content Pruning\r
-strip: //div[@class="scrollingArrows"]\r
-strip: //div[@class="timeLine"]\r
-strip: //dl[@class="storyBlogByline"]\r
-\r
-prune: no\r
-\r
-test_url: http://www.cbsnews.com/8301-201_162-57366361/rescued-americans-dad-proud-of-the-u.s/
\ No newline at end of file
+date: //meta[@name="published"]/@content
+date: //div[@class="timeLine"]
+title: //div[@id='contentBody']//h1
+author: //dl[@class="storyBlogByline"]/dd/a
+body: //div[@id='storyMediaBox'] | //div[contains(@class, 'storyText')]
+
+# Content Pruning
+strip: //div[@class="scrollingArrows"]
+strip: //div[@class="timeLine"]
+strip: //dl[@class="storyBlogByline"]
+strip: //span[@class='image-credit']
+
+prune: no
+
+test_url: http://www.cbsnews.com/8301-201_162-57366361/rescued-americans-dad-proud-of-the-u.s/
diff --git a/inc/3rdparty/site_config/standard/cedarrepublican.com.txt b/inc/3rdparty/site_config/standard/cedarrepublican.com.txt
new file mode 100755 (executable)
index 0000000..42faa52
--- /dev/null
@@ -0,0 +1,2 @@
+body: //div[@class='frame']//img[@class='horizontal'] | //div[@class='content']
+test_url: http://cedarrepublican.com/online_features/gift_ideas/sending-mother-s-day-flowers-how-to-be-sure-they/article_b69af9b8-1f05-5352-8621-16ce007e5623.html
old mode 100644 (file)
new mode 100755 (executable)
index ea0df2a..d00d65d
@@ -1,10 +1,10 @@
-title: //*[@id='Content']/span[1]\r
-author: substring-after(substring-before(//*[@id='Content']/span[2], ')'), '(')\r
-date: substring-before(substring-after(//*[@id='Content']/span[2], 'Updated: '), 'Counter')\r
-\r
-strip: //*[@id='Content']/span[1]\r
-strip: //*[@id='Content']/span[2]\r
-\r
-body: //*[@id='Content']\r
+title: //*[@id='Content']/span[1]
+author: substring-after(substring-before(//*[@id='Content']/span[2], ')'), '(')
+date: substring-before(substring-after(//*[@id='Content']/span[2], 'Updated: '), 'Counter')
+
+strip: //*[@id='Content']/span[1]
+strip: //*[@id='Content']/span[2]
+
+body: //*[@id='Content']
 
 test_url: http://www.chinamining.org/News/2011-07-22/1311319069d48087.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 1d29410..3144053
@@ -1,5 +1,5 @@
-title: //div[@class='title']\r
-author: //div[@class='author']\r
-prune: no\r
-\r
+title: //div[@class='title']
+author: //div[@class='author']
+prune: no
+
 test_url: http://www.chomsky.info/onchomsky/2002----.htm
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/chrisltd.com.txt b/inc/3rdparty/site_config/standard/chrisltd.com.txt
new file mode 100755 (executable)
index 0000000..86d0f5d
--- /dev/null
@@ -0,0 +1,6 @@
+title: //header/h1/b[contains(@class, 'title')]
+author: substring-after(//article/header/div, 'By ')
+date: //header/h1/span[contains(@class, 'date')]
+body: //div[@id='main]/article
+strip: //header
+test_url: http://chrisltd.com/blog/2012/03/fix-widows-indesign/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 44288a4..86be14c
@@ -1,13 +1,13 @@
-title://div[@class='title']\r
-author://div[@class='byline']/b\r
-date:substring-after(//div[@class='byline'], 'posted')\r
-body://div[@id='body']\r
-wrap_in(h2)://span[@class='subhead']\r
-wrap_in(i)://p[@class='bio']\r
-wrap_in(i)://p[@class='copyright']\r
-strip://div[@class='title']\r
-strip://div[@class='deck']\r
-strip://div[@class='byline']\r
-strip://div[@class='copyright']\r
+title://div[@class='title']
+author://div[@class='byline']/b
+date:substring-after(//div[@class='byline'], 'posted')
+body://div[@id='body']
+wrap_in(h2)://span[@class='subhead']
+wrap_in(i)://p[@class='bio']
+wrap_in(i)://p[@class='copyright']
+strip://div[@class='title']
+strip://div[@class='deck']
+strip://div[@class='byline']
+strip://div[@class='copyright']
 strip://br
 test_url: http://www.christianitytoday.com/ct/2012/aprilweb-only/my-god-forsaken-me.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7f089c5..fb5f342
@@ -1,5 +1,5 @@
-title: //h1[@class="entry-title"]\r
-author: //*[@class="author vcard fn"]\r
-date: //*[@class="published"]\r
+title: //h1[@class="entry-title"]
+author: //*[@class="author vcard fn"]
+date: //*[@class="published"]
 body: //div[(@class = "dd_content_wrap")]
 test_url: http://christianpf.com/do-ibuys-lead-to-more-buying/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 5c5889a..b3c7651
@@ -1,6 +1,6 @@
-tidy: no\r
-prune: no\r
-date: //article//time[@pubdate]\r
-title: //article/header/h2\r
+tidy: no
+prune: no
+date: //article//time[@pubdate]
+title: //article/header/h2
 body: //article
 test_url: http://www.christies.com/LotFinder/custom/lot_details_MultiLanguage.aspx?from=salesummary&intObjectID=5556662&sid=e536ed1a-b763-41c4-afcf-c94815ec6eee&LID=3
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d4cc858..5a1d043
@@ -1,9 +1,9 @@
-body: //pre[@id='cx-desc-text']\r
-body: //div[contains(@class, 'overview-tab-right-bar-info')]\r
-title: //h1[contains(@class, 'detail-dialog-title')]\r
-tidy: no\r
-prune: no\r
-replace_string(<noscript>): <div>\r
-replace_string(</noscript>): </div>\r
+body: //pre[@id='cx-desc-text']
+body: //div[contains(@class, 'overview-tab-right-bar-info')]
+title: //h1[contains(@class, 'detail-dialog-title')]
+tidy: no
+prune: no
+replace_string(<noscript>): <div>
+replace_string(</noscript>): </div>
 
 test_url: https://chrome.google.com/webstore/detail/pnaiinchjaonopoejhknmgjingcnaloc
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0c6c11e..e86d3ec
@@ -1,17 +1,17 @@
-title: //h1[contains(@class, "entry-title")]\r
-author: //p[contains(@class, "byline")]\r
-\r
-# blog articles (chronicle.com/blogs/*)\r
-body: //div[contains(@class, "abstract")]\r
-date: //p[contains(@class, "time")]\r
-\r
-# all (?) other articles\r
-body: //div[@id="article-body"]\r
-date: //p[contains(@class, "dateline")]\r
-\r
-# remove sidebars containing images (I assume this is desired for Instapaper)\r
-strip: //div[@id="related"]\r
-strip: //div[contains(@class, "image")]\r
-\r
+title: //h1[contains(@class, "entry-title")]
+author: //p[contains(@class, "byline")]
+
+# blog articles (chronicle.com/blogs/*)
+body: //div[contains(@class, "abstract")]
+date: //p[contains(@class, "time")]
+
+# all (?) other articles
+body: //div[@id="article-body"]
+date: //p[contains(@class, "dateline")]
+
+# remove sidebars containing images (I assume this is desired for Instapaper)
+strip: //div[@id="related"]
+strip: //div[contains(@class, "image")]
+
 # note that if you're not a Chronicle subscriber (personally or institutionally), you'll only see the first couple of paragraphs of the article, and Instapaper will display that with some crap above and below. thank goodness for that bookmarklet
 test_url: http://chronicle.com/article/In-a-Land-of-Second-Chances/128375/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ciaosamin.com.txt b/inc/3rdparty/site_config/standard/ciaosamin.com.txt
new file mode 100755 (executable)
index 0000000..02fd343
--- /dev/null
@@ -0,0 +1,4 @@
+body://div[contains(@class, 'entry-content')]
+date://h2[contains(@class, 'date-header')]
+title://h3[contains(@class, 'post-title')]
+test_url: http://www.ciaosamin.com/2013/04/how-this-happened.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index b9f9a12..b891363
@@ -1,33 +1,33 @@
-# fforst@...\r
-\r
-# Use link to print article for single page view\r
-single_page_link: //a[@class="print"]\r
-\r
-# set body\r
-tidy: no\r
-body: //div[@class='artikel-content']\r
-\r
-# strip title and subtitle since we got it already\r
-strip: //div[@class='issue']\r
-strip: //div[@class='artikel-content']/h2\r
-\r
-# some authors are known and have a link, others don't\r
-author: //a[contains(@href, 'autor?')]\r
-\r
-#date\r
-date: //span[@class='article-date']\r
-\r
-# Strip author since we got him\r
-strip_id_or_class: author\r
-\r
-#strip captions\r
-strip_id_or_class: field-name-field-image-credit\r
-strip_id_or_class: field-name-field-article-image-subtitle\r
-\r
-# remove community functions\r
-strip: //div[@class='meta']\r
-strip: //div[@id='comments']\r
-\r
-# remove "continue on the next page" text\r
+# fforst@...
+
+# Use link to print article for single page view
+single_page_link: //a[@class="print"]
+
+# set body
+tidy: no
+body: //div[@class='artikel-content']
+
+# strip title and subtitle since we got it already
+strip: //div[@class='issue']
+strip: //div[@class='artikel-content']/h2
+
+# some authors are known and have a link, others don't
+author: //a[contains(@href, 'autor?')]
+
+#date
+date: //span[@class='article-date']
+
+# Strip author since we got him
+strip_id_or_class: author
+
+#strip captions
+strip_id_or_class: field-name-field-image-credit
+strip_id_or_class: field-name-field-article-image-subtitle
+
+# remove community functions
+strip: //div[@class='meta']
+strip: //div[@id='comments']
+
+# remove "continue on the next page" text
 strip: //p[text()="[SEITE]"]
 test_url: http://www.cicero.de/weltbuehne/ihre-wut-ist-global-krise-jugend-revolten-aufstaende-zelte/43049
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 4d3ac80..d7e9b76
@@ -1,4 +1,4 @@
-body: //*[(@id = "articlebody")]\r
-strip_id_or_class: rotulo\r
+body: //*[(@id = "articlebody")]
+strip_id_or_class: rotulo
 
 test_url: http://ciperchile.cl/2011/04/18/las-operaciones-secretas-que-ordenaba-karadima-para-aniquilar-a-su-competencia/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a0c3ea5..df4c7cc
@@ -1,6 +1,6 @@
-body: //p[@class='subhead' or @class='attribution'] | //div[@class='article-body']\r
-prune: no\r
-\r
-single_page_link: //li[@class='print']/a\r
-\r
+body: //p[@class='subhead' or @class='attribution'] | //div[@class='article-body']
+prune: no
+
+single_page_link: //li[@class='print']/a
+
 test_url: http://www.cjr.org/behind_the_news/from_breaking_news_to_baseless.php
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/classyllama.com.txt b/inc/3rdparty/site_config/standard/classyllama.com.txt
new file mode 100755 (executable)
index 0000000..1864eee
--- /dev/null
@@ -0,0 +1,6 @@
+date: //div[@id='content']//p[contains(@class, 'date')]/span
+author: substring-after(//div[@id='content']//div[contains(@class, 'over-under-bars')]/p[last()]/text(), 'Posted by ')
+body: //div[@id='content']//div[@class='pane-content']
+strip_id_or_class: trackback-url
+strip_id_or_class: over-under-bars
+test_url: http://www.classyllama.com/content/layout-caching
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 369e88a..d5a22cc
@@ -1,6 +1,6 @@
-title://div[@class="entrytitle"]/a\r
-author:substring-after(substring-before(//div[@class="entrytime"], "|"), "By ")\r
-date:substring-before(substring-after(//div[@class="entrytime"], "|"), "- Posted")\r
-body://div[@class="entrybody"]\r
+title://div[@class="entrytitle"]/a
+author:substring-after(substring-before(//div[@class="entrytime"], "|"), "By ")
+date:substring-before(substring-after(//div[@class="entrytime"], "|"), "- Posted")
+body://div[@class="entrybody"]
 strip://div[@class="entrybody"]//p[@class="singleinfo"]
 test_url: http://clientk.com/2011/12/19/the-impact-of-more/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index b356bbd..0148e54
@@ -1,11 +1,11 @@
-title: //h1\r
-author: //a[@class='auteur']\r
-body: //div[@class='editorial']\r
-next_page_link: //a[contains(text(),'Page suivante')]\r
-strip: //a[contains(text(),'Page suivante')]\r
-strip: //a[contains(text(),'Page précédente')]\r
-strip_id_or_class: slideshow\r
-\r
-prune: no\r
-\r
+title: //h1
+author: //a[@class='auteur']
+body: //div[@class='editorial']
+next_page_link: //a[contains(text(),'Page suivante')]
+strip: //a[contains(text(),'Page suivante')]
+strip: //a[contains(text(),'Page précédente')]
+strip_id_or_class: slideshow
+
+prune: no
+
 test_url: http://www.clubic.com/carte-graphique/carte-graphique-amd/radeon-hd-7770/article-478936-1-radeon-hd-7750-7770.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 2bc96d2..0b76377
@@ -1,6 +1,6 @@
-body: //div[contains(@id,'article-body')]\r
-strip://div[contains(@id,'disqus_count_block')]\r
-strip://div[contains(@id,'col-left')]\r
-strip://div[contains(@id,'col-right')]\r
+body: //div[contains(@id,'article-body')]
+strip://div[contains(@id,'disqus_count_block')]
+strip://div[contains(@id,'col-left')]
+strip://div[contains(@id,'col-right')]
 
 test_url: http://www.cmswire.com/cms/customer-experience/for-apps-and-appstores-the-singularity-is-approaching-014888.php
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/cn.engadget.com.txt b/inc/3rdparty/site_config/standard/cn.engadget.com.txt
new file mode 100755 (executable)
index 0000000..63f6f7e
--- /dev/null
@@ -0,0 +1,5 @@
+title: //h2[@class="posttitle"]
+body: //div[@class="postbody"]
+prune: no
+
+test_url: http://cn.engadget.com/2013/06/29/google-play-music-all-access/
diff --git a/inc/3rdparty/site_config/standard/cn.reuters.com.txt b/inc/3rdparty/site_config/standard/cn.reuters.com.txt
new file mode 100755 (executable)
index 0000000..b387866
--- /dev/null
@@ -0,0 +1,5 @@
+title: //div[@id='maincontent']//h1
+body: //div[@id='resizeableText']
+
+test_url: http://cn.reuters.com/article/CNAnalysesNews/idCNKBS0FF0NM20140710
+test_url: http://cn.reuters.feedsportal.com/CNAnalysesNews
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 74f46ba..eac08aa
@@ -1,16 +1,16 @@
-title: //meta[@property="og:title"]/@content\r
-body: //div[contains(@class, 'postBody')]\r
-date: //div[@id='nameAndTime']/time\r
-author: //div[@id='nameAndTime']/span[@class='author']\r
-\r
-strip_id_or_class: image-credit\r
-strip_id_or_class: noAutolink\r
-strip_id_or_class: related\r
-\r
-prune: no\r
-tidy: no\r
-\r
-# early end\r
-replace_string(Download today's podcast</a>): Download today's podcast</a></div></body></html>\r
-\r
+title: //meta[@property="og:title"]/@content
+body: //div[contains(@class, 'postBody')]
+date: //div[@id='nameAndTime']/time
+author: //div[@id='nameAndTime']/span[@class='author']
+
+strip_id_or_class: image-credit
+strip_id_or_class: noAutolink
+strip_id_or_class: related
+
+prune: no
+tidy: no
+
+# early end
+replace_string(Download today's podcast</a>): Download today's podcast</a></div></body></html>
+
 test_url: http://www.cnet.com/8301-13952_1-57367607-81/the-404-981-where-the-world-is-a-vampire-podcast/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 995e2c7..6f69e4e
@@ -1,19 +1,23 @@
-title: //div[@class="cnn_storyarea"]/h1\r
-author: //div[@class="cnnByline"]/strong\r
-date: substring-after(//div[@class="cnn_strytmstmp"], 'Sun')\r
-date: substring-after(//div[@class="cnn_strytmstmp"], 'Mon')\r
-date: substring-after(//div[@class="cnn_strytmstmp"], 'Tue')\r
-date: substring-after(//div[@class="cnn_strytmstmp"], 'Wed')\r
-date: substring-after(//div[@class="cnn_strytmstmp"], 'Thu')\r
-date: substring-after(//div[@class="cnn_strytmstmp"], 'Fri')\r
-date: substring-after(//div[@class="cnn_strytmstmp"], 'Sat')\r
-strip: //div[@class="cnn_storyarea"]/h1\r
-strip_id_or_class: cnnByline\r
-strip_id_or_class: cnn_strytmstmp\r
-strip_id_or_class: cnn_strycaptiontxt\r
-strip_id_or_class: cnn_strybtntoolsbttm\r
-strip_id_or_class: cnn_strybtntools\r
-strip_id_or_class: cnn_strybtmcntnt\r
-strip_id_or_class: cnn_containerwht\r
-strip_id_or_class: cnn_stryathrtmp\r
-test_url: http://www.cnn.com/2012/05/13/us/new-york-police-policy/index.html?eref=rss_topstories
\ No newline at end of file
+body: //div[@id='cnnContentContainer']//div[contains(@class, 'cnn_strycntntlft')]
+title: //div[@class="cnn_storyarea"]/h1
+author: //div[@class="cnnByline"]/strong
+date: substring-after(//div[@class="cnn_strytmstmp"], 'Sun')
+date: substring-after(//div[@class="cnn_strytmstmp"], 'Mon')
+date: substring-after(//div[@class="cnn_strytmstmp"], 'Tue')
+date: substring-after(//div[@class="cnn_strytmstmp"], 'Wed')
+date: substring-after(//div[@class="cnn_strytmstmp"], 'Thu')
+date: substring-after(//div[@class="cnn_strytmstmp"], 'Fri')
+date: substring-after(//div[@class="cnn_strytmstmp"], 'Sat')
+strip: //div[@class="cnn_storyarea"]/h1
+strip_id_or_class: cnnByline
+strip_id_or_class: cnn_strytmstmp
+strip_id_or_class: cnn_strycaptiontxt
+strip_id_or_class: cnn_strybtntoolsbttm
+strip_id_or_class: cnn_strybtntools
+strip_id_or_class: cnn_strybtmcntnt
+strip_id_or_class: sharebar
+#strip_id_or_class: cnn_containerwht
+strip_id_or_class: cnn_stryathrtmp
+replace_string(<a name="em0"></a>): <!-- a name -->
+test_url: http://www.cnn.com/2012/05/13/us/new-york-police-policy/index.html?eref=rss_topstories
+test_url: http://rss.cnn.com/rss/edition.rss
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6a2c2b8..ac49aef
@@ -1,26 +1,26 @@
-# main sportsillustrated.com articles\r
-\r
-body: //div[@id="cnnStoryContent"]\r
-title: //div[@id="cnnStoryHeadline"]//h1\r
-author: //div[@id="cnnSubBanner"]//strong\r
-date: substring-after(//div[@id="cnnTimeStamp"], "Updated: ")\r
-date: substring-after(//div[@id="cnnTimeStamp"], "Posted: ")\r
-\r
-# kill ugly font buttons\r
-strip: //div[@id="cnnSCFontButtons"]\r
-\r
-# kill misc filler videos & etc\r
-strip: //div[@class="cnnDivideContent"]\r
-strip: //*[@class="cnnTMbox"]\r
-\r
-# si vault articles\r
-# -------------\r
-body: //div[@class="siv_artPara"]\r
-title: //div[@class="siv_artHeader"]//h1\r
-author: //div[@class="byline"]\r
-date: //div[@class="date"]\r
-\r
-next_page_link: //div[@id='cnnStoryContinue']/a\r
-strip_id_or_class: cnnstorypagination\r
-\r
+# main sportsillustrated.com articles
+
+body: //div[@id="cnnStoryContent"]
+title: //div[@id="cnnStoryHeadline"]//h1
+author: //div[@id="cnnSubBanner"]//strong
+date: substring-after(//div[@id="cnnTimeStamp"], "Updated: ")
+date: substring-after(//div[@id="cnnTimeStamp"], "Posted: ")
+
+# kill ugly font buttons
+strip: //div[@id="cnnSCFontButtons"]
+
+# kill misc filler videos & etc
+strip: //div[@class="cnnDivideContent"]
+strip: //*[@class="cnnTMbox"]
+
+# si vault articles
+# -------------
+body: //div[@class="siv_artPara"]
+title: //div[@class="siv_artHeader"]//h1
+author: //div[@class="byline"]
+date: //div[@class="date"]
+
+next_page_link: //div[@id='cnnStoryContinue']/a
+strip_id_or_class: cnnstorypagination
+
 test_url: http://cnnsi.com/2012/writers/peter_king/01/08/wild.card.round/index.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6cf72e2..83a21e1
@@ -1,10 +1,10 @@
-body: //div[@id='content']\r
-title: //div[@id='page_header']/h1\r
-\r
-strip_id_or_class: 'lineno'\r
-strip_id_or_class: 'block-toolbar-button'\r
-strip_id_or_class: 'recipe_score'\r
-strip: //div[@id='recipe_tools']\r
-strip: //div[@id='addcomment']\r
-\r
+body: //div[@id='content']
+title: //div[@id='page_header']/h1
+
+strip_id_or_class: 'lineno'
+strip_id_or_class: 'block-toolbar-button'
+strip_id_or_class: 'recipe_score'
+strip: //div[@id='recipe_tools']
+strip: //div[@id='addcomment']
+
 test_url: http://code.activestate.com/recipes/500261-named-tuples/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/code.fivefilters.org.txt b/inc/3rdparty/site_config/standard/code.fivefilters.org.txt
new file mode 100755 (executable)
index 0000000..269fb54
--- /dev/null
@@ -0,0 +1 @@
+body: //div[@id='content']
old mode 100644 (file)
new mode 100755 (executable)
index 40a1620..6e9c00a
@@ -1,5 +1,5 @@
-body: //div[@id="gc-pagecontent"]\r
-strip: //a[@class="backtotop"]\r
-prune: no\r
-\r
+body: //div[@id="gc-pagecontent"]
+strip: //a[@class="backtotop"]
+prune: no
+
 test_url: http://code.google.com/apis/analytics/docs/tracking/gaTrackingEcommerce.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/codeproject.com.txt b/inc/3rdparty/site_config/standard/codeproject.com.txt
new file mode 100755 (executable)
index 0000000..d1191ac
--- /dev/null
@@ -0,0 +1,3 @@
+body: //div[@id="contentdiv"]
+date: //span[@class="date"]
+test_url: http://www.codeproject.com/Articles/499902/Profiling-Entity-Framework-5-in-code
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 9c95f10..adf6e5a
@@ -1,15 +1,15 @@
-body: //div[@class='blogbody']\r
-strip: //h3[@class='title']\r
-date: //h2[@class='date']\r
-#Should Atwood just be a literal?\r
-author: substring-before( substring-after(//div[@class='posted'], 'y'), 'V')\r
-\r
-# tim.kingman@... 2011-07-26\r
-# Prune:no to retain all-link ULs that are part of the body content like\r
-# http://www.codinghorror.com/blog/2011/07/building-a-pc-part-vii-rebooting.html\r
-# Then explicitly strip the "Posted By" and prev/next links that Prune:yes would have removed.\r
-\r
-prune: no\r
-strip: //div[@class='posted']/following-sibling::*\r
+body: //div[@class='blogbody']
+strip: //h3[@class='title']
+date: //h2[@class='date']
+#Should Atwood just be a literal?
+author: substring-before( substring-after(//div[@class='posted'], 'y'), 'V')
+
+# tim.kingman@... 2011-07-26
+# Prune:no to retain all-link ULs that are part of the body content like
+# http://www.codinghorror.com/blog/2011/07/building-a-pc-part-vii-rebooting.html
+# Then explicitly strip the "Posted By" and prev/next links that Prune:yes would have removed.
+
+prune: no
+strip: //div[@class='posted']/following-sibling::*
 strip: //div[@class='posted']
 test_url: http://www.codinghorror.com/blog/2011/07/building-a-pc-part-vii-rebooting.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 9d75d64..318e6ff
@@ -1,14 +1,14 @@
-title: //h1[@class='title']\r
-author: //p[@class='byline']/a[1]\r
-date: //*[@class='date']\r
-\r
-body: //div[@class='article_body']\r
-strip: //p[@class='ca_intro']\r
-strip: //div[@id='action_bar']\r
-strip: //div[@class='below_content']\r
-strip: //div[@id='announcement']\r
-strip: //div[@id='leftovers']\r
-strip: //div[@class='form']\r
-strip: //div[@id='email_overlay']\r
+title: //h1[@class='title']
+author: //p[@class='byline']/a[1]
+date: //*[@class='date']
+
+body: //div[@class='article_body']
+strip: //p[@class='ca_intro']
+strip: //div[@id='action_bar']
+strip: //div[@class='below_content']
+strip: //div[@id='announcement']
+strip: //div[@id='leftovers']
+strip: //div[@class='form']
+strip: //div[@id='email_overlay']
 strip: //a[@class='close']
 test_url: http://www.collegehumor.com/article/6599562/how-it-happened-the-necktie
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 10fd251..c9854b4
@@ -1,8 +1,8 @@
-body: //div[@id="center"]//div[@class="node"]\r
-title: //div[@id="center"]//h2\r
-author: substring-after(//div[@id="center"]//div[@class="node"]//span[@class="submitted"], "&mdash;")\r
-date: substring-before(//div[@id="center"]//div[@class="node"]//span[@class="submitted"], "&mdash;")\r
-strip: //div[@id="center"]//h2[1]\r
-strip: //span[@class="submitted"][1]\r
+body: //div[@id="center"]//div[@class="node"]
+title: //div[@id="center"]//h2
+author: substring-after(//div[@id="center"]//div[@class="node"]//span[@class="submitted"], "&mdash;")
+date: substring-before(//div[@id="center"]//div[@class="node"]//span[@class="submitted"], "&mdash;")
+strip: //div[@id="center"]//h2[1]
+strip: //span[@class="submitted"][1]
 move_into(//div[@class="node"])://div[@class="breadcrumb"]
 test_url: http://community.service-now.com/blog/lawrenceeng/seasons-greetings-servicenow-team
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 00e6fdd..8345cf5
@@ -1,5 +1,5 @@
-strip_id_or_class:column-3\r
-strip_id_or_class:portlet-boundary\r
-strip_id_or_class:banner\r
+strip_id_or_class:column-3
+strip_id_or_class:portlet-boundary
+strip_id_or_class:banner
 
 test_url: http://www.computer.org/portal/web/buildyourcareer/careerwatch/jt19
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 2919924..5973c50
@@ -1,18 +1,18 @@
-title://h1\r
-\r
-author://div[@id="news-meta"]/a\r
-\r
-body://*[@id="main"]/div[1]\r
-\r
-strip://*[@id="main"]/div[2]\r
-strip://*[@id="main"]/div[3]\r
-strip://*[@id="page"]//footer\r
-\r
-#date: didn't manage to parse it\r
-\r
-#Images have to be stripped because the page does it with overlay\r
-strip://img\r
-\r
-#figures are not displayed in instapaper...\r
-strip://figure | //figcaption\r
+title://h1
+
+author://div[@id="news-meta"]/a
+
+body://*[@id="main"]/div[1]
+
+strip://*[@id="main"]/div[2]
+strip://*[@id="main"]/div[3]
+strip://*[@id="page"]//footer
+
+#date: didn't manage to parse it
+
+#Images have to be stripped because the page does it with overlay
+strip://img
+
+#figures are not displayed in instapaper...
+strip://figure | //figcaption
 test_url: http://www.computerbase.de/news/2012-06/verbraucherzentrale-mahnt-blizzard-fuer-diablo-3-ab/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8e1f3e1..7f20a4d
@@ -1,22 +1,22 @@
-title: //meta[@name='headline']/@content\r
-date: //meta[@name='date']/@content\r
-author: //meta[@name='author']/@content\r
-body: //div[contains(@class, 'article')]\r
-body://div[@id="article_body"]\r
-\r
-strip_id_or_class: banner\r
-strip: //noscript\r
-strip: //div[@style='width:1px;height:130px;float:right;']\r
-strip: //div[@class='storyby']\r
-strip_image_src: twitter_icon\r
-strip_image_src: rss_bug\r
-\r
-tidy: no\r
-prune: no\r
-\r
-next_page_link://div[@id="next_page"]/a\r
-\r
-single_page_link: concat('http://www.computerworld.com/s/article/print/', substring-after(//link[@rel='canonical']/@href, '/s/article/'))\r
-\r
-test_url: http://www.computerworld.com/s/article/9224348/Apple_s_new_OS_X_tightens_screws_on_some_malware\r
+title: //meta[@name='headline']/@content
+date: //meta[@name='date']/@content
+author: //meta[@name='author']/@content
+body: //div[contains(@class, 'article')]
+body://div[@id="article_body"]
+
+strip_id_or_class: banner
+strip: //noscript
+strip: //div[@style='width:1px;height:130px;float:right;']
+strip: //div[@class='storyby']
+strip_image_src: twitter_icon
+strip_image_src: rss_bug
+
+tidy: no
+prune: no
+
+next_page_link://div[@id="next_page"]/a
+
+single_page_link: concat('http://www.computerworld.com/s/article/print/', substring-after(//link[@rel='canonical']/@href, '/s/article/'))
+
+test_url: http://www.computerworld.com/s/article/9224348/Apple_s_new_OS_X_tightens_screws_on_some_malware
 test_url: http://www.computerworld.com/s/article/9227679/Windows_8_Release_Preview_Updated_but_still_uneasy
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a83f366..d819109
@@ -1,5 +1,5 @@
-strip: //div[contains(@class, 'articleAdtechAd')]\r
-title: //div[@id='article']/h1\r
-title: //div[contains(@class, 'article')]/h1\r
-body: //div[@id='articleText']\r
+strip: //div[contains(@class, 'articleAdtechAd')]
+title: //div[@id='article']/h1
+title: //div[contains(@class, 'article')]/h1
+body: //div[@id='articleText']
 test_url: http://www.computerworld.dk/art/56748/test-din-viden-med-computerworlds-store-sommerquiz?a=fp_1&i=0
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d2b289a..c3120fe
@@ -1,9 +1,9 @@
-# get author from string like "Posted by <author> on <date>"\r
-author: substring-before(substring-after(//div[@class='post']/p[@class='post-meta'], 'by'), 'on')\r
-\r
-# get date from string like "Posted by <author> on <date>"\r
-date: substring-after(//div[@class='post']/p[@class='post-meta'], 'on')\r
-\r
-# this keeps thumbnail images\r
+# get author from string like "Posted by <author> on <date>"
+author: substring-before(substring-after(//div[@class='post']/p[@class='post-meta'], 'by'), 'on')
+
+# get date from string like "Posted by <author> on <date>"
+date: substring-after(//div[@class='post']/p[@class='post-meta'], 'on')
+
+# this keeps thumbnail images
 prune: no
 test_url: http://www.contemporist.com/2011/11/02/landing-200-lamp-by-kim-hyunjoo
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 9bad2c8..966cc86
@@ -1,7 +1,7 @@
-title: //div[@class='article_header']/h1\r
-body: //div[@class='article_header']/p | //div[@class='article_body']\r
-strip_id_or_class: share_this\r
-strip_id_or_class: sociable\r
-prune: no\r
-\r
+title: //div[@class='article_header']/h1
+body: //div[@class='article_header']/p | //div[@class='article_body']
+strip_id_or_class: share_this
+strip_id_or_class: sociable
+prune: no
+
 test_url: http://conversaciones.nokia.com/2011/10/07/cinco-atajos-en-el-nokia-n8/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/cooper.com.txt b/inc/3rdparty/site_config/standard/cooper.com.txt
new file mode 100755 (executable)
index 0000000..a424409
--- /dev/null
@@ -0,0 +1,4 @@
+body: //*[contains(@class,'body')]
+date: //abbr[@class='published']
+
+test_url: http://www.cooper.com/journal/2012/08/2-weeks-left-to-win-your-way-to-the-woodstock-of-ux-coopers-ux-boot-camp.html/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a24374d..cf1fa93
@@ -1,7 +1,7 @@
-body: //div[@id="permalink"]/div[@class="post"]\r
-\r
-strip: //div[@id='backArrow']\r
-strip: //div[@id='fwdArrow']\r
-strip: //div[@class="post-title"]\r
+body: //div[@id="permalink"]/div[@class="post"]
+
+strip: //div[@id='backArrow']
+strip: //div[@id='fwdArrow']
+strip: //div[@class="post-title"]
 strip: //div[@class="sharing"]
 test_url: http://www.core77.com/blog/columns/why_design_education_must_change_17993.asp
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c9e9228..b6bd8be
@@ -1,6 +1,6 @@
-title: //div[@class='main']//h1[contains(@class, 'article-title')]\r
-author: //div[@class='mainauthorstyle']\r
-body: //div[@class='main']//div[@class='main-text']\r
-strip: //td[@width='140']\r
-\r
+title: //div[@class='main']//h1[contains(@class, 'article-title')]
+author: //div[@class='mainauthorstyle']
+body: //div[@class='main']//div[@class='main-text']
+strip: //td[@width='140']
+
 test_url: http://www.counterpunch.org/johnstone05172011.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d25cd05..037cd17
@@ -1,3 +1,3 @@
-title://h2\r
+title://h2
 body://div[contains(@class, 'entrytext')]
 test_url: http://www.crazybutable.com/weblog/archives/2010/07/01/house-ideas-that-worked/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 74bc6db..b5a8018
@@ -1,3 +1,3 @@
-body: //div[@class="readingtext"]\r
+body: //div[@class="readingtext"]
 title: substring-after(substring-after(//title, ':'), ':')
 test_url: http://www.crimethinc.com/texts/recentfeatures/nightmares.php
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7fa950a..61d5d6a
@@ -1,3 +1,3 @@
-author: //p[contains(@class,'author')]/a\r
+author: //p[contains(@class,'author')]/a
 date: //div[contains(@class,'date')]
 test_url: http://www.crn.de/netzwerke-tk/artikel-93103.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d4dbc5c..b482e34
@@ -1,18 +1,18 @@
-title: //h1[contains(@class, 'head')]\r
-\r
-# standard page\r
-body: //div[@id='mainColumn']//div[contains(@class, 'list-article-full')]\r
-# print page\r
-body: //div[@id='mainColumn']\r
-\r
-author: //a[contains(@class, 'ui-author')]\r
-\r
-single_page_link: //div[@class='storyToolbar']//a[contains(@href, '/print/')]\r
-\r
-strip_id_or_class: storyToolbar\r
-strip_id_or_class: promotion-tag\r
-\r
-tidy: no\r
-prune: no\r
+title: //h1[contains(@class, 'head')]
+
+# standard page
+body: //div[@id='mainColumn']//div[contains(@class, 'list-article-full')]
+# print page
+body: //div[@id='mainColumn']
+
+author: //a[contains(@class, 'ui-author')]
+
+single_page_link: //div[@class='storyToolbar']//a[contains(@href, '/print/')]
+
+strip_id_or_class: storyToolbar
+strip_id_or_class: promotion-tag
+
+tidy: no
+prune: no
 
 test_url: www.csmonitor.com/World/Middle-East/2011/1108/Imminent-Iran-nuclear-threat-A-timeline-of-warnings-since-1979/Earliest-warnings-1979-84
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 131a923..1da60b4
@@ -1,7 +1,7 @@
-title: //div[@id='csn_blogST_headline']/h1\r
-\r
-body: //div[@id='csn_blogST_main']\r
-strip_id_or_class: ipfootnotes\r
-strip: //div[@id='csn_blogST_main']/p[1]/img\r
+title: //div[@id='csn_blogST_headline']/h1
+
+body: //div[@id='csn_blogST_main']
+strip_id_or_class: ipfootnotes
+strip: //div[@id='csn_blogST_main']/p[1]/img
 strip: //div[@id='csn_blogST_sidebar']
 test_url: http://www.csnbayarea.com/blog/giants-talk/post/-?blog%2Fgiants-talk%2Fpost%2F-=&blockID=578902&feedID=5987
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0df72c3..c14a934
@@ -1,22 +1,22 @@
-# author's name is not isolated as a tag.... ugh\r
-convert_double_br_tags: yes\r
-body: //csn_blogST_main\r
-\r
-#junk above and around the article\r
-strip: /html/body/div[4]/div[3]/div/div/div/section/div/div/div/div/div/div\r
-strip: /html/body/div[4]/header\r
-strip_id_or_class: article-right-sidebar\r
-strip_id_or_class: rsn-gigya-sharebar-container\r
-strip_id_or_class: article-bottom\r
-strip_id_or_class: hider\r
-strip_id_or_class: footer\r
-strip_id_or_class: masthead\r
-strip_id_or_class: block-menu-menu-rsn-login-or-register\r
-strip_id_or_class: block-menu-menu-header-links\r
-strip_id_or_class: block-rsn-follow-bar-follow-bar\r
-strip_id_or_class: block-rsn-weather-rsn-weather-scoreboard\r
-strip_id_or_class: logo\r
-strip_id_or_class: element-invisible\r
-strip_id_or_class: site-name\r
-strip: //div[contains(@style, 'none')]\r
+# author's name is not isolated as a tag.... ugh
+convert_double_br_tags: yes
+body: //csn_blogST_main
+
+#junk above and around the article
+strip: /html/body/div[4]/div[3]/div/div/div/section/div/div/div/div/div/div
+strip: /html/body/div[4]/header
+strip_id_or_class: article-right-sidebar
+strip_id_or_class: rsn-gigya-sharebar-container
+strip_id_or_class: article-bottom
+strip_id_or_class: hider
+strip_id_or_class: footer
+strip_id_or_class: masthead
+strip_id_or_class: block-menu-menu-rsn-login-or-register
+strip_id_or_class: block-menu-menu-header-links
+strip_id_or_class: block-rsn-follow-bar-follow-bar
+strip_id_or_class: block-rsn-weather-rsn-weather-scoreboard
+strip_id_or_class: logo
+strip_id_or_class: element-invisible
+strip_id_or_class: site-name
+strip: //div[contains(@style, 'none')]
 test_url: http://www.csnphilly.com/eagles/can-stoutland-save-danny-watkins-career
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/css-tricks.com.txt b/inc/3rdparty/site_config/standard/css-tricks.com.txt
new file mode 100755 (executable)
index 0000000..3d8174a
--- /dev/null
@@ -0,0 +1,6 @@
+title://article[contains(@id, "post-")]/h1
+date://article[contains(@id, "post-")]/p[@class="time"]/time
+body://article[contains(@id, "post-")]
+strip://article[contains(@id, "post-")]/p[@class="time"]/time
+prune:yes
+test_url: http://css-tricks.com/off-canvas-menu-with-css-target/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/cw.com.tw.txt b/inc/3rdparty/site_config/standard/cw.com.tw.txt
new file mode 100755 (executable)
index 0000000..6e3a91e
--- /dev/null
@@ -0,0 +1,14 @@
+author://span[contains(@class,'reporter')]
+
+date://span[contains(@class,'date')]
+
+body://div[contains(@class,'mainContaner')]
+
+strip://div[contains(@class,'mainHeaer')]
+strip://div[contains(@class,'keyW')]
+strip://div[contains(@class,'wonderful')]
+strip://div[contains(@class,'pages')]
+strip://div[contains(@class,'Topics TopicsW3')]
+
+next_page_link://li[@class='pageNext']/a[contains(.,'下一頁')]
+test_url: http://www.cw.com.tw/article/article.action?id=5032848
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 4a00ef4..381446e
@@ -1,5 +1,5 @@
-single_page_link: //a\r
-tidy: no\r
-prune: no\r
+single_page_link: //a
+tidy: no
+prune: no
 
 test_url: da.feedsportal.com/c/585/f/413794/s/17037b5a/l/0L0Stelegraaf0Bnl0Cbinnenland0C10A2757860C0I0IKlacht0Itegen0Idr0B0IFrank0Iniet0I0Eontvankelijk0I0I0Bhtml0Dcid0Frss/ia1.htm
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dagogtid.no.txt b/inc/3rdparty/site_config/standard/dagogtid.no.txt
new file mode 100755 (executable)
index 0000000..1531472
--- /dev/null
@@ -0,0 +1,4 @@
+title: //span[@class = 'overskriftEkstrastor']
+author: //em/a
+
+test_url: http://dagogtid.no/nyhet.cfm?nyhetid=2414
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6101399..978ed1c
@@ -1,4 +1,4 @@
-tidy: no\r
-body: //article\r
+tidy: no
+body: //article
 
 test_url: http://www.dailydot.com/entertainment/tumblr-christopher-price-topherchris/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 124675c..6d4cb82
@@ -1,10 +1,10 @@
-body: //div[@id='article-1']//div[contains(@class, 'article-body')]\r
-title: //div[@class='meta']//a[@id='titleHref']\r
-date: //div[@class='meta']//p[@class='date']\r
-\r
-strip_id_or_class: invisible\r
-strip_id_or_class: divider-doodle\r
-\r
-prune: no\r
-\r
-test_url: http://www.dailykos.com/story/2012/01/26/1058790/-Newt-Gingrichs-campaign-admits-he-lied-during-debate-about-ABC-News-interview-with-his ex-wife
\ No newline at end of file
+body: //div[@id='article-1']//div[contains(@class, 'article-body')]
+title: //div[@class='meta']//a[@id='titleHref']
+date: //div[@class='meta']//p[@class='date']
+
+strip_id_or_class: invisible
+strip_id_or_class: divider-doodle
+
+prune: no
+
+test_url: http://www.dailykos.com/story/2012/01/26/1058790/-Newt-Gingrich-s-campaign-admits-he-lied-during-debate-about-ABC-News-interview-with-his-ex-wife
old mode 100644 (file)
new mode 100755 (executable)
index c83dbdb..cd29a4d
@@ -1,12 +1,12 @@
-body: //div[@id='js-article-text']\r
-strip: //div[@class='explore-links']\r
-strip: //div[@id='js-article-text']/br[position()=1]\r
-strip_id_or_class: print-or-mail-links\r
-strip_id_or_class: shareArticles\r
-strip_id_or_class: googleAds\r
-strip_id_or_class: digg-button\r
-strip_id_or_class: article-icon-links-container\r
-strip_id_or_class: clickToEnlarge\r
-tidy: no\r
-\r
+body: //div[@id='js-article-text']
+strip: //div[@class='explore-links']
+strip: //div[@id='js-article-text']/br[position()=1]
+strip_id_or_class: print-or-mail-links
+strip_id_or_class: shareArticles
+strip_id_or_class: googleAds
+strip_id_or_class: digg-button
+strip_id_or_class: article-icon-links-container
+strip_id_or_class: clickToEnlarge
+tidy: no
+
 test_url: http://www.dailymail.co.uk/news/article-1375423/Royal-wedding-Texan-billionaire-Joe-Albritton-invited-Prince-Charles.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dailystar.com.lb.txt b/inc/3rdparty/site_config/standard/dailystar.com.lb.txt
new file mode 100755 (executable)
index 0000000..3b15304
--- /dev/null
@@ -0,0 +1,6 @@
+title: //div[@class='ec-blog-headline']
+body: //*[@id="divDetails"]
+date: //*[@id="ctl00_ContentPlaceHolder1_tdDate"]
+author: //*[@id="ctl00_ContentPlaceHolder1_anchorAuthor"]/a
+autodetect_next_page: no
+test_url: http://dailystar.com.lb/Opinion/Columnist/2012/Oct-10/190803-americas-new-modesty-in-the-mideast.ashx#axzz2928JP5xE
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/danleech.com.txt b/inc/3rdparty/site_config/standard/danleech.com.txt
new file mode 100755 (executable)
index 0000000..1d4cec7
--- /dev/null
@@ -0,0 +1,6 @@
+tidy: no
+prune: no
+date: //article//time[@pubdate]
+title: //article/h1//span[contains(@class, 'entry-title')]
+body: //article/div[contains(@class, 'entry-content')]
+test_url: http://danleech.com/post/36822126876/simple-icons
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 96a2bc4..6066948
@@ -1,5 +1,5 @@
-autodetect_next_page: no\r
-tidy: no\r
-prune: no\r
+autodetect_next_page: no
+tidy: no
+prune: no
 body: //div[@class='NoOverflow']
 test_url: http://www.dansdata.com/gz129.htm
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dantri.com.vn.txt b/inc/3rdparty/site_config/standard/dantri.com.vn.txt
new file mode 100755 (executable)
index 0000000..f19fee7
--- /dev/null
@@ -0,0 +1,7 @@
+title: //h1[contains(@class, 'fon31 mt2')]
+body: //h2[contains(@class, 'fon33 mt1')] | //div[contains(@class, 'fon34 mt3')]
+
+prune: no
+
+test_url: http://dantri.com.vn/su-kien/chang-trai-mot-minh-dap-xe-vuot-450km-de-vieng-mo-dai-tuong-869763.htm
+test_url: http://dantri.com.vn/trangchu.rss
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index dca8ade..251cc67
@@ -1,7 +1,7 @@
-title: //div[@class="article"]/h1\r
-author: //div[@id="Sidebar"]/p/strong\r
-date: //h6[@class="dateline"]\r
-body: //div[@class="article"]\r
-strip: //h6[@class="dateline"]\r
-strip: //div[@class="article"]/h1\r
+title: //div[@class="article"]/h1
+author: //div[@id="Sidebar"]/p/strong
+date: //h6[@class="dateline"]
+body: //div[@class="article"]
+strip: //h6[@class="dateline"]
+strip: //div[@class="article"]/h1
 test_url: http://daringfireball.net/2011/10/apps_are_the_new_channels
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3534002..e9111a4
@@ -1,4 +1,4 @@
-body: //div[@id="article"]\r
-date: //p[@class="date"]\r
+body: //div[@id="article"]
+date: //p[@class="date"]
 author: //p[@class="byline"]
 test_url: http://www.datanami.com/datanami/2011-12-07/new_path_for_sap:_in_memory_computing,_predictive_analysis_converge.html?featured=top
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7d11c6e..524c4bf
@@ -1,8 +1,8 @@
-title: (//article//h2)[1]\r
-body: //article[contains(@class, 'post')]\r
-date: //time[@id='top_time']/@datetime\r
-\r
-prune: no\r
-tidy: no\r
-\r
+title: (//article//h2)[1]
+body: //article[contains(@class, 'post')]
+date: //time[@id='top_time']/@datetime
+
+prune: no
+tidy: no
+
 test_url: http://dcurt.is/predictions-txt
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/defomicron.net.txt b/inc/3rdparty/site_config/standard/defomicron.net.txt
new file mode 100755 (executable)
index 0000000..9f11258
--- /dev/null
@@ -0,0 +1,9 @@
+title: //article/h1
+author: //hgroup/h3/a
+date: //time
+body: //article
+strip: //aside
+footnotes: yes
+prune: no
+tidy: no
+test_url: https://defomicron.net/2012/09/ios-6/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 84fd4f7..c4b922e
@@ -1,4 +1,4 @@
-strip_id_or_class: banner\r
-strip_id_or_class: gamma\r
+strip_id_or_class: banner
+strip_id_or_class: gamma
 strip_id_or_class: module-list
 test_url: http://delong.typepad.com/sdj/2011/02/in-which-suresh-naidu-visits-the-new-jerusalem.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/democracynow.org.txt b/inc/3rdparty/site_config/standard/democracynow.org.txt
new file mode 100755 (executable)
index 0000000..b0050b4
--- /dev/null
@@ -0,0 +1,5 @@
+body: //div[contains(@class, 'blog_body')]
+
+prune: no
+
+test_url: http://www.democracynow.org/blog/2014/1/9/the_fbi_the_nsa_and_a
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 48722eb..07db352
@@ -1,13 +1,13 @@
-title: //div[@id='artikelHeader']/h1\r
-author: //span[@class='author']\r
-date: //span[@class='date']\r
-body: //div[@class='copytext']\r
-strip: //ul[@class='lookupLinksArtikel']\r
-\r
-strip: //div[@id='pageTop']\r
-strip: //div[@id='toolbar']\r
-strip: //div[@id='articleTools']\r
-strip: //div[@id='weiterlesen']\r
-strip: //div[@id='communityCanvas']\r
+title: //div[@id='artikelHeader']/h1
+author: //span[@class='author']
+date: //span[@class='date']
+body: //div[@class='copytext']
+strip: //ul[@class='lookupLinksArtikel']
+
+strip: //div[@id='pageTop']
+strip: //div[@id='toolbar']
+strip: //div[@id='articleTools']
+strip: //div[@id='weiterlesen']
+strip: //div[@id='communityCanvas']
 
 test_url: http://derstandard.at/1318726018343/Breitband-LTE-Was-bringt-die-neue-Mobilfunk-Generation
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6096db0..9020847
@@ -1,11 +1,11 @@
-tidy: no\r
-body: //div[@class='main']\r
-\r
-author: substring-before(substring-after(//div[@class='meta-single'], 'erstellt von '), ' am')\r
-date: substring-before(substring-after(//div[@class='meta-single'], ' am '), ' | ')\r
-\r
-strip_id_or_class: pagelink\r
-strip_id_or_class: wp-polls \r
-\r
+tidy: no
+body: //div[@class='main']
+
+author: substring-before(substring-after(//div[@class='meta-single'], 'erstellt von '), ' am')
+date: substring-before(substring-after(//div[@class='meta-single'], ' am '), ' | ')
+
+strip_id_or_class: pagelink
+strip_id_or_class: wp-polls 
+
 next_page_link: //div[@class='post-page-next']/a
 test_url: http://www.designtagebuch.de/die-gefuehlte-lesbarkeit/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a6dac5f..efa85f7
@@ -1,5 +1,5 @@
-body: (//blockquote[contains(@class, 'postcontent')])[1]\r
-body: (//div[starts-with(@id, 'post_message')])[1]\r
-\r
-prune: no\r
+body: (//blockquote[contains(@class, 'postcontent')])[1]
+body: (//div[starts-with(@id, 'post_message')])[1]
+
+prune: no
 tidy: no
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 548caba..d1d8a29
@@ -1,8 +1,8 @@
-title: //h1[@class="content-headline"]\r
-body: //div[@class="headers-container"] | //div[@class="content-container"]\r
-prune: no\r
-tidy: no\r
-\r
-single_page_link: //li[@class='utility-print']/a\r
-\r
+title: //h1[@class="content-headline"]
+body: //div[@class="headers-container"] | //div[@class="content-container"]
+prune: no
+tidy: no
+
+single_page_link: //li[@class='utility-print']/a
+
 test_url: http://www.details.com/culture-trends/critical-eye/201108/best-new-designers-innovations
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 43a8f0a..7609b72
@@ -1,3 +1,3 @@
-title: //div[@class="bodyText"]/h1\r
+title: //div[@class="bodyText"]/h1
 author: //div[@class="picture"]/a/img/@alt
 test_url: https://developers.facebook.com/blog/post/2012/03/22/developer-spotlight--foodspotting/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index b960b37..6f1d4e2
@@ -1,6 +1,6 @@
-date: //h2[@class='date-header']\r
-body: //div[@class='post hentry']\r
-title: //h3\r
-strip: //div[@class='post-footer']\r
+date: //h2[@class='date-header']
+body: //div[@class='post hentry']
+title: //h3
+strip: //div[@class='post-footer']
 
 test_url: http://devlinsangle.blogspot.co.at/2012/03/difference-between-teaching-and_01.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a117202..f8b79c8
@@ -1,8 +1,8 @@
-title: //h1[@id='query_h1']\r
-body: //div[contains(@class, 'lunatext results_content')]\r
-strip_id_or_class: spl_unshd\r
-#replace_string(<div class="dicTl">): <div class="dicTl">------------------<br />\r
-\r
-prune: no\r
+title: //h1[@id='query_h1']
+body: //div[contains(@class, 'lunatext results_content')]
+strip_id_or_class: spl_unshd
+#replace_string(<div class="dicTl">): <div class="dicTl">------------------<br />
+
+prune: no
 
 test_url: http://www.wired.com/cloudline/2011/10/meet-arms-cortex-a15-the-future-of-the-ipad-and-possibly-the-macbook-air/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7e825a9..ced189c
@@ -1,6 +1,6 @@
-title: //div[@class='article']/h1\r
-date: substring-before(//p[@class='articletime'],'|')\r
-body: //div[@id='articletext']\r
-strip: //div[@class='inlineDiashow']\r
+title: //div[@class='article']/h1
+date: substring-before(//p[@class='articletime'],'|')
+body: //div[@id='articletext']
+strip: //div[@class='inlineDiashow']
 
 test_url: http://diepresse.com/home/politik/aussenpolitik/701905/TibeterProteste_Nonne-verbrennt-sich-selbst?_vl_backlink=/home/politik/index.do
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 2d2ae2c..80ce5ff
@@ -1,8 +1,8 @@
-# default parser works great\r
-# only add "author" and "next page link" reference\r
-# 2012-04-13\r
-\r
-next_page_link: //div[@class = 'pagination']/a[@class = 'next_page']\r
-\r
+# default parser works great
+# only add "author" and "next page link" reference
+# 2012-04-13
+
+next_page_link: //div[@class = 'pagination']/a[@class = 'next_page']
+
 author: //*[@class = 'author metadata']/a
 test_url: http://digiphoto.techbang.com/posts/2433--commercial-photography-communication-is-the-key-to-a-good-work
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 37192ac..18ce370
@@ -1,6 +1,6 @@
-title: //div[@class='post-title']/h1\r
-author: //a[@href='#author']\r
-body: //div[@class='post-content']\r
-strip: //div[@class='post-meta']\r
-\r
+title: //div[@class='post-title']/h1
+author: //a[@href='#author']
+body: //div[@class='post-content']
+strip: //div[@class='post-meta']
+
 test_url: http://www.digital-photography-school.com/10-ways-to-develop-yourself-photographically
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index b21431d..f48bdfd
@@ -1,5 +1,5 @@
-title: //div[@class="article_header"]/h1\r
-date: //div[@class="article_pub"]/span[@class="time"]\r
-author: //div[@class="article_pub"]/span[@class="editors"]/a/text()\r
+title: //div[@class="article_header"]/h1
+date: //div[@class="article_pub"]/span[@class="time"]
+author: //div[@class="article_pub"]/span[@class="editors"]/a/text()
 body: //div[@class="article_body clear_left"]
 test_url: http://www.digitalspy.co.uk/movies/at-the-movies/a364066/top-5-super-bowl-movie-trailers-the-avengers-battleship-more.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 413e550..85cc78e
@@ -1,8 +1,11 @@
-convert_double_br_tags: yes\r
-\r
-title: substring(substring-after(//title, ':'), 1, string-length(substring-after(//title, ':')) - 10)\r
-body: //*[contains(@class, 'SB_Content')]\r
-author: string('Scott Adams')\r
-date: //*[contains(@class, 'SB_Detail')]/text()[1]\r
+#title: substring(substring-after(//title, ':'), 1, string-length(substring-after(//title, ':')) - 10)
+title: //div[contains(@class, 'SB_Title')]//a
+body: //div[contains(@class, 'STR_Image')]
+body: //*[contains(@class, 'SB_Content')]
+author: string('Scott Adams')
+date: //*[contains(@class, 'SB_Detail')]/text()[1]
 
-test_url: http://dilbert.com/blog/entry/death_by_hypnosis_or_not/
\ No newline at end of file
+
+test_url: http://dilbert.com/blog/entry/death_by_hypnosis_or_not/
+test_url: http://dilbert.com/strips/comic/2013-10-22
+test_url: http://feed.dilbert.com/dilbert/daily_strip
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 9ef198c..bc315cf
@@ -1,19 +1,19 @@
-title: //div[@class='newsdetbd']\r
-body: //div[@id='innerleft'] \r
-#//p[@class = 'plnht']\r
-strip_image_src: /albums/\r
-strip: //div[@class='mrrt']\r
-prune: yes\r
-strip_id_or_class: 'fdpd'\r
-strip_id_or_class: 'epapt' \r
-strip_id_or_class: 'newsrtwd'\r
-strip_id_or_class: 'padtp'\r
-strip_id_or_class: 'newdt'\r
-strip_id_or_class: 'newdlt'\r
-strip: //div[@id='selNotes']\r
-strip_id_or_class: 'clsNotes'\r
-strip_id_or_class: 'clear'\r
-strip_id_or_class: 'cmtwrap'\r
-strip_id_or_class: 'sess'\r
+title: //div[@class='newsdetbd']
+body: //div[@id='innerleft'] 
+#//p[@class = 'plnht']
+strip_image_src: /albums/
+strip: //div[@class='mrrt']
+prune: yes
+strip_id_or_class: 'fdpd'
+strip_id_or_class: 'epapt' 
+strip_id_or_class: 'newsrtwd'
+strip_id_or_class: 'padtp'
+strip_id_or_class: 'newdt'
+strip_id_or_class: 'newdlt'
+strip: //div[@id='selNotes']
+strip_id_or_class: 'clsNotes'
+strip_id_or_class: 'clear'
+strip_id_or_class: 'cmtwrap'
+strip_id_or_class: 'sess'
 strip_id_or_class: 'parents'
 test_url: http://www.dinamalar.com/News_Detail.asp?Id=295725
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 86bb3b8..5283a0c
@@ -1,26 +1,28 @@
-# Since this element has class="clear", the Instapaper stylesheets (at least this text parser preview), will render it unreadable, with a 1px font size and line height.\r
-\r
-body: //div[@id="article-content"]\r
-\r
-\r
-# Ads\r
-strip_id_or_class: advert-space\r
-\r
-# Read more, recommend, comments etc\r
-strip_id_or_class: fbc-recommend\r
-strip_id_or_class: recommend\r
-strip_id_or_class: article-readers\r
-strip_id_or_class: article-addons\r
-strip_id_or_class: hook\r
-strip_id_or_class: right\r
-strip_id_or_class: footer\r
-\r
-# Other news\r
-strip: //div[@id="mirrors"]\r
-\r
-# Author\r
-author: //div[@id="byline"]/div/p/strong\r
-\r
-# Date\r
-date: substring(substring-after(//p[@class="published"], 'Publicerad '), 0, 11)\r
-test_url: http://www.dn.se/nyheter/varlden/landade-flygplan-mitt-i-villaomrade
\ No newline at end of file
+# Since this element has class="clear", the Instapaper stylesheets (at least this text parser preview), will render it unreadable, with a 1px font size and line height.
+
+body: //div[@id="article-content"]
+
+
+# Ads
+strip_id_or_class: advert-space
+
+# Read more, recommend, comments etc
+strip_id_or_class: fbc-recommend
+strip_id_or_class: recommend
+strip_id_or_class: article-readers
+strip_id_or_class: article-addons
+strip_id_or_class: hook
+strip_id_or_class: right
+strip_id_or_class: footer
+
+# Other news
+strip: //div[@id="mirrors"]
+
+# Author
+author: //div[@id="byline"]/div/p/strong
+
+# Date
+date: substring(substring-after(//p[@class="published"], 'Publicerad '), 0, 11)
+
+test_url: http://www.dn.se/nyheter/varlden/landade-flygplan-mitt-i-villaomrade
+test_url: http://www.dn.se/m/rss/senaste-nytt
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dobreprogramy.pl.txt b/inc/3rdparty/site_config/standard/dobreprogramy.pl.txt
new file mode 100755 (executable)
index 0000000..972293b
--- /dev/null
@@ -0,0 +1,6 @@
+title: //*[@class="news"]//h1[@class="title"]
+author: //*[@class="news"]//*[@class="newsInfo"]/a
+date: substring-before(//*[@class="news"]//*[@class="newsInfo"]/text(), ',')
+body: //*[@class="news"]//*[@class="newsContent"]
+footnotes: no
+test_url: http://www.dobreprogramy.pl/Sony-konczy-z-Foldinghome-na-PS3,Aktualnosc,36899.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 9f65ea9..1c518a9
@@ -1,8 +1,8 @@
-strip: //*[(@id = "featured")]\r
-\r
-author:substring-after( //div[@class='posttitle']/h2[@class='author'],'by ')\r
-\r
-date: concat(//div[@class='month'],' ',//div[@class='day'])\r
-\r
+strip: //*[(@id = "featured")]
+
+author:substring-after( //div[@class='posttitle']/h2[@class='author'],'by ')
+
+date: concat(//div[@class='month'],' ',//div[@class='day'])
+
 #doctac doesn't provide a year, but month/day is better than nothing
 test_url: http://www.doctac.com/mac/iphone/instapaper-update-app/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 81683f0..20566ee
@@ -1,21 +1,21 @@
-# TODO: clean up the extra junk at the end of articles\r
-\r
-# general text formatting\r
-prune: no\r
-convert_double_br_tags:yes\r
-\r
-# where to find the basic metadata\r
-author://a[@class='articleauthor']\r
-date://a[starts-with(@href,'/en/search/published/')]\r
-title:substring-before(//h2[@class='title'],'&mdash;')\r
-body://div[@id='maincontainer']\r
-\r
-dissolve://div[starts-with(@id,'commentableblock')]\r
-\r
-# clean up the crap\r
-strip://div[contains(@class,'domusnetwork')]\r
-strip://div[contains(@class,'relative_wrapper')]\r
-\r
-strip://div[contains(@class,'captionsubimage')]/img[contains(@class,'arrow')]\r
+# TODO: clean up the extra junk at the end of articles
+
+# general text formatting
+prune: no
+convert_double_br_tags:yes
+
+# where to find the basic metadata
+author://a[@class='articleauthor']
+date://a[starts-with(@href,'/en/search/published/')]
+title:substring-before(//h2[@class='title'],'&mdash;')
+body://div[@id='maincontainer']
+
+dissolve://div[starts-with(@id,'commentableblock')]
+
+# clean up the crap
+strip://div[contains(@class,'domusnetwork')]
+strip://div[contains(@class,'relative_wrapper')]
+
+strip://div[contains(@class,'captionsubimage')]/img[contains(@class,'arrow')]
 wrap_in(em): //div[contains(@class,'captionsubimage')]/span
 test_url: http://www.domusweb.it/en/design/in-praise-of-lost-time/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 22907c2..0f98311
@@ -1,8 +1,8 @@
-title: //h1[@itemprop="name"]\r
-\r
-author: //div[contains(@class, 'author')]//div[contains(@class, 'name')]/a\r
-\r
-date: //div[contains(@class, 'b-info')]//span[contains(@class, 'date')]\r
-\r
+title: //h1[@itemprop="name"]
+
+author: //div[contains(@class, 'author')]//div[contains(@class, 'name')]/a
+
+date: //div[contains(@class, 'b-info')]//span[contains(@class, 'date')]
+
 body: //div[contains(@class, 'b-typo')]
 test_url: http://dou.ua/lenta/interviews/andrej-havryuchenko/?from=sb_mostcomm
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 99d7e5d..d72a222
@@ -1,21 +1,21 @@
-# This filter is tested on:\r
-# http://www.douban.com/note/215003067/\r
-# http://www.douban.com/note/213540049/\r
-# http://www.douban.com/group/topic/31140104/\r
-\r
-title: //div[@class='note-header']/h1\r
-title: //div[@id='content']/h1\r
-\r
-author: //div[@class='info']/ul/li/a\r
-author: //h3/span/a\r
-\r
-date://div[@class='note-header']/div/span\r
-date://h3/span[contains(@class, 'color-green')]\r
-\r
-body://div[contains(@class, 'note')]\r
-body://div[contains(@class, 'topic-content')]\r
-\r
-strip://h3\r
-\r
-convert_double_br_tags: yes\r
+# This filter is tested on:
+# http://www.douban.com/note/215003067/
+# http://www.douban.com/note/213540049/
+# http://www.douban.com/group/topic/31140104/
+
+title: //div[@class='note-header']/h1
+title: //div[@id='content']/h1
+
+author: //div[@class='info']/ul/li/a
+author: //h3/span/a
+
+date://div[@class='note-header']/div/span
+date://h3/span[contains(@class, 'color-green')]
+
+body://div[contains(@class, 'note')]
+body://div[contains(@class, 'topic-content')]
+
+strip://h3
+
+convert_double_br_tags: yes
 test_url: http://www.douban.com/group/topic/31140104/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 30179a3..001c810
@@ -1,9 +1,9 @@
-# next_page_link for product review\r
-# example: http://www.dpreview.com/reviews/lytro/\r
-next_page_link: //img[@alt = 'Next page']/../@href\r
-\r
-# next_page_link for other articles\r
-# example: http://www.dpreview.com/articles/6126592906/first-impressions-using-the-fujifilm-x-pro1\r
-next_page_link: //*[@class = 'pages']/*/td[@class = 'next enabled']/a\r
+# next_page_link for product review
+# example: http://www.dpreview.com/reviews/lytro/
+next_page_link: //img[@alt = 'Next page']/../@href
+
+# next_page_link for other articles
+# example: http://www.dpreview.com/articles/6126592906/first-impressions-using-the-fujifilm-x-pro1
+next_page_link: //*[@class = 'pages']/*/td[@class = 'next enabled']/a
 single_page_link: //a[contains(.,'Print view')]
 test_url: http://www.dpreview.com/articles/6126592906/first-impressions-using-the-fujifilm-x-pro1
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7e46b0d..d8ec1ac
@@ -1,9 +1,9 @@
-title: //meta[@property='og:title']/@content\r
-author: //div[@class='articleFunctions']//a\r
-date: //meta[@name='pubdate']/@content\r
-\r
-# Can you strip elements from the body only? It is required here (`//div[@class='articleContent']/p` breaks for some reason)\r
-body: //div[@class='articleContent']\r
-\r
+title: //meta[@property='og:title']/@content
+author: //div[@class='articleFunctions']//a
+date: //meta[@name='pubdate']/@content
+
+# Can you strip elements from the body only? It is required here (`//div[@class='articleContent']/p` breaks for some reason)
+body: //div[@class='articleContent']
+
 tidy: no
 test_url: http://www.dr.dk/Nyheder/Udland/2011/10/24/150115.htm
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 659d044..4898353
@@ -1,10 +1,10 @@
-body: //div[@class='postext']\r
-\r
-strip_id_or_class: ratingblock\r
-strip_id_or_class: hreview-aggregate\r
-strip: //div[contains(@style, 'display: none;')]\r
-\r
-tidy: no\r
-prune: no\r
-\r
+body: //div[@class='postext']
+
+strip_id_or_class: ratingblock
+strip_id_or_class: hreview-aggregate
+strip: //div[contains(@style, 'display: none;')]
+
+tidy: no
+prune: no
+
 test_url: http://www.dramasonline.com/jago-pakistan-jago-7th-december-2012-ali-gul-pir/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6125ce7..d500cb8
@@ -1,12 +1,12 @@
-body: //div[@class = "description"]\r
-body: //div[@id = "post"]\r
-\r
-strip_id_or_class: vcard\r
-strip_id_or_class: journallist\r
-strip_id_or_class: infobox\r
-strip_id_or_class: terms\r
-strip_id_or_class: replieslist\r
-strip_id_or_class: communityside\r
-\r
+body: //div[@class = "description"]
+body: //div[@id = "post"]
+
+strip_id_or_class: vcard
+strip_id_or_class: journallist
+strip_id_or_class: infobox
+strip_id_or_class: terms
+strip_id_or_class: replieslist
+strip_id_or_class: communityside
+
 
 test_url: http://www.drive2.ru/cars/audi/a6/a6_c5/elysey/journal/288230376151836654/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dropbox.com.txt b/inc/3rdparty/site_config/standard/dropbox.com.txt
new file mode 100755 (executable)
index 0000000..92ae31b
--- /dev/null
@@ -0,0 +1 @@
+single_page_link: //a[@id='download_button_link']
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ffb77e4..2da3eb1
@@ -1,8 +1,8 @@
-title://h1\r
-author://div[@class="submitted"]/a\r
-date:substring-after(//div[@class="meta"],'modified: ')\r
-date:substring-after(//div[@class="submitted"],'on ')\r
-body://div[@class="node-content"]\r
-strip://div[@class="meta"]\r
+title://h1
+author://div[@class="submitted"]/a
+date:substring-after(//div[@class="meta"],'modified: ')
+date:substring-after(//div[@class="submitted"],'on ')
+body://div[@class="node-content"]
+strip://div[@class="meta"]
 strip_id_or_class:book-navigation
 test_url: http://drupal.org/node/1327354
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 418c9f6..2978797
@@ -1,11 +1,11 @@
-title: //h2/a\r
-author: substring-before(substring-after(//span[@class='byline'], 'by'), ',')\r
-date: substring-before(substring-after(//span[@class='byline'], ','), '|')\r
-body: //div[@class='entry']\r
-\r
-\r
-# strip out auction stuff at the end of posts\r
-# tidy kills the center tag, so disable it\r
-tidy: no\r
+title: //h2/a
+author: substring-before(substring-after(//span[@class='byline'], 'by'), ',')
+date: substring-before(substring-after(//span[@class='byline'], ','), '|')
+body: //div[@class='entry']
+
+
+# strip out auction stuff at the end of posts
+# tidy kills the center tag, so disable it
+tidy: no
 strip: //center//table
 test_url: http://www.dukebasketballreport.com/articles/?p=42660
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dushumashang.com.txt b/inc/3rdparty/site_config/standard/dushumashang.com.txt
new file mode 100755 (executable)
index 0000000..6a50a77
--- /dev/null
@@ -0,0 +1,17 @@
+# This filter is tested on:
+# http://www.dushumashang.com/2389
+# http://www.dushumashang.com/2415
+# http://www.dushumashang.com/2355
+
+body://div[@class='main_content']
+#body://section[@class='entry_content fl']
+title://h2
+author://span[@class='article_author']/a
+date://span[@class='pub_date']/time
+
+strip://span[@class='article_author']
+strip://span[@class='pub_date']
+strip://div[@class='page_turn']
+strip://span[@class='source_link']/em
+wrap_in(strong)://span[@class='source_link']/a
+test_url: http://www.dushumashang.com/2355
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c816368..1a1990e
@@ -1,9 +1,9 @@
-strip://*[@id = 'blog_top_stories']\r
-strip://*[@id = 'takeover_off']\r
-strip://*[@id = 'right_gray_box']\r
-strip://*[@class = 'blog_topics']\r
-strip://*[@class = 'section_titles']\r
-\r
-author://div[@class = 'post_author_info']/a\r
+strip://*[@id = 'blog_top_stories']
+strip://*[@id = 'takeover_off']
+strip://*[@id = 'right_gray_box']
+strip://*[@class = 'blog_topics']
+strip://*[@class = 'section_titles']
+
+author://div[@class = 'post_author_info']/a
 date://div[@class = 'post_date_info']
 test_url: http://dvice.com/archives/2012/05/is-nfc-and-smar.php
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 908a1b5..89a68bc
@@ -1,8 +1,8 @@
-title: //div [@class="post contain"]/h1\r
-strip: //div [@class="post contain"]/h1\r
-body: //div [@class="post contain"]\r
-author: substring-before(//title, ':')\r
-author: substring-before(//title, ' ')\r
-\r
+title: //div [@class="post contain"]/h1
+strip: //div [@class="post contain"]/h1
+body: //div [@class="post contain"]
+author: substring-before(//title, ':')
+author: substring-before(//title, ' ')
+
 
 test_url: http://eamesinerudition.com/2012/03/hospital-numbers-are-bad-for-you
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c4c38f2..ba9d312
@@ -1,8 +1,8 @@
-title: //h1\r
-date: //div[@class="et_dateUnderTitle"]\r
-author: substring-after(//div[@class="et_authorUnderTitle"], 'By ')\r
-body: //div[@id="et_leftCol640split"]\r
-\r
-strip: //div[@id="et_leftCol640splitRight"]\r
+title: //h1
+date: //div[@class="et_dateUnderTitle"]
+author: substring-after(//div[@class="et_authorUnderTitle"], 'By ')
+body: //div[@id="et_leftCol640split"]
+
+strip: //div[@id="et_leftCol640splitRight"]
 strip: //div[@class="et_light_greybgboxlower"]
 test_url: http://eandt.theiet.org/magazine/2011/12/this-festive-waste.cfm
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d762091..36708da
@@ -1,18 +1,18 @@
-title: //div[@class='title_text']\r
-\r
-author: //div[@class='author_text']\r
-\r
-body: //div[@class='story_text']/..\r
-\r
-strip: //b\r
-\r
-strip_id_or_class: back_to_top\r
-strip_id_or_class: author_text\r
-strip_id_or_class: title_text\r
-\r
-wrap_in(center): //a\r
-\r
-dissolve: //a\r
\r
+title: //div[@class='title_text']
+
+author: //div[@class='author_text']
+
+body: //div[@class='story_text']/..
+
+strip: //b
+
+strip_id_or_class: back_to_top
+strip_id_or_class: author_text
+strip_id_or_class: title_text
+
+wrap_in(center): //a
+
+dissolve: //a
 footnotes: no
 test_url: http://www.eastoftheweb.com/short-stories/UBooks/Horl.shtml
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 5fa18ff..f17e1f7
@@ -1,5 +1,5 @@
-body: //h1[@class='it-ttl'] | //div[@id='mainImgHldr'] | //span[@id='prcIsum']\r
-\r
-strip_image_src: imgLoading_30x30.gif\r
-\r
+body: //h1[@class='it-ttl'] | //div[@id='mainImgHldr'] | //span[@id='prcIsum']
+
+strip_image_src: imgLoading_30x30.gif
+
 test_url: http://www.ebay.com/itm/BRAND-NEW-FM-Transmitter-Ca-r-Charger-iPhone-4S-4-4G-3GS-3G-2G-iPod-Touch-/190657497204
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ebafc19..729affd
@@ -1,6 +1,6 @@
-title: //h1[@class="title"]\r
-author: //div[@class="hosted"]/a\r
-date: substring-after(//div[@class="dateline"]/text(), '|')\r
-\r
+title: //h1[@class="title"]
+author: //div[@class="hosted"]/a
+date: substring-after(//div[@class="dateline"]/text(), '|')
+
 strip: //a[@class="top" and @href="#"]
 test_url: http://econlog.econlib.org/archives/2012/04/blinder_on_heal.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index b59f554..936a191
@@ -1,7 +1,7 @@
-date: //div[@class="bb-md-noticia-fecha"]\r
-body: //div[@class="corpo"]\r
-dissolve: //div[@class="bb-md-noticia-extras"]\r
-strip: //strong\r
-strip_id_or_class: bb-md-noticia-foto-autor\r
+date: //div[@class="bb-md-noticia-fecha"]
+body: //div[@class="corpo"]
+dissolve: //div[@class="bb-md-noticia-extras"]
+strip: //strong
+strip_id_or_class: bb-md-noticia-foto-autor
 strip_id_or_class: bb-md-noticia-foto-bajada
 test_url: http://economia.estadao.com.br/noticias/economia,cmn-aprova-r-67-bi-em-credito-para-20-setores-da-economia,118501,0.htm
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 71dd62f..16c9ed6
@@ -1,10 +1,8 @@
-title: //div[@class='ec-blog-headline']\r
-body: //div[@class='ec-blog-body']\r
-body: //div[@class='ec-article-content clear']\r
-strip: //div[@class='related-items']\r
-date: substring-before(//p[@class='ec-article-info'], '|')\r
-prune: no\r
-\r
-autodetect_next_page: no\r
-\r
+body: //div[@class='main-content']
+date: //time[@class='date-created']
+strip: //aside
+prune: no
+
+autodetect_next_page: no
+
 test_url: http://www.economist.com/node/21528429
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 461d909..cf58581
@@ -1,13 +1,13 @@
-title: //meta[@property="og:title"]/@content\r
-body: //h2[@class='strapline'] | //article[contains(@class, 'node-article')]\r
-date: //time[@pubdate]/@datetime\r
-author: //span[@class='author-name']\r
-prune: no\r
-tidy: no\r
-strip: //footer\r
-\r
-replace_string(<p>[ pagebreak ]</p>): <!-- pagebreak -->\r
-\r
-single_page_link: //a[contains(@href, '?page=show')]\r
-\r
+title: //meta[@property="og:title"]/@content
+body: //h2[@class='strapline'] | //article[contains(@class, 'node-article')]
+date: //time[@pubdate]/@datetime
+author: //span[@class='author-name']
+prune: no
+tidy: no
+strip: //footer
+
+replace_string(<p>[ pagebreak ]</p>): <!-- pagebreak -->
+
+single_page_link: //a[contains(@href, '?page=show')]
+
 test_url: http://www.edge-online.com/features/telling-modern-warfares-story
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 9980000..95805f6
@@ -1,5 +1,5 @@
-title: //div[@class='HomeLeftPannel IMGCTRL']/h2\r
-body: //div[@class='HomeLeftPannel IMGCTRL']//div[@class='Brownalink' or @id='shortdesc']\r
-tidy: no\r
-\r
+title: //div[@class='HomeLeftPannel IMGCTRL']/h2
+body: //div[@class='HomeLeftPannel IMGCTRL']//div[@class='Brownalink' or @id='shortdesc']
+tidy: no
+
 test_url: http://edge.org/print/conversation.php?cid=the-argumentative-theory
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/edition.channel5belize.com.txt b/inc/3rdparty/site_config/standard/edition.channel5belize.com.txt
new file mode 100755 (executable)
index 0000000..6d5f170
--- /dev/null
@@ -0,0 +1,9 @@
+title: //div[@id='singlePage']//h2
+body: //div[@id='singlePage']//div[contains(@class, 'post')]
+strip: //a[@title='Email This Story']
+strip_id_or_class: sociable
+
+prune: no
+
+test_url: http://edition.channel5belize.com/archives/86016
+test_url: http://edition.channel5belize.com/feed
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index dc8ebe1..6fc82d2
@@ -1,9 +1,18 @@
-body: //div[@id='cnnContentContainer']//div[contains(@class, 'cnn_strycntntlft')]\r
-strip: //div[@id='cnnCVP2']\r
-strip_id_or_class: cnn_strylftcexpbx\r
-strip_id_or_class: cnn_strylctcqrelt\r
-strip_id_or_class: cnn_strybtntoolsbttm\r
-strip_id_or_class: cnn_stryftsbttm\r
-strip_id_or_class: cnn_strybtmcntnt\r
+body: //div[@id='cnnContentContainer']//div[contains(@class, 'cnn_strycntntlft')]
+strip: //a[starts-with(@name, 'em')]
+strip: //div[@id='cnnCVP2']
+strip_id_or_class: cnn_strylftcexpbx
+strip_id_or_class: cnn_strylctcqrelt
+strip_id_or_class: cnn_strybtntoolsbttm
+strip_id_or_class: cnn_stryftsbttm
+strip_id_or_class: cnn_strybtmcntnt
+strip_id_or_class: cnn_stryshrwdgtbtm
+strip_id_or_class: cnnGalleryContainer
+strip_id_or_class: cnn_strycrcntr
+strip_id_or_class: cnn_html_slideshow
 prune: no
-test_url: http://edition.cnn.com/2011/US/04/29/severe.weather/index.html
\ No newline at end of file
+
+test_url: http://edition.cnn.com/2011/US/04/29/severe.weather/index.html
+test_url: http://edition.cnn.com/2013/08/15/world/africa/nigeria-boko-haram-commander-killed/index.html?eref=edition
+test_url: http://rss.cnn.com/rss/edition.rss
+test_url: http://rss.cnn.com/rss/edition_technology.rss
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/eetimes.com.txt b/inc/3rdparty/site_config/standard/eetimes.com.txt
new file mode 100755 (executable)
index 0000000..300db30
--- /dev/null
@@ -0,0 +1,8 @@
+body: //div[contains(@class, 'grayshowlinks')]
+
+next_page_link: //div[@id='sitecontentcol']//a[.='Next >']
+# Doesn't work (site doesn't always load full content in print view)
+#single_page_link: //div[@id='sitecontentcol']//a[contains(@href, 'print=yes')]
+
+test_url: http://www.eetimes.com/document.asp?doc_id=1319966&
+test_url: http://www.eetimes.com/rss_simple.asp
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 59f6a71..3756027
@@ -1,11 +1,11 @@
-title: //h1[@class='style6 nevek']\r
-\r
-body: //div[@class='bal3']\r
-\r
-\r
-prune: yes\r
-\r
-tidy: yes\r
-convert_double_br_tags: yes\r
+title: //h1[@class='style6 nevek']
+
+body: //div[@class='bal3']
+
+
+prune: yes
+
+tidy: yes
+convert_double_br_tags: yes
 
 test_url: http://ekultura.hu/olvasnivalo/egyeb/cikk/2010-12-15/interju-galvolgyi-judit-2010-december
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 52ffe2d..d4b0a9b
@@ -1,3 +1,3 @@
-body: //div[@id='jobDesc-bd']/p\r
+body: //div[@id='jobDesc-bd']/p
 
 test_url: http://www.elance.com/j/xml-technical-intergration/23687172/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/elderscrollsonline.com.txt b/inc/3rdparty/site_config/standard/elderscrollsonline.com.txt
new file mode 100755 (executable)
index 0000000..fa3892c
--- /dev/null
@@ -0,0 +1,22 @@
+date: //time
+title: //h1[contains(@class, "alpha")]
+body: //article[contains(@class, "news-post")]
+
+# fix dates - dates as they are won't work as strtotime doesn't understand format (03.28.2013)
+replace_string(<time class="gamma">01.): <time class="gamma">January.
+replace_string(<time class="gamma">02.): <time class="gamma">February.
+replace_string(<time class="gamma">03.): <time class="gamma">March.
+replace_string(<time class="gamma">04.): <time class="gamma">April.
+replace_string(<time class="gamma">05.): <time class="gamma">May.
+replace_string(<time class="gamma">06.): <time class="gamma">June.
+replace_string(<time class="gamma">07.): <time class="gamma">July.
+replace_string(<time class="gamma">08.): <time class="gamma">August.
+replace_string(<time class="gamma">09.): <time class="gamma">September.
+replace_string(<time class="gamma">10.): <time class="gamma">October.
+replace_string(<time class="gamma">11.): <time class="gamma">November.
+replace_string(<time class="gamma">12.): <time class="gamma">December.
+
+prune: no
+
+test_url: http://elderscrollsonline.com/en/rss
+test_url: http://elderscrollsonline.com/en/news/post/2013/03/27/developer-question-of-the-week-17
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0766471..56fba5f
@@ -1,27 +1,27 @@
-title: //h1\r
-date: //div[@class='datum']\r
-single_page_link: //a[contains(@href, '?type=99')]\r
-\r
-# this hack preserves the intro text, because it would be striped otherwise if the title is set to //h1\r
-dissolve: //div[@class='artikelMeldung']\r
-\r
-\r
-strip_id_or_class: anzeige\r
-strip_id_or_class: top_page_navigation\r
-strip_id_or_class: cr_image_container\r
-strip_id_or_class: cr_image_reference\r
-strip_id_or_class: cr_image_icon\r
-strip_id_or_class: _close_txt\r
-strip_id_or_class: _close_ico\r
-strip_id_or_class: clearer\r
-\r
-strip://h1\r
-strip://h6\r
-strip://div[contains(@id, 'plista')]\r
-strip://img[contains(@id,'tiny')]\r
-strip://img[@class='cr_image']\r
-\r
-# strip url at the top\r
-strip: //p[@style='font-size: 10px;']\r
+title: //h1
+date: //div[@class='datum']
+single_page_link: //a[contains(@href, '?type=99')]
+
+# this hack preserves the intro text, because it would be striped otherwise if the title is set to //h1
+dissolve: //div[@class='artikelMeldung']
+
+
+strip_id_or_class: anzeige
+strip_id_or_class: top_page_navigation
+strip_id_or_class: cr_image_container
+strip_id_or_class: cr_image_reference
+strip_id_or_class: cr_image_icon
+strip_id_or_class: _close_txt
+strip_id_or_class: _close_ico
+strip_id_or_class: clearer
+
+strip://h1
+strip://h6
+strip://div[contains(@id, 'plista')]
+strip://img[contains(@id,'tiny')]
+strip://img[@class='cr_image']
+
+# strip url at the top
+strip: //p[@style='font-size: 10px;']
 
 test_url: http://www.elektroniknet.de/automotive/technik-know-how/sicherheitselektronik/article/87717/0/Besser_als_die_Wirklichkeit/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 9fecd66..435c6c2
@@ -1,4 +1,4 @@
-single_page_link: //a[contains(@href, 'print_contenido')]\r
-title: //h2\r
+single_page_link: //a[contains(@href, 'print_contenido')]
+title: //h2
 author: //div[@class="autor"]
 test_url: http://www.elmalpensante.com/index.php?doc=display_contenido&id=668
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 32f9fc3..c6f9787
@@ -1,22 +1,22 @@
-title: //meta[@name='DC.title']/@content\r
-title: //div[contains(@class, 'cabecera_noticia')]//h1\r
-date: //meta[@name='DC.date']/@content\r
-date: //meta[@name='date']/@content\r
-body: //div[@class='columna_texto']\r
-body: //div[@id='cuerpo_noticia']\r
-body: //div[@class='estructura_2col_1zq']//div[@class='margen_n']\r
-\r
-prune: no\r
-\r
-strip_id_or_class: disposicion_vertical\r
-strip_id_or_class: ampliar_foto\r
-strip_id_or_class: utilidades\r
-strip_id_or_class: info_relacionada\r
-strip_id_or_class: m-kiosko\r
-strip_id_or_class: info_complementa\r
-\r
-strip: //div[starts-with(@id, 'sumario') and contains(., 'más información')]\r
-strip: //div[@id='coment' or @id='foros_not']\r
+title: //meta[@name='DC.title']/@content
+title: //div[contains(@class, 'cabecera_noticia')]//h1
+date: //meta[@name='DC.date']/@content
+date: //meta[@name='date']/@content
+body: //div[@class='columna_texto']
+body: //div[@id='cuerpo_noticia']
+body: //div[@class='estructura_2col_1zq']//div[@class='margen_n']
 
-test_url: http://elpais.com/elpais/2012/02/06/gente/1328526783_491687.html\r
+prune: no
+
+strip_id_or_class: disposicion_vertical
+strip_id_or_class: ampliar_foto
+strip_id_or_class: utilidades
+strip_id_or_class: info_relacionada
+strip_id_or_class: m-kiosko
+strip_id_or_class: info_complementa
+
+strip: //div[starts-with(@id, 'sumario') and contains(., 'más información')]
+strip: //div[@id='coment' or @id='foros_not']
+
+test_url: http://elpais.com/elpais/2012/02/06/gente/1328526783_491687.html
 test_url: http://www.elpais.com/articulo/cultura/mano/retrato/materia/elpepicul/20120207elpepicul_2/Tes
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/emaratalyoum.com.txt b/inc/3rdparty/site_config/standard/emaratalyoum.com.txt
new file mode 100755 (executable)
index 0000000..3d1313e
--- /dev/null
@@ -0,0 +1,7 @@
+body: //div[@id='main-column']//div[@class='content']
+
+prune: no
+
+test_url: http://www.emaratalyoum.com/sports/arab-and-international/2013-08-29-1.601844
+test_url: http://www.emaratalyoum.com/sports/arab-and-international/2013-08-29-1.601842
+test_url: http://www.emaratalyoum.com/public-sports-1.533088?ot=ot.AjaxPageLayout
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c1a9106..2ca0216
@@ -1,10 +1,10 @@
-body: //div[@id='content']\r
-strip: //div[@class='rl'] \r
-strip: //p[@class='authdesc']\r
-strip: //p[@class='strybtm']\r
-strip: //div[@id='stryFtrLft']\r
-strip: //div[@id='f1Conversation']\r
-strip: //div[@id='cmtSpncrRuler']\r
-strip: //div[@id='stryComments']\r
+body: //div[@id='content']
+strip: //div[@class='rl'] 
+strip: //p[@class='authdesc']
+strip: //p[@class='strybtm']
+strip: //div[@id='stryFtrLft']
+strip: //div[@id='f1Conversation']
+strip: //div[@id='cmtSpncrRuler']
+strip: //div[@id='stryComments']
 strip: //div[@id='athrData']
 test_url: http://en.espnf1.com/monaco/motorsport/story/50529.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6cc6b14..52acddb
@@ -1,7 +1,7 @@
-title: //meta[@property="og:title"]/@content\r
-body: //div[@class='post_body']\r
-date: //*[@class='post_time']\r
-\r
-prune: no\r
-\r
+title: //meta[@property="og:title"]/@content
+body: //div[@class='post_body']
+date: //*[@class='post_time']
+
+prune: no
+
 test_url: http://www.engadget.com/2011/05/20/screen-grabs-the-mentalist-takes-the-ipad-to-new-heights/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 35ace46..48f301f
@@ -1,7 +1,7 @@
-title: //h2\r
-body: //div[@class="post_content"]\r
-author: //p[@class="author"]/a\r
-date: //p[@class="date"]\r
-strip: //h2\r
+title: //h2
+body: //div[@class="post_content"]
+author: //p[@class="author"]/a
+date: //p[@class="date"]
+strip: //h2
 strip: //header
 test_url: http://engineering.tumblr.com/post/21276808338/tumblr-firehose
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index aed3a5f..9736599
@@ -1,7 +1,7 @@
-title: //span[@id='DetailedTitle']\r
-body: //div[@id='ctl00_cphBody_dvArticleInfoBlock'] | //td[@class='DetailedSummary']\r
-strip_id_or_class: sidebar\r
-strip_id_or_class: Skyscrapper_Body\r
-strip: //td[@class='DetailedSummary']/table[position() != 1]\r
-prune: no\r
+title: //span[@id='DetailedTitle']
+body: //div[@id='ctl00_cphBody_dvArticleInfoBlock'] | //td[@class='DetailedSummary']
+strip_id_or_class: sidebar
+strip_id_or_class: Skyscrapper_Body
+strip: //td[@class='DetailedSummary']/table[position() != 1]
+prune: no
 test_url: http://english.aljazeera.net//news/middleeast/2011/04/20114681444376835.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e2b99bf..ddd51c4
@@ -1,9 +1,9 @@
-body: //div[@id='article']//div[contains(@class, 'inside')]\r
-\r
-strip_id_or_class: tags\r
-strip_id_or_class: actions\r
-strip_id_or_class: google-ads\r
-\r
-prune: no\r
-\r
+body: //div[@id='article']//div[contains(@class, 'inside')]
+
+strip_id_or_class: tags
+strip_id_or_class: actions
+strip_id_or_class: google-ads
+
+prune: no
+
 test_url: http://www.enikos.gr/politics/98606,To_oxi_toy_Agorastoy_stoys_Germanoys.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3e7fba0..a756c45
@@ -1,10 +1,10 @@
-author://div[@class = 'article-author']/span[@class = 'byline']\r
-title://h1[@class = 'heading']\r
-body://div[@id = 'related-article-links']\r
-strip://div[@id = 'comment-sort-order']\r
-strip://div[@id = 'my-profile']\r
-strip://div[@class = 'article-author']\r
-strip://div[@class = 'bg-f8f1d8 width-385 text-left']\r
-strip://div[@id = 'login-status']\r
+author://div[@class = 'article-author']/span[@class = 'byline']
+title://h1[@class = 'heading']
+body://div[@id = 'related-article-links']
+strip://div[@id = 'comment-sort-order']
+strip://div[@id = 'my-profile']
+strip://div[@class = 'article-author']
+strip://div[@class = 'bg-f8f1d8 width-385 text-left']
+strip://div[@id = 'login-status']
 strip://div[@class = 'puff-padding']
 test_url: http://entertainment.timesonline.co.uk/tol/arts_and_entertainment/the_tls/article7177738.ece
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ericsuh.com.txt b/inc/3rdparty/site_config/standard/ericsuh.com.txt
new file mode 100755 (executable)
index 0000000..d25140c
--- /dev/null
@@ -0,0 +1,4 @@
+date: //h6[@class='datetime']/child::text()
+author: string("Eric J. Suh")
+footnotes: yes
+test_url: http://www.ericsuh.com/blog/posts/2012/8/strange-numbers.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 19a1e9d..21691a5
@@ -1,11 +1,11 @@
-title: concat(//div[@class='doc_author'], ' - ', upper-case(//div[@class='doc_title']))\r
-\r
-body: //div[@class='doc']\r
-\r
-prune: yes\r
-\r
-tidy: yes\r
-convert_double_br_tags: yes\r
-\r
+title: concat(//div[@class='doc_author'], ' - ', upper-case(//div[@class='doc_title']))
+
+body: //div[@class='doc']
+
+prune: yes
+
+tidy: yes
+convert_double_br_tags: yes
+
 strip: //a[contains(@href, 'www.facebook.com/pages/Elet-es-Irodalom/')]
 test_url: http://www.es.hu/2010-12-08_vissza-a-partpenzt
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7e17a04..fd453a1
@@ -1,2 +1,8 @@
+title: //h1[@class='headline']/div[@class='name']
+
+strip_image_src: 'http://cdn.themis-media.com/media/global/images/library/deriv/115/115825.png'
+
+next_page_link: //a[@class='next_page']
+
 strip_comments: no
-test_url: http://www.escapistmagazine.com/articles/view/columns/extraconsideration/8717-Extra-Consideration-The-Story
\ No newline at end of file
+test_url: http://www.escapistmagazine.com/articles/view/columns/criticalintel/10302-I-Hate-Magic
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 319d352..0647629
@@ -1,12 +1,12 @@
-title: //div[@class='headline'] | //div[@class='mod-header']/h3\r
-body: //div[contains(@class, 'article')]\r
-strip: //div[contains(@class, 'mod-inline')]\r
-strip: //*/span[@class='page-actions']\r
-strip: //div[@class='page-actions']/*\r
-strip: //div[@class='headline'] | //div[@class='mod-header']/h3\r
-strip: //div[@class='mod-blog-navigation']\r
-strip: //div[@class='monthday']\r
-strip: //div[@class='time']\r
-strip: //div[@class='timeofday']\r
+title: //div[@class='headline'] | //div[@class='mod-header']/h3
+body: //div[contains(@class, 'article')]
+strip: //div[contains(@class, 'mod-inline')]
+strip: //*/span[@class='page-actions']
+strip: //div[@class='page-actions']/*
+strip: //div[@class='headline'] | //div[@class='mod-header']/h3
+strip: //div[@class='mod-blog-navigation']
+strip: //div[@class='monthday']
+strip: //div[@class='time']
+strip: //div[@class='timeofday']
 strip: //div[contains(@class, 'mod-conversations')]
 test_url: http://espn.go.com/boston/mlb/story/_/id/7092528/terry-francona-victim-latest-red-sox-smear-campaign
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7566e8c..b9cb1e5
@@ -1,10 +1,11 @@
-title: //h1\r
-author: //div[@id='byline']\r
-\r
-body: //div[@id='printBody']\r
-\r
-single_page_link: concat('http://www.esquire.com/print-this/', substring-after(//link[@rel='canonical']/@href, 'esquire.com/'))\r
-\r
-prune: no\r
-\r
-test_url: http://www.esquire.com/features/impossible/price-is-right-perfect-bid-0810
\ No newline at end of file
+title: //h1
+author: //div[@id='byline']
+
+body: //div[@id='printBody']
+
+single_page_link: concat('http://www.esquire.com/print-this/', substring-after(//link[@rel='canonical']/@href, 'esquire.com/'))
+
+prune: no
+
+test_url: http://www.esquire.com/features/impossible/price-is-right-perfect-bid-0810
+test_url: http://www.esquire.com/blogs/politics/police-getting-leftover-armoured-iraq-trucks-112513
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 88c8c56..9a92239
@@ -1,6 +1,6 @@
-title: //*[@itemprop='headline']\r
-author: //*[@itemprop='author']\r
-date: //*[@itemprop='datePublished']\r
-body: //*[@itemprop='articleBody']\r
+title: //*[@itemprop='headline']
+author: //*[@itemprop='author']
+date: //*[@itemprop='datePublished']
+body: //*[@itemprop='articleBody']
 strip: //*[contains(@class, 'instapaper_ignore')]
 test_url: http://www.essentialpublicradio.org/story/2011-11-14/volunteers-sought-federal-tax-assistance-program-pennsylvania-9421
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 58da5ef..95f8cf7
@@ -1,6 +1,6 @@
-strip_id_or_class: 'left'\r
-strip_id_or_class: 'right'\r
-strip_id_or_class: 'block-belowcontent'\r
-author: //span[@class = 'name']/a\r
-date: //div[@class= 'datum']\r
+strip_id_or_class: 'left'
+strip_id_or_class: 'right'
+strip_id_or_class: 'block-belowcontent'
+author: //span[@class = 'name']/a
+date: //div[@class= 'datum']
 test_url: http://www.etc.se/intervju/lonsamt-att-radda-jorden
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6ecdf6b..8a35166
@@ -1,8 +1,8 @@
-body: //div[ @class='content' ]  |  //div[ @class='blog-entry' ]\r
-\r
-strip: //h2/abbr  |  //div[ @class='lowleader' ]  |  //*[ @class='discussion' ]  |  //img[ @class='play-button' ]  |  //div[ @class='boxout' ] | //h2/a | //h2 | //h2/div | //p[ @class='timestamp' ] | //a[ @class='eurogamer-author' ] | //p[ @class='aPager' ] | //h1 | //div[ @id='lowleader' ] | //a[ @class='next' ]  |  //div[contains(concat(' ', normalize-space(@class), ' '), ' pullquote ')]\r
-\r
-date://p[ @class='timestamp' ]\r
-\r
-author://a[ @class='eurogamer-author' ]\r
+body: //div[ @class='content' ]  |  //div[ @class='blog-entry' ]
+
+strip: //h2/abbr  |  //div[ @class='lowleader' ]  |  //*[ @class='discussion' ]  |  //img[ @class='play-button' ]  |  //div[ @class='boxout' ] | //h2/a | //h2 | //h2/div | //p[ @class='timestamp' ] | //a[ @class='eurogamer-author' ] | //p[ @class='aPager' ] | //h1 | //div[ @id='lowleader' ] | //a[ @class='next' ]  |  //div[contains(concat(' ', normalize-space(@class), ' '), ' pullquote ')]
+
+date://p[ @class='timestamp' ]
+
+author://a[ @class='eurogamer-author' ]
 test_url: http://www.eurogamer.net/articles/digitalfoundry-vs-unreal-engine-4
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0716251..ccb4f87
@@ -1,11 +1,11 @@
-author: substring-after(//div[@class='articleauthor'],'By ')\r
-\r
-# Blog posts\r
-date: //div[@class='articledate']\r
-# News\r
-date: //div[@class='articledate_b']\r
-\r
-body: //div[@class='articletext']\r
-\r
+author: substring-after(//div[@class='articleauthor'],'By ')
+
+# Blog posts
+date: //div[@class='articledate']
+# News
+date: //div[@class='articledate_b']
+
+body: //div[@class='articletext']
+
 convert_double_br_tags: yes
 test_url: http://www.evo.co.uk/carreviews/evolongtermtests/280072/bmw_330d_sport_touring.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d0cb283..d81d325
@@ -1,9 +1,10 @@
-title: //div[@id='article']/div[contains(@class, 'content')]/h1\r
-body: //div[@id='article']/div[contains(@class, 'content')]\r
-date: //div[contains(@class, 'article-slot')]/descendant::div[contains(@id, 'articledates')]\r
-\r
-strip: //img[contains(@src, 'img/px.gif')]\r
-prune: no\r
-# remove Facebook banner and obtrusive ad\r
-strip: //div[@id='article']/div[contains(@class, 'content')]/div[contains(@class, 'art-right')]\r
-test_url: http://www.expressen.se/kultur/1.2683904/medan-natet-dras-at
\ No newline at end of file
+title: //h1[contains(@class, 'b-headline_article')]
+body: //div[contains(@class, 'b-article_print')]
+
+single_page_link: //div[contains(@class, 'b-page__footer__actions')]//a[contains(@href, 'print=true')]
+
+prune: no
+
+test_url: http://www.expressen.se/kultur/1.2683904/medan-natet-dras-at
+test_url: http://www.expressen.se/gt/polis-om-styckmordet-extremt-markligt-fall/
+test_url: http://www.expressen.se/Pages/OutboundFeedsPage.aspx?id=3642159&viewstyle=rss
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/facebook.com.txt b/inc/3rdparty/site_config/standard/facebook.com.txt
new file mode 100755 (executable)
index 0000000..6a49276
--- /dev/null
@@ -0,0 +1,5 @@
+body: //div[@id='imagestage']
+prune: no
+tidy: no
+
+test_url: https://www.facebook.com/feeds/page.php?id=338077742912613&format=rss20
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c17e0b8..4c96a1a
@@ -1,3 +1,3 @@
-bosdy: //div[@class='content']\r
+bosdy: //div[@class='content']
 
 test_url: http://facta.co.jp/blog/archives/20111026001026.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index b941b74..2bfcc9b
@@ -1,18 +1,14 @@
-title: //h2[@class='related relatedTitle']\r
-author: //a[contains(@href, 'liste.php?author_id')]\r
-\r
-# can't think of a better way unfortunately, really bad markup on this site\r
-date: substring-after(//td[@style='width:85%;'], 'vom')\r
-\r
-# not sure why, but instapaper seems to suck up the teaser paragraph\r
-# not solved!\r
-body: //div[contains(@class, 'teaser')]\r
-body: //div[@id='content']\r
-\r
-# cleanup\r
-strip: //img[@src='http://www.falter.at/web/_pics/falterlogo_dblau.gif']\r
-strip: //div[@class='servicebox']\r
-strip: //h1\r
-strip: //br\r
-strip: //td[@id='adcol']
-test_url: http://www.falter.at/web/print/detail.php?id=1634
\ No newline at end of file
+title: //h1
+author: //a[contains(@href, '/kategorie/autoren')]
+date: //a[contains(@href, '/falter/ausgabe')]
+body: //article[@class='spanMain']
+
+# cleanup
+strip_id_or_class: 'respond'
+strip: //img[@src='http://www.falter.at/web/_pics/falterlogo_dblau.gif']
+strip_id_or_class: 'meta'
+strip_id_or_class: 'servicebox'
+strip_id_or_class: 'related'
+strip_id_or_class: 'twitter-share-button'
+strip: //br
+test_url: http://www.falter.at/falter/2013/03/26/der-dandy-auf-der-sinkenden-galeere/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8d0c4da..e7cab4d
@@ -1,6 +1,6 @@
-body: //*[@id = 'story text']\r
-author: //a[starts-with(@href, '/u/')]\r
-next_page_link: substring-after(//input[contains(@value, 'Next')]/@onclick, "self.location='")\r
-autodetect_next_page:yes\r
+body: //*[@id = 'story text']
+author: //a[starts-with(@href, '/u/')]
+next_page_link: substring-after(//input[contains(@value, 'Next')]/@onclick, "self.location='")
+autodetect_next_page:yes
 strip_id_or_class: 'a2a_kit'
 test_url: http://www.fanfiction.net/s/6497403/1/Spartan_Love
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 5547a76..a641723
@@ -1,16 +1,16 @@
-title: //h1\r
-author: //h5[@class='byline']//a\r
-date: //h5[@class='date']\r
-body: //figure[@class='node-poster'] | //div[contains(@class, "node-content")]\r
-strip_id_or_class: article-top-wrapper\r
-strip_id_or_class: footer-message\r
-strip_id_or_class: print-logo\r
-strip: //cite\r
-strip://*[@class='timestamp']\r
-strip://div[@id='page_right']\r
-strip://section[@id='header_region']\r
-strip://h1[@class='node-title']\r
-strip://div[@class='node-submitted']\r
-strip_id_or_class: skipnav\r
-test_url: http://www.fastcompany.com/3000226/link-between-quietness-and-productivity\r
+title: //h1
+author: //h5[@class='byline']//a
+date: //h5[@class='date']
+body: //figure[@class='node-poster'] | //div[contains(@class, "node-content")]
+strip_id_or_class: article-top-wrapper
+strip_id_or_class: footer-message
+strip_id_or_class: print-logo
+strip: //cite
+strip://*[@class='timestamp']
+strip://div[@id='page_right']
+strip://section[@id='header_region']
+strip://h1[@class='node-title']
+strip://div[@class='node-submitted']
+strip_id_or_class: skipnav
+test_url: http://www.fastcompany.com/3000226/link-between-quietness-and-productivity
 test_url: http://www.fastcompany.com/3003586/6-simple-rituals-reach-your-potential-every-day
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 4fe5968..d087d2a
@@ -1,30 +1,36 @@
-# Title\r
-title: //p[@class='Content HeadlineShort']\r
-\r
-# Authors\r
-# some are known and have a link, others don't\r
-author: substring-after(//span[@class='Autor'], 'Von')\r
-\r
-# Date\r
-date: //span[@class='Datum']\r
-\r
-# Body\r
-body: //div[@class='Artikel']\r
-\r
-# Removements before body text\r
-strip: //div[@class='Breadcrumbs']\r
-strip: //div[@class='QuickSearchBox']\r
-strip: //div[@class='FAZArtikelEinleitung']\r
-strip: //div[@class='FAZArtikelReiter']\r
-strip: //div[@class='clear']\r
-\r
-# General removements\r
-strip: //span[@class='Bildnachweis']\r
-\r
-# Removements after body text\r
-strip: //div[@class='ArtikelAbbinder']\r
-strip: //div[@class='ArtikelKommentieren Artikelfuss GETS;tk;boxen.top-lesermeinungen;tp;content']\r
-strip: //div[@class='FAZArtikelKommentare FAZArtikelContent']\r
-strip: //div[@class='FAZArtikelFunktionen']\r
-strip: //div[@id='FAZContentRight']\r
-test_url: http://www.faz.net/aktuell/gesellschaft/ehe-haltbarkeitsformel-verliebe-dich-oft-verlobe-dich-selten-heirate-vielleicht-11685306.html
\ No newline at end of file
+# Title
+title: //p[@class='Content HeadlineShort']
+
+# Authors
+# some are known and have a link, others don't
+author: substring-after(//span[@class='Autor'], 'Von')
+
+# Date
+date: //span[@class='Datum']
+
+# Body
+body: //div[@class='Artikel']
+
+# Removements before body text
+strip: //div[@class='Breadcrumbs']
+strip: //div[@class='QuickSearchBox']
+strip: //div[@class='FAZArtikelEinleitung']
+strip: //div[@class='FAZArtikelReiter']
+strip: //div[@class='clear']
+
+# General removements
+strip: //span[@class='Bildnachweis']
+strip: //img[@class='MediaIcon']
+strip: //div[@class='ArtikelMediaLink']
+dissolve: //a[img]
+
+# Removements after body text
+strip: //div[@class='ArtikelAbbinder']
+strip: //div[@class='ArtikelKommentieren Artikelfuss GETS;tk;boxen.top-lesermeinungen;tp;content']
+strip: //div[@class='FAZArtikelKommentare FAZArtikelContent']
+strip: //div[@class='FAZArtikelFunktionen']
+strip: //div[@id='FAZContentRight']
+
+# Fix picture captions
+wrap_in(small): //span[@class='Bildunterschrift']/text()
+test_url: http://www.faz.net/aktuell/feuilleton/zum-tod-von-margaret-thatcher-die-reizfigur-12141919.html#Drucken
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/fertigung.de.txt b/inc/3rdparty/site_config/standard/fertigung.de.txt
new file mode 100755 (executable)
index 0000000..90145e5
--- /dev/null
@@ -0,0 +1,23 @@
+title: //title
+
+body: //div[@id='content']
+
+strip: (//div[@id='content']/h2)[1]
+
+strip: //h2[contains(., 'mehr News')]/following::*
+strip: //h2[contains(., 'mehr News')]
+
+strip: //div[contains(@class, 'indizar')]/following::*
+strip: //div[contains(@class, 'indizar')]
+
+strip: //h1[contains(@class, 'single')]/preceding::*
+strip: //h1[contains(@class, 'single')]
+
+strip_id_or_class: plista_widget
+
+prune: no
+
+next_page_link: //a[contains(., 'Weiter')]
+
+test_url: http://www.fertigung.de/2013/04/igus-neuer-energiekettenkatalog/
+test_url: http://www.fertigung.de/2013/04/dynamisch-und-hochpraezise/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 4a04e83..19ec16b
@@ -1,5 +1,5 @@
-body: id('storytext')\r
-author: //a[starts-with(@href, '/u/')]\r
-#next_page_link: substring-after(//input[contains(@value, 'Next')]/@onclick, "self.location='")\r
+body: id('storytext')
+author: //a[starts-with(@href, '/u/')]
+#next_page_link: substring-after(//input[contains(@value, 'Next')]/@onclick, "self.location='")
 strip_id_or_class: 'a2a_kit'
 test_url: http://www.fictionpress.com/s/2897964/1/All_We_Knew
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3dbfe76..081f0bb
@@ -1,12 +1,12 @@
-title: //h4\r
-author: //span[@class="author"]\r
-body: //div[@id="story"]\r
-strip_id_or_class: summary\r
-strip_id_or_class: meta\r
-strip_id_or_class: storyfoot\r
-convert_double_br_tags: yes\r
-prune: no\r
-\r
-# Note: this site still has trouble because single <br> tags are stripped, but I don't see a way to fix that with this interface.\r
+title: //h4
+author: //span[@class="author"]
+body: //div[@id="story"]
+strip_id_or_class: summary
+strip_id_or_class: meta
+strip_id_or_class: storyfoot
+convert_double_br_tags: yes
+prune: no
+
+# Note: this site still has trouble because single <br> tags are stripped, but I don't see a way to fix that with this interface.
 
 test_url: http://www.ficwad.com/story/158977
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 81c18fd..248522c
@@ -1,12 +1,12 @@
-title: //meta[@property='og:title']/@content\r
-body: //div[@id='y-article-bd']\r
-body: //div[contains(@class, 'yom-art-content')]\r
-strip: //div[contains(@class, 'related-companies')]\r
-strip: //div[@id='y-article-related']\r
-strip: //div[@id='ypf-article-related']\r
-prune: no\r
-\r
-single_page_link: //div[@class='ft']//a[contains(@href, 'page=all')]\r
-\r
-test_url: http://sg.finance.yahoo.com/news/Motorola-takes-wraps-249-rsg-3508842732.html?x=0&.v=1\r
+title: //meta[@property='og:title']/@content
+body: //div[@id='y-article-bd']
+body: //div[contains(@class, 'yom-art-content')]
+strip: //div[contains(@class, 'related-companies')]
+strip: //div[@id='y-article-related']
+strip: //div[@id='ypf-article-related']
+prune: no
+
+single_page_link: //div[@class='ft']//a[contains(@href, 'page=all')]
+
+test_url: http://sg.finance.yahoo.com/news/Motorola-takes-wraps-249-rsg-3508842732.html?x=0&.v=1
 test_url: http://finance.yahoo.com/news/super-young-retirement-savers.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 1a5cd2e..43aef75
@@ -1,10 +1,10 @@
-date: //div[@class='notes']/a\r
-body: //div[@id='content']\r
-\r
-strip_id_or_class: tags\r
-strip_id_or_class: permalink\r
-strip_id_or_class: notes\r
-strip_id_or_class: post_nav\r
-strip: //div[@id='content']//h2\r
+date: //div[@class='notes']/a
+body: //div[@id='content']
+
+strip_id_or_class: tags
+strip_id_or_class: permalink
+strip_id_or_class: notes
+strip_id_or_class: post_nav
+strip: //div[@id='content']//h2
 strip_id_or_class: right_column
 test_url: http://findtheswagger.tumblr.com/post/11589145141/moe-resners-end-of-an-era-1957-giants-final
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index dd56da2..ce972ba
@@ -1,7 +1,7 @@
-title: //div[@class='articleTitle']\r
-author: //div[@class='articleAuthor']\r
-body: //div[@class='articleContent']\r
-prune: no\r
-convert_double_br_tags: yes\r
-\r
+title: //div[@class='articleTitle']
+author: //div[@class='articleAuthor']
+body: //div[@class='articleContent']
+prune: no
+convert_double_br_tags: yes
+
 test_url: http://www.firstthings.com/article/2011/05/the-trouble-with-ayn-rand
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3d7b45a..d0a0a77
@@ -1,7 +1,7 @@
-title: substring-after(//title, 'Right:')\r
-body: //div[@class = 'post-body']\r
-author: substring-after(//*[@class='post-author'], 'by')\r
-date: concat(//*[@class='date-header'], ' ', //*[@class='post-timestamp']/a)\r
-convert_double_br_tags: yes\r
+title: substring-after(//title, 'Right:')
+body: //div[@class = 'post-body']
+author: substring-after(//*[@class='post-author'], 'by')
+date: concat(//*[@class='date-header'], ' ', //*[@class='post-timestamp']/a)
+convert_double_br_tags: yes
 
 test_url: http://www.fivethirtyeight.com/2010/07/does-rnc-have-structural-problems.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/flyingmachinestudios.com.txt b/inc/3rdparty/site_config/standard/flyingmachinestudios.com.txt
new file mode 100755 (executable)
index 0000000..2053f80
--- /dev/null
@@ -0,0 +1,2 @@
+strip_id_or_class: linenos
+test_url: http://www.flyingmachinestudios.com/programming/whoops-dci-refactoring/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 32d44c8..5db3e58
@@ -1,7 +1,7 @@
-author: //div[@class='authorDescription']/h2\r
-body: //div[@id='story']\r
-date: substring-before(substring-after(//p[@class='date'],'Erstellt am:'), '-')\r
-title: //h1[@class='detail']\r
-strip: //div[@class='fact']\r
+author: //div[@class='authorDescription']/h2
+body: //div[@id='story']
+date: substring-before(substring-after(//p[@class='date'],'Erstellt am:'), '-')
+title: //h1[@class='detail']
+strip: //div[@class='fact']
 
 test_url: http://fm4.orf.at/stories/1689156/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7faa6bf..e404ccb
@@ -1,15 +1,15 @@
-title: normalize(//h1)\r
-\r
-author: //td/p[position()=last()]/em\r
-\r
-# I swear, this is really the best way to do this\r
-date: normalize(//td[contains(@style, "color: #ffffff")])\r
-\r
-# my god, it's full of tables\r
-body: /table/tbody/tr[5]//table/tbody//table/tbody/tr/td\r
-strip: //h1\r
-\r
-# the following two lines strip the byline at the end of the article (the byline is a <p> that consists of an em dash and then some text in an <em>). I have no idea why I can't just strip //p[position()=last()], but trying to do so includes a bunch of other crap in the output.\r
-strip: //p[position()=last()]/em\r
+title: normalize(//h1)
+
+author: //td/p[position()=last()]/em
+
+# I swear, this is really the best way to do this
+date: normalize(//td[contains(@style, "color: #ffffff")])
+
+# my god, it's full of tables
+body: /table/tbody/tr[5]//table/tbody//table/tbody/tr/td
+strip: //h1
+
+# the following two lines strip the byline at the end of the article (the byline is a <p> that consists of an em dash and then some text in an <em>). I have no idea why I can't just strip //p[position()=last()], but trying to do so includes a bunch of other crap in the output.
+strip: //p[position()=last()]/em
 strip: //p[position()=last()]/child::text()
 test_url: http://www.fnal.gov/pub/today/archive_2011/today11-11-09_MuonDepartmentReadMore.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3ad5cab..6da3687
@@ -1,19 +1,19 @@
-title: //h1\r
-\r
-author: //div[@class='articleContent small']/div[@class='textBlock']//span[@class='created']\r
-\r
-date: //div[@class='articleHead']/span[@class='created']\r
-\r
-body: //div[@id='article']\r
-\r
-strip: //span[@class='markerText']\r
-strip: //div[@class='articleContent small']/div[@class='textBlock']//span[@class='created']\r
-strip: //div[@class='sidebar']\r
-strip: //div[@class='starbar']\r
-strip: //div[@class='actions clearfix']\r
-strip: //div[@id='commentForm']\r
-strip: //div[@id='commentSent']\r
-strip: //div[@id='comments']\r
-strip: //div[@class='similarityBlock']\r
+title: //h1
+
+author: //div[@class='articleContent small']/div[@class='textBlock']//span[@class='created']
+
+date: //div[@class='articleHead']/span[@class='created']
+
+body: //div[@id='article']
+
+strip: //span[@class='markerText']
+strip: //div[@class='articleContent small']/div[@class='textBlock']//span[@class='created']
+strip: //div[@class='sidebar']
+strip: //div[@class='starbar']
+strip: //div[@class='actions clearfix']
+strip: //div[@id='commentForm']
+strip: //div[@id='commentSent']
+strip: //div[@id='comments']
+strip: //div[@class='similarityBlock']
 
 test_url: http://www.focus.de/politik/ausland/ein-jahr-nach-bombenanschlag-u-bahn-attentaeter-von-minsk-hingerichtet_aid_724958.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/folklore.org.txt b/inc/3rdparty/site_config/standard/folklore.org.txt
new file mode 100755 (executable)
index 0000000..ed23a0b
--- /dev/null
@@ -0,0 +1,4 @@
+author: /html/body/table[3]/tbody/tr/td[1]/table[2]/tbody/tr[1]/td[2]
+date: /html/body/table[3]/tbody/tr/td[1]/table[2]/tbody/tr[2]/td[2]
+body: //div[@class='main']
+test_url: http://www.folklore.org/StoryView.py?story=Calculator_Construction_Set.txt
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/food.com.txt b/inc/3rdparty/site_config/standard/food.com.txt
new file mode 100755 (executable)
index 0000000..a70da76
--- /dev/null
@@ -0,0 +1,11 @@
+body: //div[@id='print-area']
+title: //h1[contains(@class, 'section-title')]
+single_page_link: //a[@id='prntrec']
+strip_image_src: food-logo-small
+strip_id_or_class: timer
+strip_id_or_class: photo-sm
+strip_id_or_class: page-header
+
+prune: no
+
+test_url: http://www.food.com/recipe/couldnt-be-easier-bbq-pork-tenderloin-crock-pot-317152
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 69867cc..89cb8b9
@@ -1,11 +1,11 @@
-body: //div[@class='entry-content']\r
-date: //meta[@name="date"]/@content\r
-author: //meta[@name="author"]/@content\r
-\r
-strip_id_or_class: ecapShell\r
-strip_id_or_class: noindent\r
-strip_id_or_class: targetedPromotion\r
-\r
-prune: no\r
-\r
+body: //div[@class='entry-content']
+date: //meta[@name="date"]/@content
+author: //meta[@name="author"]/@content
+
+strip_id_or_class: ecapShell
+strip_id_or_class: noindent
+strip_id_or_class: targetedPromotion
+
+prune: no
+
 test_url: http://www.fool.com/investing/general/2012/01/27/dfc-global-beats-up-on-analysts-yet-again.aspx
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 2381b56..9e1d04c
@@ -1,16 +1,27 @@
-title: //hgroup//h1\r
-title: //span[@class='mainarttitle']\r
-\r
-body: //div[@id='leftRail']//div[contains(@class, 'body')]\r
-\r
-author: //meta[@name="author"]/@content\r
-author: //span[@class='mainartauthor']\r
-\r
-date: substring-before(//hgroup//h6, '@')\r
-date: //span[@class='mainartdate']\r
-\r
-prune: no\r
-\r
-single_page_link: //a[contains(@href, '/print/')]\r
-\r
-test_url: http://www.forbes.com/forbes/2011/0509/technology-frog-design-jan-chipchase-ethnographer-birth-cool_print.html
\ No newline at end of file
+title: //hgroup//h1
+title: //span[@class='mainarttitle']
+
+body: //div[@id='leftRail']//div[contains(@class, 'body')]
+
+author: //meta[@name="author"]/@content
+author: //span[@class='mainartauthor']
+
+date: substring-before(//hgroup//h6, '@')
+date: //span[@class='mainartdate']
+
+prune: no
+strip: //aside
+strip_id_or_class: sticky_sharing
+strip_id_or_class: pagination
+strip_id_or_class: controlsbox
+strip_id_or_class: storyboxes
+strip_id_or_class: sponsoredlinks
+strip_id_or_class: nextpage
+strip_id_or_class: contextuallinks
+strip_id_or_class: article_actions
+strip_id_or_class: engagement_block
+
+single_page_link: //a[contains(@href, '/print/')]
+
+test_url: http://www.forbes.com/forbes/2011/0509/technology-frog-design-jan-chipchase-ethnographer-birth-cool_print.html
+test_url: http://www.forbes.com/sites/bruceupbin/2012/09/11/the-iphone-5-winners-and-losers/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/foreignaffairs.com.txt b/inc/3rdparty/site_config/standard/foreignaffairs.com.txt
new file mode 100755 (executable)
index 0000000..cf8b742
--- /dev/null
@@ -0,0 +1,34 @@
+# TIDY
+#tidy: no
+# PRUNE
+#prune: no
+
+# SINGLE PAGE
+single_page_link: //div[@class='showlinks']/a
+
+# TITLE
+title: //h1[@class="title"]
+
+# AUTHOR
+author: //div[contains(@class,"field-field-article-display-authors")]/div/div/a/text()
+
+# DATE
+date: //div[contains(@class,"field-field-article-issue")]/div/div/a/text() | //span[@class="date-display-single"]
+
+# BODY
+body: //div[contains(@class,"content-resize")]
+
+# Remove clutter
+strip: //div[@class="article-sidebar"]
+strip: //div[@class="showlinks"]
+strip: //div[contains(@class,"premium-box")]
+strip: //div[contains(@class,"premium-box")]
+strip: //table[contains(@border,"2")]
+
+# Fix picture captions
+wrap_in(small): //p/img/following-sibling::em
+wrap_in(small): //p[img]/text()
+
+# Fix sub-headlines
+wrap_in(h3): //div[contains(@class,"field-field-article-subtitle")]/div/div/text()
+test_url: http://www.foreignaffairs.com/articles/138810/pierre-n-leval/the-long-arm-of-international-law
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6ab7a09..4e84b98
@@ -1,11 +1,15 @@
-title: //div[@id='art-mast']//h1\r
-author: substring-after(//span[@id='by-line'], 'BY ')\r
-date: //span[@id='pub-date']\r
-body: //div[@id='art-mast']//h2 | //div[@id='art-mast']/h3 | //div[@id='art-body']//div[@class='translateBody']\r
-strip: //div[@id='share-box']\r
-prune: no\r
-\r
-single_page_link: //span[@id='controls']/a[contains(@href, 'print=yes')]\r
-\r
-test_url: http://www.foreignpolicy.com/articles/2011/08/01/a_murderers_manifesto_and_me\r
-test_url: test_url: http://www.foreignpolicy.com/articles/2012/02/29/five_years_in_damascus
\ No newline at end of file
+title: //div[@class='translateHead']//h1 | //div[@id='art-mast']//h1
+author: substring-after(//span[@id='by-line'], 'BY ')
+date: //span[@id='pub-date']
+body: //div[@id='art-mast']/h2 | //div[@class='translateBody'] | //div[@id='art-body']
+#Strip inside article content
+strip: //div[@id='share-box']
+strip: //div[@id='special-box']
+
+prune: no
+
+single_page_link: //span[@id='controls']/a[contains(@href, 'print=yes')]
+single_page_link: //a[text()='SINGLE PAGE']
+
+test_url: http://www.foreignpolicy.com/articles/2011/08/01/a_murderers_manifesto_and_me
+test_url: http://www.foreignpolicy.com/articles/2012/02/29/five_years_in_damascus
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3085c8f..c1bd2ba
@@ -1,9 +1,9 @@
-title: //div[@class="articleHeader"]/h1\r
-author: //p[@class="byline"]\r
-date: //p[contains(@class,"publishedDate")]/span\r
-# remove the right menu\r
-strip: //div[contains(@class,"aside")]\r
-# remove some SharePoint webpart label junk\r
-strip: //div[@id="ctl00_PlaceHolderMain_ArticleLeadField_label"]\r
+title: //div[@class="articleHeader"]/h1
+author: //p[@class="byline"]
+date: //p[contains(@class,"publishedDate")]/span
+# remove the right menu
+strip: //div[contains(@class,"aside")]
+# remove some SharePoint webpart label junk
+strip: //div[@id="ctl00_PlaceHolderMain_ArticleLeadField_label"]
 strip: //div[@id="ctl00_PlaceHolderMain_PublishingPageContentField_label"]
 test_url: http://forsvaret.no/aktuelt/publisert/nyheter/Sider/F5-fly-til-Skedsmo.aspx
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f1ee485..e19c77d
@@ -1,9 +1,9 @@
-prune: no\r
-\r
-author: //meta[@name="dc.publisher"]/@content\r
-date: //meta[@name="dc.date"]/@content\r
-strip: //p[contains(@class, 'contributor vcard')]\r
-replace_string(<ul><li><div class="photo">): <div class="photo">\r
-strip: //p[a[contains(., 'Click here to read more on this story ')]]\r
-\r
+prune: no
+
+author: //meta[@name="dc.publisher"]/@content
+date: //meta[@name="dc.date"]/@content
+strip: //p[contains(@class, 'contributor vcard')]
+replace_string(<ul><li><div class="photo">): <div class="photo">
+strip: //p[a[contains(., 'Click here to read more on this story ')]]
+
 test_url: http://www.foxnews.com/entertainment/2011/05/04/dwayne-johnson-guys-grow-pair-driving-hybrid/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f3d5425..78d3772
@@ -1,3 +1,3 @@
-body: //div[@id="projectDetailsContent"]//td\r
+body: //div[@id="projectDetailsContent"]//td
 
 test_url: http://www.freelancer.com/projects/PHP-Website-Design/debug-Forum-website-code.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8dc0dab..c83f830
@@ -1,5 +1,5 @@
-body: //div[@class = 'instapaperbody']\r
-convert_double_br_tags: no\r
-date: //div[@class='instadate']\r
+body: //div[@class = 'instapaperbody']
+convert_double_br_tags: no
+date: //div[@class='instadate']
 title: //h2[@class = 'instatitle']
 test_url: http://freytag-film.com/blog/artikel/shooting_a_feature_film_in_10_days
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/fria.nu.txt b/inc/3rdparty/site_config/standard/fria.nu.txt
new file mode 100755 (executable)
index 0000000..9d8eff9
--- /dev/null
@@ -0,0 +1,8 @@
+body: //div[contains(@class, 'layout__inner')]//div[contains(@class, 'file-image') or contains(@class, 'node__content')]
+author: //article//div[contains(@class, 'field-byline')]
+strip_id_or_class: rekommenderade
+strip_id_or_class: disqus
+strip_id_or_class: annonser
+
+test_url: http://www.fria.nu/artikel/112079
+test_url: http://www.fria.nu/taxonomy/term/1928/all/feed
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/friatidningen.se.txt b/inc/3rdparty/site_config/standard/friatidningen.se.txt
new file mode 100755 (executable)
index 0000000..1e4abc5
--- /dev/null
@@ -0,0 +1,7 @@
+body: //div[contains(@class, 'layout__inner')]//div[contains(@class, 'file-image') or contains(@class, 'node__content')]
+author: //article//div[contains(@class, 'field-byline')]
+strip_id_or_class: rekommenderade
+strip_id_or_class: disqus
+strip_id_or_class: annonser
+
+test_url: http://www.friatidningen.se/artikel/112074
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 39a9973..b067d88
@@ -1,11 +1,11 @@
-#body: (//div[@class='ftr-yt-vid'])[1]\r
-body: (//blockquote[contains(@class, 'postcontent')])[1]\r
-body: (//div[starts-with(@id, 'post_message')])[1]\r
-\r
-prune: no\r
-tidy: no\r
-\r
-#replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player"\r
-#replace_string(</iframe>): </iframe>&nbsp;</div>\r
-\r
+#body: (//div[@class='ftr-yt-vid'])[1]
+body: (//blockquote[contains(@class, 'postcontent')])[1]
+body: (//div[starts-with(@id, 'post_message')])[1]
+
+prune: no
+tidy: no
+
+#replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player"
+#replace_string(</iframe>): </iframe>&nbsp;</div>
+
 test_url: http://www.friendskorner.com/forum/f137/debate-personal-lives-leaders-west-vs-pakistan-must-read-297989/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 38d9d32..e66b960
@@ -1,5 +1,5 @@
-body: //div[contains(@class, 'ft-story-body')]\r
-\r
-author: substring-after(//div[contains(@class, 'ft-story-header')]/p[1], 'By ')\r
+body: //div[contains(@class, 'ft-story-body')]
+
+author: substring-after(//div[contains(@class, 'ft-story-header')]/p[1], 'By ')
 date: substring-before(substring-after(//div[contains(@class, 'ft-story-header')]/p[2], 'Published:'), '|')
 test_url: http://www.ft.com/cms/s/2/e1be4b5a-620c-11e0-8ee4-00144feab49a.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ftchinese.com.txt b/inc/3rdparty/site_config/standard/ftchinese.com.txt
new file mode 100755 (executable)
index 0000000..5c94d9b
--- /dev/null
@@ -0,0 +1,18 @@
+# Modified to define the single_page_link
+# This filter is tested on:
+# http://www.ftchinese.com/story/001047373
+# http://www.ftchinese.com/story/001047631
+# http://www.ftchinese.com/story/001047622/?print=y
+# http://www.ftchinese.com/story/001049052
+# http://www.ftchinese.com/story/001049088
+
+title:substring-before(//title, '-')
+author: //div[@class='byline']/a
+date: //a[@class='storytime']
+#Set date in print view
+#date: //div[@class='byline']/a/following-sibling::a
+body: //div[@id="bodytext"]
+strip://div[@class='pagination']
+single_page_link://div[@class='pagination']/a[.='全文']
+#next_page_link: //div[@class='pagination']//a[.='下一页']
+test_url: http://www.ftchinese.com/story/001049088
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a58765b..7d76af0
@@ -1,5 +1,5 @@
-body: //div[@class='boxIntroHead']/span[@class='h3'] | //div[@class='section']/div[@class='paragraph' or @class='embObjLeft']\r
-single_page_link: //a[@class='icon print']\r
+body: //div[@class='boxIntroHead']/span[@class='h3'] | //div[@class='section']/div[@class='paragraph' or @class='embObjLeft']
+single_page_link: //a[@class='icon print']
 
-test_url: http://www.ftd.de/it-medien/it-telekommunikation/:mobilfunk-vivendi-und-vodafone-trennen-sich-in-frankreich/60034691.html\r
+test_url: http://www.ftd.de/it-medien/it-telekommunikation/:mobilfunk-vivendi-und-vodafone-trennen-sich-in-frankreich/60034691.html
 test_url: http://www.ftd.de/it-medien/medien-internet/:verkauf-von-warner-music-musikbranche-auf-dem-sprung/60048185.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8e6356b..0dc3047
@@ -1,3 +1,3 @@
-body: //div[@class = 'entry']\r
+body: //div[@class = 'entry']
 
 test_url: http://www.fubiz.net/2011/05/31/world-press-photo-2011/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 50fc144..808c1f1
@@ -1,11 +1,11 @@
-date: //span[@class='date']\r
-strip: //div[@class='postsidebar']\r
-body: //div[@class='singlepost']\r
-title: //div[@class='singlepost']/h1\r
-move_into(//div[@class='singlepost']): //div[@class='info']\r
-strip: //div[@class='gallery']\r
-strip: //div[@class='biggallery']\r
-strip: //ul[@class='social']\r
-strip: //ul[@class='social_mail']\r
+date: //span[@class='date']
+strip: //div[@class='postsidebar']
+body: //div[@class='singlepost']
+title: //div[@class='singlepost']/h1
+move_into(//div[@class='singlepost']): //div[@class='info']
+strip: //div[@class='gallery']
+strip: //div[@class='biggallery']
+strip: //ul[@class='social']
+strip: //ul[@class='social_mail']
 
 test_url: http://futurezone.at/future/5502-erste-galileo-satelliten-starten-ins-all.php
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 35a8762..7c808cf
@@ -1,20 +1,20 @@
-# default view title\r
-title: //span[@class='newsTitle']\r
-# print view title\r
-title: //h3[@class='title']\r
-\r
-# default view author\r
-author: //span[@class='newsAuth']/a\r
-author: substring-after(//span[@class='newsAuth'], 'by ')\r
-\r
-# default view date\r
-date: //td[@class='newsDate']\r
-\r
-# default view body\r
-body: //td[@class='featureText']\r
-body: //td[@class='newsText']\r
-\r
-strip: //h3[@class='title']\r
-\r
+# default view title
+title: //span[@class='newsTitle']
+# print view title
+title: //h3[@class='title']
+
+# default view author
+author: //span[@class='newsAuth']/a
+author: substring-after(//span[@class='newsAuth'], 'by ')
+
+# default view date
+date: //td[@class='newsDate']
+
+# default view body
+body: //td[@class='featureText']
+body: //td[@class='newsText']
+
+strip: //h3[@class='title']
+
 single_page_link: //a[contains(@href, '?print=1')]
 test_url: http://www.gamasutra.com/view/feature/132559/staying_power_rethinking_feedback_.php
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 2cc4b37..73f8342
@@ -1,10 +1,10 @@
-title: //meta[@property="og:title"]/@content\r
-body: //div[@id='GBTVPlayer'] | //div[contains(@class, 'col490')]\r
-\r
-prune: no\r
-\r
-strip_id_or_class: noprint\r
-strip: //div[@id='gbNewsTextContent']/following-sibling::*\r
-\r
-test_url: http://www.gameblog.fr/news/26330-les-sims-3-showtime-s-annonce-en-video\r
+title: //meta[@property="og:title"]/@content
+body: //div[@id='GBTVPlayer'] | //div[contains(@class, 'col490')]
+
+prune: no
+
+strip_id_or_class: noprint
+strip: //div[@id='gbNewsTextContent']/following-sibling::*
+
+test_url: http://www.gameblog.fr/news/26330-les-sims-3-showtime-s-annonce-en-video
 test_url: http://www.gameblog.fr/news/26306-mise-a-jour-du-dashboard-de-la-xbox-360-disponible
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gamechurch.com.txt b/inc/3rdparty/site_config/standard/gamechurch.com.txt
new file mode 100755 (executable)
index 0000000..c9eea5f
--- /dev/null
@@ -0,0 +1,10 @@
+title: //h1[@class='title']
+
+date: substring-before(substring-after(//div[@class='comment-bubble']/.., 'Posted'), 'by')
+
+body: //div[@class='the-content']
+
+strip: //div[@class='article-image responsive']
+
+strip_id_or_class: 'pullquote'
+test_url: http://gamechurch.com/virtual-gun-control-the-best-amendment/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gamer.no.txt b/inc/3rdparty/site_config/standard/gamer.no.txt
new file mode 100755 (executable)
index 0000000..e76a59d
--- /dev/null
@@ -0,0 +1,11 @@
+body: //div[@class='pageContent description']
+date: //div[@class='authorsAndDateTime']/span[@title]
+single_page_link: //div[@class='pages']/a[last()-1]
+
+# fix images and captions
+wrap_in(figure): //div[contains(concat(' ', @class, ' '), ' image')]
+wrap_in(figcaption): //div[contains(concat(' ', @class, ' '), ' image')]/div[@class='text']/text()
+
+# get rid of videos
+strip_id_or_class: 'video full'
+test_url: http://www.gamer.no/artikler/142455/slik-blei-ambisiose-dragons-dogma-skapt/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gamereactor.no.txt b/inc/3rdparty/site_config/standard/gamereactor.no.txt
new file mode 100755 (executable)
index 0000000..6f7c1b9
--- /dev/null
@@ -0,0 +1,11 @@
+title: //div[@id='content']/div/h1
+
+author: //a[@itemprop='reviewer']
+
+date: //time[@itemprop='dtreviewed']/@datetime
+
+body: //div[@id='breadtext']
+
+# fix for NOT magically removing anchors with text identical to title
+dissolve: //a[text()=//div[@id='content']/div/h1/text()]
+test_url: http://www.gamereactor.no/previews/177481/The+Evil+Within/?sid=38b5bd30f56f1b7214de4ff5bed4b76f
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 1791e81..327ac55
@@ -1,3 +1,3 @@
-tidy: no\r
-\r
+tidy: no
+
 test_url: http://www.garythink.com/eft/testing.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ef68082..8eda0c3
@@ -1,4 +1,4 @@
-# These should work, but don't. They were given by Firefox XPather extension\r
-title: //article//header//a//h1\r
+# These should work, but don't. They were given by Firefox XPather extension
+title: //article//header//a//h1
 body: //article//section
 test_url: http://gasteroprod.com/blog/faut-il-continuer-a-supporter-internet-explorer-6.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7434632..2ab144f
@@ -1,8 +1,8 @@
-body: //div[@class='panel']\r
-strip: //div[@style='float:right']\r
-strip: //span[@class='titulosHomePublicidad']\r
-strip: //div[@id='TitTop5Der']\r
-strip: //img[@src='/ImagesGatoPardo/LogoGatopardo.png']\r
-\r
+body: //div[@class='panel']
+strip: //div[@style='float:right']
+strip: //span[@class='titulosHomePublicidad']
+strip: //div[@id='TitTop5Der']
+strip: //img[@src='/ImagesGatoPardo/LogoGatopardo.png']
+
 prune: yes
 test_url: http://www.gatopardo.com/ReportajesGP.php?R=95
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6531d81..9bc5613
@@ -1,6 +1,6 @@
-body: //div[@class="post-body"]\r
-\r
-# Remove 'content is restricted'\r
-strip: //div[@id='agegate_IDHERE']\r
-\r
+body: //div[@class="post-body"]
+
+# Remove 'content is restricted'
+strip: //div[@id='agegate_IDHERE']
+
 test_url: http://gawker.com/#!5782070/russian-bomb-squad-successfully-defuses-sex-toy
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 55586e1..89eb402
@@ -1,3 +1,3 @@
-author: substring-after(//span[@class='storyauthor'],'Posted by')\r
+author: substring-after(//span[@class='storyauthor'],'Posted by')
 date: //span[@class='storydate']
 test_url: http://www.geeksofdoom.com/2012/03/14/robert-rodriguez-says-machete-kills-and-sin-city-2-will-film-this-year/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f6dccf4..a664b4d
@@ -1,3 +1,3 @@
-body: //div[@id = 'article']\r
+body: //div[@id = 'article']
 strip: //div[@id = 'klasbox']
 test_url: http://www.geenstijl.nl/mt/archieven/2010/10/vrouw_lange_frans_wou_baas_b_d.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 537b4c2..e28d4b8
@@ -1,3 +1,3 @@
-body: //div[@class='post']\r
+body: //div[@class='post']
 strip: //ul[@id='bookmark_single']
 test_url: http://getnews.jp/archives/117312
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8a54bc0..61de51b
@@ -1,11 +1,11 @@
-# 2011-11-19 - carlo@... - Initial setup.\r
-\r
-strip_id_or_class: user-review-detail\r
-strip: //h1\r
-\r
-body: //div[@class="wiki-content"]  |  //div[@class="section-bd"]  |  //div[@class="news-story"]\r
-\r
-author: //span[@class="reviewer"]  |  //p[@class="byline"]/a/text()\r
-date: //span[@class="dtreviewed"]\r
+# 2011-11-19 - carlo@... - Initial setup.
+
+strip_id_or_class: user-review-detail
+strip: //h1
+
+body: //div[@class="wiki-content"]  |  //div[@class="section-bd"]  |  //div[@class="news-story"]
+
+author: //span[@class="reviewer"]  |  //p[@class="byline"]/a/text()
+date: //span[@class="dtreviewed"]
 
 test_url: http://www.giantbomb.com/the-elder-scrolls-v-skyrim/61-33394/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f60199a..e2689ea
@@ -1,20 +1,20 @@
-tidy:no\r
-title://h2[@class="title"]\r
-# author:"Ben Miller"\r
-date://div[@id="stats"]/span\r
-strip_id_or_class:stats\r
-strip_id_or_class:breadcrumbs\r
-strip_id_or_class:gn-why-content\r
-strip_id_or_class:single-social\r
-strip_id_or_class:sidebar-ads\r
-strip_id_or_class:sidebar-top\r
-strip_id_or_class:footer\r
-strip_id_or_class:post_meta\r
-# strip_id_or_class:\r
-# strip_id_or_class:\r
-# strip_id_or_class:\r
-# strip_id_or_class:\r
-# strip_id_or_class:\r
-# strip_id_or_class:\r
+tidy:no
+title://h2[@class="title"]
+# author:"Ben Miller"
+date://div[@id="stats"]/span
+strip_id_or_class:stats
+strip_id_or_class:breadcrumbs
+strip_id_or_class:gn-why-content
+strip_id_or_class:single-social
+strip_id_or_class:sidebar-ads
+strip_id_or_class:sidebar-top
+strip_id_or_class:footer
+strip_id_or_class:post_meta
+# strip_id_or_class:
+# strip_id_or_class:
+# strip_id_or_class:
+# strip_id_or_class:
+# strip_id_or_class:
+# strip_id_or_class:
 
 test_url: http://www.giga.de/benm/2011/10/17/probleme-mit-ios-5-wenn-die-daten-weg-sind/#more-58033
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 348bdf2..cc8fdfa
@@ -1,17 +1,12 @@
-date: //meta[@name='DC.date.issued']/@content\r
-date: //span[@class='post-meta the-date']\r
-\r
-title: //meta[@property='og:title']/@content\r
-\r
-author: //meta[@name='DC.creator']/@content\r
-\r
-body: //div[contains(@class, 'post-sub-head') or starts-with(@id, 'post-content-')]\r
-\r
-find_string: id="content"\r
-replace_string: id="content-ignore"\r
-\r
-strip_id_or_class: sharedaddy\r
-\r
-prune: no\r
-\r
-test_url: http://gigaom.com/2011/10/24/groupon-google-lawsuit/
\ No newline at end of file
+date: //meta[@name='dcterms.created']/@content
+title: //meta[@property='og:title']/@content
+author: //section[@class="post-meta"]//a[@rel="author"]
+
+body: //div[starts-with(@id, 'post-content-')]
+
+strip_id_or_class: sharedaddy
+
+prune: no
+
+test_url: http://gigaom.com/2011/10/24/groupon-google-lawsuit/
+test_url: http://gigaom.com/2012/12/26/snapchat-rises-why-pokes-decline-shows-facebooks-inability-to-invent/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 478b23a..d3534b2
@@ -1,3 +1,3 @@
-single_page_link: //p[@id='skip']//a[contains(@href, 'skip')]\r
+single_page_link: //p[@id='skip']//a[contains(@href, 'skip')]
 
 test_url: http://gihyo.jp/dev/serial/01/machine-learning/0010
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 53095b3..9020786
@@ -1,6 +1,6 @@
-body: //div[@class="highlight"]/pre\r
-\r
-prune: no\r
-tidy: no\r
-\r
+body: //div[@class="highlight"]/pre
+
+prune: no
+tidy: no
+
 test_url: https://gist.github.com/1258908
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 144ce04..0de0750
@@ -1,3 +1,3 @@
-single_page_link: //div[@id="content"]//h2/a\r
+single_page_link: //div[@id="content"]//h2/a
 
 test_url: http://givemesomethingtoread.com/post/6285838917/the-baddest-lawyer-in-the-history-of-jersey
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 285e76c..2eb82a6
@@ -1,7 +1,7 @@
-body: //div[@id="leadimage" or @class="postcontent"]\r
-author: //div[@class="contentauthor"]\r
-date: //div[@class="timestamp"]\r
-\r
-prune: no\r
-\r
+body: //div[@id="leadimage" or @class="postcontent"]
+author: //div[@class="contentauthor"]
+date: //div[@class="timestamp"]
+
+prune: no
+
 test_url: http://www.gizmodo.co.uk/2013/02/bbc-forcing-poor-old-sir-david-attenborough-to-go-on-twitter/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c953625..e73ec9d
@@ -1,7 +1,11 @@
-body: //div[@class="post-body" or contains(@class, 'illustration top')]\r
-author: (//cite//span[@class="plus-icon"])[1]\r
-date: //span[@class="date"]\r
-\r
-prune: no\r
-\r
-test_url: http://gizmodo.com/5880147/kuhn-rikon-improves-their-spice-grinder-with-grade-school-science
\ No newline at end of file
+#body: //div[@class="post-body" or contains(@class, 'illustration top')]
+body: //div[contains(@class, 'image-annotation-box') or contains(@class, 'post-content')]
+#author: (//cite//span[@class="plus-icon"])[1]
+author: //span[contains(@class, 'display-name')]
+date: //span[@class="date"]
+
+prune: no
+
+test_url: http://gizmodo.com/5880147/kuhn-rikon-improves-their-spice-grinder-with-grade-school-science
+test_url: http://gizmodo.com/what-van-goghs-paintings-would-look-like-if-they-came-874035680
+test_url: http://gizmodo.com/vip.xml
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gizmodo.uol.com.br.txt b/inc/3rdparty/site_config/standard/gizmodo.uol.com.br.txt
new file mode 100755 (executable)
index 0000000..d963d68
--- /dev/null
@@ -0,0 +1,6 @@
+title: //h1
+
+body: //div[@id='destaques']//div[contains(@class, 'img')] | //div[@id='maincontent']//p
+
+test_url: http://gizmodo.uol.com.br/nvidia-gtx-titan-z/
+test_url: http://gizmodo.uol.com.br/perfil-mark-zuckerberg-hackeado/
old mode 100644 (file)
new mode 100755 (executable)
index 135ed50..71fbc93
@@ -1,4 +1,18 @@
-# Look for Open Graph data - http://ogp.me\r
-title: //meta[@property="og:title"]/@content\r
-date: //meta[@property="article:published_time"]/@content\r
-# article:author is someties URL, e.g. on guardian.co.uk
\ No newline at end of file
+# Look for Open Graph data - http://ogp.me
+title: //meta[@property="og:title"]/@content
+date: //meta[@property="article:published_time"]/@content
+# article:author is someties URL, e.g. on guardian.co.uk
+
+# Remove Google Publisher Tags: https://support.google.com/dfp_sb/answer/1649768?hl=en
+#strip_id_or_class: div-gpt-ad
+
+# Strip doubleclick image ads
+strip_image_src: doubleclick.net
+
+# If you get chunks of Javascript code appearing in the extracted output, try uncommenting the lines below.
+# This tries to convert script tags to hidden div elements (which Full-Text RSS removes).
+# If you notice issues with this approach, please let us know.
+#find_string: <script 
+#replace_string: <div style="display:none" 
+#find_string: </script>
+#replace_string: </div>
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 95d4bec..ee50f68
@@ -1,15 +1,15 @@
-body: //div[@id='content']\r
-\r
-strip: //p[@class='top']\r
-strip: //h2[.='Where next?']\r
-strip_id_or_class: where-next\r
-strip_id_or_class: social-bookmarks\r
-strip_id_or_class: link-to-here\r
-strip_id_or_class: options-heading\r
-strip_id_or_class: page-options-content\r
-strip_id_or_class: page-info-bottom\r
-\r
-tidy: no\r
-prune: no\r
-\r
+body: //div[@id='content']
+
+strip: //p[@class='top']
+strip: //h2[.='Where next?']
+strip_id_or_class: where-next
+strip_id_or_class: social-bookmarks
+strip_id_or_class: link-to-here
+strip_id_or_class: options-heading
+strip_id_or_class: page-options-content
+strip_id_or_class: page-info-bottom
+
+tidy: no
+prune: no
+
 test_url: http://www.globalissues.org/article/39/a-primer-on-neoliberalism
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/globoesporte.globo.com.txt b/inc/3rdparty/site_config/standard/globoesporte.globo.com.txt
new file mode 100755 (executable)
index 0000000..fd8e70f
--- /dev/null
@@ -0,0 +1,25 @@
+title: //h1[@class="entry-title"]
+
+body: //div[@class='materia-titulo']/h2 | //*[@id="materia-letra"]
+
+date: //abbr[@class="published"]
+date: //abbr[@class="updated"]
+
+author: //*[@class="author"]/strong
+
+strip: //div[contains(@class,'foto')]/strong
+strip: //div[contains(@class,'frase-materia')]/div[@class='autor']
+strip: //div[contains(@class,'saibamais')]
+strip: //*[contains(text(),'Clique aqui e veja mais')]/ancestor::p
+strip: //ul[@class="toolbar"]
+
+# quotes
+wrap_in(blockquote): //div[@id='materia-letra']//div[contains(@class,'frase-materia')]/div[@class='frase']
+
+prune: no
+
+replace_string([Clique aqui e veja mais vídeos do Fluminense]): []
+
+test_url: http://globoesporte.globo.com/atletismo/noticia/2013/08/michael-johnson-diz-que-bolt-e-melhor-da-historia-nao-ha-duvidas.html
+test_url: http://globoesporte.globo.com/futebol/futebol-internacional/futebol-espanhol/noticia/2013/08/barca-atropela-levante-e-neymar-passa-em-branco-em-estreia-oficial.html
+test_url: http://globoesporte.globo.com/futebol/times/fluminense/noticia/2013/08/poupado-no-sabado-felipe-se-diz-pronto-para-ser-titular-contra-o-goias.html
diff --git a/inc/3rdparty/site_config/standard/gloswielkopolski.pl.txt b/inc/3rdparty/site_config/standard/gloswielkopolski.pl.txt
new file mode 100755 (executable)
index 0000000..1648795
--- /dev/null
@@ -0,0 +1,8 @@
+title: //article[@id='material']/header/h1
+author: //article[@id='material']/header/div[2]/p
+date: //article[@id='material']/header/p/time[1]
+body: //section[@id='tresc']
+next_page_link: .//section[@id='tresc']/div[@class='stronicowanie']/a[@rel='next']
+strip://div[@class='podobneSonda']
+
+test_url: http://www.gloswielkopolski.pl/artykul/803547,abc-telemarketingu-praca-ktora-zwalnia-z-myslenia,id,t.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 075c4d2..e25e9a0
@@ -1,16 +1,16 @@
-title: //div[@id='article_headline']//h1\r
-date: //div[contains(@class, 'articleDate')]//h4\r
-body: //div[@id='article_headline']/h2 | //div[@id='large_article_image' or @id='article_content']\r
-\r
-strip_id_or_class: relatedLinksBox\r
-strip_id_or_class: betting-widget\r
-strip_image_src: install_flash.gif\r
-\r
-strip: //table[contains(@style, 'float: right; width: 285px;')]\r
-strip: //div[@class='caption']\r
-\r
-tidy: no\r
-prune: no\r
-\r
-test_url: http://www.goal.com/en-gb/news/3284/euro-2012/2012/05/31/3139032/video-profile-back-to-his-very-best-for-bayern-frances-flair-and-\r
+title: //div[@id='article_headline']//h1
+date: //div[contains(@class, 'articleDate')]//h4
+body: //div[@id='article_headline']/h2 | //div[@id='large_article_image' or @id='article_content']
+
+strip_id_or_class: relatedLinksBox
+strip_id_or_class: betting-widget
+strip_image_src: install_flash.gif
+
+strip: //table[contains(@style, 'float: right; width: 285px;')]
+strip: //div[@class='caption']
+
+tidy: no
+prune: no
+
+test_url: http://www.goal.com/en-gb/news/3284/euro-2012/2012/05/31/3139032/video-profile-back-to-his-very-best-for-bayern-frances-flair-and-
 test_url: http://www.goal.com/en-gb/news/3284/euro-2012/2012/05/31/3139869/lampard-injury-a-bitter-blow-for-england-and-sorry-way-to#
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6c5d1c4..6afdebe
@@ -1,25 +1,25 @@
-# Jens Kohl, jens.kohl@...\r
-# - Added publication date\r
-# - Striped pagination block\r
-# - Added single page link\r
-# - Added xpath-querys for the printer friendly version\r
-\r
-title: //h1\r
-body: //div[@class='formatted']\r
-prune: no\r
-\r
-date: substring-after(//li[2][@class="text1"], 'Datum:')\r
-strip: //ol[@class="list-chapters"]\r
-strip_comments: yes\r
-\r
-# next: commands for printer friendly pages\r
-single_page_link: //a[contains(@href, 'print.php?a=')]/@href\r
-title: //body/h3\r
-strip_image_src: staticrl/images/logo.jpg\r
-strip_image_src: http://cpx.golem.de/cpx.php?class=7\r
-strip: //body/h3\r
-strip: //body/b[1]\r
-strip: //body/b[2]\r
-strip: //body/b[3]\r
-strip: //div[1]\r
+# Jens Kohl, jens.kohl@...
+# - Added publication date
+# - Striped pagination block
+# - Added single page link
+# - Added xpath-querys for the printer friendly version
+
+title: //h1
+body: //div[@class='formatted']
+prune: no
+
+date: substring-after(//li[2][@class="text1"], 'Datum:')
+strip: //ol[@class="list-chapters"]
+strip_comments: yes
+
+# next: commands for printer friendly pages
+single_page_link: //a[contains(@href, 'print.php?a=')]/@href
+title: //body/h3
+strip_image_src: staticrl/images/logo.jpg
+strip_image_src: http://cpx.golem.de/cpx.php?class=7
+strip: //body/h3
+strip: //body/b[1]
+strip: //body/b[2]
+strip: //body/b[3]
+strip: //div[1]
 test_url: http://www.golem.de/1112/88696.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 5cf6701..94159fb
@@ -1,4 +1,4 @@
-title: //div[@class="title"]/div/h1\r
-body: //div[@class="body"]\r
-date: //li[@class="date-time"]\r
+title: //div[@class="title"]/div/h1
+body: //div[@class="body"]
+date: //li[@class="date-time"]
 test_url: http://www.good.is/post/why-amazon-is-the-next-top-tech-company/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/goodfil.ms.txt b/inc/3rdparty/site_config/standard/goodfil.ms.txt
new file mode 100755 (executable)
index 0000000..f8bbbc6
--- /dev/null
@@ -0,0 +1,2 @@
+strip_id_or_class: gutter
+test_url: http://goodfil.ms/blog/posts/2012/08/13/angularjs-and-the-goodfilms-mobile-site-part-1/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c2fe4e4..e2d2d0b
@@ -1,14 +1,14 @@
-date: //meta[@name='og:article:published_time']/@value\r
-\r
-body: //div[@class='itemIntroText' or @class='itemImageBlock' or @class='itemFullText']\r
-\r
-strip_id_or_class: itemImageGallery\r
-\r
-# remove extras at end of post content\r
-find_string: <div style="margin:5px 0 10px;">\r
-replace_string: </div></body></html><!--\r
-\r
-prune: no\r
-\r
-test_url: http://www.gossip-tv.gr/story/158902/aggelike-daliane-semera-duskoleuontai-oloi-sta-epaggelmatika-tous\r
+date: //meta[@name='og:article:published_time']/@value
+
+body: //div[@class='itemIntroText' or @class='itemImageBlock' or @class='itemFullText']
+
+strip_id_or_class: itemImageGallery
+
+# remove extras at end of post content
+find_string: <div style="margin:5px 0 10px;">
+replace_string: </div></body></html><!--
+
+prune: no
+
+test_url: http://www.gossip-tv.gr/story/158902/aggelike-daliane-semera-duskoleuontai-oloi-sta-epaggelmatika-tous
 test_url: http://www.gossip-tv.gr/lifestyle/Taste/story/230266/lahtaristo-kai-ygieino-tost-sokolatas
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/goteborgsfria.se.txt b/inc/3rdparty/site_config/standard/goteborgsfria.se.txt
new file mode 100755 (executable)
index 0000000..c90aed0
--- /dev/null
@@ -0,0 +1,7 @@
+body: //div[contains(@class, 'layout__inner')]//div[contains(@class, 'file-image') or contains(@class, 'node__content')]
+author: //article//div[contains(@class, 'field-byline')]
+strip_id_or_class: rekommenderade
+strip_id_or_class: disqus
+strip_id_or_class: annonser
+
+test_url: http://www.goteborgsfria.se/artikel/112079
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 5179fc1..3645387
@@ -1,7 +1,7 @@
-title: //div[@class='entry-header']\r
-author: //span[@class='vcard author']\r
-date: //abbr[@class='published']\r
-#move_into(//div[@class='entry-body']): //img[@id='photo_1']\r
-body: //div[@class='entry-body']\r
+title: //div[@class='entry-header']
+author: //span[@class='vcard author']
+date: //abbr[@class='published']
+#move_into(//div[@class='entry-body']): //img[@id='photo_1']
+body: //div[@class='entry-body']
 strip: //div[@class='galleryEaseThumbs']
 test_url: http://gothamist.com/2012/03/15/fancy_cocktail_lounge_the_randolph.php
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7fb0ee0..f8af732
@@ -1,21 +1,21 @@
-title: //span[@id="showTitle"]\r
-author: //span[@id="showAuthor"]\r
-date: //span[@id="showRefDate"]\r
-\r
-strip: //span[@class="black_bold"]\r
-strip: //div[@id="sectionName"]\r
-strip: //div[@id="storyHeader"]\r
-\r
-body: //div[@id="newsBodyText"]\r
-\r
-strip_image_src: "http://www.gotomanager.com/img/mgrm/space.gif"\r
-strip_image_src: "http://www.gotomanager.com/images/separator.gif"\r
-strip_image_src: "http://www.gotomanager.com/images/spaces.gif"\r
-\r
-convert_double_br_tags: yes\r
-tidy: yes\r
-\r
-strip: //div[@id="smallLeadImage"]\r
-strip: //div[@id="truehitsSurvey"]\r
+title: //span[@id="showTitle"]
+author: //span[@id="showAuthor"]
+date: //span[@id="showRefDate"]
+
+strip: //span[@class="black_bold"]
+strip: //div[@id="sectionName"]
+strip: //div[@id="storyHeader"]
+
+body: //div[@id="newsBodyText"]
+
+strip_image_src: "http://www.gotomanager.com/img/mgrm/space.gif"
+strip_image_src: "http://www.gotomanager.com/images/separator.gif"
+strip_image_src: "http://www.gotomanager.com/images/spaces.gif"
+
+convert_double_br_tags: yes
+tidy: yes
+
+strip: //div[@id="smallLeadImage"]
+strip: //div[@id="truehitsSurvey"]
 strip: //table[@id="relatedInfoTable"]
 test_url: http://www.gotomanager.com/news/details.aspx?id=86759
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gov.ky.txt b/inc/3rdparty/site_config/standard/gov.ky.txt
new file mode 100755 (executable)
index 0000000..294ece3
--- /dev/null
@@ -0,0 +1,4 @@
+strip: //body//title
+
+test_url: http://www.gov.ky/pls/portal/PORTAL.wwv_media.show?p_id=7593947&p_settingssetid=1&p_settingssiteid=0&p_siteid=2425&p_type=basetext&p_textid=7593948
+test_url: http://www.rcips.ky/pls/portal/wlacomp.wlafeed.show_cignewsfeed_agency?p_sitecode=POL&p_agency=Police
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gp.se.txt b/inc/3rdparty/site_config/standard/gp.se.txt
new file mode 100755 (executable)
index 0000000..158ae4e
--- /dev/null
@@ -0,0 +1,11 @@
+body: //div[@id='articleContainer']
+author: //div[@id='articleContent']//div[contains(@class, 'byline')]//span[contains(@class, 'name fn')]
+strip_id_or_class: toolbar
+strip_id_or_class: ADad
+strip_id_or_class: articleSerieWrapper
+strip_id_or_class: articleFloatContainer
+strip: //div[contains(@class, 'byline')]//img
+prune: no
+
+test_url: http://www.gp.se/nyheter/bohuslan/1.2045564-styckade-mannen-hade-mordat-hustrun
+test_url: http://www.gp.se/1.16560
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 233c4a7..8ad8a14
@@ -1,9 +1,9 @@
-next_page_link: //div[@class='pagination']//span[@class='paginationNext']/a\r
-strip_id_or_class: utility\r
-strip_id_or_class: keywords\r
-strip_id_or_class: pagination\r
-strip_id_or_class: position2_content\r
-body: //div[@class='article']\r
-title: //h1[@class='content-headline']\r
+next_page_link: //div[@class='pagination']//span[@class='paginationNext']/a
+strip_id_or_class: utility
+strip_id_or_class: keywords
+strip_id_or_class: pagination
+strip_id_or_class: position2_content
+body: //div[@class='article']
+title: //h1[@class='content-headline']
 author: //span[@class='contributor']//a
 test_url: http://www.gq.com/news-politics/newsmakers/201203/terry-thompson-ohio-zoo-massacre-chris-heath-gq-february-2012
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3269e08..b8d419f
@@ -1,20 +1,20 @@
-# this is fragile with footnotes -- leave it for now\r
-\r
-#tidy: no\r
-#prune: no\r
-#move_into(//article): //aside[@id='footnotes']\r
-author: //cite/a\r
-date: //time\r
-\r
-strip: //a[text()='Grantland']\r
-strip_id_or_class: ad-wrapper\r
-strip_id_or_class: fb-connect-link\r
-strip_id_or_class: fb-status\r
-strip: //li[@class='print']\r
-strip: //cite\r
-strip: //a[contains(text(), '[+]')]\r
-strip: //a[@id='jump-nav-link']\r
-strip: //h1[text()='Share This']\r
-strip: //h1[text()='Top Stories']\r
-strip: //div[@id="update-text-size"]\r
+# this is fragile with footnotes -- leave it for now
+
+#tidy: no
+#prune: no
+#move_into(//article): //aside[@id='footnotes']
+author: //cite/a
+date: //time
+
+strip: //a[text()='Grantland']
+strip_id_or_class: ad-wrapper
+strip_id_or_class: fb-connect-link
+strip_id_or_class: fb-status
+strip: //li[@class='print']
+strip: //cite
+strip: //a[contains(text(), '[+]')]
+strip: //a[@id='jump-nav-link']
+strip: //h1[text()='Share This']
+strip: //h1[text()='Top Stories']
+strip: //div[@id="update-text-size"]
 test_url: http://www.grantland.com/story/_/id/8421241/examining-new-albums-rock-veterans-no-doubt-green-day
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a525803..31a4107
@@ -1,11 +1,11 @@
-title: //div[@class="blogpost"]/h2\r
-author: //div[@class="blogpost"]/p[@class="byline"]/a\r
-date: //div[@class="blogpost"]/p[@class="byline"]/span[@class="time_posted"]\r
-body: //div[@class="blogpost"]\r
-strip_id_or_class: flag\r
-strip_id_or_class: byline\r
-strip_id_or_class: post_footer\r
-strip_id_or_class: related_posts\r
-strip_id_or_class: post_author_bios\r
+title: //div[@class="blogpost"]/h2
+author: //div[@class="blogpost"]/p[@class="byline"]/a
+date: //div[@class="blogpost"]/p[@class="byline"]/span[@class="time_posted"]
+body: //div[@class="blogpost"]
+strip_id_or_class: flag
+strip_id_or_class: byline
+strip_id_or_class: post_footer
+strip_id_or_class: related_posts
+strip_id_or_class: post_author_bios
 strip: //h2
 test_url: http://greatergreaterwashington.org/post/12457/ask-ggw-what-will-happen-to-the-1000-series-railcars/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7e15a5c..0fe30ef
@@ -1,5 +1,5 @@
-title://h1\r
-author://span[@class="submitted"]/a\r
-date:substring-after(//span[@class="submitted"],'on ')\r
+title://h1
+author://span[@class="submitted"]/a
+date:substring-after(//span[@class="submitted"],'on ')
 body://div[@class="content"]
 test_url: http://groups.drupal.org/node/36816
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e69044b..97b620d
@@ -1,5 +1,5 @@
-body: //div[@class='wrapper_half']//ul[@class='details'] | //div[@class='wrapper_half']//p[@class='synopsis'] | //div[@class='wrapper_half']//div[@class='image'] | //div[@class='wrapper_half']//div[@class='article']\r
-strip: //div[@class='wrapper_half']//ul[@class='details']/li[position()>1]\r
-prune: no\r
-tidy: no\r
+body: //div[@class='wrapper_half']//ul[@class='details'] | //div[@class='wrapper_half']//p[@class='synopsis'] | //div[@class='wrapper_half']//div[@class='image'] | //div[@class='wrapper_half']//div[@class='article']
+strip: //div[@class='wrapper_half']//ul[@class='details']/li[position()>1]
+prune: no
+tidy: no
 test_url: http://gulfnews.com/news/gulf/uae/government/abu-dhabi-centre-offers-useful-information-1.811084
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 00255eb..f8327be
@@ -1,22 +1,22 @@
-# To administrator:\r
-# Please change the hostname to "www.guokr.com/article/*"\r
-# Not working for "www.guokr.com/post/" pages configured by carlosliu913@gmail.com\r
-\r
-# This filter is tested on:\r
-# http://www.guokr.com/article/274325/\r
-# http://www.guokr.com/article/275013/\r
-\r
-title://h1\r
-author://div[contains(@class, 'content-th-info')]/a\r
-date://div[contains(@class, 'content-th-info')]/span\r
-body://div[contains(@class, 'Content')]\r
-\r
-strip://div[contains(@class, 'bottom-i')]\r
-strip://div[contains(@class, 'copyright')]\r
-strip://div[contains(@class, 'fr')]\r
-strip://div[contains(@class, 'content-th-info')]\r
-strip://h1[contains(@id, 'articleTitle')]\r
-strip://div[contains(@class, 'side')]\r
-strip://div[contains(@class, 'top-wp')]\r
-test_url: http://www.guokr.com/article/275013/\r
+# To administrator:
+# Please change the hostname to "www.guokr.com/article/*"
+# Not working for "www.guokr.com/post/" pages configured by carlosliu913@gmail.com
+
+# This filter is tested on:
+# http://www.guokr.com/article/274325/
+# http://www.guokr.com/article/275013/
+
+title://h1
+author://div[contains(@class, 'content-th-info')]/a
+date://div[contains(@class, 'content-th-info')]/span
+body://div[contains(@class, 'Content')]
+
+strip://div[contains(@class, 'bottom-i')]
+strip://div[contains(@class, 'copyright')]
+strip://div[contains(@class, 'fr')]
+strip://div[contains(@class, 'content-th-info')]
+strip://h1[contains(@id, 'articleTitle')]
+strip://div[contains(@class, 'side')]
+strip://div[contains(@class, 'top-wp')]
+test_url: http://www.guokr.com/article/275013/
 test_url: http://www.guokr.com/article/338387/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index bc1ce68..1bb2bc7
@@ -1,5 +1,5 @@
-title: //div[@id="habermetni"]/h1[@id="haber_baslik"]\r
-body: //div[@id="habermetni"]/p\r
-strip: //img[@class='newsDetailLeft']\r
+title: //div[@id="habermetni"]/h1[@id="haber_baslik"]
+body: //div[@id="habermetni"]/p
+strip: //img[@class='newsDetailLeft']
 strip_image_src: /haber-resimleri/
 test_url: http://www.haberler.com/emniyete-atacakti-elinde-patladi-3198733-haberi/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/hackmake.org.txt b/inc/3rdparty/site_config/standard/hackmake.org.txt
new file mode 100755 (executable)
index 0000000..9814011
--- /dev/null
@@ -0,0 +1,7 @@
+date: //article//time[@pubdate]
+body: //article/div[@id="post-wide"]
+title: //article/header/h2
+strip: /div[@id="comment"]
+strip: //footer
+author: substring-after(//footer/p[@class='byline'] , 'By')
+test_url: http://hackmake.org/2012/12/21/mindfulness-of-concentration
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7989d09..1802efe
@@ -1,5 +1,5 @@
-title:substring-before(id("maincontent")/table, 'Posted')\r
-body:id("maincontent")/p\r
-# eventually convert linebreaks better\r
+title:substring-before(id("maincontent")/table, 'Posted')
+body:id("maincontent")/p
+# eventually convert linebreaks better
 
 test_url: http://halo.bungie.org/fanfic/?story=Delahunt0312112316071.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 747f90a..33f7e72
@@ -1,7 +1,7 @@
-# Remove right column\r
-strip: //*[(@class = 'right_col')]\r
-\r
-# Remove comments etc.\r
-strip: //*[(@class = 'category')]\r
+# Remove right column
+strip: //*[(@class = 'right_col')]
+
+# Remove comments etc.
+strip: //*[(@class = 'category')]
 strip: /html/body/div[1][@class='absolute_content_high']/div[1][@class='wrapper']/div[1][@class='main_col']/div[@class='main_content']/h3
 test_url: http://hammers.theoffside.com/carling-cup/a-funny-thing-happened-on-the-way-to-4-nil.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/handelsblatt.com.txt b/inc/3rdparty/site_config/standard/handelsblatt.com.txt
new file mode 100755 (executable)
index 0000000..7d067aa
--- /dev/null
@@ -0,0 +1,31 @@
+#Single Page
+single_page_link: //li[contains(@class,"hcf-print")]/a
+
+# Title  hcf-headline
+title: //span[@class='hcf-headline']
+
+# Authors 
+author: //div[@class="hcf-author"]/a/text()
+author: substring-after(//div[@class='hcf-author'], 'von ')
+
+# Date 
+date: //div[@class='hcf-article-date']
+
+# Body
+body: //div[@class='article']
+
+# General removements
+strip: //div[contains(@class,"hcf-smartbox")]
+strip: //div[contains(@class,"hcf-stopper")]
+strip: //div[contains(@class,"hcf-img-controls")]
+strip: //span[@class='hcf-location-mark']
+strip: //span[@class='hcf-copyright']
+strip: //div[@class='hcf-copyright']
+strip: //div[@class='hcf-origin']
+
+
+
+
+# Fix picture captions
+wrap_in(small): //div[@class="hcf-caption"]
+test_url: http://www.handelsblatt.com/meinung/gastbeitraege/gastkommentar-zum-emissionshandel-kurskorrekturen-fuehren-zum-kentern/8044326.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d3ffeab..1dca632
@@ -1,4 +1,4 @@
-date: //span[@class="item-date"]\r
-body: //div[@class="item-content"]\r
+date: //span[@class="item-date"]
+body: //div[@class="item-content"]
 strip_comments: no
 test_url: http://www.hanselman.com/blog/BrainBytesBackBunsTheProgrammersPriorities.aspx
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 318885c..e4f1f6b
@@ -1,6 +1,6 @@
-title: //h1\r
-author: //a[@class='a_aut']\r
-body: //div[@class='content_dossier']\r
-strip: //div[@id='pagination']\r
+title: //h1
+author: //a[@class='a_aut']
+body: //div[@class='content_dossier']
+strip: //div[@id='pagination']
 next_page_link: //div[@class='sommaire_colonne']//span[@class='page_actuelle']/following::span[@class='autres_page']//a/@href
 test_url: http://www.hardware.fr/articles/850-1/pci-express-3-0-impact-performances.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/hardware.no.txt b/inc/3rdparty/site_config/standard/hardware.no.txt
new file mode 100755 (executable)
index 0000000..cbbcf84
--- /dev/null
@@ -0,0 +1,16 @@
+title: //h1[@class='headline']
+title: //h2[@itemprop='alternativeHeadline']
+title: //h1[@itemprop='headline']
+author: //span[@itemprop='name']
+date: //time[@itemprop='datePublished']
+body: //div[@itemprop='reviewBody']
+
+wrap_in(blockquote): //div[@class='factBox']
+
+next_page_link: //a[@rel='next']
+
+strip_id_or_class: 'product-box'
+strip: //a[@rel='next']
+strip: //a[text()='Del på Facebook']
+strip: //a[text()='Del på Twitter']
+test_url: http://www.hardware.no/artikler/asus-vg248qe/132792
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index fd6145e..c2f292e
@@ -1,6 +1,7 @@
-title: //div[@id='article-title']\r
-author: //div[@id='articleAuthors']\r
-body: //div[@id='article']\r
-strip: //div[@class='module wide']\r
-next_page_link: //a[@title='Next Page']
-test_url: http://hbr.org/2012/04/the-real-leadership-lessons-of-steve-jobs/ar/
\ No newline at end of file
+title: //div[@id='article-title']
+author: //div[@id='articleAuthors']
+body: //div[@id='article']
+strip: //div[@class='module wide']
+#single_page_link: //a[@class='social-print']
+test_url: http://hbr.org/2012/04/the-real-leadership-lessons-of-steve-jobs/ar/
+test_url: http://hbr.org/2013/03/big-bang-disruption/ar/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/headrush.typepad.com.txt b/inc/3rdparty/site_config/standard/headrush.typepad.com.txt
new file mode 100755 (executable)
index 0000000..a314677
--- /dev/null
@@ -0,0 +1,14 @@
+title://div[@class='content']/h3[1]
+body://div[@class='content']
+
+# Article nav
+strip://div[@class='content']/p[1]
+
+# Comments and trackbacks
+strip://h2/following-sibling::p
+strip://h2
+
+# Posted on
+strip://b/p
+strip://div[@class='content']/p[@class='posted']
+test_url: http://headrush.typepad.com/creating_passionate_users/2005/05/the_case_for_ea.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 1da82ac..daff614
@@ -1,3 +1,3 @@
-body: //div[@id='content']/div\r
+body: //div[@id='content']/div
 date: //p[@class='author_date']/span[@class='date']
 test_url: http://heise-online.mobi/newsticker/meldung/Amazons-Appstore-in-der-Kritik-Ein-Desaster-fuer-Kunden-und-Entwickler-1273936.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 5f19d3f..c51af56
@@ -1,7 +1,7 @@
-single_page_link: //p[@class='news_option']/a\r
-\r
-date: //p[@class='news_datum']\r
-title: //h1\r
-body: //div[@class='meldung_wrapper']\r
-\r
+single_page_link: //p[@class='news_option']/a
+
+date: //p[@class='news_datum']
+title: //h1
+body: //div[@class='meldung_wrapper']
+
 test_url: http://www.heise.de/newsticker/meldung/Europa-soll-Grundrechteschutz-im-Netz-staerken-1392664.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/hemmings.com.txt b/inc/3rdparty/site_config/standard/hemmings.com.txt
new file mode 100755 (executable)
index 0000000..a02b4a6
--- /dev/null
@@ -0,0 +1,9 @@
+title: //h2
+body: //div[@id='leftdetail']
+single_page_link: //a[contains(@href, 'printable=1')]
+strip: //a[contains(., 'Full Version')]
+
+prune: no
+
+test_url: http://www.hemmings.com/classifieds/dealer/ferrari/330gtc/1601235.html
+test_url: http://www.hemmings.com/rss/keyword.xml?adtype=carsforsale&make=ferrari
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/heroturko.me.txt b/inc/3rdparty/site_config/standard/heroturko.me.txt
new file mode 100755 (executable)
index 0000000..07b6adf
--- /dev/null
@@ -0,0 +1,6 @@
+title: //div[contains(@class, 'title')]//h1
+body: //div[contains(@class, 'story')]
+
+prune: no
+
+test_url: http://www.heroturko.me/5223034-ds-catia-p3-v5-6r2014-gasp0-x86x64-multilanguage-english-docs.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d866f62..4ed0b8b
@@ -1,7 +1,7 @@
-body: //div[@id='article_holder']//div[@class='image'] | //div[@id='article_body']\r
-\r
-prune: no\r
-tidy: no\r
-\r
-test_url: http://hespress.com/videos/73684.html\r
+body: //div[@id='article_holder']//div[@class='image'] | //div[@id='article_body']
+
+prune: no
+tidy: no
+
+test_url: http://hespress.com/videos/73684.html
 test_url: http://hespress.com/permalink/73678.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/hiamag.com.txt b/inc/3rdparty/site_config/standard/hiamag.com.txt
new file mode 100755 (executable)
index 0000000..3c7ba5a
--- /dev/null
@@ -0,0 +1,3 @@
+body: (//div[contains(@class, 'gallery-slides')]//img)[1] | //div[contains(@class, 'node_body_inner')]
+
+test_url: http://www.hiamag.com/rss.xml
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index fd50b6a..5a808fa
@@ -1,3 +1,3 @@
-body: //div[@class='journal-entry-text']\r
+body: //div[@class='journal-entry-text']
 
 test_url: http://highscalability.com/blog/2011/3/14/6-lessons-from-dropbox-one-million-files-saved-every-15-minu.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c57c1aa..d869a86
@@ -1,4 +1,4 @@
-body: //div[@class = 'pd']\r
-strip: //div[@id = 'overzicht-albumrecensies']\r
+body: //div[@class = 'pd']
+strip: //div[@id = 'overzicht-albumrecensies']
 strip: //div[@id = 'jc']
 test_url: http://hiphopleeft.nl/index.php?option=com_content&view=article&id=2767:mark-ronson-record-collection&catid=66:m&Itemid=142
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index dc687f3..78fb60a
@@ -1,10 +1,10 @@
-body://div[@id = 'content']\r
-author://span[@class = 'authors']\r
-author://span[@class = 'ht-vtag'][1]\r
-date:substring-before(//meta[@name = 'dc.date']/@content,'T')\r
-strip://div[contains(@class, 'region-ubercontent')]\r
-strip://h1\r
-strip://div[@id = 'ht-author']\r
-strip://ul[@class = 'links inline'] \r
-strip://div[@id = 'ht-tools']\r
+body://div[@id = 'content']
+author://span[@class = 'authors']
+author://span[@class = 'ht-vtag'][1]
+date:substring-before(//meta[@name = 'dc.date']/@content,'T')
+strip://div[contains(@class, 'region-ubercontent')]
+strip://h1
+strip://div[@id = 'ht-author']
+strip://ul[@class = 'links inline'] 
+strip://div[@id = 'ht-tools']
 test_url: http://www.historytoday.com/carol-dyhouse/skin-deep-fall-fur
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index eeee159..2da13a8
@@ -1,5 +1,5 @@
-title: //*[@class='ptitle']\r
-date: //span[@class='date']\r
-body: //div[@class='body']\r
+title: //*[@class='ptitle']
+date: //span[@class='date']
+body: //div[@class='body']
 prune: no
 test_url: http://hmercer.com/2011/07/why-i-switched-to-jekyll/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/hollywoodlife.com.txt b/inc/3rdparty/site_config/standard/hollywoodlife.com.txt
new file mode 100755 (executable)
index 0000000..975ffa2
--- /dev/null
@@ -0,0 +1,22 @@
+date: //meta[@name='sailthru.date']/@content
+body: //article[contains(@class, 'entry-content')]
+
+strip_image_src: subscribe.png
+
+strip_id_or_class: wpcom-iframe-form
+strip_id_or_class: gallery-thumbs
+strip_id_or_class: twitter
+strip_id_or_class: fb-link
+strip_id_or_class: pinterest
+
+strip: //div[@class='data']
+strip: //iframe[contains(@name, 'wpcom')]
+
+find_string: <a href="http://www.youtube.com/subscription_center?add_user_id=2rJLq19N0dGrxfib80M
+replace_string: </p></div></body></html><!--
+
+find_string: <h3>More
+replace_string: </div></body></html><!--
+
+test_url: http://hollywoodlife.com/2013/10/04/miriam-carey-dead-capitol-hill-car-chase-shooting-postpartum-depression/
+test_url: http://hollywoodlife.com/feed/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d43e644..8ed26ff
@@ -1,4 +1,4 @@
-body: //div[@id='entry-body']\r
-strip_id_or_class: paginate\r
+body: //div[@id='entry-body']
+strip_id_or_class: paginate
 strip: //p[contains(., 'Additional Resources')]
 test_url: http://hometheaterreview.com/dreamvision-starlight-3-three-chip-d-ila-projector-reviewed/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e19dd52..dfd8193
@@ -1,5 +1,5 @@
-body: //table[@class='ap-smallphoto-table'] | //div[@class='body']//*[@class='entry-content']\r
-tidy: no\r
-strip_image_src: analytics.apnewsregistry\r
-\r
+body: //table[@class='ap-smallphoto-table'] | //div[@class='body']//*[@class='entry-content']
+tidy: no
+strip_image_src: analytics.apnewsregistry
+
 test_url: http://hosted.ap.org/dynamic/stories/U/US_SPENDING_SHOWDOWN?SITE=FLPET&SECTION=HOME&TEMPLATE=DEFAULT&CTIME=2011-04-06-07-46-50
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/howtogeek.com.txt b/inc/3rdparty/site_config/standard/howtogeek.com.txt
new file mode 100755 (executable)
index 0000000..baa2ed4
--- /dev/null
@@ -0,0 +1,11 @@
+body: //div[contains(@class, 'thecontent')]
+
+strip_image_src: loading.gif
+find_string:src="http://cdn.howtogeek.com/public/images/blank.gif"
+replace_string:-
+find_string:data-href=
+replace_string:src=
+
+strip_id_or_class: relatedside
+
+test_url: http://www.howtogeek.com/school/microsoft-excel-formulas-and-functions/lesson1/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 67125fb..360dc72
@@ -1,3 +1,3 @@
-prune: yes\r
+prune: yes
 tidy: yes
 test_url: http://www.hs.fi/kotimaa/Teollisuushallin%20palo%20levitt%C3%A4%C3%A4%20vaarallista%20savua%20Tuusulassa/a1305571582405
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a8412d2..4653508
@@ -1,3 +1,3 @@
-single_page_link: //iframe[@id='hootFrame']/@src\r
-\r
+single_page_link: //iframe[@id='hootFrame']/@src
+
 test_url: http://ht.ly/bOiZV
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d40513b..d4618c1
@@ -1,16 +1,21 @@
-title: //meta[@property="og:title"]/@content\r
-body: //div[img[starts-with(@id, 'img_caption')]] | //div[@class="big_photo"] | //div[contains(@class, 'entry_body_text')]\r
-date: //meta[@name="publish_date"]/@content\r
-author: //a[@rel="author"]\r
-author: //meta[@name="author"]/@content\r
-prune: no\r
-tidy: no\r
-strip: //footer\r
-strip_id_or_class: ps-slideshow\r
-strip_id_or_class: fs-slideshow\r
-strip: //p[contains(., 'Related on HuffPost:')]\r
-# end early\r
-replace_string(<div class="sbm-main): </body></html><div class="not-interested \r
-\r
-test_url: http://www.huffingtonpost.com/mitch-moxley/tracking-beijings-boom-th_b_1209828.html\r
-test_url: http://www.huffingtonpost.com/2012/09/11/president-obama-iphone-throwdown_n_1873826.html
\ No newline at end of file
+title: //meta[@property="og:title"]/@content
+body: //div[img[starts-with(@id, 'img_caption')]] | //div[@class="big_photo"] | //div[contains(@class, 'entry_body_text')]
+date: //meta[@name="publish_date"]/@content
+author: //a[@rel="author"]
+author: //meta[@name="author"]/@content
+
+prune: no
+tidy: no
+
+strip: //footer
+strip_id_or_class: ps-slideshow
+strip_id_or_class: fs-slideshow
+strip: //p[contains(., 'Related on HuffPost:')]
+strip_id_or_class: contribute-story
+strip_id_or_class: promo_holder
+
+# end early
+replace_string(<div class="sbm-main): </body></html><div class="not-interested 
+
+test_url: http://www.huffingtonpost.com/mitch-moxley/tracking-beijings-boom-th_b_1209828.html
+test_url: http://www.huffingtonpost.com/2012/09/11/president-obama-iphone-throwdown_n_1873826.html
old mode 100644 (file)
new mode 100755 (executable)
index ec7d3c0..92d3c67
@@ -1,5 +1,5 @@
-title: //h3[@class="entry-header"]\r
-date: //h2[@class="date-header"]\r
-body: //div[contains(@class, 'entry')]\r
+title: //h3[@class="entry-header"]
+date: //h2[@class="date-header"]
+body: //div[contains(@class, 'entry')]
 
 test_url: http://www.humantransit.org/2012/06/can-network-primers-reduce-grief-about-network-design.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ccf09dc..68fd220
@@ -1,7 +1,7 @@
-title: //div[@class='HaberDetayTitleHold Title']/h1\r
-body: //div[@id='YazarDetayText']\r
-author: //div[@class='HaberDetayTitleHold Title']/h1\r
-prune: no\r
-\r
-test_url: http://www.hurriyet.com.tr/ekonomi/19490260.asp\r
+title: //div[@class='HaberDetayTitleHold Title']/h1
+body: //div[@id='YazarDetayText']
+author: //div[@class='HaberDetayTitleHold Title']/h1
+prune: no
+
+test_url: http://www.hurriyet.com.tr/ekonomi/19490260.asp
 test_url: http://www.hurriyet.com.tr/yazarlar/22078439.asp
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 06fa98d..05e7b5f
@@ -1,9 +1,9 @@
-title: //div[@id='pg-content']//h1\r
-body: //div[@id='articleBody0']\r
-replace_string(</table>): </table><br /><br />\r
-\r
-single_page_link: //div[@class="up-header"]/a\r
-\r
-prune: no\r
+title: //div[@id='pg-content']//h1
+body: //div[@id='articleBody0']
+replace_string(</table>): </table><br /><br />
+
+single_page_link: //div[@class="up-header"]/a
+
+prune: no
 
 test_url: http://hvg.hu/w/20111125_sparta
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 49b46da..23e4754
@@ -1,10 +1,10 @@
-body: //div[@id='content']//div[contains(@class, 'wp-image-') or contains(@class, 'entry')][1]\r
-author: //span[@class='author']/a\r
-\r
-strip_id_or_class: disqus\r
-strip_id_or_class: paginator\r
-strip_id_or_class: photo-number\r
-\r
-prune: no\r
-\r
+body: //div[@id='content']//div[contains(@class, 'wp-image-') or contains(@class, 'entry')][1]
+author: //span[@class='author']/a
+
+strip_id_or_class: disqus
+strip_id_or_class: paginator
+strip_id_or_class: photo-number
+
+prune: no
+
 test_url: http://hypebeast.com/2012/11/stussy-2012-fall-winter-november-releases/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/icannabis.tumblr.com.txt b/inc/3rdparty/site_config/standard/icannabis.tumblr.com.txt
new file mode 100755 (executable)
index 0000000..3bda753
--- /dev/null
@@ -0,0 +1,9 @@
+tidy:no
+prune:no
+
+body://div[contains(@id,'content')]
+
+strip_id_or_class:meta
+strip_id_or_class:notes
+strip_id_or_class:pagination
+test_url: http://icannabis.tumblr.com/post/28660592471/reviewmswireless3000
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/idealog.co.nz.txt b/inc/3rdparty/site_config/standard/idealog.co.nz.txt
new file mode 100755 (executable)
index 0000000..ca88f60
--- /dev/null
@@ -0,0 +1,12 @@
+body: //div[@class='content']
+
+strip: //p[@class='dateline']
+strip: //hr
+strip_id_or_class: share
+strip_id_or_class: comments
+strip_id_or_class: tags
+
+title: substring-before(//title,' ::')
+author: substring-before(//p[@class='dateline'],',') 
+date: //p[@class='dateline']/time
+test_url: http://www.idealog.co.nz/blog/2012/12/geeks-plane-help-kiwis-take-san-francisco
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e1badef..f3b3379
@@ -1,7 +1,7 @@
-title: //a[@class='post_title']\r
-body: //div[@class='entrybox']\r
-strip_id_or_class: post_title\r
-date: //div[@class='entrybox']/b[1]\r
-strip: //div[@class='entrybox']/b[1]\r
+title: //a[@class='post_title']
+body: //div[@class='entrybox']
+strip_id_or_class: post_title
+date: //div[@class='entrybox']/b[1]
+strip: //div[@class='entrybox']/b[1]
 author: string('Maciej Cegłowski')
 test_url: http://idlewords.com/2011/08/why_arabic_is_terrific.htm
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d7ec2da..45dd5f2
@@ -1,5 +1,5 @@
-author: substring-after(substring-after(//span[@class='submitted'],'- '),'- ')\r
-date: substring-before(//span[@class='submitted'], concat('- ',substring-after(substring-after(//span[@class='submitted'],'- '),'- ')))\r
-body: //div[@class='content clear-block zoneApple']\r
+author: substring-after(substring-after(//span[@class='submitted'],'- '),'- ')
+date: substring-before(//span[@class='submitted'], concat('- ',substring-after(substring-after(//span[@class='submitted'],'- '),'- ')))
+body: //div[@class='content clear-block zoneApple']
 
 test_url: http://www.igeneration.fr/iphone/l-iphone-et-l-ipad-chouchous-des-tpe-et-pme-55112
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f74178a..6063530
@@ -1,7 +1,7 @@
-title://h1[@class='page-title']\r
-body://*[@id='content']//div[contains(@class,'node-content')]\r
-\r
-author://*[@id='content']//div[contains(@class,'node-submitted')]/a\r
-\r
+title://h1[@class='page-title']
+body://*[@id='content']//div[contains(@class,'node-content')]
+
+author://*[@id='content']//div[contains(@class,'node-submitted')]/a
+
 date:substring-after(//div[contains(@class,'node-submitted')],' on ')
 test_url: http://ignoredbydinosaurs.com/2011/09/great-lie-lorem-ipsum
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ca1e54a..9880b51
@@ -1,13 +1,13 @@
-# Get proper Title, Author and Date info\r
-title: substring-before(//title, '|')\r
-author: substring-after(//h4/a[@href='http://www.ilounge.com/index.php/ilounge/aboutus/'], 'By')\r
-date: //span[@class='instapaper_date']\r
-\r
-# For Reviews & First Looks, get the intro paragraph and put it in front of the main body.\r
-move_into(//div[@id='instapaper_para1']): //div[@id='instapaper_body']\r
-body: //div[@id='instapaper_para1']\r
-strip: //div[@class='reviewinfo']\r
-\r
-# We don't use footnotes, so why bother checking for them? \r
+# Get proper Title, Author and Date info
+title: substring-before(//title, '|')
+author: substring-after(//h4/a[@href='http://www.ilounge.com/index.php/ilounge/aboutus/'], 'By')
+date: //span[@class='instapaper_date']
+
+# For Reviews & First Looks, get the intro paragraph and put it in front of the main body.
+move_into(//div[@id='instapaper_para1']): //div[@id='instapaper_body']
+body: //div[@id='instapaper_para1']
+strip: //div[@class='reviewinfo']
+
+# We don't use footnotes, so why bother checking for them? 
 footnotes: no
 test_url: http://www.ilounge.com/index.php/reviews/entry/luxa2-alum-x-for-iphone-4-4s/?utm_source=twitterfeed&utm_medium=twitter
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index da6a60f..51a7eb9
@@ -1,5 +1,5 @@
-title: //div[@class='published visible e2-smart-title']//span\r
-author: //span[@id='e2-blog-title']\r
-date: //p[@class='super-h']\r
+title: //div[@class='published visible e2-smart-title']//span
+author: //span[@id='e2-blog-title']
+date: //p[@class='super-h']
 body: //div[@class='text published visible']
 test_url: http://ilyabirman.ru/meanwhile/2011/11/15/2/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0589aaa..5410e64
@@ -1,21 +1,21 @@
-author: substring-after(substring-before(//div[@id='byline'],'|'),'By')\r
-author: //div[@class='byline']/a\r
-date: //span[@class='pubdate']\r
-# print friendly page\r
-body: //div[@id='text']\r
-# regular page\r
-body: //div[@id= 'articlecontent']\r
-\r
-strip: //div[@id= 'articlecontent']/h1\r
-strip: //div[@id='articlecontent']/p[@class='deck']\r
-strip: //div[@id='articlecontent']/div[@class='byline']\r
-strip: //div[@id='articlespacer']\r
-strip: //div[@id='incsharebox']\r
-strip: //div[@id='articlesidebar']\r
-\r
-prune: no\r
-\r
-single_page_link: //a[contains(@href, 'Printer_Friendly.html')]\r
-strip: //a[contains(., 'Dig Deeper')]\r
-test_url: http://www.inc.com/guides/2010/11/seven-tips-for-lobbying-politicians.html\r
+author: substring-after(substring-before(//div[@id='byline'],'|'),'By')
+author: //div[@class='byline']/a
+date: //span[@class='pubdate']
+# print friendly page
+body: //div[@id='text']
+# regular page
+body: //div[@id= 'articlecontent']
+
+strip: //div[@id= 'articlecontent']/h1
+strip: //div[@id='articlecontent']/p[@class='deck']
+strip: //div[@id='articlecontent']/div[@class='byline']
+strip: //div[@id='articlespacer']
+strip: //div[@id='incsharebox']
+strip: //div[@id='articlesidebar']
+
+prune: no
+
+single_page_link: //a[contains(@href, 'Printer_Friendly.html')]
+strip: //a[contains(., 'Dig Deeper')]
+test_url: http://www.inc.com/guides/2010/11/seven-tips-for-lobbying-politicians.html
 test_url: http://www.inc.com/eric-schurenberg/startups-are-we-geting-irrationally-exuberant.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 47baf36..af74220
@@ -1,9 +1,9 @@
-title: //meta[@property='og:title']/@content\r
-body: //div[contains(@class, 'articleContent')]\r
-date: //meta[@property='article:published_time']/@content\r
-author: //div[@id='main']//div[@class='byline']//span[@class='authorName']\r
-\r
-strip_id_or_class: RelatedArtTag\r
-\r
+title: //meta[@property='og:title']/@content
+body: //div[contains(@class, 'articleContent')]
+date: //meta[@property='article:published_time']/@content
+author: //div[@id='main']//div[@class='byline']//span[@class='authorName']
+
+strip_id_or_class: RelatedArtTag
+
 tidy: no
 test_url: http://www.independent.co.uk/news/world/middle-east/syria-could-face-human-rights-probe-2274326.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e7a35e8..8112105
@@ -1,6 +1,6 @@
-body: //figure[@class='mainVideo']\r
-strip: //figcaption\r
-\r
-prune: no\r
-\r
+body: //figure[@class='mainVideo']
+strip: //figcaption
+
+prune: no
+
 test_url: http://www.indiatimes.com/bollywood/kareena-insecure-about-saif-working-with-bipasha-23386.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 312cec4..5225245
@@ -1,5 +1,5 @@
-title: //div[@class='weblogPost']/h3[1]\r
-author: ("Brent Simmons")\r
-date: //span[@class="weblogPostDisplayDate"]\r
+title: //div[@class='weblogPost']/h3[1]
+author: ("Brent Simmons")
+date: //span[@class="weblogPostDisplayDate"]
 body: //div[@class='weblogPostBody']
 test_url: http://inessential.com/2011/10/25/why_just_store_the_app_data_on_dropbo
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 64cf3c8..dee69f8
@@ -1,4 +1,4 @@
-title://h1\r
-body://div[@id='texto_link']\r
+title://h1
+body://div[@id='texto_link']
 
 test_url: http://info.abril.com.br/noticias/internet/filme-do-youtube-vai-estrear-nos-cinemas-22042011-6.shl
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3a4e402..f4a328a
@@ -1,14 +1,14 @@
-body: //div[@id="intTranscript"]\r
-body: //div[@class="box-content"]\r
-title: //div[@class="box-content"]//h1[1]\r
-author: //p[@class="info"]/strong \r
-date: substring-before(substring-after(//p[@class="info"], "on"), "Length")\r
-strip: //div[@class="box-content"]//h1[1]\r
-strip: //div[@class="box-content"]//p[@class="info"]\r
-strip_id_or_class: vendor-content-box\r
-strip_id_or_class: tags2\r
-strip_id_or_class: instructions\r
-strip_id_or_class: comments\r
-strip_id_or_class: forum-list-tree\r
+body: //div[@id="intTranscript"]
+body: //div[@class="box-content"]
+title: //div[@class="box-content"]//h1[1]
+author: //p[@class="info"]/strong 
+date: substring-before(substring-after(//p[@class="info"], "on"), "Length")
+strip: //div[@class="box-content"]//h1[1]
+strip: //div[@class="box-content"]//p[@class="info"]
+strip_id_or_class: vendor-content-box
+strip_id_or_class: tags2
+strip_id_or_class: instructions
+strip_id_or_class: comments
+strip_id_or_class: forum-list-tree
 strip: //div[@class="addthis_toolbox addthis_default_style"]
 test_url: http://www.infoq.com/interviews/oleg-zhurakousky-javaone2011-interview
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index eedec24..7798749
@@ -1,9 +1,9 @@
-title: //div[@class='tituloInt']\r
-body: //div[@class='notaPortada']\r
-strip: //img[@id='imgHorizontalInt imgDetalleImg imagenNota']\r
-date: //span[@class='publi']\r
-author: //span[@class='autor']\r
-tidy: no\r
-prune: no\r
+title: //div[@class='tituloInt']
+body: //div[@class='notaPortada']
+strip: //img[@id='imgHorizontalInt imgDetalleImg imagenNota']
+date: //span[@class='publi']
+author: //span[@class='autor']
+tidy: no
+prune: no
 
 test_url: http://www.informador.com.mx/tecnologia/2011/337606/6/iran-desarrolla-antivirus-tras-afectaciones-por-duqu.htm
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6e3c3b1..3ade754
@@ -1,7 +1,7 @@
-title: //meta[@property='og:title']/@content\r
-author: //*[@property='dc:creator']\r
-date: //*[@property='dc:date']/@content\r
-body: //div[@id='page-content']//div[contains(@class, 'article-body')]\r
-\r
+title: //meta[@property='og:title']/@content
+author: //*[@property='dc:creator']
+date: //*[@property='dc:date']/@content
+body: //div[@id='page-content']//div[contains(@class, 'article-body')]
+
 tidy: no
 test_url: http://www.information.dk/282307
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 134306c..1330a04
@@ -1,10 +1,10 @@
-title://h1[@class="post_title"]\r
-body://article[@class="post"]\r
-date://h1[@class="section_separator"]\r
-author://span[@class="post_author"]\r
-strip://nav[@class="arrow_nav"]\r
-strip://section[@id="contact"]\r
-strip_id_or_class:post_title\r
-strip_id_or_class:post_author\r
+title://h1[@class="post_title"]
+body://article[@class="post"]
+date://h1[@class="section_separator"]
+author://span[@class="post_author"]
+strip://nav[@class="arrow_nav"]
+strip://section[@id="contact"]
+strip_id_or_class:post_title
+strip_id_or_class:post_author
 strip_id_or_class:section_separator
 test_url: http://informationarchitects.net/blog/nzz-relaunch-a-quick-review/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0879e9e..60b798e
@@ -1,6 +1,6 @@
-title: //head/title\r
-body: //table[@id='table3']//div[@class='postContent']\r
-prune: no\r
-tidy: no\r
-\r
+title: //head/title
+body: //table[@id='table3']//div[@class='postContent']
+prune: no
+tidy: no
+
 test_url: http://www.informationclearinghouse.info/article28238.htm
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 84c1fdc..24bf624
@@ -1,7 +1,7 @@
-title: //div[@id='content']/h1\r
-body: //div[@id="content"]\r
-strip: //img[contains(@src, 'informit_printer.png')]\r
-single_page_link: //div[contains(@class, 'articleTools')]//a[contains(@href, '/printerfriendly.')]\r
-prune: no\r
-\r
+title: //div[@id='content']/h1
+body: //div[@id="content"]
+strip: //img[contains(@src, 'informit_printer.png')]
+single_page_link: //div[contains(@class, 'articleTools')]//a[contains(@href, '/printerfriendly.')]
+prune: no
+
 test_url: http://www.informit.com/articles/article.aspx?p=1729268
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index dd588ed..d335bc4
@@ -1,12 +1,12 @@
-body: //div[@id='main_text']\r
-title: //div[@id='main_text']/h1\r
-strip: //div[@id='main_text']/h1\r
-strip: //div[@id='main_text']/h2\r
-strip_id_or_class: tools\r
-strip_id_or_class: articleTools\r
-strip_id_or_class: pagination\r
-strip_id_or_class: byline\r
-strip_id_or_class: tweet\r
-date: //div[@class='date']\r
+body: //div[@id='main_text']
+title: //div[@id='main_text']/h1
+strip: //div[@id='main_text']/h1
+strip: //div[@id='main_text']/h2
+strip_id_or_class: tools
+strip_id_or_class: articleTools
+strip_id_or_class: pagination
+strip_id_or_class: byline
+strip_id_or_class: tweet
+date: //div[@class='date']
 strip: //div[@class='date']
 test_url: http://www.infoworld.com/d/the-industry-standard/it-jobs-the-rise-both-offshore-and-in-us-187689
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 012c873..489d5af
@@ -1,9 +1,9 @@
-# This filter is tested on:\r
-# http://www.infzm.com/content/71068\r
-# http://www.infzm.com/content/41577\r
-\r
-author://em[contains(@class, 'toAuthor')]\r
-date:substring(//em[contains(@class, 'pubTime')],1)\r
-body://section[contains(@id, 'articleContent')]\r
+# This filter is tested on:
+# http://www.infzm.com/content/71068
+# http://www.infzm.com/content/41577
+
+author://em[contains(@class, 'toAuthor')]
+date:substring(//em[contains(@class, 'pubTime')],1)
+body://section[contains(@id, 'articleContent')]
 title://h1[contains(@class ,'articleHeadline clearfix')]
 test_url: http://www.infzm.com/content/41577
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6629daf..c63f53a
@@ -1,8 +1,8 @@
-# set body\r
-body: //div[@class='post-listing']\r
-\r
-# remove clutter\r
-strip: //a/big\r
-strip: //a/em\r
+# set body
+body: //div[@class='post-listing']
+
+# remove clutter
+strip: //a/big
+strip: //a/em
 strip: //p/em
 test_url: http://inhabitat.com/2010/11/18/sliding-walls-transform-this-tokyo-house-into-an-office/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ad9e821..522caeb
@@ -1,6 +1,6 @@
-title: //div[@class='caption']\r
-author: //p[@class='username']\r
-\r
-strip: //div[@class='contents']/h3\r
+title: //div[@class='caption']
+author: //p[@class='username']
+
+strip: //div[@class='contents']/h3
 strip: //div[@class='location']
 test_url: http://instagr.am/p/G-s_aciyDJ/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d196059..afe058d
@@ -1,7 +1,7 @@
-body: //div[@id = 'post']\r
-strip: //div[@class = 'postinfo']\r
-strip: //div[@id = 'postmetanew']\r
-strip: //div[@class = 'paginator']\r
-strip: //div[@class = 'col-2']\r
+body: //div[@id = 'post']
+strip: //div[@class = 'postinfo']
+strip: //div[@id = 'postmetanew']
+strip: //div[@class = 'paginator']
+strip: //div[@class = 'col-2']
 strip: //div[@id = 'adfactor-label']
 test_url: http://www.ipadclub.nl/15808/text-writer-ipad-tekstverwerker-met-functieknoppen/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a2e4900..dedb557
@@ -1,7 +1,7 @@
-body: //div[@id = 'post']\r
-strip: //div[@class = 'postinfo']\r
-strip: //div[@id = 'postmetanew']\r
-strip: //div[@class = 'paginator']\r
-strip: //div[@class = 'col-2']\r
+body: //div[@id = 'post']
+strip: //div[@class = 'postinfo']
+strip: //div[@id = 'postmetanew']
+strip: //div[@class = 'paginator']
+strip: //div[@class = 'col-2']
 strip: //div[@id = 'adfactor-label']
 test_url: http://www.ipadplanet.nl/11723/steve-jobs-bevestigt-verdwijnen-fysieke-rotatieschakelaar-in-ios-4-2/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f8d4f6a..850a24e
@@ -1,7 +1,7 @@
-body: //div[@id = 'post']\r
-strip: //div[@class = 'postinfo']\r
-strip: //div[@id = 'postmetanew']\r
-strip: //div[@class = 'paginator']\r
-strip: //div[@class = 'col-2']\r
-strip: //div[@id = 'adfactor-label']\r
+body: //div[@id = 'post']
+strip: //div[@class = 'postinfo']
+strip: //div[@id = 'postmetanew']
+strip: //div[@class = 'paginator']
+strip: //div[@class = 'col-2']
+strip: //div[@id = 'adfactor-label']
 test_url: http://www.iphoneclub.nl/105808/t-mobile-mobiel-internet-wordt-duurder-maar-blijft-onbeperkt/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c97ff43..e8ccea0
@@ -1,9 +1,9 @@
-title: //meta[@name='og:title']/@content\r
-body: //small[@class='postmetadata'] | //div[contains(@class, 'entry-content')]\r
-\r
-strip: //span[@vanilla-identifier]\r
-\r
-prune: no\r
-tidy: no\r
-\r
+title: //meta[@name='og:title']/@content
+body: //small[@class='postmetadata'] | //div[contains(@class, 'entry-content')]
+
+strip: //span[@vanilla-identifier]
+
+prune: no
+tidy: no
+
 test_url: http://www.iphonehacks.com/2012/07/app-review-process-behind-the-scenes.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a1c16a1..215fdf8
@@ -1,6 +1,6 @@
-# Remove social buttons\r
-strip: //div[@id='temp_Content_Right']\r
-\r
-# Remove duplicate article title\r
+# Remove social buttons
+strip: //div[@id='temp_Content_Right']
+
+# Remove duplicate article title
 strip: //*[(@class='storytitle')]
 test_url: http://isource.com/2010/10/24/swearch-a-cool-iphone-web-app/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8da78cb..3ba484a
@@ -1,6 +1,6 @@
-author: //p[@class = 'writer']\r
-\r
-date: //p[@class = 'published-time']\r
-\r
+author: //p[@class = 'writer']
+
+date: //p[@class = 'published-time']
+
 body: //div[@class = 'text main']
 test_url: http://www.itavisen.no/899786/old-republic-blir-gratis
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/itmedia.co.jp.txt b/inc/3rdparty/site_config/standard/itmedia.co.jp.txt
new file mode 100755 (executable)
index 0000000..97f00ce
--- /dev/null
@@ -0,0 +1,8 @@
+body: //div[@id='cmsBody']
+
+next_page_link: //span[@id='next']/a
+
+strip_id_or_class: cmsCopyright
+strip_id_or_class: masterSocialbuttonBtm
+
+test_url: http://www.itmedia.co.jp/enterprise/articles/0912/05/news002.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 550875e..b8cb461
@@ -1,12 +1,12 @@
-title: //h1[@class="entry-title"]\r
-body: //div[@class='format_text entry-content']\r
-author: //span[@class="author vcard"]/a\r
-date: //abbr[@class="published"]\r
-\r
-strip_id_or_class:  related-posts\r
-strip_id_or_class: membershipbox\r
-strip_id_or_class: share_this_compact_bt\r
-\r
-\r
+title: //h1[@class="entry-title"]
+body: //div[@class='format_text entry-content']
+author: //span[@class="author vcard"]/a
+date: //abbr[@class="published"]
+
+strip_id_or_class:  related-posts
+strip_id_or_class: membershipbox
+strip_id_or_class: share_this_compact_bt
+
+
 footnotes: no
 test_url: http://www.itstactical.com/warcom/knives/exclusive-triple-aught-design-production-dauntless-knife-video-walkthrough/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/itwire.com.txt b/inc/3rdparty/site_config/standard/itwire.com.txt
new file mode 100755 (executable)
index 0000000..72b4106
--- /dev/null
@@ -0,0 +1,5 @@
+author: //a[@rel="author"]
+date: //li[@class="itemDateCreated"]
+strip: //div[contains(@class, 'legend-rounded')]
+
+test_url: http://www.itwire.com/it-industry-news/market/59661-ibm-looks-to-high-value-solutions-to-meet-changing-demands
old mode 100644 (file)
new mode 100755 (executable)
index d4fa604..1ee0ee5
@@ -1,5 +1,5 @@
-title: //*[@id="article-title"]\r
-author: //*[@id="article-info"]/strong\r
-date: //*[@class="article-dateline"]/strong\r
+title: //*[@id="article-title"]
+author: //*[@id="article-info"]/strong
+date: //*[@class="article-dateline"]/strong
 body: //*[@id="article-content"]
 test_url: http://www.itworld.com/open-source/140916/android-sued-microsoft-not-linux
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index af3f299..b0114d3
@@ -1,4 +1,4 @@
-body: //div[starts-with(@id, 'news-id-')]\r
-prune: no\r
-\r
+body: //div[starts-with(@id, 'news-id-')]
+prune: no
+
 test_url: http://izismile.com/2011/06/13/uncanny_factoid_fashion_or_creepy_2_pics.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f1dd3d1..343fd6f
@@ -1,6 +1,6 @@
-body: //div[@id='content']//div[@class = 'post f']\r
-strip_id_or_class: comment-big\r
-strip_id_or_class: avatar\r
-strip: //div[@class='time_s']\r
+body: //div[@id='content']//div[@class = 'post f']
+strip_id_or_class: comment-big
+strip_id_or_class: avatar
+strip: //div[@class='time_s']
 
 test_url: http://jandan.net/2011/04/03/iphone-5-sony.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6e8af93..00e4cf6
@@ -1,22 +1,22 @@
-title: //h1\r
-author: //p[contains(@class, 'author')]/a\r
-date: //p[contains(@class, 'time')]\r
-body: //div[@class='content']/div[contains(@class, 'text')]\r
-\r
-# prevent "no text" errors on multi-page articles\r
-tidy: no\r
-\r
-# we use a custom next-link detector instead of the print view because\r
-# it's pretty hard to strip out the unwanted parts in the print view\r
-autodetect_next_page: no\r
-next_page_link: //div[contains(@class, 'text')]/div/div[contains(@class, 'paging')]/a[@class='more ']\r
-\r
-strip: //h1\r
-\r
-strip_id_or_class: meta\r
-strip_id_or_class: author\r
-strip_id_or_class: paging\r
-\r
-# prevent "Report an Error" from being recognized as footnote\r
+title: //h1
+author: //p[contains(@class, 'author')]/a
+date: //p[contains(@class, 'time')]
+body: //div[@class='content']/div[contains(@class, 'text')]
+
+# prevent "no text" errors on multi-page articles
+tidy: no
+
+# we use a custom next-link detector instead of the print view because
+# it's pretty hard to strip out the unwanted parts in the print view
+autodetect_next_page: no
+next_page_link: //div[contains(@class, 'text')]/div/div[contains(@class, 'paging')]/a[@class='more ']
+
+strip: //h1
+
+strip_id_or_class: meta
+strip_id_or_class: author
+strip_id_or_class: paging
+
+# prevent "Report an Error" from being recognized as footnote
 footnotes: no
 test_url: http://jetzt.sueddeutsche.de/texte/anzeigen/544308/Alles-flicken
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 95c45ee..d45c889
@@ -1,4 +1,4 @@
-body: //div[@class='entry']\r
-prune: no\r
+body: //div[@class='entry']
+prune: no
 
 test_url: http://www.jjahnke.net/rundbr87.html#2514
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index af8d7d1..1dbe207
@@ -1,5 +1,5 @@
-body: //div[@id='formatCont_en']\r
-\r
-prune: no\r
-\r
+body: //div[@id='formatCont_en']
+
+prune: no
+
 test_url: http://www.jobbank.gc.ca/detail-eng.aspx?Source=JobPosting&OrderNum=6397922
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 75fbee5..241a361
@@ -1,21 +1,21 @@
-# Works with old posts too, such as http://www.joelonsoftware.com/articles/fog0000000332.html\r
-\r
-author: substring-after(//div[@class="author"], 'by ')\r
-date: //div[@class="date"]\r
-\r
-## Clean stuff at top ##\r
-\r
-strip: //h1[1]\r
-strip: //h2[1]\r
-strip: //div[@class="date"]\r
-strip: //div[@class="author"]\r
-\r
-## Clean stuff at bottom ##\r
-\r
-strip: //blockquote[@class="textmessage"]\r
-strip: //div[@style="width:500px"]/p[last()]\r
-strip: //div[@style="width:500px"]/p[last()-1]\r
-strip: //div[@style="width:500px"]/h4[last()]\r
-strip: //div[@style="width:500px"]/h4[last()-1]\r
+# Works with old posts too, such as http://www.joelonsoftware.com/articles/fog0000000332.html
+
+author: substring-after(//div[@class="author"], 'by ')
+date: //div[@class="date"]
+
+## Clean stuff at top ##
+
+strip: //h1[1]
+strip: //h2[1]
+strip: //div[@class="date"]
+strip: //div[@class="author"]
+
+## Clean stuff at bottom ##
+
+strip: //blockquote[@class="textmessage"]
+strip: //div[@style="width:500px"]/p[last()]
+strip: //div[@style="width:500px"]/p[last()-1]
+strip: //div[@style="width:500px"]/h4[last()]
+strip: //div[@style="width:500px"]/h4[last()-1]
 strip: //div[@style="width:500px"]/div[last()]
 test_url: http://www.joelonsoftware.com/items/2011/09/15.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 535a501..3cf6067
@@ -1,3 +1,3 @@
-author: //h1\r
+author: //h1
 date: //p[contains(@class,'date')]
 test_url: http://jouire.com/2011/01/exquisite-whispers/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7fbd467..7a8e56f
@@ -1,8 +1,8 @@
-author: //a[@class="byline-author"]\r
-title: //h1[@class="headline"]\r
-strip: //div[@id="info-card"]\r
-strip: //div[@id="breaking-news"]\r
-strip: //div[@class="rmod list-post-mod"]\r
-strip: //div[@id="footer"]\r
+author: //a[@class="byline-author"]
+title: //h1[@class="headline"]
+strip: //div[@id="info-card"]
+strip: //div[@id="breaking-news"]
+strip: //div[@class="rmod list-post-mod"]
+strip: //div[@id="footer"]
 strip: //div[@id="GH_strip"]
 test_url: http://www.joystiq.com/2012/06/20/magic-the-gathering-duels-of-the-planeswalkers-2013-review/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index be844e5..ff5a024
@@ -1,19 +1,19 @@
-body: //div[@id='article_container']\r
-author: //h4//a[@class='author']\r
-title: //h1\r
-\r
-replace_string(lang="en"): lang="de"\r
-replace_string(/>1</a>):/></a>\r
-\r
-strip_id_or_class: share_toolbox\r
-strip_id_or_class: article_header\r
-strip_id_or_class: phototext\r
-\r
-strip_image_src: icon_author.gif\r
-\r
-strip: //img[@src='']\r
-strip: //h4[@id='author']\r
-\r
-prune: no\r
-\r
+body: //div[@id='article_container']
+author: //h4//a[@class='author']
+title: //h1
+
+replace_string(lang="en"): lang="de"
+replace_string(/>1</a>):/></a>
+
+strip_id_or_class: share_toolbox
+strip_id_or_class: article_header
+strip_id_or_class: phototext
+
+strip_image_src: icon_author.gif
+
+strip: //img[@src='']
+strip: //h4[@id='author']
+
+prune: no
+
 test_url: http://www.juedische-allgemeine.de/article/view/id/13366
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e2d07f2..fdf7cdc
@@ -1,8 +1,8 @@
-convert_double_br_tags: yes\r
-\r
-title: //div[@id="storycredits"]/p/span[@class="title"]\r
-author: //div[@id="storycredits"]/p/br[1]/following-sibling::text()\r
-\r
-strip: //div[@id="storycredits"]\r
+convert_double_br_tags: yes
+
+title: //div[@id="storycredits"]/p/span[@class="title"]
+author: //div[@id="storycredits"]/p/br[1]/following-sibling::text()
+
+strip: //div[@id="storycredits"]
 
 test_url: http://www.juppy.org/santa/stories.php?ForAuthorID=35&Year=2005
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 34404e9..535693c
@@ -1,3 +1,3 @@
-body: //div[contains(@class, 'inner_content')]\r
+body: //div[contains(@class, 'inner_content')]
 
 test_url: http://kachestvo.ru/promtovar/odezhda/denim.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/kachiblog.com.txt b/inc/3rdparty/site_config/standard/kachiblog.com.txt
new file mode 100755 (executable)
index 0000000..35baf8d
--- /dev/null
@@ -0,0 +1,7 @@
+title: //h3[contains(@class, 'entry-title')]
+date: //abbr[@itemprop='datePublished']/@title
+body: //div[@itemprop='articleBody']
+tidy: no
+
+test_url: http://www.kachiblog.com/2013/05/samsung-galaxy-s4-vs-samsung-galaxy.html
+test_url: http://www.kachiblog.com/feeds/posts/default
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/kathimerini.gr.txt b/inc/3rdparty/site_config/standard/kathimerini.gr.txt
new file mode 100755 (executable)
index 0000000..2c7c518
--- /dev/null
@@ -0,0 +1,4 @@
+title: //td[contains(@class, 'articleTitlos')]
+body: //td[contains(@class, 'eelantext')]
+
+test_url: http://www.kathimerini.gr/4dcgi/_w_articles_kathremote_1_03/12/2013_530490
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e6d100e..90c64cb
@@ -1,7 +1,7 @@
-# Ads\r
-strip: //table[@align="right"][@width="120"]\r
-\r
-# Affiliate link paragraphs\r
-strip: //a[.="Adorama"]/parent::p[contains(., "goodies")]\r
+# Ads
+strip: //table[@align="right"][@width="120"]
+
+# Affiliate link paragraphs
+strip: //a[.="Adorama"]/parent::p[contains(., "goodies")]
 strip: //a[.="Adorama"]/parent::p[contains(., "This free website's biggest source of")]
 test_url: http://www.kenrockwell.com/tech/composition.htm
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7d5daa4..db4f63c
@@ -1,21 +1,21 @@
-# set body\r
-body: //div[@id='ovArtikel']\r
-\r
-# set title\r
-title: //div[@id='ovArtikel']/h1\r
-# strip main title and leave sub title\r
-strip: //div[@id='ovArtikel']/h1\r
-\r
-date: //div[@class='publicdate']\r
-\r
-#remove captions\r
-strip: //*/div[@class='bu']\r
-strip: //*/div[@class='credit']\r
-\r
-#remove adds\r
-strip: //*/div[@class='ad-head']\r
-strip: //*/div[@class='linksebay']\r
-\r
-# remove video content\r
+# set body
+body: //div[@id='ovArtikel']
+
+# set title
+title: //div[@id='ovArtikel']/h1
+# strip main title and leave sub title
+strip: //div[@id='ovArtikel']/h1
+
+date: //div[@class='publicdate']
+
+#remove captions
+strip: //*/div[@class='bu']
+strip: //*/div[@class='credit']
+
+#remove adds
+strip: //*/div[@class='ad-head']
+strip: //*/div[@class='linksebay']
+
+# remove video content
 strip: //*/div[@class='ovVideo']
 test_url: http://www.kicker.de/news/fussball/frauen/wmfr/frauen-weltmeisterschaft/2011/3/1123662/spielbericht_frankreich-frauen_deutschland-frauen.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c055659..7b3daa5
@@ -1,7 +1,7 @@
-title: //h1[@id='name']\r
-body: //*[@id='leftcol']\r
-\r
-strip_id_or_class: 'share-box'\r
-strip_id_or_class: 'project-faqs'\r
+title: //h1[@id='name']
+body: //*[@id='leftcol']
+
+strip_id_or_class: 'share-box'
+strip_id_or_class: 'project-faqs'
 strip_id_or_class: 'report-issue-wrap'
 test_url: http://www.kickstarter.com/projects/hop/elevation-dock-the-best-dock-for-iphone
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 2f6783a..b27539f
@@ -1,4 +1,4 @@
-title: //div[@class='post']/h2\r
-body: //div[@class='entry']\r
+title: //div[@class='post']/h2
+body: //div[@class='entry']
 strip: //p[contains(.,'Tags:')]
 test_url: http://www.kingarthurflour.com/blog/2011/01/28/a-big-sandwich-for-the-big-game/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f93a61e..582f251
@@ -1,6 +1,6 @@
-title: //h2\r
-author: //*[@id='main']/div/a[1]\r
-date: substring-before(substring-after(//div[@class='meta'],'&bull;'),'&bull;')\r
-body: //div[@id='main']\r
-strip: //div[@class='meta']\r
+title: //h2
+author: //*[@id='main']/div/a[1]
+date: substring-before(substring-after(//div[@class='meta'],'&bull;'),'&bull;')
+body: //div[@id='main']
+strip: //div[@class='meta']
 test_url: http://kottke.org/08/02/king-of-kong-a-fistful-of-quarters
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 9e15cc3..2f604de
@@ -1,3 +1,3 @@
-body: //div[@class = "entry-full"]\r
+body: //div[@class = "entry-full"]
 
 test_url: http://www.kumailplus.com/2011/12/02/24308
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3f0d236..fe35062
@@ -1,10 +1,10 @@
-title: //div[@id='centrediv']/h1\r
-\r
-author: substring-after(//div[@id='centrediv']/h3,'By: ')\r
-\r
-date: substring-after(substring-before(//div[@id='centrediv']/h3,'By: '),'Filed: ')\r
-\r
-body: //div[@class='KonaBody']\r
-\r
+title: //div[@id='centrediv']/h1
+
+author: substring-after(//div[@id='centrediv']/h3,'By: ')
+
+date: substring-after(substring-before(//div[@id='centrediv']/h3,'By: '),'Filed: ')
+
+body: //div[@class='KonaBody']
+
 convert_double_br_tags: yes
 test_url: http://www.kumb.com/story.php?id=126084
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 879b4d6..cf4d3b8
@@ -1,9 +1,9 @@
-date: //span[@class='datum']\r
-title: //div[@class='artikel']/h2\r
-body: //div[@class='entry']\r
-strip: //p[@class='tags']\r
-author: substring-after(//div[@class='authorinfo']/em,'Dies ist ein Artikel von ')\r
-strip: //div[@class='authorinfo']\r
-strip: //div[@class='authorpic']\r
+date: //span[@class='datum']
+title: //div[@class='artikel']/h2
+body: //div[@class='entry']
+strip: //p[@class='tags']
+author: substring-after(//div[@class='authorinfo']/em,'Dies ist ein Artikel von ')
+strip: //div[@class='authorinfo']
+strip: //div[@class='authorpic']
 
 test_url: http://kwerfeldein.de/index.php/2011/10/17/doppelbelichtungen-mit-konzept/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/landetsfria.se.txt b/inc/3rdparty/site_config/standard/landetsfria.se.txt
new file mode 100755 (executable)
index 0000000..e5317a5
--- /dev/null
@@ -0,0 +1,7 @@
+body: //div[contains(@class, 'layout__inner')]//div[contains(@class, 'file-image') or contains(@class, 'node__content')]
+author: //article//div[contains(@class, 'field-byline')]
+strip_id_or_class: rekommenderade
+strip_id_or_class: disqus
+strip_id_or_class: annonser
+
+test_url: http://www.landetsfria.se/artikel/112070
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a34e39d..d25999d
@@ -1,13 +1,13 @@
-title: //h1[@class='headline']\r
-body: //div[@class='article']\r
-strip: //div[@class='article']//h3[contains(@class, 'section')]\r
-strip: //div[@class='article']//ul[contains(@class, 'article-actions')]\r
-strip: //div[@id='syndication-upper']\r
-strip: //a[@id='syndication']\r
-strip: //dl[@id='article-tags']\r
-strip: //div[@id='article-like']\r
-prune: no\r
-\r
-single_page_link: //li[@class='single-page']/a\r
-\r
+title: //h1[@class='headline']
+body: //div[@class='article']
+strip: //div[@class='article']//h3[contains(@class, 'section')]
+strip: //div[@class='article']//ul[contains(@class, 'article-actions')]
+strip: //div[@id='syndication-upper']
+strip: //a[@id='syndication']
+strip: //dl[@id='article-tags']
+strip: //div[@id='article-like']
+prune: no
+
+single_page_link: //li[@class='single-page']/a
+
 test_url: http://www.laphamsquarterly.org/essays/balanced-diets.php
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e771f81..82374c0
@@ -1,3 +1,3 @@
-tidy: no\r
+tidy: no
 
 test_url: http://www.laprensagrafica.com/opinion/editorial/229252-reflexiones-sobre-la-educacion-que-necesitamos.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 5bad8e6..746bfca
@@ -1,10 +1,10 @@
-body: //div[@id='content-content']//div[@class='content']\r
-title: //h1[@class='title']\r
-date: substring-after(//*[@class='submitted'],'Submitted on')\r
-tidy: no\r
-strip: //div[@class='terms terms-inline']\r
-strip: //div[@class='more']\r
-strip: //div[@class='share-links']\r
-strip: //table[@id='attachments']\r
-\r
+body: //div[@id='content-content']//div[@class='content']
+title: //h1[@class='title']
+date: substring-after(//*[@class='submitted'],'Submitted on')
+tidy: no
+strip: //div[@class='terms terms-inline']
+strip: //div[@class='more']
+strip: //div[@class='share-links']
+strip: //table[@id='attachments']
+
 test_url: http://www.laquadrature.net/en/finalization-of-eu-parliaments-weak-net-neutrality-resolution
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 504dbea..25e3654
@@ -1,12 +1,12 @@
-#meta data\r
-title:substring-after(title,'|')\r
-\r
-author:substring-before( substring-after(//meta[@name = 'description']/@content, normalize-space(substring-after(//title,'|'))),' respond ')\r
-date://h5[@class = 'postDate']\r
-\r
-#text\r
-body://div[@class = 'articleBody']\r
-\r
-#clean up\r
-strip://center
-test_url: http://lareviewofbooks.org/post/14066007115/literary-transactions-and-their-vicissitudes
\ No newline at end of file
+#metadata
+title: substring-before(//title,' |')
+author: //a[contains(@class,'person') and starts-with(@href, '/contributor')]
+
+#text
+body: //div[contains(@class, 'article_body')]
+
+#clean up
+strip_id_or_class: recommended_section
+
+test_url: http://lareviewofbooks.org/review/american-politics-redeembale-robert-gates-hillary-clinton-two-memoirs-washington-dc
+test_url: http://lareviewofbooks.org/interview/souvenirs-future
old mode 100644 (file)
new mode 100755 (executable)
index 0d6ac85..b2db37b
@@ -1,11 +1,11 @@
-strip: //div[@id="tugs_story_display"]\r
-strip: //div[@id="search_overlay"]\r
-strip: //div[@id="adv_search"]\r
-body: //div[@class='story']\r
-tidy: no\r
-convert_double_br_tags: yes\r
-single_page_link: //a[contains(@href, ',print.')]\r
-strip: //p[starts-with(., 'latimes.com')]\r
-strip: //h1[starts-with(., 'latimes.com')]\r
+strip: //div[@id="tugs_story_display"]
+strip: //div[@id="search_overlay"]
+strip: //div[@id="adv_search"]
+body: //div[@class='story']
+tidy: no
+convert_double_br_tags: yes
+single_page_link: //a[contains(@href, ',print.')]
+strip: //p[starts-with(., 'latimes.com')]
+strip: //h1[starts-with(., 'latimes.com')]
 strip_id_or_class: cubead
 test_url: http://www.latimes.com/news/opinion/commentary/la-oe-gartonash-wilders-20110512,0,2876761.story
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 1814988..ab2f834
@@ -1,3 +1,3 @@
-title: //h1[@class='entry-title']\r
+title: //h1[@class='entry-title']
 body: //div[@class='entry-content']
 test_url: http://laughingsquid.com/mysterious-tiny-doors-appearing-around-san-francisco/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0a4c84b..e78cf7e
@@ -1,9 +1,9 @@
-title: //div[@id="content"]/h1[1]\r
-date: substring-before(//p[@class="postdate"], ' at ')\r
-author: ("Dr. Drang")\r
-\r
-strip: //div[@id="content"]/h1[1]\r
-strip: //p[@class="postdate"]\r
-strip: //h2[@id="respond"]\r
+title: //div[@id="content"]/h1[1]
+date: substring-before(//p[@class="postdate"], ' at ')
+author: ("Dr. Drang")
+
+strip: //div[@id="content"]/h1[1]
+strip: //p[@class="postdate"]
+strip: //h2[@id="respond"]
 strip: //blockquote[@class="bbpTweet"]/p/span/a/img
 test_url: http://www.leancrew.com/all-this/2011/12/more-shell-less-egg/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f5494b9..e720e37
@@ -1,8 +1,8 @@
-title: //meta[@name='title']/@content\r
-author: //span[@class='sign']//a[@class='journaliste']\r
-author: //meta[@name='author']/@content\r
-body: //*[@id='article']/div[@class='photo'] | //*[@id='article']/h2 | //*[@id='article']/div[@class='texte']\r
-date: //time[@pubdate]/@datetime\r
-prune: no\r
-test_url: http://www.lefigaro.fr/environnement/2011/11/10/01029-20111110ARTFIG00801-la-chine-confrontee-a-un-immense-defi-ecologique.php\r
+title: //meta[@name='title']/@content
+author: //span[@class='sign']//a[@class='journaliste']
+author: //meta[@name='author']/@content
+body: //*[@id='article']/div[@class='photo'] | //*[@id='article']/h2 | //*[@id='article']/div[@class='texte']
+date: //time[@pubdate]/@datetime
+prune: no
+test_url: http://www.lefigaro.fr/environnement/2011/11/10/01029-20111110ARTFIG00801-la-chine-confrontee-a-un-immense-defi-ecologique.php
 test_url: http://www.lefigaro.fr/conjoncture/2012/11/20/20002-20121120ARTFIG00609-l-usager-devrait-payer-plus-pour-financer-les-transports.php
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index eb20527..097999b
@@ -1,13 +1,18 @@
-title: //h1\r
-\r
-# they have a single component containing both author and date\r
-#author: //p[@class='source']\r
-#date: //p[@class='source']\r
-\r
-body: //div[@class='contenu_article']\r
-#Shoot the insane "conjugaison.lemonde.fr" links :\r
-strip: //a[contains(@class, 'listLink')]\r
-\r
-prune: no\r
-\r
-test_url: http://www.lemonde.fr/economie/article/2011/07/05/moody-s-abaisse-la-note-du-portugal-de-quatre-crans_1545237_3234.html
\ No newline at end of file
+title: //h1
+
+# We can have multiple authors
+author: //a[@class='auteur']
+
+# Last edition date (if any)
+date: //time[@itemprop='dateModified']/@datetime
+# Publication date
+date: //time[@itemprop='datePublished']/@datetime
+
+
+body: //div[@id='articleBody']
+#Shoot the insane "conjugaison.lemonde.fr" links :
+#strip: //a[contains(@class, 'conjug')]
+
+prune: no
+
+test_url: http://www.lemonde.fr/economie/article/2011/07/05/moody-s-abaisse-la-note-du-portugal-de-quatre-crans_1545237_3234.html
old mode 100644 (file)
new mode 100755 (executable)
index 9b57f72..51e025a
@@ -1,9 +1,9 @@
-title: //h1/following::span[@class='fn']\r
-# Author: should stop parsing until <br> reached, but I don't know how to do this.\r
-author: //following::div[@class='PDate2']\r
-date: //following::div[@class='PDate2']/strong\r
-\r
-body: //div[@class='ArTexte']\r
-body: //div[@id='prod_txt_b']\r
-body: //div[@class='ArPhotoP']\r
+title: //h1/following::span[@class='fn']
+# Author: should stop parsing until <br> reached, but I don't know how to do this.
+author: //following::div[@class='PDate2']
+date: //following::div[@class='PDate2']/strong
+
+body: //div[@class='ArTexte']
+body: //div[@id='prod_txt_b']
+body: //div[@class='ArPhotoP']
 test_url: http://www.lesnumeriques.com/disque-dur-multimedia/popcorn-hour-300-p12231/test.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c4bee7e..49b019f
@@ -1,3 +1,3 @@
-title: //h2\r
+title: //h2
 strip_image_src: logo.gif
 test_url: http://www.letemps.ch/Facet/print/Uuid/7c9f912c-07c9-11e0-9b50-4d96c9eca37f
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/libcom.org.txt b/inc/3rdparty/site_config/standard/libcom.org.txt
new file mode 100755 (executable)
index 0000000..d1404d1
--- /dev/null
@@ -0,0 +1,7 @@
+date: //span[contains(@class, 'page-date')]
+body: //div[@id='node-page']
+strip_id_or_class: book-navigation
+prune: no
+
+test_url: http://libcom.org/library/what-was-the-ussr-aufheben-1
+test_url: http://libcom.org/library-latest/feed
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c3888aa..0e1dceb
@@ -1,3 +1,3 @@
-title: //h2[@class="entry-title"]\r
+title: //h2[@class="entry-title"]
 body: //div[@class="entry-content"]
 test_url: http://www.lifeandculture.fr/digital/facebook-and-the-epiphanator-an-end-to-endings/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 32ade14..ec97f06
@@ -1,42 +1,47 @@
-# Adds author text: Gawker sites commonly show as "Author: View Profile"\r
-author://a[@class="plus-icon modfont"]\r
-\r
-# Add date and time\r
-date: //span[@class="date"]\r
-\r
-# Remove date and time from article text\r
-strip: //span[@class="date"]\r
-\r
-# Remove login/comment text\r
-strip: //*[(@class="presence_control_external smalltype")]\r
-\r
-strip: //div[@class="nodebyline modfont"]\r
-\r
-# Remove right sidebar\r
-strip: //div[@id="rightwrapper"]\r
-\r
-# Remove print header\r
-strip: //div[@id='printhead']/h1\r
-\r
-# Remove 'content is restricted'\r
-strip: //div[@id='agegate_IDHERE']\r
-\r
-# Remove follow text\r
-strip: //*[(@class="permalink_ads")]\r
-\r
-# Remove view/comment count\r
-strip: //div[@id='wrapper']/div[2][@class='postmeta_permalink_wrapper']/div[1][@class='postmeta_permalink']/div[2][@class='pm_line']\r
-\r
-# Remove contact text\r
-strip: //div[@id='wrapper']/div[1][@class='content permalink']/p[6][@class='contactinfo']\r
-\r
-# Remove medium duplicates of the article image\r
-strip_image_src: medium.jpg\r
-\r
-# Remove "arrow" class at bottom of page\r
-strip: //p[@class="arrow"]\r
-\r
-# Remove "track" image from article body\r
-strip: //img[@alt="track"]\r
-test_url: http://lifehacker.com/5925801/how-can-i-turn-vague-goals-into-actionable-to+dos\r
-test_url: http://lifehacker.com/5941600/hack-an-old-computer-mouse-into-a-wireless-bluetooth-mouse
\ No newline at end of file
+# Adds author text: Gawker sites commonly show as "Author: View Profile"
+author://a[@class="plus-icon modfont"]
+
+# Add date and time
+date: //span[@class="date"]
+
+body: //div[contains(@class, 'marquee-asset-wrapper') or contains(@class, 'post-content')]
+
+# Remove date and time from article text
+strip: //span[@class="date"]
+
+# Remove login/comment text
+strip: //*[(@class="presence_control_external smalltype")]
+
+strip: //div[@class="nodebyline modfont"]
+
+# Remove right sidebar
+strip: //div[@id="rightwrapper"]
+
+# Remove print header
+strip: //div[@id='printhead']/h1
+
+# Remove 'content is restricted'
+strip: //div[@id='agegate_IDHERE']
+
+# Remove follow text
+strip: //*[(@class="permalink_ads")]
+
+strip_id_or_class: inset_groups
+
+# Remove view/comment count
+strip: //div[@id='wrapper']/div[2][@class='postmeta_permalink_wrapper']/div[1][@class='postmeta_permalink']/div[2][@class='pm_line']
+
+# Remove contact text
+strip: //div[@id='wrapper']/div[1][@class='content permalink']/p[6][@class='contactinfo']
+
+# Remove medium duplicates of the article image
+strip_image_src: medium.jpg
+
+# Remove "arrow" class at bottom of page
+strip: //p[@class="arrow"]
+
+# Remove "track" image from article body
+strip: //img[@alt="track"]
+test_url: http://lifehacker.com/5925801/how-can-i-turn-vague-goals-into-actionable-to+dos
+test_url: http://lifehacker.com/5941600/hack-an-old-computer-mouse-into-a-wireless-bluetooth-mouse
+test_url: http://lifehacker.com/what-happens-to-the-brain-when-you-meditate-and-how-it-1202533314
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/lifestyle.inquirer.net.txt b/inc/3rdparty/site_config/standard/lifestyle.inquirer.net.txt
new file mode 100755 (executable)
index 0000000..25d544a
--- /dev/null
@@ -0,0 +1,7 @@
+title: //h1[@class='singlePageTitle']
+
+strip: //p[contains(text(), 'Follow Us')]
+strip: //p/strong[contains(text(), 'Recent Stories:')]
+strip: //div[@id="sharefeature"]
+
+test_url: http://lifestyle.inquirer.net/100223/dusting-your-ceiling-fan
diff --git a/inc/3rdparty/site_config/standard/lifeweek.com.cn.txt b/inc/3rdparty/site_config/standard/lifeweek.com.cn.txt
new file mode 100755 (executable)
index 0000000..e09f669
--- /dev/null
@@ -0,0 +1,23 @@
+# This filter is tested on:
+# http://www.lifeweek.com.cn/2012/1211/39439.shtml
+# http://www.lifeweek.com.cn/2013/0308/40213.shtml
+
+title:substring-before(//h1, '(')
+title://h1
+date://ul[@class='authorbox']/li
+author: substring-after(//ul[@class='authorbox']/li/following-sibling::li, '作者:')
+
+next_page_link: //div[@class='pageturn_list']/a[@class='pagedown']
+body: //div[@class='original ']
+
+strip://h1
+strip://ul[@class='authorbox']
+strip://span[@class='app_p']
+strip://div[@style='text-align:right;']
+strip://div[@class='pageturn_list']
+strip://div[@class='lifespeaks']
+strip://div[@class='vright fr']
+strip://div[@class='copyrt mg20']
+strip://div[@class='keyabout mg20']
+strip://ul[@class='readabout mg20']
+test_url: http://www.lifeweek.com.cn/2013/0308/40213.shtml
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/livescience.com.txt b/inc/3rdparty/site_config/standard/livescience.com.txt
new file mode 100755 (executable)
index 0000000..5275d34
--- /dev/null
@@ -0,0 +1,20 @@
+title: //div[@class="album_title"]//h1
+author: substring-before(//div[@class='by_line'], ',')
+date: substring-after(substring-before(//div[@class="album_time"], ' Time'), 'Date: ')
+body: //div[@class="about_text"]
+
+strip: //div[@class='large_popper']
+strip: //span[contains(@id, 'mag_glass')]
+strip: //span[contains(@class, 'img_overlay')]
+strip: //td//span
+strip: //div[@class="center_adsense"]
+strip: //div[@class="article_info"]//div[@class='asset_section']
+strip: //div[@class="article_additional"]
+strip: //div[contains(@style, 'overflow:hidden')]
+strip: //div[@class="aa_text"]
+strip: //div[@id='nointelliTXT']
+
+prune: no
+autodetect_on_failure: no
+
+test_url: http://www.livescience.com/34569-why-flowers-close-at-night-nyctinasty.html
old mode 100644 (file)
new mode 100755 (executable)
index 48d5e1a..1310ec0
@@ -1,3 +1,3 @@
-single_page_link: //div[@class="post"]/div[@class="title"]/a\r
+single_page_link: //div[@class="post"]/div[@class="title"]/a
 
 test_url: http://longform.org/2011/05/06/disconcerting-new-answers-in-models-suicide/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 08ad90c..730af94
@@ -1,9 +1,9 @@
-body: //div[@class='container_16']//div[@class='grid_11']\r
-strip: //h2[@class='mast']\r
-strip: //div[@class='container_16']//div[@class='grid_11']/h1\r
-strip: //div[@class='container_16']//div[@class='grid_11']/p[1]\r
-strip: //div[@class='container_16']//div[@class='grid_11']/div\r
-author: //a[starts-with(@title, 'Posts by')]\r
-date: substring-before(substring-after(//time, 'Posted on '), ' at')\r
-test_url: http://www.loopinsight.com/2012/09/13/forget-iphone-5-naysayers-this-thing-is-big/\r
+body: //div[@class='container_16']//div[@class='grid_11']
+strip: //h2[@class='mast']
+strip: //div[@class='container_16']//div[@class='grid_11']/h1
+strip: //div[@class='container_16']//div[@class='grid_11']/p[1]
+strip: //div[@class='container_16']//div[@class='grid_11']/div
+author: //a[starts-with(@title, 'Posts by')]
+date: substring-before(substring-after(//time, 'Posted on '), ' at')
+test_url: http://www.loopinsight.com/2012/09/13/forget-iphone-5-naysayers-this-thing-is-big/
 test_url: http://www.loopinsight.com/2011/05/20/playbook-returns-high-misses-sales-targets-by-90/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a823e64..d7eb0fa
@@ -1,3 +1,3 @@
-prune: no\r
+prune: no
 convert_double_br_tags: yes
 test_url: http://www.lostgarden.com/2012/04/loops-and-arcs.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/lovefm.com.txt b/inc/3rdparty/site_config/standard/lovefm.com.txt
new file mode 100755 (executable)
index 0000000..20d26c5
--- /dev/null
@@ -0,0 +1,6 @@
+title: //*[@id='title']
+date: //*[@id='date']
+body: //*[@id='desc']
+tidy: no
+
+test_url: http://www.lovefm.com/local_news.php?item=2176
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/lovetv.com.bz.txt b/inc/3rdparty/site_config/standard/lovetv.com.bz.txt
new file mode 100755 (executable)
index 0000000..a71fccd
--- /dev/null
@@ -0,0 +1,9 @@
+title: //div[contains(@class, 'post')]//h1
+body: //div[contains(@class, 'post')]
+strip: //hr
+strip_id_or_class: post-meta
+
+prune: no
+
+test_url: http://www.lovetv.com.bz/2013/06/28/recently-discovered-ancient-maya-wooden-canoe-paddle-to-be-handed-over-to-archaeology/
+test_url: http://www.lovetv.com.bz/feed/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ce5053d..f1aacb7
@@ -1,8 +1,12 @@
-title: substring-before(//title, ' · LRB')\r
-\r
-body: //div[@class="article-body indent"]\r
-\r
-date: substring-after(//p[@class="meta-info"]/a, '· ')\r
-\r
-prune: no
-test_url: http://www.lrb.co.uk/v33/n18/james-meek/its-already-happened
\ No newline at end of file
+title: //div[contains(@class, "article-body")]/hgroup/h1
+body: //div[contains(@class, "article-body")]
+
+date: substring-after(//p[@class="meta-info"]/a, '· ')
+
+author: //div[contains(@class, "article-body")]/hgroup/h2
+
+strip_id_or_class: print-hide
+strip_id_or_class: books
+
+test_url: http://www.lrb.co.uk/v33/n18/james-meek/its-already-happened
+test_url: http://www.lrb.co.uk/v36/n13/benjamin-kunkel/paupers-and-richlings
old mode 100644 (file)
new mode 100755 (executable)
index 92ccf3b..b445f5e
@@ -1,6 +1,6 @@
-title: //h2\r
-\r
-body: // div[@id='content']\r
-\r
+title: //h2
+
+body: // div[@id='content']
+
 strip: //div[@class='sidebar_wrapper']
 test_url: http://www.luminous-landscape.com/tutorials/optimizing_exposure.shtml
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/luxuo.com.txt b/inc/3rdparty/site_config/standard/luxuo.com.txt
new file mode 100755 (executable)
index 0000000..a3d5cb1
--- /dev/null
@@ -0,0 +1,4 @@
+body: //div[@class='post-content']
+prune: no
+
+test_url: http://www.luxuo.com/watches/feed
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a8af543..d1ff0b4
@@ -1,8 +1,8 @@
-title: //div[@class="story-body"]/div[@class="story-inner"]/h1\r
-body: //div[@class="story-body"]\r
-date: //p[@class='date']/strong\r
-author: substring-after(//div[@class="story-inner"]/div[@class="byline"]//span[@class='name'], 'By')\r
-\r
-strip: //div[@class="story-inner"]/div[@class="byline"]\r
+title: //div[@class="story-body"]/div[@class="story-inner"]/h1
+body: //div[@class="story-body"]
+date: //p[@class='date']/strong
+author: substring-after(//div[@class="story-inner"]/div[@class="byline"]//span[@class='name'], 'By')
+
+strip: //div[@class="story-inner"]/div[@class="byline"]
 
 test_url: http://m.bbc.co.uk/news/science-environment-19144464
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/m.douban.com.txt b/inc/3rdparty/site_config/standard/m.douban.com.txt
new file mode 100755 (executable)
index 0000000..ce9a316
--- /dev/null
@@ -0,0 +1,13 @@
+# This filter is tested on: 
+# http://m.douban.com/note/240776310/?session=6ac86d1e 
+# http://m.douban.com/note/208270705/?session=e00ec732_3433229
+
+title: //h2 
+author: //a[@class='founder'] 
+date: substring-after(//span[@class='info'],' | ') 
+body: //div[contains(@class,'entry item')] 
+
+strip://span[contains(@class,'info')] 
+
+convert_double_br_tags: yes
+test_url: http://m.douban.com/note/240776310/?session=6ac86d1e 
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/m.vanityfair.com.txt b/inc/3rdparty/site_config/standard/m.vanityfair.com.txt
new file mode 100755 (executable)
index 0000000..e47ce2c
--- /dev/null
@@ -0,0 +1,11 @@
+# Article Metadata
+title: //h1
+author: //span[@class="name"]/a
+date: //time
+
+# Content Pruning
+strip: //h5
+strip: //time
+strip: //div[@class="byline"]
+strip: //h2[@class="headline "]
+test_url: http://m.vanityfair.com/politics/2012/10/michael-lewis-profile-barack-obama
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 892b47f..9999758
@@ -1,5 +1,5 @@
-author: substring-after(//div[@class='author'],'Par ')\r
-date: //div[@class='date']\r
-body: //div[@class='content']\r
+author: substring-after(//div[@class='author'],'Par ')
+date: //div[@class='date']
+body: //div[@class='content']
 
 test_url: http://www.mac4ever.com/news/64182/icloud_les_prix_en_euros_et_en_chf/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index fd1ede7..e57bd64
@@ -1,2 +1,2 @@
-title: substring-before(//title,' « Macdrifter')
+title: substring-before(//title,' Â« Macdrifter')
 test_url: http://www.macdrifter.com/2012/03/instacast-on-my-mac/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 109eae4..522efb4
@@ -1,9 +1,9 @@
-# Remove news feed\r
-strip: //div[@id='news_feed_front']\r
-\r
-# Remove pull quote\r
-strip: //div[@class='field field-type-text field-field-pull-quote']\r
-\r
-# Remove login\r
+# Remove news feed
+strip: //div[@id='news_feed_front']
+
+# Remove pull quote
+strip: //div[@class='field field-type-text field-field-pull-quote']
+
+# Remove login
 strip: //div[@class='right_bar_login']
 test_url: http://macformat.techradar.com/blog/solid-state-storage-bringing-parity-back-mac-29-10-10&article=89189666
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e6bbe28..739eff4
@@ -1,5 +1,5 @@
-author: substring-before(substring-after(//div[@class='dateNews'],'par '),' le')\r
-date: substring-after(//div[@class='dateNews'],' le ')\r
-body: //div[@class='singleNews zoneApple']\r
+author: substring-before(substring-after(//div[@class='dateNews'],'par '),' le')
+date: substring-after(//div[@class='dateNews'],' le ')
+body: //div[@class='singleNews zoneApple']
 
 test_url: http://www.macgeneration.com/news/voir/211162/dropbox-encore-un-mac-et-deux-comptes-dropbox
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 47ebfd7..da7df69
@@ -1,21 +1,21 @@
-# Remove sliders\r
-strip: //*[(@class="slides_container")]\r
-strip: //div[(@id="slides_two")]\r
-\r
-# Remove tag cloud\r
-strip: //span[(@class="secao")]\r
-\r
-# Fix date article\r
-# TODO\r
-\r
-# Remove other stuff\r
-strip: //div[(@id="idc-container")]\r
-strip: //div[(@id="idc-noscript")]\r
-strip: //div[(@class="linkwithin_div")]\r
-strip: //div[(@class="navPosts")]\r
-strip: //div[(@id="lateral")]\r
-strip: //div[(@id="autor")]\r
-strip: //div[(@id="rodape")]\r
-strip: //div[(@id="post")]/h1\r
+# Remove sliders
+strip: //*[(@class="slides_container")]
+strip: //div[(@id="slides_two")]
+
+# Remove tag cloud
+strip: //span[(@class="secao")]
+
+# Fix date article
+# TODO
+
+# Remove other stuff
+strip: //div[(@id="idc-container")]
+strip: //div[(@id="idc-noscript")]
+strip: //div[(@class="linkwithin_div")]
+strip: //div[(@class="navPosts")]
+strip: //div[(@id="lateral")]
+strip: //div[(@id="autor")]
+strip: //div[(@id="rodape")]
+strip: //div[(@id="post")]/h1
 strip: //div[(@id="post")]/div[(@id="boxInformacoes")]
 test_url: http://macmagazine.com.br/2011/08/01/skype-para-ipad-esta-finalmente-chegando-a-app-store/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 76f999d..83cfb4a
@@ -1,10 +1,12 @@
-author: substring-after(//div[@class='byline'], " by ")\r
-date: substring-before(//div[@class='byline'], " by ")\r
-\r
-# set body\r
-body: //div[@class='content']\r
-\r
-# set title\r
-title: //h3\r
+author: substring-after(//div[@class='byline'], " by ")
+date: substring-before(//div[@class='byline'], " by ")
+
+# set body
+body: //div[@class='content']
+strip_id_or_class: commentsContainer
+strip_id_or_class: linkback
+
+# set title
+title: //h3
 #strip: //div[@class='content']/h3
-test_url: http://www.macrumors.com/2010/11/10/apple-debuts-new-apple-tv-and-itunes-movie-content-in-japan/
\ No newline at end of file
+test_url: http://www.macrumors.com/2010/11/10/apple-debuts-new-apple-tv-and-itunes-movie-content-in-japan/
old mode 100644 (file)
new mode 100755 (executable)
index 6e651ca..639fdd1
@@ -1,8 +1,8 @@
-strip: //*[(@id = "featured")]\r
-\r
-author:substring-after( //div[@class='posttitle']/h2[@class='author'],'by ')\r
-\r
-date: concat(//div[@class='month'],' ',//div[@class='day'])\r
-\r
-#macstories doesn't provide a year, but month/day is better than nothing\r
+strip: //*[(@id = "featured")]
+
+author:substring-after( //div[@class='posttitle']/h2[@class='author'],'by ')
+
+date: concat(//div[@class='month'],' ',//div[@class='day'])
+
+#macstories doesn't provide a year, but month/day is better than nothing
 test_url: http://www.macstories.net/news/instapaper-4-0-available-completely-redesigned-ipad-ui-new-features-search-subscription/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e8d6052..9be865a
@@ -1,4 +1,4 @@
-author://div[@class="article_username_container_full"]\r
-date://div[@class="article_username_container"]\r
+author://div[@class="article_username_container_full"]
+date://div[@class="article_username_container"]
 body://div[@class="article cms_clear restore postcontainer"]
 test_url: http://www.mactalk.com.au/content/chat-basil-shkara-developer-taptax-2452/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c3fc0e4..5c03518
@@ -1,3 +1,3 @@
-title: substring-after(substring-after(//title, '>'), '>')\r
+title: substring-after(substring-after(//title, '>'), '>')
 body: //div[@class='NewsArticleContent']
 test_url: http://www.mactechnews.de/news/index/Apple-Pressekonferenz-zum-iPhone-4-147316.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 9617587..e7d9720
@@ -1,24 +1,24 @@
-title: //article//h1\r
-date: //meta[@name="date"]/@content\r
-author: //div[@class="author-name" or @class="article-byline"]/a[1]\r
-\r
-body: //section[@class="page"]\r
-\r
-# remove 'From the Lab' and 'Recent posts' text\r
-strip: //div[@class='blogLabel']\r
-\r
-# remove byline and meta info\r
-strip: //div[@class="article-meta"]\r
-strip: //div[@class="author-info"]\r
-\r
-#strip tags and categories\r
-strip: //div[@class="department"]\r
-\r
-#strip product cap links\r
-strip: //div[@class="cap-main"]\r
-strip: //div[@id="compare-lede"]\r
-\r
-prune: no\r
-\r
-# copes less well with Review pages, seems fine for News\r
+title: //article//h1
+date: //meta[@name="date"]/@content
+author: //div[@class="author-name" or @class="article-byline"]/a[1]
+
+body: //section[@class="page"]
+
+# remove 'From the Lab' and 'Recent posts' text
+strip: //div[@class='blogLabel']
+
+# remove byline and meta info
+strip: //div[@class="article-meta"]
+strip: //div[@class="author-info"]
+
+#strip tags and categories
+strip: //div[@class="department"]
+
+#strip product cap links
+strip: //div[@class="cap-main"]
+strip: //div[@id="compare-lede"]
+
+prune: no
+
+# copes less well with Review pages, seems fine for News
 test_url: http://www.macworld.com/article/163184/2011/10/the_ipod_as_an_iconic_cultural_force.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e701207..414a2f5
@@ -1,3 +1,3 @@
-body: //div[@class='NewsArticle']\r
+body: //div[@class='NewsArticle']
 
 test_url: http://mainichi.jp/select/weathernews/20110311/news/20110520k0000e040062000c.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a2d25d5..2136de3
@@ -1,28 +1,28 @@
-title: substring-before(//title, '|')\r
-body: //*[@id='content-left']\r
-\r
-# Why is this not working here?\r
-# body: //*[@id='content-left']/div[@class='content-container'][2]/div[@class='content-body']/div[@class='inner-container']/div[@class='detail']\r
-\r
-\r
-#Header\r
-strip_id_or_class: 'subHead'\r
-strip_id_or_class: 'fl_right'\r
-strip_id_or_class: 'infolink'\r
-strip_id_or_class: 'content-head'\r
-strip_id_or_class: 'tab'\r
-strip_id_or_class: 'tab-active'\r
-strip: //*[contains(@class,'trenner')]\r
-\r
-# Headline\r
-strip: //h1/*\r
-strip_id_or_class: 'font16'\r
-\r
-#Images\r
-strip_id_or_class: 'leftimage'\r
-strip_id_or_class: 'rightimage'\r
-\r
-#Comments\r
-strip: //table\r
+title: substring-before(//title, '|')
+body: //*[@id='content-left']
+
+# Why is this not working here?
+# body: //*[@id='content-left']/div[@class='content-container'][2]/div[@class='content-body']/div[@class='inner-container']/div[@class='detail']
+
+
+#Header
+strip_id_or_class: 'subHead'
+strip_id_or_class: 'fl_right'
+strip_id_or_class: 'infolink'
+strip_id_or_class: 'content-head'
+strip_id_or_class: 'tab'
+strip_id_or_class: 'tab-active'
+strip: //*[contains(@class,'trenner')]
+
+# Headline
+strip: //h1/*
+strip_id_or_class: 'font16'
+
+#Images
+strip_id_or_class: 'leftimage'
+strip_id_or_class: 'rightimage'
+
+#Comments
+strip: //table
 strip: //p/following-sibling::*[0]
 test_url: http://www.mainpost.de/ueberregional/meinung/Dioxin-Skandal-bringt-Agrarministerin-in-Bedraengnis;art9517,5920211
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6809afe..078e8d0
@@ -1,3 +1,9 @@
-tidy: no
+title: //h1[@class='entry-title']
 
-test_url: http://www.makeuseof.com/dir/kindle-it-web-pages-kindle-friendly/
\ No newline at end of file
+body: //article//header//img | //article//section[@class='post']
+
+strip: //article//section[@class='post']/aside
+strip: //article//section[@class='post']/footer
+
+test_url: http://www.makeuseof.com/tag/cool-websites-and-tools-advanced-photo-editor-keep-your-kids-stuff-online-identify-60-languages/
+test_url: http://www.makeuseof.com/tag/what-do-you-think-of-our-new-look-makeuseof-poll/
diff --git a/inc/3rdparty/site_config/standard/manager.co.th.txt b/inc/3rdparty/site_config/standard/manager.co.th.txt
new file mode 100755 (executable)
index 0000000..cd6c5c0
--- /dev/null
@@ -0,0 +1,26 @@
+title: //td[@class="headline"]
+author: //font[@color="#003366"]
+date: //td[@class="date"]
+
+strip: //td[@class="headline"]
+strip: //font[@color="#003366"]
+strip: //td[@class="date"]
+
+strip: //img[@src="images/2009/logo_en.gif"]
+
+body: //tbody[@class="body"]
+convert_double_br_tags:yes
+
+strip: //img[@src="/images/TabOver.gif"]
+strip: //td[@width="160"]
+strip: //img[@src="/images/TabUnder.gif"]
+
+strip: //td[@class="small"]
+strip: //td[@height="47"]
+
+strip: //td[@valign="middle"]
+strip: //td[@background="/images/menu_bottombg.gif"]
+strip: //img[@src="/images/sc_footer_l.gif"]
+strip: //img[@src="/images/sc_footer_m.gif"]
+strip: //img[@src="/images/sc_footer_r.gif"]
+test_url: http://www.manager.co.th/Entertainment/ViewNews.aspx?NewsID=9550000101979
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ef2e03d..4bb24a6
@@ -1,8 +1,8 @@
-tidy: no\r
-prune: no\r
-date: //article//time[@pubdate]\r
-title: //article/header/h2\r
-body: //article\r
-strip: //header\r
-test_url: http://www.marco.org/2012/09/08/businessweek-gruber\r
+tidy: no
+prune: no
+date: //article//time[@pubdate]
+title: //article/header/h2
+body: //article
+strip: //header
+test_url: http://www.marco.org/2012/09/08/businessweek-gruber
 test_url: http://www.marco.org/2012/04/24/might-upgrade-someday
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8e0e349..4ff4a9c
@@ -1,8 +1,8 @@
-date: //div[@id="main"]/p[@class="date"]\r
-author: string("Martin Fowler")\r
-body: //div[@id="main"]\r
-strip_id_or_class: date\r
-strip_id_or_class: tags\r
-strip_id_or_class: tagLabel\r
+date: //div[@id="main"]/p[@class="date"]
+author: string("Martin Fowler")
+body: //div[@id="main"]
+strip_id_or_class: date
+strip_id_or_class: tags
+strip_id_or_class: tagLabel
 strip: //div[@id="main"]/h1[1]
 test_url: http://martinfowler.com/bliki/DatabaseThaw.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 2c5a14a..b6efb6c
@@ -1,4 +1,11 @@
-title: //header[@class='entry-title']/h1
-body: //div[@class='description']
+title: //h1[@class='title']
+author: substring-after(//span[@class='author_name'], 'By ')
+date: //time
+
+body: //article
 strip: //div[@class='ytm-gallery-box']
-test_url: http://mashable.com/2011/12/05/india-wants-google-and-facebook-to-censor-user-content/
\ No newline at end of file
+strip: //div[contains(@class, 'adsense')]
+strip: //aside[contains(@class, 'social')]
+strip_id_or_class: article-topics
+
+test_url: http://mashable.com/2013/05/24/myspace-architects-rebuilding-a-brand/
diff --git a/inc/3rdparty/site_config/standard/matt.might.net.txt b/inc/3rdparty/site_config/standard/matt.might.net.txt
new file mode 100755 (executable)
index 0000000..30d585c
--- /dev/null
@@ -0,0 +1,5 @@
+title: //h1
+author: string("Matt Might")
+strip: //h1/following-sibling::div
+
+test_url: http://matt.might.net/articles/oo-cesk/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
old mode 100644 (file)
new mode 100755 (executable)
index 94f27b7..4c333aa
@@ -1,2 +1,4 @@
-strip: //div[contains(@class, 'article-tools')]\r
+strip_id_or_class: article-tools
+strip_id_or_class: pagenav
+prune: no
 test_url: http://www.medialens.org/index.php/alerts/alert-archive/2012/713-the-illusion-of-democracy.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/medium.com.txt b/inc/3rdparty/site_config/standard/medium.com.txt
new file mode 100755 (executable)
index 0000000..acf7cc9
--- /dev/null
@@ -0,0 +1,7 @@
+body: //div[contains(@class, 'post-content-inner')]
+strip_id_or_class: follow-ups
+strip_id_or_class: footer
+
+prune: no
+
+test_url: https://medium.com/p/6844c0d7893b
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/megamp3.eu.txt b/inc/3rdparty/site_config/standard/megamp3.eu.txt
new file mode 100755 (executable)
index 0000000..1b6a127
--- /dev/null
@@ -0,0 +1,8 @@
+title: //h3[@class='episode_title']
+body: //ul[contains(@class, 'episode_imgdesc')]/li/descendant::*
+prune: no
+strip://*[contains(@class, 'plugin')]
+strip://*[contains(@class, 'episode_keywords')]
+
+test_url: http://www.megamp3.eu/?p=episode&name=2013-04-19_la_filiere_progressive_431.mp3
+test_url: http://www.megamp3.eu/feed.xml
old mode 100644 (file)
new mode 100755 (executable)
index e7e1e26..a1a46f6
@@ -1,16 +1,16 @@
-# need to find a way to eliminate <span> content for "related content" without eliminating important content\r
-\r
-convert_double_br_tags: [yes]\r
-#body: //div[@id='leftside']\r
-title: //h1\r
-title: //h2\r
-Author: substring-after(//h4, 'By ')\r
-Author: substring-after(//h4, 'By: ')\r
-#Strip: //span\r
-strip_id_or_class: morefromcat\r
-strip_id_or_class: mostpopular\r
-strip_id_or_class: articlepagination\r
-strip_id_or_class: toolbar\r
-body: //div[@id='zmodcontent']\r
-single_page_link: //li[@class='onepage'] //a[contains (@href, 'printer.php')]\r
+# need to find a way to eliminate <span> content for "related content" without eliminating important content
+
+convert_double_br_tags: [yes]
+#body: //div[@id='leftside']
+title: //h1
+title: //h2
+Author: substring-after(//h4, 'By ')
+Author: substring-after(//h4, 'By: ')
+#Strip: //span
+strip_id_or_class: morefromcat
+strip_id_or_class: mostpopular
+strip_id_or_class: articlepagination
+strip_id_or_class: toolbar
+body: //div[@id='zmodcontent']
+single_page_link: //li[@class='onepage'] //a[contains (@href, 'printer.php')]
 test_url: http://www.menshealth.com/mhlists/pursuit_of_happiness/index.php
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/metafilter.com.txt b/inc/3rdparty/site_config/standard/metafilter.com.txt
new file mode 100755 (executable)
index 0000000..a2f3ada
--- /dev/null
@@ -0,0 +1,8 @@
+body: //div[contains(@class, 'copy') or contains(@class, 'comments')]
+strip_id_or_class: related
+strip: //a[. = 'Subscribe']
+strip: //h1/span[@class = 'smallcopy']
+strip: //a[@class = 'skip']
+strip: //div[@id = 'logo']
+strip: //div[contains(@class, 'comments') and contains(., 'You are not currently logged in')]
+test_url: http://www.metafilter.com/128101/Probably-more-secure-than-the-Drafts-folder-on-a-shared-Gmail-account
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mforum.cari.com.my.txt b/inc/3rdparty/site_config/standard/mforum.cari.com.my.txt
new file mode 100755 (executable)
index 0000000..c295d73
--- /dev/null
@@ -0,0 +1,6 @@
+body: (//td[starts-with(@id, 'postmessage_')])[1]
+
+prune: no
+
+test_url: http://mforum.cari.com.my/forum.php?mod=viewthread&tid=788033
+test_url: http://mforum.cari.com.my/forum.php?mod=rss&fid=265&auth=0
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index af8a7d3..abaa6a8
@@ -1,5 +1,5 @@
-title: //div[@class="blogtitle"]\r
-strip: //div[@class="blogtitle"]\r
-\r
+title: //div[@class="blogtitle"]
+strip: //div[@class="blogtitle"]
+
 author: substring-after(//span[@class="blogheader"], 'Author: ')
 test_url: http://www.mikeash.com/pyblog/friday-qa-2012-01-13-the-mac-toolbox.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3d488e1..fb4636c
@@ -1,9 +1,9 @@
-title: //div[@class='post_content']/h2\r
-date: //div[@class='dateline']\r
-body: //div[@class='entry']\r
-\r
-strip: //div[@class='closer']\r
-strip: //div[@class='navigation']\r
-strip: //div[@class='aux_pane']\r
+title: //div[@class='post_content']/h2
+date: //div[@class='dateline']
+body: //div[@class='entry']
+
+strip: //div[@class='closer']
+strip: //div[@class='navigation']
+strip: //div[@class='aux_pane']
 strip: //div[@class='aux_aux_pane']
 test_url: http://www.mikeindustries.com/blog/archive/2011/10/never-be-another
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7e43d63..773a627
@@ -1,10 +1,10 @@
-title: //*[@class="article"]/h1\r
-date: //*[@class="article"]/div[@class="date"]\r
-\r
-# strip the title and date from the article text\r
-strip: //*[@class="article"]/h1\r
-strip: //*[@class="article"]/div[@class="date"]\r
-\r
-# strip annoying <br> between metadata and article\r
+title: //*[@class="article"]/h1
+date: //*[@class="article"]/div[@class="date"]
+
+# strip the title and date from the article text
+strip: //*[@class="article"]/h1
+strip: //*[@class="article"]/div[@class="date"]
+
+# strip annoying <br> between metadata and article
 strip: //*[@class="article"]/div[@class="date"]/following-sibling::br
 test_url: http://minnesota.publicradio.org/display/web/2012/06/19/health/senators-want-health-care-ruling-on-tv/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 51a0630..dc926a6
@@ -1,5 +1,5 @@
-title: //*[@id="content-header"]/h1\r
-author: //*[contains(@class, 'byline')]/a/text()\r
-date: substring-after(//*[contains(@class, 'byline')]/text()[2], '|')\r
+title: //*[@id="content-header"]/h1
+author: //*[contains(@class, 'byline')]/a/text()
+date: substring-after(//*[contains(@class, 'byline')]/text()[2], '|')
 body: //*[contains(@class, 'node-body')]
 test_url: http://www.minnpost.com/eric-black-ink/2012/06/overturning-obamacare-would-be-game-changer-supreme-court
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 4215a05..2033cf3
@@ -1,3 +1,3 @@
-# Remove extra links\r
+# Remove extra links
 strip: //*[@class='appended_html']
 test_url: http://www.mirrorfootball.co.uk/news/West-Ham-crisis-Carlton-Cole-slams-diabolical-performance-and-rips-into-Avram-Grant-lack-of-tactical-nous-following-Liverpool-mauling-article636151.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ae542aa..73c485e
@@ -1,5 +1,5 @@
-strip_id_or_class: 'book-ad'\r
-strip_id_or_class: 'bigger pullquote'\r
-strip_id_or_class: 'subscribe'\r
+strip_id_or_class: 'book-ad'
+strip_id_or_class: 'bigger pullquote'
+strip_id_or_class: 'subscribe'
 strip_id_or_class: 'blog-link'
 test_url: http://mises.org/daily/4804
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 30e8aff..765fab3
@@ -1,14 +1,14 @@
-title: //h1[@class='article-headline']\r
-date: //span[@class='timeStamp']\r
-author: substring-before(//p[@class='article-byline'], '/')\r
-body: //div[@id='article']\r
-#strip: //div[@class='inner']\r
-strip: //div[@id='article_head']\r
-strip: //p[@class='tagLine']\r
-strip: //div[@id='article_related_links']\r
-strip: //div[@id='article_related_mlb']\r
-strip: //span[@class='more']\r
-strip: //div[@class='article_component']\r
-strip: //span[@class='screen_reader']\r
-strip: //ul[@class='columnists_blurb']\r
+title: //h1[@class='article-headline']
+date: //span[@class='timeStamp']
+author: substring-before(//p[@class='article-byline'], '/')
+body: //div[@id='article']
+#strip: //div[@class='inner']
+strip: //div[@id='article_head']
+strip: //p[@class='tagLine']
+strip: //div[@id='article_related_links']
+strip: //div[@id='article_related_mlb']
+strip: //span[@class='more']
+strip: //div[@class='article_component']
+strip: //span[@class='screen_reader']
+strip: //ul[@class='columnists_blurb']
 test_url: http://mlb.mlb.com/news/article.jsp?ymd=20120403&content_id=27880830
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c4e3389..8480e30
@@ -1,14 +1,14 @@
-title: //h1[@id = 'stream_title']\r
-author: //p[@class = 'byline']/a\r
-date: //span[@class = 'datetime']\r
-\r
-body: //div[@id = 'stream_container']\r
-strip: //p[@class = 'byline']\r
-strip_id_or_class: stream_summary\r
-strip_id_or_class: social-spoken\r
-strip_id_or_class: datetime\r
-strip_id_or_class: author-mini-profile\r
-strip_id_or_class: social-tools\r
-strip_id_or_class: entry-tags\r
+title: //h1[@id = 'stream_title']
+author: //p[@class = 'byline']/a
+date: //span[@class = 'datetime']
+
+body: //div[@id = 'stream_container']
+strip: //p[@class = 'byline']
+strip_id_or_class: stream_summary
+strip_id_or_class: social-spoken
+strip_id_or_class: datetime
+strip_id_or_class: author-mini-profile
+strip_id_or_class: social-tools
+strip_id_or_class: entry-tags
 strip_id_or_class: fb-like-box
 test_url: http://mlb.sbnation.com/2011/10/17/2495845/2011-world-series-st-louis-cardinals-texas-rangers-home-field-advantage
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 41e1513..5d706f8
@@ -1,6 +1,6 @@
-title: //*[@class="header_title"]/h1\r
-date: //*[@class="field-date"]\r
-author: //*[@class="field-author"]\r
-body: //div[contains(@class, 'content')]\r
+title: //*[@class="header_title"]/h1
+date: //*[@class="field-date"]
+author: //*[@class="field-author"]
+body: //div[contains(@class, 'content')]
 
 test_url: http://www.mlssoccer.com/news/article/2012/06/19/lack-depth-front-forces-arena-alter-las-formation
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 918fae3..50d8a24
@@ -1,5 +1,5 @@
-title: //h1\r
-body: //div[@id = 'article_content']/div[contains(@class,'article')]\r
-author: //sub[@class = 'article_promoted_text']/a[starts-with(@href, 'member')]\r
+title: //h1
+body: //div[@id = 'article_content']/div[contains(@class,'article')]
+author: //sub[@class = 'article_promoted_text']/a[starts-with(@href, 'member')]
 date: //div[@class = 'article_username_container']
 test_url: http://www.mmo-champion.com/content/2688-Other-Press-Tour-Interviews-A-Night-in-Mists-of-Pandaria-Blue-Posts-MoP-Screenshot
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ddfe6fa..d3576df
@@ -1,11 +1,11 @@
-tidy: no\r
-author: //div[@id="above-content"]//img/@alt | //div[@class="comment-auth"]/span[1]/a/text()\r
-date: //div[@class="comment-auth"]/div | //div[@class="comment-auth"]/span[2]\r
-body: //div[@class="node"]\r
-\r
-strip_id_or_class: vertical-social-bar\r
-strip_id_or_class: blogs_paginator\r
-strip_id_or_class: horizontal-social-links\r
-strip_id_or_class: servicelinksdiv\r
+tidy: no
+author: //div[@id="above-content"]//img/@alt | //div[@class="comment-auth"]/span[1]/a/text()
+date: //div[@class="comment-auth"]/div | //div[@class="comment-auth"]/span[2]
+body: //div[@class="node"]
+
+strip_id_or_class: vertical-social-bar
+strip_id_or_class: blogs_paginator
+strip_id_or_class: horizontal-social-links
+strip_id_or_class: servicelinksdiv
 
 test_url: http://www.mnn.com/green-tech/research-innovations/blogs/5-breakthroughs-that-will-make-solar-power-cheaper-than-coal
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ba15895..8a3f939
@@ -1,14 +1,14 @@
-title: //title\r
-\r
-author: //div[@class="author"]\r
-\r
-strip_id_or_class: 'header'\r
-strip_id_or_class: 'cikk_ajanlo'\r
-strip_id_or_class: 'buttons'\r
-strip_id_or_class: 'related'\r
-strip_id_or_class: 'adbox ad_cikk_kozepre'\r
-strip_id_or_class: 'cikk-cimkek'\r
-strip_id_or_class: 'cikk_ertekeles'\r
-\r
+title: //title
+
+author: //div[@class="author"]
+
+strip_id_or_class: 'header'
+strip_id_or_class: 'cikk_ajanlo'
+strip_id_or_class: 'buttons'
+strip_id_or_class: 'related'
+strip_id_or_class: 'adbox ad_cikk_kozepre'
+strip_id_or_class: 'cikk-cimkek'
+strip_id_or_class: 'cikk_ertekeles'
+
 strip_comments: yes
 test_url: http://mno.hu/grund/a-gumibottal-hadonaszo-rendort-joval-konnyebb-utalni-1055351
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mobile.nytimes.com.txt b/inc/3rdparty/site_config/standard/mobile.nytimes.com.txt
new file mode 100755 (executable)
index 0000000..c60252e
--- /dev/null
@@ -0,0 +1,4 @@
+title: //h1[contains(@class, 'headline')]
+body: //article[contains(@class, 'full-art')]
+strip_id_or_class: image-credit
+test_url: http://mobile.nytimes.com/2014/06/19/opinion/gail-collins-romney-and-the-2016-contenders-huddle.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d5d8103..6ffcd18
@@ -1,5 +1,5 @@
-title: //h2[@class="article_title"]\r
-strip: //a[@class="houseAdLink"]\r
-strip: //h1\r
+title: //h2[@class="article_title"]
+strip: //a[@class="houseAdLink"]
+strip: //h1
 strip: //div[@class="more_articles"]
 test_url: http://mobile.slate.com/rss.jsp?rssid=411&item=http%3a%2f%2fwww.slate.com%2fdefault.aspx%3fdisplaymode%3d201%26id%3d2293749%26device%3drss
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a1cc531..82da4ae
@@ -1,11 +1,11 @@
-body: //div[@class='post uncustomized-post-template']\r
-\r
-# remove duplicate of post title, which is a link\r
-strip: //h3[@class='post-title']\r
-\r
-# remove permalink and timestamp, which isn't useful as it's a time with no date\r
-strip: //span[@class='post-timestamp']\r
-\r
-# remove labels (tags)\r
+body: //div[@class='post uncustomized-post-template']
+
+# remove duplicate of post title, which is a link
+strip: //h3[@class='post-title']
+
+# remove permalink and timestamp, which isn't useful as it's a time with no date
+strip: //span[@class='post-timestamp']
+
+# remove labels (tags)
 strip: //span[@class='post-labels']
 test_url: http://mobileopportunity.blogspot.com/2010/12/rims-q3-financials-tale-of-two.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 4c93d0c..306ef8d
@@ -1,8 +1,8 @@
-title: //meta[@property="og:title"]/@content\r
-author: //meta[@name="author"]/@content\r
-date: //span[@class='date1']\r
-body: //div[@id='newsimage'] | //div[@id='bodytext']\r
-tidy: no\r
-prune: no\r
-\r
+title: //meta[@property="og:title"]/@content
+author: //meta[@name="author"]/@content
+date: //span[@class='date1']
+body: //div[@id='newsimage'] | //div[@id='bodytext']
+tidy: no
+prune: no
+
 test_url: http://www.modernghana.com/news/323765/1/039ghost039-teachers-removed-salaries-allowances-p.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a0d1628..d5e03d2
@@ -1,24 +1,24 @@
-title: //meta[@property="og:title"]/@content\r
-title: //h1[@class='storyheadline']\r
-author: //meta[@name="AUTHOR"]/@content\r
-date: //span[@class='cnnDateStamp']\r
-date: //meta[@name="DATE"]/@content\r
-body: //div[@id='storytext' or @class='storytext']\r
-\r
-strip_id_or_class: ie_column\r
-strip_id_or_class: sharewidgets\r
-strip_image_src: bug.gif\r
-\r
-strip: //div[@class="hed_side"]\r
-strip: //span[@class="byline"]\r
-strip: //a[@class="soc-twtname"]\r
-strip: //span[@class="cnnDateStamp"]\r
-strip: //div[@class="storytimestamp"]\r
-strip: //div[@class="cnnCol_side"]\r
-\r
-prune: no\r
-tidy: no\r
-\r
-test_url: http://money.cnn.com/2011/03/15/news/companies/steve_jobs_thought_process.fortune/index.htm?section=money_topstories&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fmoney_topstories+%28Top+Stories%29\r
-test_url: http://money.cnn.com/2012/01/27/markets/markets_newyork/index.htm\r
+title: //meta[@property="og:title"]/@content
+title: //h1[@class='storyheadline']
+author: //meta[@name="AUTHOR"]/@content
+date: //span[@class='cnnDateStamp']
+date: //meta[@name="DATE"]/@content
+body: //div[@id='storytext' or @class='storytext']
+
+strip_id_or_class: ie_column
+strip_id_or_class: sharewidgets
+strip_image_src: bug.gif
+
+strip: //div[@class="hed_side"]
+strip: //span[@class="byline"]
+strip: //a[@class="soc-twtname"]
+strip: //span[@class="cnnDateStamp"]
+strip: //div[@class="storytimestamp"]
+strip: //div[@class="cnnCol_side"]
+
+prune: no
+tidy: no
+
+test_url: http://money.cnn.com/2011/03/15/news/companies/steve_jobs_thought_process.fortune/index.htm?section=money_topstories&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fmoney_topstories+%28Top+Stories%29
+test_url: http://money.cnn.com/2012/01/27/markets/markets_newyork/index.htm
 test_url: http://money.cnn.com/2012/05/13/technology/yahoo-ceo-out-rumor/index.htm
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 061a8d5..55026ee
@@ -1,13 +1,13 @@
-strip_image_src: menu\r
-strip_image_src: templates\r
-strip: //div/a\r
-strip: //div/b\r
-strip: //div/strong\r
-strip: //td[@width='30%']\r
-strip: //br[1]\r
-strip: //br[2]\r
-strip: //br[3]\r
-strip: //br[4]\r
-strip: //a[@href='http://www.moonsault.de/newzboard/index.php?act=home']\r
+strip_image_src: menu
+strip_image_src: templates
+strip: //div/a
+strip: //div/b
+strip: //div/strong
+strip: //td[@width='30%']
+strip: //br[1]
+strip: //br[2]
+strip: //br[3]
+strip: //br[4]
+strip: //a[@href='http://www.moonsault.de/newzboard/index.php?act=home']
 strip_id_or_class: cse-branding-right
 test_url: http://www.moonsault.de/newzboard/index.php?news=22321&act=previous
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a7e59c3..780cca4
@@ -1,7 +1,7 @@
-title: //h1[@class='print-title']\r
-body: //div[@class='print-submitted' or @class='print-created' or @class='print-content']\r
-prune: no\r
-\r
-single_page_link: //li[@class='print']/a\r
-\r
+title: //h1[@class='print-title']
+body: //div[@class='print-submitted' or @class='print-created' or @class='print-content']
+prune: no
+
+single_page_link: //li[@class='print']/a
+
 test_url: http://moreintelligentlife.com/content/places/paul-markillie/they-trash-cars-dont-they
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6faf1c9..c6312c0
@@ -1,5 +1,5 @@
-author: //span[@class="author"]/a\r
-date: //span[@class="date"]\r
-body: //div[@class="story-content"]\r
-strip: //aside\r
+author: //span[@class="author"]/a
+date: //span[@class="date"]
+body: //div[@class="story-content"]
+strip: //aside
 test_url: http://motherboard.vice.com/blog/you-can-carry-a-copy-of-the-pirate-bay-in-your-pocket
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a9d9195..a34adff
@@ -1,7 +1,7 @@
-title: //h2[contains(@class,'post_headline')]\r
-body: //div[@class='entry']\r
-convert_double_br_tags: yes\r
-strip_image_src: _selected.gif\r
-strip_id_or_class: addthis_\r
+title: //h2[contains(@class,'post_headline')]
+body: //div[@class='entry']
+convert_double_br_tags: yes
+strip_image_src: _selected.gif
+strip_id_or_class: addthis_
 strip: //a[contains(@href,'feedburner.com')]
 test_url: http://mothering.com/all-things-mothering/inspiration/motherhood-brings-me-down
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d58c7d2..851feb7
@@ -1,15 +1,15 @@
-title: //h1\r
-body: //div[@id = 'content-area']\r
-next_page_link: //div[@class='node-pager']/a[contains(@class, 'next')]\r
-tidy: no\r
-author: //p[contains(@class, 'byline')]/a\r
-\r
-strip_id_or_class: node-header\r
-strip_id_or_class: hdr-tools\r
-strip_id_or_class: node-body-break\r
-strip_id_or_class: pullquote\r
-strip_id_or_class: node-pager\r
-strip_id_or_class: author-bio\r
-strip_id_or_class: node-footer\r
+title: //h1
+body: //div[@id = 'content-area']
+next_page_link: //div[@class='node-pager']/a[contains(@class, 'next')]
+tidy: no
+author: //p[contains(@class, 'byline')]/a
+
+strip_id_or_class: node-header
+strip_id_or_class: hdr-tools
+strip_id_or_class: node-body-break
+strip_id_or_class: pullquote
+strip_id_or_class: node-pager
+strip_id_or_class: author-bio
+strip_id_or_class: node-footer
 
 test_url: http://motherjones.com/politics/2012/02/mac-mcclelland-free-online-shipping-warehouses-labor
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/movie.douban.com.txt b/inc/3rdparty/site_config/standard/movie.douban.com.txt
new file mode 100755 (executable)
index 0000000..eae211e
--- /dev/null
@@ -0,0 +1,12 @@
+# This filter is tested on:
+# http://movie.douban.com/review/1062013/
+
+title: //span[contains(@property, 'v:summary')]
+author: //span[contains(@property, 'v:reviewer')]
+date://span[contains(@property, 'v:dtreviewed')]
+body://div[contains(@class, 'main-bd')]
+
+strip://img[contains(@class,'rating')]|//img[contains(@class,'review-stat')]
+convert_double_br_tags: yes
+test_url: http://movie.douban.com/review/1062013/
+test_url: http://movie.douban.com/review/1021870/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f4f2045..7a28427
@@ -1,3 +1,3 @@
-body: //div[class="mainBody"]\r
+body: //div[class="mainBody"]
 footnotes: no
 test_url: http://msdn.microsoft.com/en-us/library/hh542796(VS.103).aspx
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ad89cda..f008d2d
@@ -1,21 +1,21 @@
-title: //title\r
-author: //div[@id='byline']\r
-\r
-date: //div[contains(@class,'timestamp')]/abbr/text()\r
-\r
-body: //div[@id='intellitTXT']\r
-\r
-strip: //div[@id='byline']\r
-strip: //div[contains(@class,'timestamp')]\r
-strip: //div[contains(@class, 'ad-label')]\r
-strip: //div[contains(@class, 'ad-break')]\r
-strip: //span[contains(@class, 'x-video')]\r
-strip: //span[contains(@class, 'inline')]\r
-strip: //div[contains(@class, 'video')]\r
-strip: //div[contains(@class, 'discuss')]\r
-strip: //div[@id='most-popular']\r
-strip: //div[contains(@class,'drawer')]\r
-strip: //*[contains(@class, 'hide')]\r
-\r
+title: //title
+author: //div[@id='byline']
+
+date: //div[contains(@class,'timestamp')]/abbr/text()
+
+body: //div[@id='intellitTXT']
+
+strip: //div[@id='byline']
+strip: //div[contains(@class,'timestamp')]
+strip: //div[contains(@class, 'ad-label')]
+strip: //div[contains(@class, 'ad-break')]
+strip: //span[contains(@class, 'x-video')]
+strip: //span[contains(@class, 'inline')]
+strip: //div[contains(@class, 'video')]
+strip: //div[contains(@class, 'discuss')]
+strip: //div[@id='most-popular']
+strip: //div[contains(@class,'drawer')]
+strip: //*[contains(@class, 'hide')]
+
 footnotes: no
 test_url: http://www.msnbc.msn.com/id/44748412/ns/business-world_business/#.TolUv-vfDbE
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/myfoxatlanta.com.txt b/inc/3rdparty/site_config/standard/myfoxatlanta.com.txt
new file mode 100755 (executable)
index 0000000..8a7590a
--- /dev/null
@@ -0,0 +1,5 @@
+body: //div[@id='WNStoryBody']
+author: //div[@id='WNStoryByline']
+prune: no
+
+test_url: http://www.myfoxatlanta.com/category/233685/local-news?clienttype=rss
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 1a35b4f..9ad8ce0
@@ -1,4 +1,4 @@
-body: //div[@class="col1"]//div[@class="photo"] | //div[@class="detail"]/p[@class="fontStyle21"] | //div[@class="story last"]\r
-tidy: no\r
-\r
+body: //div[@class="col1"]//div[@class="photo"] | //div[@class="detail"]/p[@class="fontStyle21"] | //div[@class="story last"]
+tidy: no
+
 test_url: http://www.myfoxboston.com/dpp/news/local/transit-police-say-woman-spat-on-mbta-bus-driver-2010611
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8b99d22..956be1e
@@ -1,12 +1,12 @@
-title: //h2[contains(@class, 'name')]\r
-body: //div[@class='printFullPageContentContainer']//div[contains(@class, 'recipe')]\r
-\r
-strip_id_or_class: photoBy\r
-strip_id_or_class: link\r
-\r
-single_page_link: //li[@class='print']/a[contains(@href, '/print/')]\r
-\r
-prune: no\r
-tidy: no\r
-\r
+title: //h2[contains(@class, 'name')]
+body: //div[@class='printFullPageContentContainer']//div[contains(@class, 'recipe')]
+
+strip_id_or_class: photoBy
+strip_id_or_class: link
+
+single_page_link: //li[@class='print']/a[contains(@href, '/print/')]
+
+prune: no
+tidy: no
+
 test_url: http://www.myrecipes.com/recipe/hummingbird-cake-10000000387218/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d95530f..7df1112
@@ -1,8 +1,8 @@
-title: //div[@class='address']/span\r
-author: substring-before(//span[@class='credits'],',')\r
-date: //div[@class='promodatepress']/span\r
-body: //div[@class='default_style_wrap']\r
-strip: //div[@class='text_adjust']\r
-strip: //div[@class='skiplink']\r
+title: //div[@class='address']/span
+author: substring-before(//span[@class='credits'],',')
+date: //div[@class='promodatepress']/span
+body: //div[@class='default_style_wrap']
+strip: //div[@class='text_adjust']
+strip: //div[@class='skiplink']
 strip: //h2
 test_url: http://www.nasa.gov/mission_pages/kepler/news/kepler-21b.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0b722d3..2645d40
@@ -1,10 +1,10 @@
-date://span[contains(@class,'date')]\r
-\r
-body://div[contains(@class,'contWarp')]\r
-\r
-strip://div[contains(@class,'keyWord')]\r
-strip://div[contains(@class,'submitComt')]\r
-strip://div[contains(@class,'cmts')]\r
-strip://div[contains(@class,'notice')]\r
+date://span[contains(@class,'date')]
+
+body://div[contains(@class,'contWarp')]
+
+strip://div[contains(@class,'keyWord')]
+strip://div[contains(@class,'submitComt')]
+strip://div[contains(@class,'cmts')]
+strip://div[contains(@class,'notice')]
 strip://div[contains(@class,'part pt-second')]
 test_url: http://www.nbweekly.com/news/china/201203/29316.aspx
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 45136a2..e7cc431
@@ -1,17 +1,17 @@
-#host configuration should be http://www.neh.gov/news/humanities/\r
-\r
-\r
-#meta data \r
-title:substring-after(substring-after(//title,':'),':')\r
-author:substring-after(//h2[@class = 'subHead'],'By')\r
-date:substring-before(substring-after(//title,':'),':')\r
-\r
-#img and caption handling\r
-wrap_in(small)://div[@id = 'mainContent']/table/descendant::p/descendant::text()\r
-wrap_in(fieldset)://div[@id = 'mainContent']/table\r
-\r
-# clean up\r
-strip: //table[@class = 'marginpaddingTop']\r
-strip: //h2[@class = 'subHead']\r
+#host configuration should be http://www.neh.gov/news/humanities/
+
+
+#meta data 
+title:substring-after(substring-after(//title,':'),':')
+author:substring-after(//h2[@class = 'subHead'],'By')
+date:substring-before(substring-after(//title,':'),':')
+
+#img and caption handling
+wrap_in(small)://div[@id = 'mainContent']/table/descendant::p/descendant::text()
+wrap_in(fieldset)://div[@id = 'mainContent']/table
+
+# clean up
+strip: //table[@class = 'marginpaddingTop']
+strip: //h2[@class = 'subHead']
 
 test_url: http://www.neh.gov/news/humanities/2011-11/IslamicScholar.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 564d549..2089fc3
@@ -1,3 +1,3 @@
-title: //*[@class="header_title"]/h1\r
+title: //*[@class="header_title"]/h1
 body: //div[contains(@class, 'content')]
 test_url: http://neomoney.co/personal/expatriate-and-migrant-loans/expatriate-loans/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 4e6d66d..b7fedbf
@@ -1,7 +1,7 @@
-title: //div[@class='content-title']\r
-#date: substring-after(//div[@class='dernek-text-under'],'Posted on')\r
-body: //div[@class='content-item']\r
-next_page_link: //li[@class='next']/a\r
-convert_double_br_tags: yes\r
+title: //div[@class='content-title']
+#date: substring-after(//div[@class='dernek-text-under'],'Posted on')
+body: //div[@class='content-item']
+next_page_link: //li[@class='next']/a
+convert_double_br_tags: yes
 
 test_url: http://www.net-security.org/article.php?id=1732
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8688544..dcea047
@@ -1,16 +1,16 @@
-title: //h1\r
-author: //div[@class="submitted"]/span\r
-\r
-# seems like this should work, but nothing is returned. Issue with xpath parser?\r
-date: //div[@class="submitted"]/time\r
-\r
-body: //div[@id="main-content"]\r
-\r
-strip_comments: no\r
-\r
-strip: //h1\r
-strip: //div[@class="submitted"]\r
-strip: //dd[@class="profile-avatar"]\r
-strip: //div[@class="author-profile"]/dl/dt[1]\r
+title: //h1
+author: //div[@class="submitted"]/span
+
+# seems like this should work, but nothing is returned. Issue with xpath parser?
+date: //div[@class="submitted"]/time
+
+body: //div[@id="main-content"]
+
+strip_comments: no
+
+strip: //h1
+strip: //div[@class="submitted"]
+strip: //dd[@class="profile-avatar"]
+strip: //div[@class="author-profile"]/dl/dt[1]
 strip: //div[@id="right-col"]
 test_url: http://www.netmagazine.com/opinions/nielsen-wrong-mobile
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 87dc3cd..7fa43fd
@@ -1,6 +1,6 @@
-title: //h1[@class='entry-title']\r
-author: //a[@ref='author']\r
-date: //span[@class='entry-date']\r
-body: //div[@class='entry-content']\r
+title: //h1[@class='entry-title']
+author: //a[@ref='author']
+date: //span[@class='entry-date']
+body: //div[@class='entry-content']
 
 test_url: http://netzpolitik.org/2011/buch-generation-facebook/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/newleftproject.org.txt b/inc/3rdparty/site_config/standard/newleftproject.org.txt
new file mode 100755 (executable)
index 0000000..d9af99d
--- /dev/null
@@ -0,0 +1,3 @@
+title: //div[contains(@class, 'article_header')]//h3
+
+test_url: http://www.newleftproject.org/index.php/site/article_comments/do_we_need_a_facebook_of_the_left
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ab76684..f17ecdc
@@ -1,9 +1,9 @@
-title: //div[@id="maincontent"]/h1\r
-body: //div[@id="maincontent"]\r
-date: //div[@id="maincontent"]/p[2]\r
-author: //ul[@id="contributors"]/li/p/b\r
-\r
-strip: //p[@*]\r
-strip: //h1\r
+title: //div[@id="maincontent"]/h1
+body: //div[@id="maincontent"]
+date: //div[@id="maincontent"]/p[2]
+author: //ul[@id="contributors"]/li/p/b
+
+strip: //p[@*]
+strip: //h1
 strip: //div[@id="maincontent"]/div
 test_url: http://newmatilda.com/2011/07/22/turnbull-makes-sense-climate
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/newrepublic.com.txt b/inc/3rdparty/site_config/standard/newrepublic.com.txt
new file mode 100755 (executable)
index 0000000..039f038
--- /dev/null
@@ -0,0 +1,8 @@
+author: //span[@class="authors"]
+date: //span[@class="date"]
+body: //div[@class="primary"]
+
+strip: //div[@id="controls"]
+strip: //div[@id="read-next"]
+
+test_url: http://www.newrepublic.com/article/112731/moocs-will-online-education-ruin-university-experience
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 1f1e5d3..2b35270
@@ -1,8 +1,8 @@
-title: //div[@id="main-content"]//h2\r
-\r
-author: //div[@id="main-content"]//span[@class="authors"]\r
-\r
-date: //div[@id="main-content"]//span[@class="timestamp"]\r
-\r
+title: //div[@id="main-content"]//h2
+
+author: //div[@id="main-content"]//span[@class="authors"]
+
+date: //div[@id="main-content"]//span[@class="timestamp"]
+
 body: //div[@id="main-content"]//div[@class="content"]
 test_url: http://www.news-gazette.com/news/business/economy/2011-08-08/ibm-drops-out-blue-waters-project.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index b7ab224..78af70f
@@ -1,12 +1,12 @@
-#This should apply to *.cnet.com. Not just news.cnet.com.\r
-title: //h1\r
-author: //img[@class="mugshot"]/@alt\r
-strip: //h1\r
-strip_id_or_class: breadcrumb\r
-strip: //p[@id="introP"]\r
-strip: //div[@class="postByline"]\r
-strip: //div[@class="editorBio"]\r
-strip: //div[@class="inline-slideshow"]\r
-strip: //div[@class="related"]\r
+#This should apply to *.cnet.com. Not just news.cnet.com.
+title: //h1
+author: //img[@class="mugshot"]/@alt
+strip: //h1
+strip_id_or_class: breadcrumb
+strip: //p[@id="introP"]
+strip: //div[@class="postByline"]
+strip: //div[@class="editorBio"]
+strip: //div[@class="inline-slideshow"]
+strip: //div[@class="related"]
 body: //div[@class="postBody txtWrap"]
 test_url: http://news.cnet.com/8301-27076_3-57405303-248/apple-ipad-charging-fine-keep-it-plugged-in/?tag=mncol;posts
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3ed1dc8..629bc91
@@ -1,8 +1,8 @@
-title://div[@class="content_detail"]/h1\r
-\r
-author://div[@class="author"]/strong\r
-\r
-date:substring-before(substring-after(//div[@class="content_detail"]/span[@class="date"], ','), ' WIB')\r
-\r
+title://div[@class="content_detail"]/h1
+
+author://div[@class="author"]/strong
+
+date:substring-before(substring-after(//div[@class="content_detail"]/span[@class="date"], ','), ' WIB')
+
 body://div[@class="text_detail"]
 test_url: http://news.detik.com/read/2012/05/22/225531/1922307/10/menkeu-cek-soal-lolosnya-315-kg-sabu-dari-bea-cukai
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6fc8613..5754d47
@@ -1,9 +1,9 @@
-body: //div[@id='main']\r
-strip: //div[@id='sbs']\r
-strip: //div[@id='fsizeSwitch']\r
-strip: //div[@id='googleAd']\r
-strip: //div[@id='detailFoot']\r
-strip_image_src: counter?key\r
-convert_double_br_tags: yes\r
+body: //div[@id='main']
+strip: //div[@id='sbs']
+strip: //div[@id='fsizeSwitch']
+strip: //div[@id='googleAd']
+strip: //div[@id='detailFoot']
+strip_image_src: counter?key
+convert_double_br_tags: yes
 
 test_url: http://news.kanaloco.jp/localnews/article/1105200018/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ded680f..1df4731
@@ -1,11 +1,11 @@
-title: //h2[@class="lyt-hdg-02-04"]\r
-\r
-author: //div[@class="lyt-namearea"]/a\r
-\r
-date: //div[@class="lyt-namearea"]/text()\r
-\r
-body: //div[@class="articleContent"]\r
-\r
-strip: //div[@id="tab-aside"]\r
+title: //h2[@class="lyt-hdg-02-04"]
+
+author: //div[@class="lyt-namearea"]/a
+
+date: //div[@class="lyt-namearea"]/text()
+
+body: //div[@class="articleContent"]
+
+strip: //div[@id="tab-aside"]
 
 test_url: http://news.mynavi.jp/articles/2011/12/07/nico/index.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index b60deea..3b1d3cc
@@ -1,11 +1,11 @@
-single_page_link: //div[@id='content']//p[@class='readMore']/a\r
-\r
-title: //div[@class='hidden offscreen']/h2\r
-body: //div[@id="storyText"]\r
-move_into(//div[@id='storyText']): //div[@class='fact']\r
-strip: //small[@class='credit']\r
-strip: //small[@class='caption']\r
-date: substring-after(//div[@id='storyMeta']//p[@class='date'],'Publiziert am')\r
-strip: //p[@class='toplink']\r
+single_page_link: //div[@id='content']//p[@class='readMore']/a
+
+title: //div[@class='hidden offscreen']/h2
+body: //div[@id="storyText"]
+move_into(//div[@id='storyText']): //div[@class='fact']
+strip: //small[@class='credit']
+strip: //small[@class='caption']
+date: substring-after(//div[@id='storyMeta']//p[@class='date'],'Publiziert am')
+strip: //p[@class='toplink']
 
 test_url: http://news.orf.at/stories/2084731/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 743245f..1d54733
@@ -1,9 +1,9 @@
-body: //article\r
-title: //h1\r
-author: //span[@class='b-article-source-dropdown']\r
-strip: //span[@class='b-article-photo-incut__source']\r
-strip: //a[@class='b-read-more b-read-more_bottom']\r
-\r
-\r
+body: //article
+title: //h1
+author: //span[@class='b-article-source-dropdown']
+strip: //span[@class='b-article-photo-incut__source']
+strip: //a[@class='b-read-more b-read-more_bottom']
+
+
 tidy:no
 test_url: http://news.rambler.ru/12972208/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c80c332..ba4db82
@@ -1,4 +1,4 @@
-body: //div[@class='main']/div[@class='item']\r
-strip: //div[@class='right']\r
-\r
+body: //div[@class='main']/div[@class='item']
+strip: //div[@class='right']
+
 test_url: http://news.techmeme.com/110516/fh-rip
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 5ee0404..fc1739c
@@ -1,12 +1,12 @@
-title: //meta[@property='og:title']/@content\r
-title: //h1[@class='headline']\r
-author: //cite[contains(@class, 'byline') and contains(@class, 'vcard')]//span[@class='fn']\r
-date: //cite[contains(@class, 'byline') and contains(@class, 'vcard')]//abbr/@title\r
-body: //div[@id='mediaarticlelead']//a[@class='media'] | //div[contains(@class,'yom-art-content')]\r
-#strip: //cite/abbr\r
-strip_id_or_class: action\r
-strip_id_or_class: prefetch\r
-tidy: no\r
-prune: no\r
+title: //meta[@property='og:title']/@content
+title: //h1[@class='headline']
+author: //cite[contains(@class, 'byline') and contains(@class, 'vcard')]//span[@class='fn']
+date: //cite[contains(@class, 'byline') and contains(@class, 'vcard')]//abbr/@title
+body: //div[@id='mediaarticlelead']//a[@class='media'] | //div[contains(@class,'yom-art-content')]
+#strip: //cite/abbr
+strip_id_or_class: action
+strip_id_or_class: prefetch
+tidy: no
+prune: no
 
 test_url: http://news.yahoo.com/cold-la-nina-winter-forecast-west-coast-183535067.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0b01f8a..f7441d1
@@ -1,3 +1,3 @@
-strip_comments: no\r
+strip_comments: no
 strip: //a[. = 'reply']
 test_url: http://news.ycombinator.com/item?id=1516461
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/news.zing.vn.txt b/inc/3rdparty/site_config/standard/news.zing.vn.txt
new file mode 100755 (executable)
index 0000000..af81e90
--- /dev/null
@@ -0,0 +1,3 @@
+body://div[@class="newsdetail_wrapper"]
+strip://div[@class="more_news"]
+test_url: http://news.zing.vn/xa-hoi/s-phat-nang-xe-may-di-duong-tren-cao-ha-noi/a280838.html#home_noibat1
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/news247.gr.txt b/inc/3rdparty/site_config/standard/news247.gr.txt
new file mode 100755 (executable)
index 0000000..87637be
--- /dev/null
@@ -0,0 +1,6 @@
+title: //h1[@class='title']
+
+body: //img[@id='relPicsMainPic'] | //div[contains(@class, 'storyContent')]
+
+test_url: http://news247.gr/eidiseis/katatheseis_fwtia_htan_apofasismenoi_akomh_kai_na_afairesoyn_zwes_an_thewrousan_oti_to_thuma_htan_antipalos_toys.2433351.html
+test_url: http://news247.gr/?widget=rssfeed&view=feed&contentId=38291
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0500890..5eb0ea4
@@ -1,9 +1,9 @@
-date: //meta[@name='og:article:published_time']/@value\r
-\r
-body: //div[@class='itemIntroText' or @class='itemImageBlock' or @class='itemFullText']\r
-\r
-strip_id_or_class: itemImageGallery\r
-\r
-prune: no\r
-\r
+date: //meta[@name='og:article:published_time']/@value
+
+body: //div[@class='itemIntroText' or @class='itemImageBlock' or @class='itemFullText']
+
+strip_id_or_class: itemImageGallery
+
+prune: no
+
 test_url: http://www.newsbomb.gr/gossip/story/257234/i-proin-moy-protimoyse-na-serfarei-apo-to-na-kanoyme-sex
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index eb7d335..1a99031
@@ -1,12 +1,12 @@
-title: //h1\r
-body: (//div[@class='articleImg']//img)[1] | //p[contains(@class, 'commentTextArticle') or contains(@class, 'articlePublished')] | //div[@id='articleLeftContent']\r
-author: //div[@class='byline']//a[contains(@href, '/user/')]\r
-\r
-strip_id_or_class: facts\r
-strip_id_or_class: articleBlogsHolder\r
-strip_id_or_class: byline\r
-\r
-prune: no\r
-tidy: no\r
-\r
+title: //h1
+body: (//div[@class='articleImg']//img)[1] | //p[contains(@class, 'commentTextArticle') or contains(@class, 'articlePublished')] | //div[@id='articleLeftContent']
+author: //div[@class='byline']//a[contains(@href, '/user/')]
+
+strip_id_or_class: facts
+strip_id_or_class: articleBlogsHolder
+strip_id_or_class: byline
+
+prune: no
+tidy: no
+
 test_url: http://www.newsmill.se/artikel/2012/05/06/medielogiken-v-ger-tyngre-n-reportrarnas-sikter
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 860ad66..247bbeb
@@ -1,10 +1,10 @@
-body: //div[@class='right']//div[@class='articles']\r
-author: //div[@id='artinfo']//a[contains(@href, '/author/')]\r
-strip: //div[@id='artinfo']\r
-strip: //table[//a[contains(@href, 'twitter.com')]]\r
-strip_id_or_class: twitter\r
-\r
-prune: no\r
-tidy: no\r
-\r
+body: //div[@class='right']//div[@class='articles']
+author: //div[@id='artinfo']//a[contains(@href, '/author/')]
+strip: //div[@id='artinfo']
+strip: //table[//a[contains(@href, 'twitter.com')]]
+strip_id_or_class: twitter
+
+prune: no
+tidy: no
+
 test_url: http://www.newsunspun.org/eotn/bbc-headline-change-iran-goes-from-not-building-to-undecided-on-nuclear-bomb
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/newsweek.com.txt b/inc/3rdparty/site_config/standard/newsweek.com.txt
new file mode 100755 (executable)
index 0000000..565648b
--- /dev/null
@@ -0,0 +1,6 @@
+body: //div[@class = 'article-body']
+title: //h1[@class = 'article-title']
+strip: //aside
+
+test_url: http://www.newsweek.com/day-steve-mcqueen-met-his-new-nazi-neighbor-keith-moon-229741
+test_url: http://www.newsweek.com/2014/06/13/how-greylock-partners-finds-next-facebook-253329.html
diff --git a/inc/3rdparty/site_config/standard/newswise.com.txt b/inc/3rdparty/site_config/standard/newswise.com.txt
new file mode 100755 (executable)
index 0000000..10120ea
--- /dev/null
@@ -0,0 +1,17 @@
+prune: no
+tidy: no
+
+title: //h1/a[2]
+body: //div[@id="main"]
+author: //span[@id="articlesource"]
+date: //span[contains(@class, 'releasedate')]
+
+strip: //div[@class="inst-logo"]
+strip: //h1[1]
+
+strip_id_or_class: addthis
+strip_id_or_class: released
+strip_id_or_class: skiptranslate
+strip_id_or_class: flash
+
+test_url: http://www.newswise.com/articles/first-heat-wave-of-season-puts-elderly-at-risk
old mode 100644 (file)
new mode 100755 (executable)
index 5624aa8..950324a
@@ -1,10 +1,11 @@
-title: //h1[@id='articlehed'] | //h2[@id="articleintro"]\r
-body: //div[@id='articletext']\r
-\r
-strip: //ul[@id="bc"] | //div[@id="yrail"] | //div[@class="entry-keywords"] | //div[@class="entry-categories"] | //div[@class="socialUtils"] | //div[@id="footer"]\r
-\r
-date: //h4[@id='articleauthor']/span[@class='dd dds']\r
-date: //div[@id="pagebody"]/div[@class='hentry entry']/div[@class='published']\r
-\r
-single_page_link: //div[@class='paginationViewSinglePage']/a\r
-test_url: http://www.newyorker.com/online/blogs/culture/2012/06/mug-shot-web-sites.html
\ No newline at end of file
+title: //h1[@id='articlehed'] | //h2[@id="articleintro"]
+body: //div[@id='articletext']
+
+strip: //ul[@id="bc"] | //div[@id="yrail"] | //div[@class="entry-keywords"] | //div[@class="entry-categories"] | //div[@class="socialUtils"] | //div[@id="footer"] | //div[@class="cartoon"]
+
+date: //h4[@id='articleauthor']/span[@class='dd dds']
+date: //div[@id="pagebody"]/div[@class='hentry entry']/div[@class='published']
+
+single_page_link: //div[@class='paginationViewSinglePage']/a
+test_url: http://www.newyorker.com/online/blogs/culture/2012/06/mug-shot-web-sites.html
+test_url: http://www.newyorker.com/reporting/2013/04/22/130422fa_fact_bilger?currentPage=all&mobify=0
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 806a3df..b8d235d
@@ -1,16 +1,16 @@
-# 2011-08-22 [carlo@...] initial version\r
-# 2011-08-22 [carlo@...] removed comments & social links\r
-\r
-tidy: no\r
-\r
-single_page_link: //a[@class="single active"]\r
-\r
-body: //div[@id="main"]//div[@class="content-region"]/article\r
-author: //span[@class="author-name"]\r
-date: //time/text()\r
-\r
-strip_id_or_class: //aside[@id="related"]\r
-strip: //footer\r
-\r
+# 2011-08-22 [carlo@...] initial version
+# 2011-08-22 [carlo@...] removed comments & social links
+
+tidy: no
+
+single_page_link: //a[@class="single active"]
+
+body: //div[@id="main"]//div[@class="content-region"]/article
+author: //span[@class="author-name"]
+date: //time/text()
+
+strip_id_or_class: //aside[@id="related"]
+strip: //footer
+
 title: //h1
 test_url: http://www.next-gen.biz/reviews/deus-ex-human-revolution-review
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 70f9247..956b288
@@ -1,11 +1,11 @@
-# doesn't look like selecting an attribute value works?\r
-# author: //meta[@id="authorName"]@value\r
-\r
-author: substring-after(//li[@id="article-hdr-meta-author"]/text(), "By ")\r
-date: //abbr[@id="article-time"]\r
-title: //div[@id="article-hdr"]/h1\r
-body: //div[@class="articleText"]\r
-\r
-# strip miscellaneous teasers & etc\r
+# doesn't look like selecting an attribute value works?
+# author: //meta[@id="authorName"]@value
+
+author: substring-after(//li[@id="article-hdr-meta-author"]/text(), "By ")
+date: //abbr[@id="article-time"]
+title: //div[@id="article-hdr"]/h1
+body: //div[@class="articleText"]
+
+# strip miscellaneous teasers & etc
 strip: //div[@class="removeformobile"]
 test_url: http://www.nfl.com/news/story/09000d5d82388707/article/close-shave-chiefs-haley-perseveres-through-rough-start?module=HP11_content_stream
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6083486..44a82a9
@@ -1,7 +1,7 @@
-next_page_link: //div[@class='nextpage_continue']/a\r
-strip: //div[@class='nextpage_continue']\r
-strip_id_or_class: nextpage\r
-title: //div[@class='article_title']//h1\r
-body: //div[@class='article_title']/..\r
+next_page_link: //div[@class='nextpage_continue']/a
+strip: //div[@class='nextpage_continue']
+strip_id_or_class: nextpage
+title: //div[@class='article_title']//h1
+body: //div[@class='article_title']/..
 body: //div[@class='content']
 test_url: http://ngm.nationalgeographic.com/2012/02/tsunami/folger-text
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
old mode 100644 (file)
new mode 100755 (executable)
index 409a897..f0e28af
@@ -1,13 +1,13 @@
-body: //div[@id="main"]\r
-title: //div[@id="main"]/h3\r
-\r
-# Remove &lsquo;Review&rsquo; and &lsquo;Wii&rsquo;.\r
-strip: //div[@class="badge"]\r
-\r
-# Remove duplicate title and country flag.\r
-strip: //h3\r
-\r
-# Commented out below are attempts to extract the author and date, which did not work.\r
-# author: //p[@class="extra "]/a\r
+body: //div[@id="main"]
+title: //div[@id="main"]/h3
+
+# Remove &lsquo;Review&rsquo; and &lsquo;Wii&rsquo;.
+strip: //div[@class="badge"]
+
+# Remove duplicate title and country flag.
+strip: //h3
+
+# Commented out below are attempts to extract the author and date, which did not work.
+# author: //p[@class="extra "]/a
 # date: //p[@class="extra "]/span[@class="when"]
 test_url: http://www.nintendoworldreport.com/review/28400
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ae2d7e4..b15f061
@@ -1,5 +1,5 @@
-author: //span[@class='meta']/span[@class='username']\r
-body: //div[@class='article-content']\r
-\r
+author: //span[@class='meta']/span[@class='username']
+body: //div[@class='article-content']
+
 strip_id_or_class: 'article-actions'
 test_url: http://nojesguiden.se/blogg/maja-bredberg/maja-laser-tidningen-en-helt-vanlig-lordag-i
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 04a0a34..88429a7
@@ -1,11 +1,11 @@
-title: //h1\r
-body: //div[@id='pn-maincontent']\r
-strip_id_or_class: z-menu\r
-strip_id_or_class: news_category\r
-strip_id_or_class: news_title\r
-strip_id_or_class: news_modify\r
-strip_id_or_class: news_morearticlesincat\r
-strip_id_or_class: ezc_comments\r
-strip_comments: yes\r
-\r
+title: //h1
+body: //div[@id='pn-maincontent']
+strip_id_or_class: z-menu
+strip_id_or_class: news_category
+strip_id_or_class: news_title
+strip_id_or_class: news_modify
+strip_id_or_class: news_morearticlesincat
+strip_id_or_class: ezc_comments
+strip_comments: yes
+
 test_url: http://www.northumberlandview.ca/index.php?module=news&func=display&sid=5972
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/nosalty.hu.txt b/inc/3rdparty/site_config/standard/nosalty.hu.txt
new file mode 100755 (executable)
index 0000000..7e20cad
--- /dev/null
@@ -0,0 +1,6 @@
+title: //div[@id='tab-recept']//h1
+body: //div[@id='tab-recept']//div[contains(@class, 'column-container')]
+strip_id_or_class: ajanlo-box
+prune: no
+
+test_url: http://www.nosalty.hu/recept/szupergyors-fank
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 205b1af..1b817c0
@@ -1,6 +1,6 @@
-title: /html/body/div[3]/div/div/h1\r
-\r
-body: //*[@id="article-body"]\r
-\r
+title: /html/body/div[3]/div/div/h1
+
+body: //*[@id="article-body"]
+
 
 test_url: http://nplusonemag.com/the-outskirts-of-progress
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index afab0eb..acd73e4
@@ -1,32 +1,34 @@
-title: //div[contains(@class, 'storytitle')]//h1\r
-author: //p[@class="byline"]/span\r
-body: //div[@id='storyspan02']//*[@class='duration' or @class='download' or contains(@class, 'photo')] | //div[@id='storytext'] | //div[@class='transcript']\r
-date: //meta[@name="date"]/@content\r
-\r
-strip: //div[@class='enlarge_measure']\r
-strip: //div[@class='enlarge_html']\r
-strip: //a[@class='enlargeicon']\r
-strip: //div[contains(@class, 'bookedition')]\r
-strip: //div[@class='textsize']\r
-strip: //ul[@class='genres']\r
-strip: //span[@class='bull']\r
-strip_id_or_class: secondary\r
-strip_id_or_class: con1col\r
-strip: //h3[@class='conheader']\r
-\r
-replace_string(<a name="more">&nbsp;</a>): <!-- no more -->\r
-replace_string(<div class="transcript">): <div class="transcript"><h2>Transcript</h2>\r
-\r
-prune: no\r
-strip://div[@class="ecommercepop"]\r
-strip://span[@class="bull"]\r
-strip://span[@class="purchaseLink"]\r
-strip://div[@class="enlarge_html"]\r
-strip://div[@class="enlarge_measure"]\r
-strip://div[@class="container con1col small"]\r
-strip://a[contains(@class, "enlargebtn")]\r
-strip://div[contains(@class, "bucketwrap internallink")]\r
-\r
-test_url: http://www.npr.org/blogs/thetwo-way/2011/07/12/137799301/sports-loses-its-escapist-gleam-in-a-summer-of-court-dates\r
-test_url: http://www.npr.org/2012/07/04/156190948/feeling-under-siege-catholic-leadership-shifts-right\r
-test_url: http://www.npr.org/2012/12/13/166480907/the-years-best-sci-fi-crosses-galaxies-and-genres
\ No newline at end of file
+title: //div[contains(@class, 'storytitle')]//h1
+author: //p[@class="byline"]/span
+body: //div[@id='primaryaudio']//*[@class='duration' or @class='download' or contains(@class, 'photo')] | //div[@id='storytext' or @id='supplementarycontent' or contains(@class, 'transcript')]
+date: //meta[@name="date"]/@content
+
+strip_id_or_class: enlarge_measure
+strip_id_or_class: enlarge_html
+strip: //a[contains(@class, 'enlargeicon')]
+strip: //div[contains(@class, 'bookedition')]
+strip: //div[@class='textsize']
+strip: //ul[@class='genres']
+strip: //span[@class='bull']
+strip_id_or_class: secondary
+strip_id_or_class: con1col
+strip: //h3[@class='conheader']
+
+replace_string(<a name="more">&nbsp;</a>): <!-- no more -->
+replace_string(<div class="transcript">): <div class="transcript"><h2>Transcript</h2>
+replace_string(<div class="transcript storytext">): <div class="transcript storytext"><h2>Transcript</h2>
+
+prune: no
+strip://div[@class="ecommercepop"]
+strip://span[@class="bull"]
+strip://span[@class="purchaseLink"]
+strip://div[@class="enlarge_html"]
+strip://div[@class="enlarge_measure"]
+strip://div[@class="container con1col small"]
+strip://a[contains(@class, "enlargebtn")]
+strip://div[contains(@class, "bucketwrap internallink")]
+
+test_url: http://www.npr.org/blogs/thetwo-way/2011/07/12/137799301/sports-loses-its-escapist-gleam-in-a-summer-of-court-dates
+test_url: http://www.npr.org/2012/07/04/156190948/feeling-under-siege-catholic-leadership-shifts-right
+test_url: http://www.npr.org/2012/12/13/166480907/the-years-best-sci-fi-crosses-galaxies-and-genres
+test_url: http://www.npr.org/templates/story/story.php?storyId=229103221
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8ecb896..d95ec68
@@ -1,13 +1,13 @@
-strip_id_or_class: sIFR-alternate\r
-title: //div[@id='page-title-wrapper']/div[@id='page-title']/h2\r
-single_page_link: //a[contains(@href, 'pagination=false') and not(contains(@href, 'printpage=true'))]\r
-\r
-body: //div[@id = 'article-body']\r
-strip_id_or_class:article-tools\r
-strip_id_or_class:js_target\r
-strip_id_or_class:marker\r
-author://div[@id = 'page-title']/h3\r
-date://div[@id = 'page-title']/h5/a[starts-with(@href,'/issues/')]\r
-\r
-\r
+strip_id_or_class: sIFR-alternate
+title: //div[@id='page-title-wrapper']/div[@id='page-title']/h2
+single_page_link: //a[contains(@href, 'pagination=false') and not(contains(@href, 'printpage=true'))]
+
+body: //div[@id = 'article-body']
+strip_id_or_class:article-tools
+strip_id_or_class:js_target
+strip_id_or_class:marker
+author://div[@id = 'page-title']/h3
+date://div[@id = 'page-title']/h5/a[starts-with(@href,'/issues/')]
+
+
 test_url: http://www.nybooks.com/articles/archives/2012/feb/23/were-more-unequal-you-think/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f664c93..7a1d62d
@@ -1,8 +1,8 @@
-title: //h2[contains(@class, 'primary')]\r
-body: //div[@id='story']\r
-author: //*[@class='by']/a\r
-date: substring-after(//*[@class='date'], 'Published')\r
-\r
-next_page_link: //div[@class='page-navigation']//li[@class='next']/a\r
-\r
+title: //h2[contains(@class, 'primary')]
+body: //div[@id='story']
+author: //*[@class='by']/a
+date: substring-after(//*[@class='date'], 'Published')
+
+next_page_link: //div[@class='page-navigation']//li[@class='next']/a
+
 test_url: http://nymag.com/news/features/wall-street-2012-2/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8c9e37f..f4bedb6
@@ -1,8 +1,8 @@
-title: //div[@class="article default-article"]/h1\r
-author: //p[@class="author"]/a[2]\r
-\r
-# Article introduction:\r
-#move_into(//div[@class="article-bread"]): //p[@class="lead"]\r
-\r
+title: //div[@class="article default-article"]/h1
+author: //p[@class="author"]/a[2]
+
+# Article introduction:
+#move_into(//div[@class="article-bread"]): //p[@class="lead"]
+
 body: //div[@class="article-bread"]
 test_url: http://www.nyteknik.se/nyheter/energi_miljo/energi/article3391426.ece
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8d9a794..23c9ad1
@@ -1,36 +1,49 @@
-title://h1[@class="articleHeadline"]\r
-body://div[@id="article"]\r
-strip_id_or_class:articleTools\r
-strip_id_or_class:readerscomment\r
-#strip://div[contains(@class, "articleInline runaroundLeft")]\r
-strip: //div[contains(@class, "doubleRule")]\r
-# strip image credit - appears as a bold heading\r
-strip: //div[contains(@class, "articleInline")]//h6\r
-strip_id_or_class:enlargeThis\r
-strip_id_or_class:pageLinks\r
-strip_id_or_class:memberTools\r
-strip_id_or_class:articleExtras\r
-strip_id_or_class:singleAd\r
-strip_id_or_class:byline\r
-strip_id_or_class:dateline\r
-strip_id_or_class:articleheadline\r
-strip_id_or_class:articleBottomExtra\r
-strip://a[contains(@href, 'nytimes.com/adx/')]\r
-strip: //nyt_byline\r
-strip: //span[contains(@class, 'slideshow') or contains(@class, 'video')]\r
-strip: //p[@class='caption']//a[contains(., 'More Photos')]\r
-\r
-prune: no\r
-tidy: no\r
-\r
-date: substring-after(//*[contains(@class, 'dateline')], 'Published:')\r
-\r
-single_page_link: //link[contains(@href, 'pagewanted=all')]\r
-#single_page_link: //a[contains(@href, 'pagewanted=all') and not(contains(@href, 'login'))]\r
-\r
-strip://ul[@id = 'toolsList']\r
-strip://h6[@class = 'kicker']\r
-author:substring-after(//h6[@class='byline'],'By ')\r
-\r
-test_url: http://www.nytimes.com/2011/07/24/books/review/an-academic-authors-unintentional-masterpiece.html\r
-test_url: http://www.nytimes.com/2012/06/10/arts/television/the-newsroom-aaron-sorkins-return-to-tv.html
\ No newline at end of file
+title://h1[@class="articleHeadline"]
+body://div[@id="article"]
+body://*[@itemprop="articleBody"]
+strip_id_or_class:articleTools
+strip_id_or_class:readerscomment
+#strip://div[contains(@class, "articleInline runaroundLeft")]
+strip: //div[contains(@class, "doubleRule")]
+# strip image credit - appears as a bold heading
+strip: //div[contains(@class, "articleInline")]//h6
+strip_id_or_class:enlargeThis
+strip_id_or_class:pageLinks
+strip_id_or_class:memberTools
+strip_id_or_class:articleExtras
+strip_id_or_class:singleAd
+strip_id_or_class:byline
+strip_id_or_class:dateline
+strip_id_or_class:articleheadline
+strip_id_or_class:articleBottomExtra
+strip_id_or_class:shareTools
+strip://a[contains(@href, 'nytimes.com/adx/')]
+strip: //nyt_byline
+strip: //span[contains(@class, 'slideshow') or contains(@class, 'video')]
+strip: //p[@class='caption']//a[contains(., 'More Photos')]
+
+prune: no
+tidy: no
+
+find_string: <script 
+replace_string: <div style="display:none" 
+find_string: </script>
+replace_string: </div>
+
+date: substring-after(//*[contains(@class, 'dateline')], 'Published:')
+
+single_page_link: //link[contains(@href, 'pagewanted=all')]
+single_page_link: //link[@rel='alternate' and contains(@href, 'mobile.nytimes.com')]/@href
+single_page_link: concat(substring-before(//div[@id='pageLinks']//a[contains(@href, 'pagewanted=')]/@href, 'pagewanted='), 'pagewanted=all') 
+#single_page_link: //a[contains(@href, 'pagewanted=all') and not(contains(@href, 'login'))]
+
+strip://ul[@id = 'toolsList']
+strip://h6[@class = 'kicker']
+author:substring-after(//h6[@class='byline'],'By ')
+
+test_url: http://www.nytimes.com/2011/07/24/books/review/an-academic-authors-unintentional-masterpiece.html
+test_url: http://www.nytimes.com/2012/06/10/arts/television/the-newsroom-aaron-sorkins-return-to-tv.html
+test_url: http://www.nytimes.com/2013/03/25/world/middleeast/israeli-military-responds-after-patrols-come-under-fire-from-syria.html
+test_url: http://www.nytimes.com/2013/08/15/nyregion/when-the-new-york-city-subway-ran-without-rails.html
+test_url: http://www.nytimes.com/2004/02/29/weekinreview/correspondence-class-consciousness-china-s-wealthy-live-creed-hobbes-darwin-meet.html
+test_url: http://www.nytimes.com/2014/06/19/opinion/gail-collins-romney-and-the-2016-contenders-huddle.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 81faaba..749f4f2
@@ -1,12 +1,12 @@
-body: //*[@class='article-full']\r
-title: //h3\r
-strip: //header[@class='group']\r
-#body: //p[@class='lead']\r
-#move_into(//p[@class='lead']): //*[@class='article-full']/figure\r
-#move_into(//p[@class='lead']): //div[@id='articleBodyText']\r
-strip: //div[@id='social-media-floater']\r
-strip: //div[@class='advertisement']\r
-strip: //div[@class='infobox']\r
-strip: //div[@id='articleComments']\r
-\r
+body: //*[@class='article-full']
+title: //h3
+strip: //header[@class='group']
+#body: //p[@class='lead']
+#move_into(//p[@class='lead']): //*[@class='article-full']/figure
+#move_into(//p[@class='lead']): //div[@id='articleBodyText']
+strip: //div[@id='social-media-floater']
+strip: //div[@class='advertisement']
+strip: //div[@class='infobox']
+strip: //div[@id='articleComments']
+
 test_url: http://www.nzz.ch/wissen/wissenschaft/sonnenschutz-fuer-die-erde-1.17282213
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e409ca2..0b10753
@@ -1,7 +1,7 @@
-body: //article[contains(@class, 'instapaper_body')]\r
-\r
-prune: no\r
-\r
-single_page_link: //a[@id='print-button']\r
-\r
+body: //article[contains(@class, 'instapaper_body')]
+
+prune: no
+
+single_page_link: //a[@id='print-button']
+
 test_url: http://www.observer.com/2008/would-you-take-tumblr-man
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a2fb5f2..bf10787
@@ -1,7 +1,7 @@
-body: //div[(@id = "content")]\r
-strip: //div[(@class = "links-bar")]\r
-strip: //div[(@class = "povrzani")]\r
-strip: //div[(@class = "povrzani-dolu")]\r
-strip: //div[(@class = "tags")]\r
+body: //div[(@id = "content")]
+strip: //div[(@class = "links-bar")]
+strip: //div[(@class = "povrzani")]
+strip: //div[(@class = "povrzani-dolu")]
+strip: //div[(@class = "tags")]
 strip: //h1[(@id = "page-title")]
 test_url: http://off.net.mk/zhivot-i-zabava/gadzheti/dzhabe-raboti-dzhabe-ne-dishi
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
old mode 100644 (file)
new mode 100755 (executable)
index 1b39b62..4b3a720
@@ -1,9 +1,9 @@
-title: //div[@id='squeeze']/h1\r
-strip: //div[@id='squeeze']/h1\r
-author: //div[@class='submitted']/a\r
-strip: //div[@class='submitted']/a\r
-convert_double_br_tags: yes\r
-\r
-\r
+title: //div[@id='squeeze']/h1
+strip: //div[@id='squeeze']/h1
+author: //div[@class='submitted']/a
+strip: //div[@class='submitted']/a
+convert_double_br_tags: yes
+
+
 
 test_url: http://omiliya.org/content/predchuvstvie.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index be7a17e..a95c2b0
@@ -1,5 +1,5 @@
-body: //div[(@class = "statija")]\r
-strip: //div[(@class = "relatedBlock")]\r
-strip: //div[(@class = "swftools")]\r
+body: //div[(@class = "statija")]
+strip: //div[(@class = "relatedBlock")]
+strip: //div[(@class = "swftools")]
 strip: //table[(@class = "links")]
 test_url: http://on.net.mk/video/na-trkala/lamborghini-aventador-avionot-shto-ne-leta
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index edb5285..448bb7e
@@ -1,23 +1,25 @@
-title: //meta[@property="og:title"]/@content\r
-body: //div[@id='article_story_body']\r
-\r
-author: //h3[@class='byline']/a\r
-# for slid show content\r
-body: //ul[@id='imageSlide']//li[@class='firstSlide']//img | (//div[@class='txt_body']//p)[1]\r
-date: //li[@class='dateStamp']/small\r
-\r
-strip_id_or_class: insetFullBracket\r
-strip_id_or_class: insettipBox\r
-#strip_id_or_class: legacyInset\r
-strip_id_or_class: recipeACShopAndBuyText\r
-\r
-strip: //div[contains(@class, 'insetContent')]//cite\r
-strip: //*[contains(@style, 'visibility: hidden;')]\r
-strip: //div[contains(@class, 'insetContent') and not(contains(@class, 'image'))]\r
-\r
-prune: no\r
-tidy: no\r
-\r
-test_url: http://online.wsj.com/article/SB10001424052970203363504577185322849515102.html\r
-# slide show\r
-test_url: http://online.wsj.com/article/SB10001424052970204791104577110550376458164.html
\ No newline at end of file
+title: //meta[@property="og:title"]/@content
+body: //div[@id='article_story_body']
+
+author: //h3[@class='byline']/a
+# for slide show content
+body: //ul[@id='imageSlide']//li[@class='firstSlide']//img | (//div[@class='txt_body']//p)[1]
+date: //li[@class='dateStamp']/small
+
+strip_id_or_class: insetFullBracket
+strip_id_or_class: insettipBox
+#strip_id_or_class: legacyInset
+strip_id_or_class: recipeACShopAndBuyText
+
+strip: //div[contains(@class, 'insetContent')]//cite
+strip: //*[contains(@style, 'visibility: hidden;')]
+strip: //div[contains(@class, 'insetContent') and not(contains(@class, 'image'))]
+strip: //div[contains(@class, 'carousel')]
+
+prune: no
+tidy: no
+
+test_url: http://online.wsj.com/news/articles/SB10001424052702304626304579509100018004342
+test_url: http://online.wsj.com/article/SB10001424052970203363504577185322849515102.html
+# slide show
+test_url: http://online.wsj.com/article/SB10001424052970204791104577110550376458164.html
diff --git a/inc/3rdparty/site_config/standard/ontologicalgeek.com.txt b/inc/3rdparty/site_config/standard/ontologicalgeek.com.txt
new file mode 100755 (executable)
index 0000000..a9bf71e
--- /dev/null
@@ -0,0 +1,8 @@
+title: //h1[@class='entry-title']
+
+author: //a[@rel='author']
+
+date: substring-before(//aside[@class='entry-meta'], '|')
+
+body: //div[@class='entry-content']
+test_url: http://ontologicalgeek.com/change-or-live-final-fantasy-x-as-catholic-dystopia/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 510eb25..6913eb0
@@ -1,4 +1,4 @@
-body: //div[@id = 'content-inner']\r
-strip: //div[@id = 'content-bottom']\r
+body: //div[@id = 'content-inner']
+strip: //div[@id = 'content-bottom']
 strip_id_or_class: print_sharebutton
 test_url: http://openthemagazine.com/article/nation/sania-vs-saina
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index b766354..a5dcdb5
@@ -1,4 +1,4 @@
-body: //div[@class="chapter"]\r
-prune: no\r
-tidy: no\r
+body: //div[@class="chapter"]
+prune: no
+tidy: no
 test_url: http://openwebx.org/docs/springext.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ff16ca7..fb4f218
@@ -1,11 +1,11 @@
-single_page_link: //div[@id='content']//p[@class='readMore']/a\r
-\r
-title: //div[@class='hidden offscreen']/h2\r
-body: //div[@id="storyText"]\r
-move_into(//div[@id='storyText']): //div[@class='fact']\r
-strip: //small[@class='credit']\r
-strip: //small[@class='caption']\r
-date: substring-after(//div[@id='storyMeta']//p[@class='date'],'Publiziert am')\r
-strip: //p[@class='toplink']\r
+single_page_link: //div[@id='content']//p[@class='readMore']/a
+
+title: //div[@class='hidden offscreen']/h2
+body: //div[@id="storyText"]
+move_into(//div[@id='storyText']): //div[@class='fact']
+strip: //small[@class='credit']
+strip: //small[@class='caption']
+date: substring-after(//div[@id='storyMeta']//p[@class='date'],'Publiziert am')
+strip: //p[@class='toplink']
 
 test_url: http://orf.at/stories/2084731/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0dedac3..50717f2
@@ -1,18 +1,18 @@
-title: /html/body/div[5]/div[2]/h1\r
-body: /html/body/div[5]/div[2]/div[6]/div/div\r
-body: //*[@id="cikk"]\r
-strip: /html/body/div[5]/div[2]/h1\r
-strip: /html/body/div[5]/div[2]/div[4]\r
-strip: //*[@id="multidoboz"]\r
-strip: /html/body/div[5]/div[2]/div[6]/div[2]\r
-strip: //*[@id="comments"]\r
-strip: //*[@id="rating-doboz"]\r
-strip: /html/body/div[5]/div[2]/div[10]\r
-strip: /html/body/div[5]/div[2]/a\r
-strip: /html/body/div[5]/div[2]/span\r
-strip: /html/body/div[5]/div[2]/span[2]\r
-strip: /html/body/div[5]/div[2]/span[3]\r
-strip: /html/body/div[5]/div[2]/span[4]\r
-strip: /html/body/div[5]/div[2]/span[5]\r
+title: /html/body/div[5]/div[2]/h1
+body: /html/body/div[5]/div[2]/div[6]/div/div
+body: //*[@id="cikk"]
+strip: /html/body/div[5]/div[2]/h1
+strip: /html/body/div[5]/div[2]/div[4]
+strip: //*[@id="multidoboz"]
+strip: /html/body/div[5]/div[2]/div[6]/div[2]
+strip: //*[@id="comments"]
+strip: //*[@id="rating-doboz"]
+strip: /html/body/div[5]/div[2]/div[10]
+strip: /html/body/div[5]/div[2]/a
+strip: /html/body/div[5]/div[2]/span
+strip: /html/body/div[5]/div[2]/span[2]
+strip: /html/body/div[5]/div[2]/span[3]
+strip: /html/body/div[5]/div[2]/span[4]
+strip: /html/body/div[5]/div[2]/span[5]
 strip: //*[@id="kommentszam"]
 test_url: http://www.origo.hu/itthon/20110119-lemondott-a-kulturaert-felelos-helyettes-allamtitkar.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/oschina.net.txt b/inc/3rdparty/site_config/standard/oschina.net.txt
new file mode 100755 (executable)
index 0000000..5645153
--- /dev/null
@@ -0,0 +1,3 @@
+title: //h1
+strip_id_or_class: syntaxhighlighter
+test_url: http://www.oschina.net/translate/event-based-programming-what-async-has-over-sync?print
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f03c955..7e2985e
@@ -1,11 +1,11 @@
-#body: (//div[@class='ftr-yt-vid'])[1]\r
-body: (//blockquote[contains(@class, 'postcontent')])[1]\r
-body: (//div[starts-with(@id, 'post_message')])[1]\r
-\r
-prune: no\r
-tidy: no\r
-\r
-#replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player"\r
-#replace_string(</iframe>): </iframe>&nbsp;</div>\r
-\r
+#body: (//div[@class='ftr-yt-vid'])[1]
+body: (//blockquote[contains(@class, 'postcontent')])[1]
+body: (//div[starts-with(@id, 'post_message')])[1]
+
+prune: no
+tidy: no
+
+#replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player"
+#replace_string(</iframe>): </iframe>&nbsp;</div>
+
 test_url: http://pakistantvdekho.com/showthread.php?647741-Sitam-Gar-by-HUM-TV-Episode-07&p=659080#post659080
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/pakmedia.tv.txt b/inc/3rdparty/site_config/standard/pakmedia.tv.txt
new file mode 100755 (executable)
index 0000000..5d6e4c8
--- /dev/null
@@ -0,0 +1,17 @@
+title: //h1[@class='entry-title']
+body: //article//div[@class='entry']
+strip_id_or_class: addthis
+strip_id_or_class: gdsrcacheloader
+strip_id_or_class: entry-meta
+strip_id_or_class: entry-tags
+strip_id_or_class: authorbox
+strip: //div[@class='entry']/p[1]
+strip: //img[@width='600' and @height='70']
+# related posts
+strip: //h3[contains(., 'Related posts')]
+strip: //div[contains(@style, 'border: 0pt none ; margin: 0pt; padding: 0pt;')]
+
+prune: no
+tidy: no
+
+test_url: http://pakmedia.tv/tv-one/feed
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d0d2a5d..35121e1
@@ -1,5 +1,5 @@
-title://h2\r
-author://div[@class="posted"]/a\r
-date://div[@class="date"]\r
+title://h2
+author://div[@class="posted"]/a
+date://div[@class="date"]
 body://div[@class="entry"]
 test_url: http://pandagon.net/index.php/site/its-okay-to-admit-that-mass-hysteria-is-real
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7d1c218..a5d427a
@@ -1,5 +1,5 @@
-tidy: no\r
-body: //article\r
-date: //time/@datetime\r
+tidy: no
+body: //article
+date: //time/@datetime
 strip_id_or_class: sharedaddy
 test_url: http://pandodaily.com/2012/01/19/ibooks-author-is-not-going-to-hurt-publishers-it-might-even-help-them/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0361f06..e0e2595
@@ -1,3 +1,3 @@
-body: //div[@class='entry']\r
+body: //div[@class='entry']
 date: //h3[@class='postDate']
 test_url: http://www.panic.com/blog/2011/07/panic-is-ready-for-lion/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/papodehomem.com.br.txt b/inc/3rdparty/site_config/standard/papodehomem.com.br.txt
new file mode 100755 (executable)
index 0000000..2c522da
--- /dev/null
@@ -0,0 +1,6 @@
+title: //h2[@class="page_title"]
+body: //div[@class="entry arquivo"]
+author: //span[@class="author"]
+footnotes: yes
+prune: yes
+test_url: http://papodehomem.com.br/um-relato-confessional-sobre-a-maioridade-penal/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a3bd4b0..cd9bd55
@@ -1,6 +1,6 @@
-title: //h2[@class="post-title"]\r
-author: substring-after(//div[@class="description"],'Words by ')\r
-date: //li[@class="date"]\r
-strip: //h2[@class="post-title"]\r
+title: //h2[@class="post-title"]
+author: substring-after(//div[@class="description"],'Words by ')
+date: //li[@class="date"]
+strip: //h2[@class="post-title"]
 body: //div[@class="copy"]
 test_url: http://parislemon.com/post/13462682469/the-15-inch-air
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 478a669..caaa2e9
@@ -1,3 +1,3 @@
-title: //h1\r
+title: //h1
 body: //div[@id='news-article']
 test_url: http://www.parliament.uk/business/committees/committees-a-z/commons-select/backbench-business-committee/news/guidance-for-e-petitioners/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 89d13b2..03b67b7
@@ -1,6 +1,6 @@
-title://div[@class="paste_box_line1"]/h1\r
-author://div[@class="paste_box_line2"]/a\r
-body://div[@class="text"]\r
-date:substring-before(substring-after(//div[@class="paste_box_line2"],'|'),'|')\r
+title://div[@class="paste_box_line1"]/h1
+author://div[@class="paste_box_line2"]/a
+body://div[@class="text"]
+date:substring-before(substring-after(//div[@class="paste_box_line2"],'|'),'|')
 dissolve://li
 test_url: http://pastebin.com/LAykd1es
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 40a049e..c535158
@@ -1,5 +1,5 @@
-title: //h1\r
-body: //div[@id='ff-pastepad-content']\r
-prune: no\r
+title: //h1
+body: //div[@id='ff-pastepad-content']
+prune: no
 # todo: add test file
 test_url: http://pastepad.fivefilters.org/test.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 1a4cd25..2504222
@@ -1,8 +1,8 @@
-title://*[contains(@class,'post-title')]\r
-body://div[contains(@class,'post-body')]\r
-body://div[contains(@class,'entry-content')]\r
-strip_comments:no\r
-prune:no\r
-convert_double_br_tags:yes\r
+title://*[contains(@class,'post-title')]
+body://div[contains(@class,'post-body')]
+body://div[contains(@class,'entry-content')]
+strip_comments:no
+prune:no
+convert_double_br_tags:yes
 tidy:yes
 test_url: http://www.pathawks.com/2011/06/crazyawesomecoloradotrip.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
old mode 100644 (file)
new mode 100755 (executable)
index cebea4d..96bdd95
@@ -1,10 +1,10 @@
-prune:yes\r
-\r
-date://*[contains(@class,'date')]\r
-\r
-body://div[contains(@id,'content')]\r
-\r
-next_page_link://a[contains(.,'Next >')]\r
-\r
+prune:yes
+
+date://*[contains(@class,'date')]
+
+body://div[contains(@id,'content')]
+
+next_page_link://a[contains(.,'Next >')]
+
 strip_id_or_class:sponsors
 test_url: http://www.pcmag.com/article2/0,2817,2401676,00.asp
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 30ccbb5..7193f87
@@ -1,19 +1,19 @@
-title: //div[@class='articleHead']//h1\r
-author: //div[@class="author-name"]/a[1]\r
-body: //div[@class="main"]\r
-\r
-# remove 'From the Lab' and 'Recent posts' text\r
-strip: //div[@class='blogLabel']\r
-\r
-# remove byline and meta info\r
-strip: //h1\r
-strip: //div[@class="article-meta"]\r
-strip: //div[@class="author-info"]\r
-\r
-#strip tags and categories\r
-strip: //div[@class="department"]\r
-\r
-#strip product cap links\r
-strip: //div[@class="cap-main"]\r
-strip: //div[@id="compare-lede"]\r
+title: //div[@class='articleHead']//h1
+author: //div[@class="author-name"]/a[1]
+body: //div[@class="main"]
+
+# remove 'From the Lab' and 'Recent posts' text
+strip: //div[@class='blogLabel']
+
+# remove byline and meta info
+strip: //h1
+strip: //div[@class="article-meta"]
+strip: //div[@class="author-info"]
+
+#strip tags and categories
+strip: //div[@class="department"]
+
+#strip product cap links
+strip: //div[@class="cap-main"]
+strip: //div[@id="compare-lede"]
 test_url: http://www.pcworld.com/article/262034/are-printer-companies-gouging-us-on-laser-toner-pricing.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f97615f..a0d5099
@@ -1,23 +1,23 @@
-# 2012-01-14 carlo@... - fixed title, body; added author, date\r
-\r
-title: //div[@class="title"]/h2/a\r
-# body: //div[@class="post"]\r
-# author: //p[@class="iconEmail"]/a\r
-# date: //p[@class="iconDate"]\r
-\r
-# 1/24/2013 yosoyju - fixed author, date, and body, added support for PA Report\r
-\r
-# Penny Arcade\r
-\r
-author: //li[@class="iconEmail"]/a\r
-date: //li[@class="iconDate"]\r
-body: //div[@class="body"]\r
-\r
-# PA Report\r
-\r
-author: //div[@class="meta"]/p/a\r
-date: substring-after(//div[@class="meta"]/p, '/ ')\r
-title: substring-after(//title, '- ')\r
-\r
-test_url: http://penny-arcade.com/2012/01/13/i-put-some-news-in-your-news\r
+# 2012-01-14 carlo@... - fixed title, body; added author, date
+
+title: //div[@class="title"]/h2/a
+# body: //div[@class="post"]
+# author: //p[@class="iconEmail"]/a
+# date: //p[@class="iconDate"]
+
+# 1/24/2013 yosoyju - fixed author, date, and body, added support for PA Report
+
+# Penny Arcade
+
+author: //li[@class="iconEmail"]/a
+date: //li[@class="iconDate"]
+body: //div[@class="body"]
+
+# PA Report
+
+author: //div[@class="meta"]/p/a
+date: substring-after(//div[@class="meta"]/p, '/ ')
+title: substring-after(//title, '- ')
+
+test_url: http://penny-arcade.com/2012/01/13/i-put-some-news-in-your-news
 test_url: http://penny-arcade.com/report/editorial-article/the-dystopian-future-of-casual-games-personalized-targeted-pricing-and-mech
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a369fd6..5ba5f77
@@ -1,6 +1,6 @@
-prune: no\r
-tidy: no\r
-body: //div[@class='article-content']\r
-dissolve: //nobr/a\r
+prune: no
+tidy: no
+body: //div[@class='article-content']
+dissolve: //nobr/a
 dissolve: //nobr
 test_url: http://www.philadelphiaeagles.com/news/article-1/Jacksons-Light-Shined-On-Sunday-Night/51a862de-42b4-40f1-a5a8-ba0fb8a435b7
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 41318f6..accbd60
@@ -1,10 +1,10 @@
-title: //h1[@class='entry-title']\r
-author: //p[@class='byline']/span\r
-body: //@id='body-content'\r
-date: //div[@class='article_timestamp']/span\r
-\r
-strip: //@class=b-group\r
-strip: //*[contains(@style, 'none')]\r
-strip: //a[contains(@href, 'comments')]\r
+title: //h1[@class='entry-title']
+author: //p[@class='byline']/span
+body: //@id='body-content'
+date: //div[@class='article_timestamp']/span
+
+strip: //@class=b-group
+strip: //*[contains(@style, 'none')]
+strip: //a[contains(@href, 'comments')]
 strip: //*[contains(@class, 'comment')]
 test_url: http://www.philly.com/philly/sports/eagles/20120127_Ohio_State_s_Posey_didn_t_waste_time_lost_to_suspension.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 4e2ccb0..7f7e383
@@ -1,6 +1,6 @@
-author: substring-before(//div[@class='post_meta'],' on')\r
-date: substring-after(substring-before(//div[@class='post_meta'],'with'),' on')\r
-title: //h1[class='post_title']\r
-body: //div[@class='article']\r
+author: substring-before(//div[@class='post_meta'],' on')
+date: substring-after(substring-before(//div[@class='post_meta'],'with'),' on')
+title: //h1[class='post_title']
+body: //div[@class='article']
 
 test_url: http://photo.tutsplus.com/articles/news/a-brilliant-beginners-guide-to-architectural-photography/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7c57a84..cc643f0
@@ -1,6 +1,6 @@
-body: //div[@id='content']\r
-strip_id_or_class: manualnavbar\r
-\r
-prune: no\r
+body: //div[@id='content']
+strip_id_or_class: manualnavbar
+
+prune: no
 
 test_url: http://www.php.net/manual/en/migration5.incompatible.php
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a816399..624055b
@@ -1,7 +1,7 @@
-title: //div[@class='abstitle']//h1\r
-author: //div[@class='authorList']\r
-body: //div[@id='fulltext_body']\r
-\r
-prune: no\r
+title: //div[@class='abstitle']//h1
+author: //div[@class='authorList']
+body: //div[@id='fulltext_body']
+
+prune: no
 
 test_url: http://www.physicstoday.org/resource/1/phtoad/v64/i10/p48_s1?bypassSSO=1
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/pinterest.com.txt b/inc/3rdparty/site_config/standard/pinterest.com.txt
new file mode 100755 (executable)
index 0000000..01b6df4
--- /dev/null
@@ -0,0 +1,5 @@
+title: //title
+body: //div[contains(@class, 'imageContainer')]
+
+test_url: http://pinterest.com/pin/380906080954441188/
+test_url: http://pinterest.com/michaelsorm/architecture/rss
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3decc53..eee96a9
@@ -1,16 +1,16 @@
-title:concat(//h1,' - ',//h2,' - ',//h3)\r
-author://address\r
-date://span[@class='pub-date']\r
-body://div[@id='main']\r
-single_page_link://link[@rel='canonical']\r
-strip://div[@class='info']\r
-strip_id_or_class:'object-grid related-content'\r
-strip_id_or_class:'object-prevnext'\r
-strip_id_or_class:'object-header'\r
-strip_id_or_class:'source'\r
-strip_id_or_class:'label'\r
-strip_id_or_class:'title'\r
-dissolve://ul\r
-strip://li[@class='next']\r
+title:concat(//h1,' - ',//h2,' - ',//h3)
+author://address
+date://span[@class='pub-date']
+body://div[@id='main']
+single_page_link://link[@rel='canonical']
+strip://div[@class='info']
+strip_id_or_class:'object-grid related-content'
+strip_id_or_class:'object-prevnext'
+strip_id_or_class:'object-header'
+strip_id_or_class:'source'
+strip_id_or_class:'label'
+strip_id_or_class:'title'
+dissolve://ul
+strip://li[@class='next']
 strip://li[@class='prev']
 test_url: http://pitchfork.com/features/why-we-fight/8796-on-the-far-slope-of-the-uncanny-valley/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 9277707..c302526
@@ -1,8 +1,8 @@
-title: //h2[@class='post-title']\r
-author: substring-before(substring-after(//h3[@class='post-byline'],'By:'),'/')\r
-date: substring-before(substring-after(//p[@class='post-details'],'Posted on '),'in')\r
-strip: //h2[@class='post-title']\r
-strip: //p[@class='post-details']\r
-strip: //h3[@class='post-byline']\r
+title: //h2[@class='post-title']
+author: substring-before(substring-after(//h3[@class='post-byline'],'By:'),'/')
+date: substring-before(substring-after(//p[@class='post-details'],'Posted on '),'in')
+strip: //h2[@class='post-title']
+strip: //p[@class='post-details']
+strip: //h3[@class='post-byline']
 body: //div[@id='content']
 test_url: http://pittnews.com/newsstory/mens-basketball-pitt-recruit-robinson-to-bring-leadership/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 824cb06..f294852
@@ -1,15 +1,15 @@
-title: substring-before(//title,'pirates.com')\r
-date: //span[@class='timeStamp']\r
-author: substring-before(substring-after(//div[@class='byLine'],'By'),'/')\r
-body: //div[@id='article']\r
-#strip: //div[@class='inner']\r
-strip: //div[@id='article_head']\r
-strip: //p[@class='tagLine']\r
-strip: //div[@id='article_related_links']\r
-strip: //div[@id='article_related_mlb']\r
-strip: //div[@id='article_related_club']\r
-strip: //span[@class='more']\r
-strip: //div[@class='article_component']\r
-strip: //span[@class='screen_reader']\r
+title: substring-before(//title,'pirates.com')
+date: //span[@class='timeStamp']
+author: substring-before(substring-after(//div[@class='byLine'],'By'),'/')
+body: //div[@id='article']
+#strip: //div[@class='inner']
+strip: //div[@id='article_head']
+strip: //p[@class='tagLine']
+strip: //div[@id='article_related_links']
+strip: //div[@id='article_related_mlb']
+strip: //div[@id='article_related_club']
+strip: //span[@class='more']
+strip: //div[@class='article_component']
+strip: //span[@class='screen_reader']
 strip: //ul[@class='columnists_blurb']
 test_url: http://pittsburgh.pirates.mlb.com/news/article.jsp?ymd=20120330&content_id=27759040&vkey=news_pit&c_id=pit
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index b3e6616..cc7891f
@@ -1,7 +1,7 @@
-title: substring-before(//title,'- Pittsburgh Tribune')\r
-author: substring-before(substring-after(//div[@class='byline'],'By '),',')\r
-date: substring-after(substring-after(//div[@class='byline'],','),',')\r
-body: //div[@id='storyBody']\r
-strip: //div[@class='morestories']\r
+title: substring-before(//title,'- Pittsburgh Tribune')
+author: substring-before(substring-after(//div[@class='byline'],'By '),',')
+date: substring-after(substring-after(//div[@class='byline'],','),',')
+body: //div[@id='storyBody']
+strip: //div[@class='morestories']
 dissolve: //p[@class='subheader']
 test_url: http://www.pittsburghlive.com/x/pittsburghtrib/sports/columnists/s_785654.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index dd715d8..4d02f6b
@@ -1,8 +1,8 @@
-title: //title\r
-author: substring-after(//div[@class='by-line'],'BY')\r
-\r
-body: //div[@id='article-body']\r
-\r
-strip: //div[@class='by-line']\r
+title: //title
+author: substring-after(//div[@class='by-line'],'BY')
+
+body: //div[@id='article-body']
+
+strip: //div[@class='by-line']
 strip: //div[@id='article-body']/h1
 test_url: http://www.pittsburghmagazine.com/Pittsburgh-Magazine/May-2012/Verde-Lights-the-Night/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6113b96..c372284
@@ -1,4 +1,4 @@
-title: //span[@class='StoryHeadline']\r
-strip: //div[@class='fivevert']\r
+title: //span[@class='StoryHeadline']
+strip: //div[@class='fivevert']
 body: //div[@id='Content']
 test_url: http://www.pittsburghpanthers.com/sports/m-baskbl/recaps/031412aaa.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3936310..571874a
@@ -1,8 +1,8 @@
-title: //h1[@class='articletitle']\r
-author: substring-after(//span[@class='author'],'by')\r
-date: //span[@class='created']\r
-body: //div[@class='article']\r
-strip: //div[@class='headline']\r
-strip: //p[@class='articleinfo']\r
+title: //h1[@class='articletitle']
+author: substring-after(//span[@class='author'],'by')
+date: //span[@class='created']
+body: //div[@class='article']
+strip: //div[@class='headline']
+strip: //p[@class='articleinfo']
 #dissolve: //p[@class='subheader']
 test_url: http://www.pittscriptblog.com/2012-articles/march/2012-football-opponents-set-and-the-attendance-dilemma.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/planetvita.de.txt b/inc/3rdparty/site_config/standard/planetvita.de.txt
new file mode 100755 (executable)
index 0000000..bfc3342
--- /dev/null
@@ -0,0 +1,5 @@
+title: //div[@id='frnRahmen']/div/div[@id='content']/div[2]/h2
+author: //div[@id='content']/div[1]/div/a
+body: //div[@id='content']/div[2]/span
+strip: //div[@id='commenthead']
+test_url: http://www.planetvita.de/news/10389-psn-store-update-vom-03-april-neue-inhalte-fuer-psvita.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 07b347a..9283494
@@ -1,6 +1,6 @@
-author: //article//*[@class="author"]\r
-date: //article//*[@class="publication-date"]\r
-body: //article\r
-strip: //article/header\r
+author: //article//*[@class="author"]
+date: //article//*[@class="publication-date"]
+body: //article
+strip: //article/header
 strip: //article/section
 test_url: http://www.playboy.com/playground/view/playboy-interview-jon-hamm
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 50a5dbf..4a7ea12
@@ -1,17 +1,17 @@
-body: //div[@id='contentPane']//div[@class='vg']\r
-body: //div[@id='contentPane']\r
-\r
-# Grab the author by finding the first profile pic, then backing up a node and getting the title of <a> tag which will be the author hopefully. Sorry can't test this due to parser errors, thanks google :(\r
-\r
-author: //div[@id='contentPane']//img[contains(@alt, 'profile photo')][1]/../@title\r
-\r
-\r
-strip: //*[@title="People who +1'd this"]/../..\r
-strip: //*[contains(@class, 'a-b-f-i-Hg-Uf')]\r
-strip: //*[@role='menu']\r
-strip: //img[contains(@alt, 'profile photo')]\r
-strip: //*[@class='a-f-i-Ad']\r
-\r
-tidy: no\r
-\r
+body: //div[@id='contentPane']//div[@class='vg']
+body: //div[@id='contentPane']
+
+# Grab the author by finding the first profile pic, then backing up a node and getting the title of <a> tag which will be the author hopefully. Sorry can't test this due to parser errors, thanks google :(
+
+author: //div[@id='contentPane']//img[contains(@alt, 'profile photo')][1]/../@title
+
+
+strip: //*[@title="People who +1'd this"]/../..
+strip: //*[contains(@class, 'a-b-f-i-Hg-Uf')]
+strip: //*[@role='menu']
+strip: //img[contains(@alt, 'profile photo')]
+strip: //*[@class='a-f-i-Ad']
+
+tidy: no
+
 test_url: http://plus.google.com/u/0/117840649766034848455/posts/FddaP6jeCqp
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index bb9be0a..ec151b4
@@ -1,4 +1,4 @@
-title: //h2[@class='jcw-pagetitle'\r
-date: //p[@class='postinfo']\r
+title: //h2[@class='jcw-pagetitle'
+date: //p[@class='postinfo']
 body: //div[@class='contenttext']
 test_url: http://plzkthxbai.com/blog/2011/06/28/1password-and-internet-security/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 880311d..65ddba5
@@ -1,4 +1,4 @@
-body: //div[@id="content"]/div[1]\r
-\r
+body: //div[@id="content"]/div[1]
+
 title: //h1[@class="entry-title"]
 test_url: http://pogue.blogs.nytimes.com/2011/05/12/the-future-of-skype/
\ No newline at end of file
index c5302d1bac527785345b3ec4e1f58fccff63d32c..d8f5e5758d973433b6fc992f4c35d28488400ce4 100755 (executable)
@@ -1,17 +1,13 @@
-title://div[contains(@class, "article")]/h1\r
-body://div[contains(@class,"story-text")]\r
-\r
-# Why doesn't this work? next_page_link://ul[contains(@class,"pagination")]/li/a[@rel="next"]\r
-\r
-next_page_link://ul[contains(@class,"pagination")]/li[contains(@class, "current")]/following-sibling::node()/a\r
-next_page_link://div[contains(@class,"pagination")]/ol/li[contains(@class, "current")]/following-sibling::node()/a\r
-date://meta[@name="publish_date"]/@content\r
-\r
-strip://div[contains(@class, "breadcrumbs")]\r
-strip://a[contains(@class, "hidden")]\r
-strip://div[contains(@class, "story-embed")]\r
+title://div[contains(@class, "article")]/h1
+body://div[contains(@class,"story-text")]
+
+# Why doesn't this work? next_page_link://ul[contains(@class,"pagination")]/li/a[@rel="next"]
+
+next_page_link://ul[contains(@class,"pagination")]/li[contains(@class, "current")]/following-sibling::node()/a
+date://meta[@name="publish_date"]/@content
+
+strip://div[contains(@class, "breadcrumbs")]
+strip://a[contains(@class, "hidden")]
+strip://div[contains(@class, "story-embed")]
 strip://div[contains(@class, "story-text")]//p/a[contains(text(), "Also on POLITICO:")]/..
-strip://div[contains(@class, "story-interrupt")]\r
-strip://footer[contains(@class, "author-bio")]\r
-\r
 test_url: http://www.politico.com/news/stories/0712/78105.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index fd247b5..65a8fc5
@@ -1,4 +1,4 @@
-body: //div[@id="content"]\r
-\r
+body: //div[@id="content"]
+
 strip: //div[@class="pfcontentmid"]/div[position()>4]|//div[@class="pfad"]
 test_url: http://www.politifact.com/truth-o-meter/statements/2011/may/30/barbara-boxer/barbara-boxer-says-medicare-overhead-far-lower-pri/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8deecbc..b13f8f8
@@ -1,13 +1,13 @@
-# 21/10-2011:\r
-# Added Author+Date\r
-# Remove fakta-boks if found\r
-# Deleted 'Læs også...' filter \r
-#  - Change in markup caused it to strip too much.\r
-\r
-author://span[@class='autor-name']\r
-date:substring-after(//div[@class='art-created'], ' ')\r
-title: //h1[contains(@class, 'stor-type')]\r
-body: //div[@id='art-body']\r
-strip: //div[@class='art-fakta article-box']\r
+# 21/10-2011:
+# Added Author+Date
+# Remove fakta-boks if found
+# Deleted 'Læs også...' filter 
+#  - Change in markup caused it to strip too much.
+
+author://span[@class='autor-name']
+date:substring-after(//div[@class='art-created'], ' ')
+title: //h1[contains(@class, 'stor-type')]
+body: //div[@id='art-body']
+strip: //div[@class='art-fakta article-box']
 
 test_url: http://politiken.dk/kultur/boger/skonlitteratur_boger/ECE1426386/makabre-tegneserie-zombier-aeder-alt-levende/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/polygon.com.txt b/inc/3rdparty/site_config/standard/polygon.com.txt
new file mode 100755 (executable)
index 0000000..8fe9b1b
--- /dev/null
@@ -0,0 +1,34 @@
+body: //div[@id='article-content']
+body: //article[@id='entry-top']/div[@class='float_wrapper']
+author: //header/p[@class='byline']/em/a
+date: //header/p[@class='byline']/span[@class='timestamp']
+
+strip: //div[@id='article-content']//header
+strip: //label
+
+#photos on left column (delete all)
+strip: //div[@class='big_photo']
+
+#photos on left column (remove extras used for scroll effect)
+#strip: //div[@class='big_photo']/div[./img]
+#strip: //div[@class='big_photo']/img[position()>1]
+
+strip_id_or_class: vox-lazy-load
+strip_id_or_class: social_buttons
+strip_id_or_class: feature_toc
+
+prune: no
+
+find_string: <noscript>
+replace_string: <div>
+find_string: </noscript>
+replace_string: </div>
+
+#find_string: <script
+#replace_string: <div style="display:none"
+#find_string: </script>
+#replace_string: </div>
+
+strip: //div[@class='float_wrapper']/header
+test_url: http://www.polygon.com/2013/4/5/4189028/donkey-kong-country-returns-3d-new-content
+test_url: http://www.polygon.com/features/2013/8/22/4602568/30-years-xbox-360-playstation-3-wii
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 85b7656..2582e6f
@@ -1,8 +1,8 @@
-next_page_link: //div[@id='longPagination']/a[@class='next']\r
-\r
-title: //div[@id='contentHeader']//h1\r
-\r
-body: //div[@id='articleBody']\r
-# this is so sad\r
+next_page_link: //div[@id='longPagination']/a[@class='next']
+
+title: //div[@id='contentHeader']//h1
+
+body: //div[@id='articleBody']
+# this is so sad
 body: //div[@id='intelliTXT']
 test_url: http://www.popularmechanics.com/technology/aviation/crashes/what-really-happened-aboard-air-france-447-6611877
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/portertech.ca.txt b/inc/3rdparty/site_config/standard/portertech.ca.txt
new file mode 100755 (executable)
index 0000000..2897cb5
--- /dev/null
@@ -0,0 +1,3 @@
+author: //*[(@class = "author")]
+date: //*[(@class = "date")]
+test_url: http://portertech.ca/2012/12/10/iac-morning-market/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 21cd833..f8eeb0a
@@ -1,19 +1,19 @@
-title: //div[@id="newsDetailTitle"]\r
-author: //span[@id="showAuthor"]\r
-date: //span[@id="showRefDate"]\r
-\r
-strip: //div[@id="breadcrumbs"]\r
-strip: //span[@id="PageTitle"]\r
-strip: //div[@id="newsDetailAuthorPublish"]\r
-\r
-strip: //div[@class="leadPix"]\r
-\r
-strip: //span[@id="ctl00_PageTitle"]\r
-strip: //div[@id="newsDetailTitle"]\r
-convert_double_br_tags:yes\r
-\r
-strip: //div[@id="newsDetailCredential"]\r
-strip: //div[@id="sidebar2"]\r
-strip: //div[@id="footer"]\r
+title: //div[@id="newsDetailTitle"]
+author: //span[@id="showAuthor"]
+date: //span[@id="showRefDate"]
+
+strip: //div[@id="breadcrumbs"]
+strip: //span[@id="PageTitle"]
+strip: //div[@id="newsDetailAuthorPublish"]
+
+strip: //div[@class="leadPix"]
+
+strip: //span[@id="ctl00_PageTitle"]
+strip: //div[@id="newsDetailTitle"]
+convert_double_br_tags:yes
+
+strip: //div[@id="newsDetailCredential"]
+strip: //div[@id="sidebar2"]
+strip: //div[@id="footer"]
 
 test_url: http://www.positioningmag.com/magazine/details.aspx?id=41083
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 1ea945a..baa9d69
@@ -1,26 +1,26 @@
-title: //div[@class='story_headline']\r
-author: substring-before(substring-after(//div[@class='story_byline'],'By'),'/')\r
-date: //div[@class='story_lastupdate'] \r
-body: //div[@id='story']\r
-strip: //div[@class='story_byline']\r
-strip: //div[@class='story_lastupdate']\r
-strip: //div[@class='story_headline']\r
-strip: //div[@id='abuse']\r
-strip: //h2\r
-strip: //div[@class='pagenumbers_wrap']\r
-strip: //ul[@class='pagenumbers']\r
-strip: //div[starts-with(., 'To report inappropriate comments')]\r
-\r
-strip_id_or_class: story_share\r
-strip_id_or_class: OUTBRAIN\r
-strip_id_or_class: story_box_right\r
-strip: //div[a[@href='http://www.post-gazette.com/pg/12062/1213990-42.stm']]\r
-strip: //ul[@id='pikame']/li[position()>1]\r
-\r
-prune: no\r
-tidy: no\r
-\r
-single_page_link: //a[contains(@href, '?p=0')]\r
-\r
-test_url: http://www.post-gazette.com/stories/sports/penguins/pens-crosby-expects-to-return-thursday-226648/\r
+title: //div[@class='story_headline']
+author: substring-before(substring-after(//div[@class='story_byline'],'By'),'/')
+date: //div[@class='story_lastupdate'] 
+body: //div[@id='story']
+strip: //div[@class='story_byline']
+strip: //div[@class='story_lastupdate']
+strip: //div[@class='story_headline']
+strip: //div[@id='abuse']
+strip: //h2
+strip: //div[@class='pagenumbers_wrap']
+strip: //ul[@class='pagenumbers']
+strip: //div[starts-with(., 'To report inappropriate comments')]
+
+strip_id_or_class: story_share
+strip_id_or_class: OUTBRAIN
+strip_id_or_class: story_box_right
+strip: //div[a[@href='http://www.post-gazette.com/pg/12062/1213990-42.stm']]
+strip: //ul[@id='pikame']/li[position()>1]
+
+prune: no
+tidy: no
+
+single_page_link: //a[contains(@href, '?p=0')]
+
+test_url: http://www.post-gazette.com/stories/sports/penguins/pens-crosby-expects-to-return-thursday-226648/
 test_url: http://www.post-gazette.com/stories/sports/pirates/pirates-fork-over-changes-for-fans-at-pnc-park-629789
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 86cb5d0..0f01149
@@ -1,15 +1,15 @@
-title: //div[@id='divAdnetKeyword']/h1\r
-body: //div[@id='_middle_content_bottom']\r
-\r
-wrap_in(fieldset)://div[@id='_middle_content_bottom_child2']/img\r
-\r
-strip: //div[@id='_middle_content_bottom_child1']\r
-strip: //div[@id='_middle_content_bottom_child4']\r
-strip: //div[@class='cls']\r
-strip: //div[@class='iphoneBox']\r
-strip: //ul[@class='ilgiliHaber']\r
-strip: //div[@class='yorumlar']\r
-strip: //div[@class='kategoriler']\r
-strip: //div[@class='textSize']\r
+title: //div[@id='divAdnetKeyword']/h1
+body: //div[@id='_middle_content_bottom']
+
+wrap_in(fieldset)://div[@id='_middle_content_bottom_child2']/img
+
+strip: //div[@id='_middle_content_bottom_child1']
+strip: //div[@id='_middle_content_bottom_child4']
+strip: //div[@class='cls']
+strip: //div[@class='iphoneBox']
+strip: //ul[@class='ilgiliHaber']
+strip: //div[@class='yorumlar']
+strip: //div[@class='kategoriler']
+strip: //div[@class='textSize']
 strip: //span[@class='tarih']
 test_url: http://www.posta.com.tr/yasam/teknoloji/HaberDetay/Fedailer_Istanbul_da.htm?ArticleID=101044
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7f7a503..3952ea9
@@ -1,8 +1,8 @@
-title: //h1\r
-date: /html/head/meta[@name="date"]/@content\r
-body: //div[@id="featuredlinksbox"]\r
-strip: //div[@class="relatedbox"]\r
-strip: //h1\r
-strip: //br\r
+title: //h1
+date: /html/head/meta[@name="date"]/@content
+body: //div[@id="featuredlinksbox"]
+strip: //div[@class="relatedbox"]
+strip: //h1
+strip: //br
 strip_image_src: "/images"
 test_url: http://www.prb.org/Journalists/Webcasts/2011/military-families.aspx
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 906c27a..9a49557
@@ -1,9 +1,9 @@
-title: //h1\r
-body: //div[@id='left']\r
-strip: //h1\r
-convert_double_br_tags: yes\r
-strip_id_or_class: entry-footer\r
-strip: //h1[. = 'Previously']/following::*\r
-author: string('James Hague')\r
+title: //h1
+body: //div[@id='left']
+strip: //h1
+convert_double_br_tags: yes
+strip_id_or_class: entry-footer
+strip: //h1[. = 'Previously']/following::*
+author: string('James Hague')
 date: //div[@class = 'entry-footer']/text()
 test_url: http://prog21.dadgum.com/105.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index cef811d..82ebf6b
@@ -1,4 +1,4 @@
-body: //div[@class='body']\r
-title: //h2[@class='title']\r
+body: //div[@class='body']
+title: //h2[@class='title']
 date: //span[@class='posted-on']
 test_url: http://prolost.com/blog/2011/10/13/real-men-comp-with-film.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 11e63bd..d141ac9
@@ -1,11 +1,11 @@
-title: //h1[@class="article-title"]\r
-author: //meta[@name="author"]/@content\r
-body: //div[@class="article-full"]\r
-strip_id_or_class: sidebar_inject\r
-strip_id_or_class: callout\r
-strip_id_or_class: content-inset\r
-strip_id_or_class: byline-block\r
-strip_id_or_class: photo-caption\r
-strip_id_or_class: foot-tools\r
+title: //h1[@class="article-title"]
+author: //meta[@name="author"]/@content
+body: //div[@class="article-full"]
+strip_id_or_class: sidebar_inject
+strip_id_or_class: callout
+strip_id_or_class: content-inset
+strip_id_or_class: byline-block
+strip_id_or_class: photo-caption
+strip_id_or_class: foot-tools
 
 test_url: http://www.propublica.org/article/pardon-applicants-benefit-from-friends-in-high-places
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index dedd33d..ba9ce8b
@@ -1,4 +1,4 @@
-author: //p[@class='name']\r
-date: substring-before(//p[@class='date'], ' | ')\r
+author: //p[@class='name']
+date: substring-before(//p[@class='date'], ' | ')
 body: //div[@class='news_single_item']
 test_url: http://www.prosa.dk/aktuelt/nyhed/artikel/internetaktivisten-uden-maske/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 19059c4..739d1b9
@@ -1,26 +1,26 @@
-#basics\r
-author: (//div[contains(@class,'author')])[1]\r
-date: substring-before(//a[@class='issue'], '&mdash;')\r
-#body://div[@class = 'entry']\r
-# use this until move_into support is ready\r
-body: //div[@class = 'entry' or @class='standfirst' or @class='lead_image']\r
-\r
-#moves header image and tagline into body\r
-move_into(//div[@class='entry']/div)://div[@class = 'lead_image']\r
-move_into(//div[@class='entry']/div)://div[@class = 'standfirst']\r
-\r
-\r
-# moves author info to end of text\r
-move_into(//p[strong[string(.) = 'Follow Prospect on Twitter']])://div[@id='sidebar_content']/p/em\r
-\r
-prune: no\r
-\r
-# strips social links\r
-strip_id_or_class:login-status\r
-strip_id_or_class:shareinpost\r
-strip_id_or_class:content_subscribe\r
-strip_id_or_class:postinfo\r
-strip_id_or_class:postutils\r
-strip_id_or_class:comments\r
-strip://strong[string(.) = 'Follow Prospect on Twitter']\r
+#basics
+author: (//div[contains(@class,'author')])[1]
+date: substring-before(//a[@class='issue'], '&mdash;')
+#body://div[@class = 'entry']
+# use this until move_into support is ready
+body: //div[@class = 'entry' or @class='standfirst' or @class='lead_image']
+
+#moves header image and tagline into body
+move_into(//div[@class='entry']/div)://div[@class = 'lead_image']
+move_into(//div[@class='entry']/div)://div[@class = 'standfirst']
+
+
+# moves author info to end of text
+move_into(//p[strong[string(.) = 'Follow Prospect on Twitter']])://div[@id='sidebar_content']/p/em
+
+prune: no
+
+# strips social links
+strip_id_or_class:login-status
+strip_id_or_class:shareinpost
+strip_id_or_class:content_subscribe
+strip_id_or_class:postinfo
+strip_id_or_class:postutils
+strip_id_or_class:comments
+strip://strong[string(.) = 'Follow Prospect on Twitter']
 test_url: http://www.prospectmagazine.co.uk/2011/07/postmodernism-is-dead-va-exhibition-age-of-authenticism/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/protothema.gr.txt b/inc/3rdparty/site_config/standard/protothema.gr.txt
new file mode 100755 (executable)
index 0000000..fae261b
--- /dev/null
@@ -0,0 +1,6 @@
+body: //a[contains(@rel, 'mainphotos')] | //div[contains(@class, 'article-content')]
+
+prune: no
+
+test_url: http://www.protothema.gr//politics/article/326464/diamadopoulou-floridis-kaminis-kai-boutaris-se-ekdilosi-ton-europaion-fileleutheron/
+test_url: http://www.protothema.gr/rss/news/politics/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3da3cea..1bb63c2
@@ -1,9 +1,9 @@
-title: //div[@class="page-title"]/h1\r
-author: //a[@title="View Bio"]\r
-date: substring-before(substring-after(//span[@class="submitted"], 'Published on '), ' by')\r
-strip://div[@class="page-title"]/h1\r
-strip://div[@class="article-abstract"]\r
-strip://div[@class="article-meta"]\r
-strip://div[@id="rightColumn"]\r
+title: //div[@class="page-title"]/h1
+author: //a[@title="View Bio"]
+date: substring-before(substring-after(//span[@class="submitted"], 'Published on '), ' by')
+strip://div[@class="page-title"]/h1
+strip://div[@class="article-abstract"]
+strip://div[@class="article-meta"]
+strip://div[@id="rightColumn"]
 strip://div[@id="inline-content-bottom-left"]
 test_url: http://www.psychologytoday.com/blog/how-happiness/201205/my-quibble-facebook
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index fa09947..8f32d7a
@@ -1,4 +1,4 @@
-author: //meta[@name="Author"]\r
-date: //meta[@name="Date"]\r
+author: //meta[@name="Author"]
+date: //meta[@name="Date"]
 strip: //h5
 test_url: http://www.publications.parliament.uk/pa/ld201011/ldhansrd/text/111109-0003.htm
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/publico.pt.txt b/inc/3rdparty/site_config/standard/publico.pt.txt
new file mode 100755 (executable)
index 0000000..bb6a05e
--- /dev/null
@@ -0,0 +1,12 @@
+title: //h1[@class="entry-title"]
+author: //span[@class="author"]
+body: //article[@itemtype="http://schema.org/Article"]
+date: //time[@itemprop="dateCreated"]
+
+strip: //header[@class="entry-header single-header"]
+strip: //aside[@class="entry-assets"]
+strip: //div[@class="entry-options entry-options-above group"]
+strip: //div[@class="entry-options entry-options-below group"]
+
+convert_double_br_tags: yes
+test_url: http://www.publico.pt/politica/noticia/passos-diz-que-se-limitacao-de-mandatos-fosse-para-todos-os-concelhos-estaria-claro-na-lei-1577691
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 126f9e2..0f1392a
@@ -1,4 +1,4 @@
-title: //div[@class='title']\r
-body: //div[@class='body']\r
+title: //div[@class='title']
+body: //div[@class='body']
 next_page_link: //div[@class='source']/text()[contains(., 'page')]/following-sibling::a
 test_url: http://purpleplanetmedia.com/eye/inte/ngaiman.php
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/qctimes.com.txt b/inc/3rdparty/site_config/standard/qctimes.com.txt
new file mode 100755 (executable)
index 0000000..3c3edfe
--- /dev/null
@@ -0,0 +1,5 @@
+# this site seems to work OK in the web view, but only occasionally in the instapaper app itself.
+
+body: //div[@class='entry-content']
+author: //span[@class='byline']
+test_url: http://qctimes.com/news/local/woman-faces-perjury-charges-in-meth-case/article_83f4c470-956a-11e2-a921-001a4bcf887a.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a366c1b..c17fb31
@@ -1,14 +1,14 @@
-title: //div[contains(@class, "hentry")]/h3\r
-\r
-author: //div[contains(@class, "hentry")]/h2[contains(@class, "author_bio")]\r
-\r
-date: substring-before(substring-after(normalize-space(//p[contains(@class, "postmetadata")]/small), "was posted on "), " and is filed under")\r
-\r
-body: //div[contains(@class, "entry")]\r
-\r
-strip_id_or_class: addtoany_share_save_container\r
-strip_id_or_class: postmetadata\r
-strip_id_or_class: author_bio\r
-strip_id_or_class: author_bio_2\r
+title: //div[contains(@class, "hentry")]/h3
+
+author: //div[contains(@class, "hentry")]/h2[contains(@class, "author_bio")]
+
+date: substring-before(substring-after(normalize-space(//p[contains(@class, "postmetadata")]/small), "was posted on "), " and is filed under")
+
+body: //div[contains(@class, "entry")]
+
+strip_id_or_class: addtoany_share_save_container
+strip_id_or_class: postmetadata
+strip_id_or_class: author_bio
+strip_id_or_class: author_bio_2
 strip: //div[contains(@class, "hentry")]/h3
 test_url: http://www.quantumdiaries.org/2011/10/25/piling-up/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 655f8b8..fc7ab37
@@ -1,3 +1,3 @@
-body: //div[@class='copy']\r
+body: //div[@class='copy']
 title: //h1[@class='hed']
 test_url: http://www.queerty.com/rawhide-radicals-meet-five-heroes-from-the-leather-community-20120302/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index fae4e6a..fb09a8f
@@ -1,6 +1,6 @@
-title: //h1\r
-\r
-body: //div[@class="cuerpoArticulo"]\r
-\r
+title: //h1
+
+body: //div[@class="cuerpoArticulo"]
+
 
 test_url: http://www.quepasa.cl/magazine/articulo/print.html?id=5299
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3d34f2f..732d12d
@@ -1,17 +1,17 @@
-tidy: no\r
-prune: no\r
-body: //div[contains(@class, 'main_col')]\r
-title: //h1\r
-\r
-strip_id_or_class: hidden\r
-strip_id_or_class: item_action_bar\r
-strip_id_or_class: answer_voters\r
-strip_id_or_class: question_topics\r
-strip_id_or_class: answer_header_text\r
-strip_id_or_class: editor_link\r
-strip_id_or_class: view_tag\r
-strip_id_or_class: include_details\r
-strip_id_or_class: sig_edit\r
-strip_id_or_class: profile_photo_img\r
+tidy: no
+prune: no
+body: //div[contains(@class, 'main_col')]
+title: //h1
+
+strip_id_or_class: hidden
+strip_id_or_class: item_action_bar
+strip_id_or_class: answer_voters
+strip_id_or_class: question_topics
+strip_id_or_class: answer_header_text
+strip_id_or_class: editor_link
+strip_id_or_class: view_tag
+strip_id_or_class: include_details
+strip_id_or_class: sig_edit
+strip_id_or_class: profile_photo_img
 
 test_url: http://www.quora.com/What-everyday-habit-do-you-wish-you-had-developed-earlier-in-life
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/racjonalista.pl.txt b/inc/3rdparty/site_config/standard/racjonalista.pl.txt
new file mode 100755 (executable)
index 0000000..19c719d
--- /dev/null
@@ -0,0 +1,5 @@
+author: /html/body/center/b
+date: /html/body/table/tr[2]/td/i
+single_page_link: //*[@id='oTxt']/table[3]/tr[2]/td/a[1]
+
+test_url: http://www.racjonalista.pl/kk.php/s,7214/q,Geneza.szubrawstwa
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 99ab4bb..fa66b81
@@ -1,3 +1,3 @@
-date://span[@class='date']\r
+date://span[@class='date']
 body://div[@class='entry-body']
 test_url: http://radar.oreilly.com/2012/01/genome-cloud-digital-humanities-hadoop-world-strata.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e2617dc..2496dda
@@ -1,3 +1,3 @@
-body: //div[@class='body']\r
+body: //div[@class='body']
 title: //div[@class='newsstory']/h2
 test_url: http://www.radionz.co.nz/news/stories/2010/07/18/12481029a86d
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f0c91c5..6970a74
@@ -1,11 +1,11 @@
-title: //div[@id='center-col']/h4\r
-author: substring-before(//title,'In')\r
-date: substring-after(//div[@class='commenttext']/span,'#')\r
-body: //div[@id='center-col']\r
-strip: //div[@id='center-col']/h4\r
-strip: //div[@class='graytext']\r
-\r
-# Anthony Perez-Sanz 2012.3.14\r
-# Removed long gif from the end\r
-strip: //img[@src='http://www.randsinrepose.com/spreader.gif']\r
+title: //div[@id='center-col']/h4
+author: substring-before(//title,'In')
+date: substring-after(//div[@class='commenttext']/span,'#')
+body: //div[@id='center-col']
+strip: //div[@id='center-col']/h4
+strip: //div[@class='graytext']
+
+# Anthony Perez-Sanz 2012.3.14
+# Removed long gif from the end
+strip: //img[@src='http://www.randsinrepose.com/spreader.gif']
 test_url: http://www.randsinrepose.com/archives/2012/03/13/hacking_is_important.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8033729..2d5aba7
@@ -1,3 +1,3 @@
-single_page_link: //link[@rel='canonical']/@href\r
+single_page_link: //link[@rel='canonical']/@href
 
 test_url: http://www.readability.com/read?url=http://feeds.gawker.com/~r/lifehacker/full/~3/jaxAjSay_Rw/add-a-rain-gutter-to-a-picnic-table-for-a-built+in-drink-cooler
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ff799aa..e2aabda
@@ -1,8 +1,8 @@
-title: //h1[@class="titlelink"]\r
-date: //span[@class="timestamp"]/@data-published\r
-body: //div[@class="asset-content"]\r
-strip_id_or_class: related-entries\r
-strip_id_or_class: like-and-retweet\r
-\r
-author: //div[@id="submeta"]/a[1]\r
+title: //h1[@class="titlelink"]
+date: //span[@class="timestamp"]/@data-published
+body: //div[@class="asset-content"]
+strip_id_or_class: related-entries
+strip_id_or_class: like-and-retweet
+
+author: //div[@id="submeta"]/a[1]
 test_url: http://www.readwriteweb.com/archives/why_facebook_terrifies_google.php
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index fe5ab67..1a33610
@@ -1,3 +1,3 @@
-body: //div[@id='_ctl12__ctl0_Article']\r
-prune: no\r
+body: //div[@id='_ctl12__ctl0_Article']
+prune: no
 autodetect_on_failure: no
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8c8f0e0..a01aaef
@@ -1,10 +1,10 @@
-body: //div[@class='recipedetailsleft' or @id='recipePrepAndServe' or @id='recipeingredients']\r
-\r
-strip_id_or_class: location\r
-strip_id_or_class: savings\r
-strip_id_or_class: recipeDetailDescButton\r
-\r
-prune: no\r
-tidy: no\r
-\r
+body: //div[@class='recipedetailsleft' or @id='recipePrepAndServe' or @id='recipeingredients']
+
+strip_id_or_class: location
+strip_id_or_class: savings
+strip_id_or_class: recipeDetailDescButton
+
+prune: no
+tidy: no
+
 test_url: http://www.recipe.com/avocado-basil-pasta/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3ae959b..0403ee8
@@ -1,5 +1,5 @@
-body: //div[@class='short-text' or starts-with(@id, 'news-id-')]\r
-prune: no\r
-tidy: no\r
-\r
+body: //div[@class='short-text' or starts-with(@id, 'news-id-')]
+prune: no
+tidy: no
+
 test_url: http://red-hot-girls.com/2011/06/10/the_red_hot_natalia_maria_53_pics.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 58ca9ec..8871f56
@@ -1,16 +1,20 @@
-# This setup grabs the text from a Reddit self post. It ignores all comments etc.\r
-\r
-title: //p[@class="title"]/a/text()\r
-\r
-author: //p[@class="tagline"]/a\r
-\r
-# this doesn't work for some reason...?\r
-date: //p[@class="tagline"]//@datetime\r
-\r
-body: //div[@class="expando"]//div[@class="usertext-body"]\r
-\r
-strip_id_or_class: tagline\r
-strip_id_or_class: unvotable-message\r
-strip_id_or_class: buttons\r
-\r
-test_url: http://www.reddit.com/r/truegaming/comments/wfe7r/i_wrote_about_the_problems_i_honestly_feel_that/
\ No newline at end of file
+# This setup grabs the text from a Reddit self post. It ignores all comments etc.
+
+title: //p[@class="title"]/a/text()
+
+author: //p[@class="tagline"]/a
+
+# this doesn't work for some reason...?
+date: //p[@class="tagline"]//@datetime
+
+body: //div[@class="expando"]//div[@class="usertext-body"]
+
+strip_id_or_class: tagline
+strip_id_or_class: unvotable-message
+strip_id_or_class: buttons
+
+# follow the posted link (unless it's a self post - relative URL, no http://)
+single_page_link: //p[@class="title"]/a[contains(@href, 'http://')]
+
+test_url: http://www.reddit.com/r/truegaming/comments/wfe7r/i_wrote_about_the_problems_i_honestly_feel_that/
+test_url: http://www.reddit.com/r/worldnews/comments/1as37r/twelve_north_korean_soldiers_attempting_to_defect/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 12a9618..66cc170
@@ -1,13 +1,13 @@
-title: //div[@class='posthead']//h2\r
-body: //div[contains(@class, 'postcontent') or @class='posthead']\r
-author: //div[@class='posthead']//a[@rel='author']\r
-\r
-strip: //div[@class='posthead']//h2\r
-replace_string(>Advertisements</div>): ></div>\r
-replace_string(<p>You can follow us on): <p style="display:none;">\r
-strip_id_or_class: likeThisPost\r
-\r
-prune: no\r
-tidy: no\r
-\r
+title: //div[@class='posthead']//h2
+body: //div[contains(@class, 'postcontent') or @class='posthead']
+author: //div[@class='posthead']//a[@rel='author']
+
+strip: //div[@class='posthead']//h2
+replace_string(>Advertisements</div>): ></div>
+replace_string(<p>You can follow us on): <p style="display:none;">
+strip_id_or_class: likeThisPost
+
+prune: no
+tidy: no
+
 test_url: http://www.redmondpie.com/how-to-play-music-directly-from-home-screen-folders-on-iphone/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 4f195a0..8541a0d
@@ -1,20 +1,20 @@
-# Think there might be something up with your parser that it strips out 'print' from the title :)\r
-\r
-title: //meta[@name='title']/@content\r
-author: //meta[@name='author']/@content\r
-date: //meta[@name='date']/@content\r
-\r
-body: //div[@class='articleText']\r
-\r
-strip: //div[contains(@class, 'day')]\r
-strip: //div[contains(@class, 'month')]\r
-strip: //div[contains(@class, 'year')]\r
-strip: //div[contains(@class, 'time')]\r
-strip: //h1[@class='gl_headline']\r
-strip: //div[@class='byline']\r
-strip: //div[@id='left_ear']\r
-strip: //div[@id='right_ear']\r
-strip: //div[contains(@class, 'PopularPosts')]\r
-strip ://div[@class='discuss_page_break']\r
+# Think there might be something up with your parser that it strips out 'print' from the title :)
+
+title: //meta[@name='title']/@content
+author: //meta[@name='author']/@content
+date: //meta[@name='date']/@content
+
+body: //div[@class='articleText']
+
+strip: //div[contains(@class, 'day')]
+strip: //div[contains(@class, 'month')]
+strip: //div[contains(@class, 'year')]
+strip: //div[contains(@class, 'time')]
+strip: //h1[@class='gl_headline']
+strip: //div[@class='byline']
+strip: //div[@id='left_ear']
+strip: //div[@id='right_ear']
+strip: //div[contains(@class, 'PopularPosts')]
+strip ://div[@class='discuss_page_break']
 strip ://div[contains(@class, 'p-content_TagList')]
 test_url: http://redtape.msnbc.msn.com/_news/2011/09/28/8020661-sprint-raises-fee-but-wont-free-users-from-two-year-contracts?preview=true
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 4a9fab6..98a2bbf
@@ -1,5 +1,5 @@
-body://div[@class='storycontent']\r
-date://div[@class='date']\r
-strip://li[@class='sharing_label']\r
+body://div[@class='storycontent']
+date://div[@class='date']
+strip://li[@class='sharing_label']
 strip://a[@class='FlattrButton']
 test_url: http://reflets.info/orange-nokia-siemens-deep-packet-inspection/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0b3dee1..a5361fd
@@ -1,3 +1,3 @@
-title: //*[@class='entry-title']\r
+title: //*[@class='entry-title']
 body: //div[@class='entry-content']
 test_url: http://www.renenekuda.cz/recept-na-produktivitu/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/resume.se.txt b/inc/3rdparty/site_config/standard/resume.se.txt
new file mode 100755 (executable)
index 0000000..17122a9
--- /dev/null
@@ -0,0 +1,9 @@
+date: //meta[@name='bi3dPubDate']/@content
+body: //div[contains(@class, 'articleBody')]
+
+prune: no
+
+test_url: http://www.resume.se/nyheter/media/2013/09/18/kvallspress-och-tv-slass-om-playtittarna-men-youtube-ohotat-storst/
+test_url: http://www.resume.se/nyheter/media/2013/09/18/cecilia-blankens-lamnar-mama-for-konkurrent/
+test_url: http://www.resume.se/nyheter/reklam/2013/09/18/ravelli-trodde-jag-var-med-i-blasningen/
+test_url: http://www.resume.se/rss-nyheter
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 1264ee3..a0a2394
@@ -1,6 +1,6 @@
-single_page_link://a[contains(@href, 'print')]\r
-\r
-# Grab metadata from the "printer-friendly" page, after specifying single_page_link\r
-title://h2\r
+single_page_link://a[contains(@href, 'print')]
+
+# Grab metadata from the "printer-friendly" page, after specifying single_page_link
+title://h2
 date://cite
 test_url: http://www.retrieverweekly.com/?cmd=displaystory&story_id=7548&format=html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c5c94a4..7411e62
@@ -1,10 +1,10 @@
-title: //h1[@class='headline3']\r
-author: substring-after(//p[@class="byline"], 'By ')\r
-date: //meta[@name="REVISION_DATE"]/@content\r
-body: //div[@id='articleImage' or @id='frame_fd1fade'] | //span[@id='articleText'] | //div[@class='pageNavigation']\r
-strip: //li[@class='next']\r
-strip: //span[@class='articleLocation']\r
-prune: no\r
-tidy: no\r
-\r
+title: //h1[@class='headline3']
+author: substring-after(//p[@class="byline"], 'By ')
+date: //meta[@name="REVISION_DATE"]/@content
+body: //div[@id='articleImage' or @id='frame_fd1fade'] | //span[@id='articleText'] | //div[@class='pageNavigation']
+strip: //li[@class='next']
+strip: //span[@class='articleLocation']
+prune: no
+tidy: no
+
 test_url: http://www.reuters.com/article/2011/04/08/us-ivorycoast-killings-idUSTRE73732A20110408
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index dbe4293..30e627d
@@ -1,10 +1,10 @@
-title: //div[@class="article_header"]/h3\r
-author: //div[@class="autor"]/p/*\r
-date: substring-after(substring-after(//div[@class="flt-left"],"> "), "> ")\r
-\r
-move_into(//div[@class="new_article"]): //div[@class="img_article"]/img\r
-\r
-body: //div[@class="article_content"]\r
-convert_double_br_tags: yes\r
+title: //div[@class="article_header"]/h3
+author: //div[@class="autor"]/p/*
+date: substring-after(substring-after(//div[@class="flt-left"],"> "), "> ")
+
+move_into(//div[@class="new_article"]): //div[@class="img_article"]/img
+
+body: //div[@class="article_content"]
+convert_double_br_tags: yes
 
 test_url: http://revistapiaui.estadao.com.br/edicao-68/questoes-latino-americanas/filhos-da-guerra-suja
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/rezeptwelt.de.txt b/inc/3rdparty/site_config/standard/rezeptwelt.de.txt
new file mode 100644 (file)
index 0000000..2093573
--- /dev/null
@@ -0,0 +1,5 @@
+body: //div[@class='step-content'] | //div[@class='global-active ingredients-box']
+title: //div[@class='step-1-container']
+
+tidy: no
+test_url: http://www.rezeptwelt.de/backen-herzhaft-rezepte/w%C3%BCrstchen-schlangen/530372
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 904a11d..b0ee92d
@@ -1,5 +1,5 @@
-body: //div[@id="post"]\r
-strip: //div[@id="author-description"]\r
-date: //span[@class="entry-date"]\r
+body: //div[@id="post"]
+strip: //div[@id="author-description"]
+date: //span[@class="entry-date"]
 author: //span[@class="author vcard"]
 test_url: http://richardmuscat.wordpress.com/2011/06/20/the-price-of-free/?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+TheBrooksReview+%28The+Brooks+Review%29
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 82cfaf2..ed72915
@@ -1,5 +1,5 @@
-body: //div[@class='post-body entry-content']\r
-strip: //div[@id='lws_0']\r
-prune: no\r
-\r
+body: //div[@class='post-body entry-content']
+strip: //div[@id='lws_0']
+prune: no
+
 test_url: http://ritemail.blogspot.com/2011/06/hayden-panettiere-candids-in-los.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ritholtz.com.txt b/inc/3rdparty/site_config/standard/ritholtz.com.txt
new file mode 100755 (executable)
index 0000000..d598479
--- /dev/null
@@ -0,0 +1,5 @@
+title: //div[@class='post']/h2
+author: substring-before(substring-after(//div[@class='alignright']/small, 'By '),'-')
+date: substring-after(//div[@class='alignright']/small, '-')
+strip: //div[@class='alignleft']
+test_url: http://www.ritholtz.com/blog/2012/09/situational-awareness/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/robertsspaceindustries.com.txt b/inc/3rdparty/site_config/standard/robertsspaceindustries.com.txt
new file mode 100755 (executable)
index 0000000..b0b90fb
--- /dev/null
@@ -0,0 +1,4 @@
+strip_id_or_class: 'sharedaddy'
+strip_id_or_class: 'respond'
+strip_id_or_class: 'meta'
+test_url: http://www.robertsspaceindustries.com/news-update-ai-pilots/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/robots.thoughtbot.com.txt b/inc/3rdparty/site_config/standard/robots.thoughtbot.com.txt
new file mode 100755 (executable)
index 0000000..da5b7bd
--- /dev/null
@@ -0,0 +1,5 @@
+body: //section[@class='post text']
+title: //h1[@class='title']
+date: //p[@class='post-date']
+strip: //section[@class='meta-info']
+test_url: http://robots.thoughtbot.com/post/32455387133/four-phase-test
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3035527..f8c9541
@@ -1,8 +1,8 @@
-title: //h2\r
-\r
-strip: //div[ contains(@class, 'respond') ]  |  //h2  |  //h1\r
-\r
-date: substring-after(//p[@class='info'], ' on ')\r
-\r
+title: //h2
+
+strip: //div[ contains(@class, 'respond') ]  |  //h2  |  //h1
+
+date: substring-after(//p[@class='info'], ' on ')
+
 author: //p[@class='info']//a
 test_url: http://www.rockpapershotgun.com/2010/07/29/rps-half-verdict-starcraft-2/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index abe7035..eef8b11
@@ -1,7 +1,7 @@
-author: //article/header/span[@class='author']\r
-title://article/header/h1\r
-body: //article\r
-strip: //article/header\r
-strip: //article/p[@class='metadata']\r
+author: //article/header/span[@class='author']
+title://article/header/h1
+body: //article
+strip: //article/header
+strip: //article/p[@class='metadata']
 footnotes: yes
 test_url: http://rodrigo.sharpcube.com/2010/06/20/using-and-sharing-a-vpn-connection-on-your-mac/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 2679233..da21510
@@ -1,8 +1,8 @@
-title: substring-before(//title,':')\r
-author: substring-after(substring-before(//div[@class='text']/b,'/'),'BY')\r
-\r
-body: //div[@class='text']\r
-\r
-strip: //a[contains(@href,'printart')]\r
+title: substring-before(//title,':')
+author: substring-after(substring-before(//div[@class='text']/b,'/'),'BY')
+
+body: //div[@class='text']
+
+strip: //a[contains(@href,'printart')]
 strip_id_or_class: enlarge_photo
 test_url: http://rogerebert.com/apps/pbcs.dll/article?AID=/20120411/REVIEWS/120419998/1005/GLOSSARY
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d618c23..2365c42
@@ -1,6 +1,6 @@
-body: //div[contains(@class, 'inhoud')]\r
-date: //span[@class ='published']\r
-author: //span[@class ='author']\r
-strip: //div[@class = 'grid_2']\r
-strip: //div[@class = 'block-citation-text']\r
+body: //div[contains(@class, 'inhoud')]
+date: //span[@class ='published']
+author: //span[@class ='author']
+strip: //div[@class = 'grid_2']
+strip: //div[@class = 'block-citation-text']
 test_url: http://www.rolfinjapan.nl/2011/06/duizend-kraanvogels/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index b5b29fe..ef32769
@@ -1,11 +1,11 @@
-body: //div[@class='movie_content_area']\r
-strip_id_or_class: tomatometer_bar_help\r
-strip_id_or_class: critic-links\r
-strip_id_or_class: top-critics-numbers\r
-strip_id_or_class: fan_side\r
-strip_id_or_class: fblike\r
-strip_id_or_class: rating_widget\r
-strip_id_or_class: friend_reviews\r
-prune: no\r
+body: //div[@class='movie_content_area']
+strip_id_or_class: tomatometer_bar_help
+strip_id_or_class: critic-links
+strip_id_or_class: top-critics-numbers
+strip_id_or_class: fan_side
+strip_id_or_class: fblike
+strip_id_or_class: rating_widget
+strip_id_or_class: friend_reviews
+prune: no
 
 test_url: http://www.rottentomatoes.com/m/thor/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f2f0039..a012a67
@@ -1,5 +1,5 @@
-body: //div[@class='content']\r
-strip: //p[@class='postmeta']/following::*\r
-strip: //p[@class='postmeta']\r
+body: //div[@class='content']
+strip: //p[@class='postmeta']/following::*
+strip: //p[@class='postmeta']
 strip: //p[@align='left']
 test_url: http://www.roughtype.com/archives/2012/01/power_to_the_da.php
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e7f29bb..9ddbf0f
@@ -1,4 +1,4 @@
-body:  //div[@id='news-text']\r
-prune: no\r
-test_url: http://www.rpgsite.net/news/1964-tetsuya-nomura-says-hell-soon-show-the-future-of-final-fantasy\r
+body:  //div[@id='news-text']
+prune: no
+test_url: http://www.rpgsite.net/news/1964-tetsuya-nomura-says-hell-soon-show-the-future-of-final-fantasy
 test_url: http://www.rpgsite.net/news/1965-new-atelier-totori-plus-screens-and-artwork
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d9df768..d9d9a43
@@ -1,9 +1,9 @@
-author: //div[contains(@class, 'author_text')]/h4/text()\r
-date: //li[@class='date']\r
-\r
-# stripping excessive tags\r
-strip: //div[contains(@class, 'entry_meta')]\r
-strip: //div[contains(@class, 'single_meta')]\r
-strip: //br[contains(@class, 'clear')]\r
+author: //div[contains(@class, 'author_text')]/h4/text()
+date: //li[@class='date']
+
+# stripping excessive tags
+strip: //div[contains(@class, 'entry_meta')]
+strip: //div[contains(@class, 'single_meta')]
+strip: //br[contains(@class, 'clear')]
 strip: //h3[contains(., 'Komentarz')]
 test_url: http://rubysfera.pl/2011/09/10-porad-o-rvm/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7a21c4a..e54b0f0
@@ -1,6 +1,6 @@
-title: //h1[@class='entry-title']\r
-author: ///span[@class='author vcard']\r
-date: //abbr[@class='published']\r
-body: //div[@class='entry-content']\r
+title: //h1[@class='entry-title']
+author: ///span[@class='author vcard']
+date: //abbr[@class='published']
+body: //div[@class='entry-content']
 
 test_url: http://ruhlman.com/2009/05/cookbooks-that-teach/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c036dcf..43e130a
@@ -1,3 +1,3 @@
-author: //a[@class='author']\r
+author: //a[@class='author']
 tidy: no
 test_url: http://ruttloff.org/2012/06/13/intervention
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 04f8afd..2b47f74
@@ -1,11 +1,11 @@
-title: //meta[@property='og:title']/@content\r
-author: (//span[@class="byline"]/a)[1]\r
-date: //span[contains(@class, "toLocalTime")]\r
-body: (//div[contains(@class, "articleInner")]//img[contains(@src, 'media.salon.com') and contains(@src, '460x')])[1] | //div[contains(@class, "articleContent") or contains(@class, "writerMeta")]\r
-\r
-prune: no\r
-\r
-# deal with singleton links\r
-single_page_link: (//h1/a[contains(@href, '/singleton')])[1]\r
-\r
+title: //meta[@property='og:title']/@content
+author: (//span[@class="byline"]/a)[1]
+date: //span[contains(@class, "toLocalTime")]
+body: (//div[contains(@class, "articleInner")]//img[contains(@src, 'media.salon.com') and contains(@src, '460x')])[1] | //div[contains(@class, "articleContent") or contains(@class, "writerMeta")]
+
+prune: no
+
+# deal with singleton links
+single_page_link: (//h1/a[contains(@href, '/singleton')])[1]
+
 test_url: http://www.salon.com/2011/10/25/occupying_the_rust_belt/singleton/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3106748..464f99f
@@ -1,6 +1,6 @@
-body: //p[@class='teaser1 darkgrey myriad']\r
-move_into(//p[@class='teaser1 darkgrey myriad']): //div[@class='artikel clear']\r
-strip: //div[@class='hidden']\r
-strip: //div[@id='article_related_source']\r
-\r
+body: //p[@class='teaser1 darkgrey myriad']
+move_into(//p[@class='teaser1 darkgrey myriad']): //div[@class='artikel clear']
+strip: //div[@class='hidden']
+strip: //div[@id='article_related_source']
+
 test_url: http://www.salzburg.com/nachrichten/oesterreich/politik/sn/artikel/deutliche-nachbesserungen-bei-lehrerdienstrecht-19469/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sanpedrosun.com.txt b/inc/3rdparty/site_config/standard/sanpedrosun.com.txt
new file mode 100755 (executable)
index 0000000..3f19cce
--- /dev/null
@@ -0,0 +1,10 @@
+title: //div[contains(@class, 'post')]//h1
+date: //div[contains(@class, 'post')]//h6
+body: //div[contains(@class, 'entry')]
+strip_id_or_class: post_stats
+strip_id_or_class: related-posts
+strip_id_or_class: after_story
+prune: no
+
+test_url: http://www.sanpedrosun.com/community-and-society/2013/06/05/little-angelspre-school-talent-show/
+test_url: http://www.sanpedrosun.com/feed/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 354f591..5a5605d
@@ -1,25 +1,25 @@
-title://h1\r
-\r
-# my section divs seem to interfere with the Instapaper parser, so I ditch 'em\r
-dissolve://div[contains(@class, 'section')]\r
-\r
-#these don't seem to be necessary, but just in case\r
-strip_id_or_class:'masthead'\r
-strip_id_or_class:'footer'\r
-\r
-#again, Instapaper seems to understand where my content is, but just in case\r
-body://div[@id='content']\r
-\r
-# in general, I want the Instapaper view to look like my print CSS, so I remove things specified for the screen or non-printing\r
-strip_id_or_class:'screen-only'\r
-strip_id_or_class:'no-print'\r
-\r
-#other misc removals and simplifications\r
-strip_id_or_class:'popup'\r
-strip_id_or_class:'ZoomSpin'\r
-\r
-#I have a lot of content in sidebars and "meta" asides that can work inline just fine, but has to be distinguished somehow with some minimal formatting, so I put them in blockquotes\r
-wrap_in(blockquote)://div[contains(@class, 'sidebar')]\r
-wrap_in(blockquote)://div[contains(@class, 'meta')]\r
+title://h1
+
+# my section divs seem to interfere with the Instapaper parser, so I ditch 'em
+dissolve://div[contains(@class, 'section')]
+
+#these don't seem to be necessary, but just in case
+strip_id_or_class:'masthead'
+strip_id_or_class:'footer'
+
+#again, Instapaper seems to understand where my content is, but just in case
+body://div[@id='content']
+
+# in general, I want the Instapaper view to look like my print CSS, so I remove things specified for the screen or non-printing
+strip_id_or_class:'screen-only'
+strip_id_or_class:'no-print'
+
+#other misc removals and simplifications
+strip_id_or_class:'popup'
+strip_id_or_class:'ZoomSpin'
+
+#I have a lot of content in sidebars and "meta" asides that can work inline just fine, but has to be distinguished somehow with some minimal formatting, so I put them in blockquotes
+wrap_in(blockquote)://div[contains(@class, 'sidebar')]
+wrap_in(blockquote)://div[contains(@class, 'meta')]
 wrap_in(blockquote)://p[contains(@class, 'meta')]
 test_url: http://saveyourself.ca/tutorials/low-back-pain.php
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sayidaty.net.txt b/inc/3rdparty/site_config/standard/sayidaty.net.txt
new file mode 100755 (executable)
index 0000000..2d9f188
--- /dev/null
@@ -0,0 +1,4 @@
+date: //meta[@property='article:published_time']/@content
+body: (//div[contains(@class, 'article-slider')]//img)[1] | //div[contains(@class, 'bottom-article-con')]
+
+test_url: http://www.sayidaty.net/taxonomy/term/10/all/feed
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c213843..41b3675
@@ -1,28 +1,28 @@
-title: //h1[@id='stream_title']\r
-\r
-# Author and date don't work\r
-author: //div[@class='byline']\r
-date: //div[@class='date-stamp']\r
-\r
-body: //div[@class='node-article']\r
-\r
-strip_id_or_class: fb-like-box\r
-strip_id_or_class: stream-fb-like\r
-strip_id_or_class: social-meta\r
-strip_id_or_class: social-spoken\r
-strip_id_or_class: twitter-share-button\r
-strip_id_or_class: twitter-follow-button\r
-strip_id_or_class: spinner_node_list\r
-strip_id_or_class: node-sort-link\r
-strip_id_or_class: stream_title\r
-strip_id_or_class: stream_summary\r
-strip_id_or_class: update-count-container\r
-strip_id_or_class: major-updates\r
-strip_id_or_class: newsletter-slide\r
-strip_id_or_class: author-mini-profile\r
-strip_id_or_class: byline\r
-strip_id_or_class: header\r
-strip_id_or_class: footer\r
-\r
+title: //h1[@id='stream_title']
+
+# Author and date don't work
+author: //div[@class='byline']
+date: //div[@class='date-stamp']
+
+body: //div[@class='node-article']
+
+strip_id_or_class: fb-like-box
+strip_id_or_class: stream-fb-like
+strip_id_or_class: social-meta
+strip_id_or_class: social-spoken
+strip_id_or_class: twitter-share-button
+strip_id_or_class: twitter-follow-button
+strip_id_or_class: spinner_node_list
+strip_id_or_class: node-sort-link
+strip_id_or_class: stream_title
+strip_id_or_class: stream_summary
+strip_id_or_class: update-count-container
+strip_id_or_class: major-updates
+strip_id_or_class: newsletter-slide
+strip_id_or_class: author-mini-profile
+strip_id_or_class: byline
+strip_id_or_class: header
+strip_id_or_class: footer
+
 # Works, but "no text" errors on: http://www.sbnation.com/nba/2012/3/9/2856780/nba-scores-dwight-howard-bulls-magic-mavs-suns
 test_url: http://www.sbnation.com/nba/2012/3/13/2867226/dwight-howard-trade-rumors-2012-faq-orlando-magic
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 67181b6..0074a86
@@ -1,25 +1,25 @@
-author: //p[@class='mastname']\r
-\r
-body: //div[@class='indivbody']\r
-date: //div[@class='indivbody']/h2[1]\r
-\r
-# Remove blog title. Specify first occurrence in case h1 is used in article\r
-strip: //div[@class='indivbody']/h1[1]\r
-\r
-# Remove blog description (the first p element)\r
-strip: //div[@class='indivbody']/p[1]\r
-\r
-# Remove navigation (second p element)\r
-strip: //div[@class='indivbody']/p[2]\r
-\r
-# Remove duplicate of article title. Specify first occurrence in case h3 is used in article\r
-strip: //div[@class='indivbody']/h3[1]\r
-\r
-# Remove publishing date, it's extracted by rule above\r
-strip: //div[@class='indivbody']/h2[1]\r
-\r
-# Remove duplicate of date at end, and newsletter signup\r
-strip: //p[@class='posted']\r
-\r
-# Leave date at top\r
+author: //p[@class='mastname']
+
+body: //div[@class='indivbody']
+date: //div[@class='indivbody']/h2[1]
+
+# Remove blog title. Specify first occurrence in case h1 is used in article
+strip: //div[@class='indivbody']/h1[1]
+
+# Remove blog description (the first p element)
+strip: //div[@class='indivbody']/p[1]
+
+# Remove navigation (second p element)
+strip: //div[@class='indivbody']/p[2]
+
+# Remove duplicate of article title. Specify first occurrence in case h3 is used in article
+strip: //div[@class='indivbody']/h3[1]
+
+# Remove publishing date, it's extracted by rule above
+strip: //div[@class='indivbody']/h2[1]
+
+# Remove duplicate of date at end, and newsletter signup
+strip: //p[@class='posted']
+
+# Leave date at top
 test_url: http://www.schneier.com/blog/archives/2010/12/security_in_202.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 89ebfe0..c4b2183
@@ -1,11 +1,11 @@
-body: //div[@class="storybox"]\r
-title: //div[@class="storybox"]//h1\r
-strip: //p[@class='metaline']\r
-date: substring-after(//*[@class='time'],'Erstellt am')\r
-strip: //div[@class='fact']\r
-strip: //p[@class='backlink']\r
-strip: //div[@class='mailto']\r
-strip: //div[@id='forumDisclaimer']\r
-strip: //div[@class='forum']\r
+body: //div[@class="storybox"]
+title: //div[@class="storybox"]//h1
+strip: //p[@class='metaline']
+date: substring-after(//*[@class='time'],'Erstellt am')
+strip: //div[@class='fact']
+strip: //p[@class='backlink']
+strip: //div[@class='mailto']
+strip: //div[@id='forumDisclaimer']
+strip: //div[@class='forum']
 
 test_url: http://science.orf.at/stories/1700900/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 08c1684..b0dec3d
@@ -1,12 +1,12 @@
-single_page_link: //div[@class='c2c1']/div[@class='toptheme further line']//ul//li/a\r
-\r
-author: //div[@class='details clear']//a[@class='hi']\r
-body: //div[@class='title']\r
-strip: //p[@class='entrypagination']\r
-strip: //p[@class='details_top']\r
-date: //p[@class='details_top']\r
-title: //div[@class='title']/h1\r
-strip: //p[@class='details']\r
-strip: //p[@class='details_bottom']\r
+single_page_link: //div[@class='c2c1']/div[@class='toptheme further line']//ul//li/a
+
+author: //div[@class='details clear']//a[@class='hi']
+body: //div[@class='title']
+strip: //p[@class='entrypagination']
+strip: //p[@class='details_top']
+date: //p[@class='details_top']
+title: //div[@class='title']/h1
+strip: //p[@class='details']
+strip: //p[@class='details_bottom']
 
 test_url: http://www.scienceblogs.de/astrodicticum-simplex/2011/10/weltuntergang-reloaded-das-jungste-gericht-findet-am-21-oktober-statt.php
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 75a5282..2a06f73
@@ -1,11 +1,11 @@
-body: //div[@class='post']\r
-title: //h1[@id='singlePageTitle']\r
-date: substring-before(//small,'&bull; Rubrik')\r
-\r
-strip: //div[@class='post-ratings']\r
-strip: //div[@class='post-ratings-loading']\r
-strip: //a[@title='Empfehlen Sie den Text weiter!']\r
-strip: //a[@title='Drucken']\r
-strip: //div[@class='share']\r
+body: //div[@class='post']
+title: //h1[@id='singlePageTitle']
+date: substring-before(//small,'&bull; Rubrik')
+
+strip: //div[@class='post-ratings']
+strip: //div[@class='post-ratings-loading']
+strip: //a[@title='Empfehlen Sie den Text weiter!']
+strip: //a[@title='Drucken']
+strip: //div[@class='share']
 
 test_url: http://www.scienceticker.info/2011/11/24/forscher-finden-gedachtnismolekul/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d510407..1b3f31c
@@ -1,25 +1,25 @@
-#\r
-# After site revisions at SciAm, this configuration does\r
-# not work, especially for multi-page articles. For\r
-# every article there is now a "Print" link which\r
-# is far more reliable. So this configuration should be\r
-# removed or  disabled.\r
-# 2/3/13\r
-#\r
-\r
-# meta data\r
-title://h1[@class = 'articleTitle']\r
-author:substring-after(//span[@class = 'byline'],'By ')\r
-date:substring-before(//span[@class = 'datestamp'],'|')\r
-\r
-#body content\r
-body://div[@id = 'articleContent']\r
-#next_page_link://li[@id = 'flairPagination']/a[last()]\r
-\r
-single_page_link: //a[contains(@href, 'print=true')]\r
-\r
-#cleanup\r
-strip://div[@class = 'fsgBooks']\r
-\r
-test_url: http://www.scientificamerican.com/article.cfm?id=do-brain-scans-comatose-patients-reveal-conscious-state\r
+#
+# After site revisions at SciAm, this configuration does
+# not work, especially for multi-page articles. For
+# every article there is now a "Print" link which
+# is far more reliable. So this configuration should be
+# removed or  disabled.
+# 2/3/13
+#
+
+# meta data
+title://h1[@class = 'articleTitle']
+author:substring-after(//span[@class = 'byline'],'By ')
+date:substring-before(//span[@class = 'datestamp'],'|')
+
+#body content
+body://div[@id = 'articleContent']
+#next_page_link://li[@id = 'flairPagination']/a[last()]
+
+single_page_link: //a[contains(@href, 'print=true')]
+
+#cleanup
+strip://div[@class = 'fsgBooks']
+
+test_url: http://www.scientificamerican.com/article.cfm?id=do-brain-scans-comatose-patients-reveal-conscious-state
 test_url: http://www.scientificamerican.com/article.cfm?id=solar-wind-transforms-venus-into-shape-of-comet
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/scilogs.de.txt b/inc/3rdparty/site_config/standard/scilogs.de.txt
new file mode 100755 (executable)
index 0000000..b24d784
--- /dev/null
@@ -0,0 +1,15 @@
+title: //h1
+author: //div[@class='date']/a
+date: substring-after(//div[@class='date'], ',')
+body: //div[@class='entrybody']
+
+strip_id_or_class: socialshareprivacy
+strip: //div[@class='entrybody']/br[1]
+
+# Strip related articles
+# 'p'-Tag strips 'Ähnliche Artikel: ' (<br> tags become <p>)
+strip: //div[@class='entrybody']/p[last()]
+strip: //div[@class='entrybody']/ul[last()]
+
+convert_double_br_tags: yes
+test_url: http://www.scilogs.de/wblogs/blog/formbar/fusion/2012-10-08/rundgang-durch-deutschlands-gr-tes-fusionsexperiment
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f29e37f..8881bb4
@@ -1,8 +1,8 @@
-title: //title\r
-author: //p[@id='author-name-role']/a\r
-date: substring-after(//p[@class='time'],'Posted')\r
-body: //div[@id='main']\r
-strip: //div[@id='author-info']\r
-strip: //div[@id='author-links']\r
+title: //title
+author: //p[@id='author-name-role']/a
+date: substring-after(//p[@class='time'],'Posted')
+body: //div[@id='main']
+strip: //div[@id='author-info']
+strip: //div[@id='author-links']
 strip: //h1
 test_url: http://www.scotusblog.com/2012/04/shaken-baby-case-an-update/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 84be27f..ca7ec19
@@ -1,3 +1,3 @@
-title: //h2\r
+title: //h2
 body: //div[@class='body']
 test_url: http://scraplab.net/2010/10/26/please-keep-your-belongings-with-you-at-all-times/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d8b969b..5fb0ee7
@@ -1,8 +1,8 @@
-strip: //a[starts-with(@href, '#')]\r
-strip: //*[@class='storyByline']\r
-body: //*[@class='storyPageText']/..\r
-author: string('Dave Winer')\r
-date: substring-before(substring-after(//*[@class='storyByline'], 'on'), 'at')\r
-title: //h1\r
+strip: //a[starts-with(@href, '#')]
+strip: //*[@class='storyByline']
+body: //*[@class='storyPageText']/..
+author: string('Dave Winer')
+date: substring-before(substring-after(//*[@class='storyByline'], 'on'), 'at')
+title: //h1
 footnotes: no
 test_url: http://scripting.com/stories/2011/07/08/yeahImStillYawning.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 9927675..55f2417
@@ -1,5 +1,5 @@
-body: //*[@class="entry-content"]\r
-title: //h1[@class="entry-title"]\r
-date: //*[@class="entry-date"]\r
+body: //*[@class="entry-content"]
+title: //h1[@class="entry-title"]
+date: //*[@class="entry-date"]
 author: //*[@class="author vcard"]
 test_url: http://sct.temple.edu/blogs/news-events/2011/05/congratulations-sct-class-of-2011/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/searchenginejournal.com.txt b/inc/3rdparty/site_config/standard/searchenginejournal.com.txt
new file mode 100755 (executable)
index 0000000..dc98af3
--- /dev/null
@@ -0,0 +1,5 @@
+strip: //ul[contains(@id, "social")]
+strip: //div[contains(@class, "ts-fab-wrapper")]
+strip: //div[contains(@id, 'gpt-ad')]
+
+test_url: http://www.searchenginejournal.com/web-design-vs-seo-it-doesnt-make-much-sense/62294/
old mode 100644 (file)
new mode 100755 (executable)
index f176d7c..fb6a107
@@ -1,20 +1,20 @@
-body: //div[@class="storyBox"]\r
-title: //div[@class="storyBox"]/h1\r
-author: //a[@rel="author"]\r
-date: substring-before(//span[@class="dateline"], 'by')\r
-\r
-#Removes related content but cleans up article text\r
-strip: //h1\r
-strip: //p[@class="homeStory tdmSideInfo"]\r
-strip: //div[@id="bylineShare"]\r
-strip: //script\r
-strip: //hr\r
-\r
-strip_id_or_class: homeStory\r
-strip_id_or_class: authorpic\r
-strip_id_or_class: insideComments\r
-strip_id_or_class: authorbio\r
-strip_id_or_class: gpt-ad-sel-cube\r
-strip_id_or_class: smxTextAd\r
+body: //div[@class="storyBox"]
+title: //div[@class="storyBox"]/h1
+author: //a[@rel="author"]
+date: substring-before(//span[@class="dateline"], 'by')
+
+#Removes related content but cleans up article text
+strip: //h1
+strip: //p[@class="homeStory tdmSideInfo"]
+strip: //div[@id="bylineShare"]
+strip: //script
+strip: //hr
+
+strip_id_or_class: homeStory
+strip_id_or_class: authorpic
+strip_id_or_class: insideComments
+strip_id_or_class: authorbio
+strip_id_or_class: gpt-ad-sel-cube
+strip_id_or_class: smxTextAd
 
 test_url: http://searchengineland.com/googles-jaw-dropping-sponsored-post-campaign-for-chrome-106348
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3e800a1..b6d9c92
@@ -1,4 +1,4 @@
-title: substring-before(//title, '«')\r
-body: //div[@class = 'entry']\r
+title: substring-before(//title, '«')
+body: //div[@class = 'entry']
 strip_id_or_class: 'postmetabox'
 test_url: http://sebbo.net/2010/12/akkus/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/select.yeeyan.org.txt b/inc/3rdparty/site_config/standard/select.yeeyan.org.txt
new file mode 100755 (executable)
index 0000000..6e98b14
--- /dev/null
@@ -0,0 +1,18 @@
+# This filter is tested on:
+# http://select.yeeyan.org/view/18312/332365
+# http://select.yeeyan.org/view/365295/333788
+# http://select.yeeyan.org/view/174464/332336
+
+tidy:no
+prune:no
+title://h1
+author: //div[@class='sa_author']/span/a
+date: substring-after(//div[@class='sa_author']/span/following-sibling::span, ':')
+body: //div[@class='sa_left closetag']
+wrap_in(b)://div[@class='sa_abstract']
+
+strip://ul[@class='sa_next clearfix']
+strip: //div[@class='sa_author']
+strip: //div[@class='sa_title_box']
+
+test_url: http://select.yeeyan.org/view/258033/333481
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d7b4788..5e63347
@@ -1,15 +1,15 @@
-body: //div[@id='content']\r
-\r
-# clean up recipe pages\r
-strip: //h2[@class='fn'] | //h2[@class='double-lined'] | //h3 | //div[@id='threeColumn2'] | //div[@id='threeColumn3']\r
-\r
-#recipe pages\r
-strip_id_or_class: "recipe-feedback"\r
-strip_id_or_class: "comments"\r
-strip_id_or_class: "procedure-number"\r
-strip_id_or_class: "more-with-author"\r
-\r
-#slice\r
-strip_id_or_class: "inner"\r
+body: //div[@id='content']
+
+# clean up recipe pages
+strip: //h2[@class='fn'] | //h2[@class='double-lined'] | //h3 | //div[@id='threeColumn2'] | //div[@id='threeColumn3']
+
+#recipe pages
+strip_id_or_class: "recipe-feedback"
+strip_id_or_class: "comments"
+strip_id_or_class: "procedure-number"
+strip_id_or_class: "more-with-author"
+
+#slice
+strip_id_or_class: "inner"
 
 test_url: http://www.seriouseats.com/recipes/2010/09/peking-duck-mandarin-pancakes-plum-sauce-recipe.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 9f443d5..4c10e9c
@@ -1,7 +1,7 @@
-title: //h1[@class='post-title']\r
-author: //div[@class='post-byline']/a\r
-date: substring-before(//div[@class='post-byline'], ', by')\r
-\r
-body: //div[@class='post-body']\r
+title: //h1[@class='post-title']
+author: //div[@class='post-byline']/a
+date: substring-before(//div[@class='post-byline'], ', by')
+
+body: //div[@class='post-body']
 dissolve: //noscript
 test_url: http://sf.curbed.com/archives/2011/10/17/lower_haight_loft_would_really_really_really_like_a_buyer.php
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index fca656d..1e7c85a
@@ -1,7 +1,7 @@
-title: //h1[@class="post-title"]\r
-author: //div[@class="post-byline"]/a\r
-date: substring-before(//div[@class='post-byline'], ', by')\r
-\r
-body: //div[@class='post-body']\r
+title: //h1[@class="post-title"]
+author: //div[@class="post-byline"]/a
+date: substring-before(//div[@class='post-byline'], ', by')
+
+body: //div[@class='post-body']
 strip_id_or_class: post-kicker
 test_url: http://sf.eater.com/archives/2012/05/22/nate_pollack_talks_about_the_american_grilled_cheese_kitchen_moving_into_the_mission.php
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 5f73fbc..5469112
@@ -1,12 +1,12 @@
-title: /html/head/title\r
-\r
-body: //div[@id = 'articlecontent']/div[contains(@class, 'bodytext')]\r
-author: //div[@class = 'articleheadings']/p[contains(@class,'author')]/span[@class = 'fn']\r
-date: //div[@class = 'articleheadings']/span[@class = 'updated']\r
-strip: //div[div[contains(@class, 'imgbox')]]\r
-\r
-body: //div[@class = 'blogitem']\r
-author: //p[@class="credit"]/span[@class="author"]/a[position() = 1]\r
-date: //span[@class = 'pubdate']\r
+title: /html/head/title
+
+body: //div[@id = 'articlecontent']/div[contains(@class, 'bodytext')]
+author: //div[@class = 'articleheadings']/p[contains(@class,'author')]/span[@class = 'fn']
+date: //div[@class = 'articleheadings']/span[@class = 'updated']
+strip: //div[div[contains(@class, 'imgbox')]]
+
+body: //div[@class = 'blogitem']
+author: //p[@class="credit"]/span[@class="author"]/a[position() = 1]
+date: //span[@class = 'pubdate']
 
 test_url: http://www.sfgate.com/columnists/garchik/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a11fe4c..73c3017
@@ -1,3 +1,3 @@
-body: //div[contains(@class, 'content_body')]\r
+body: //div[contains(@class, 'content_body')]
 strip_id_or_class: det_rel
 test_url: http://www.sfweekly.com/2012-03-14/news/cia-lsd-wayne-ritchie-george-h-white-mk-ultra/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index b175720..9a0d60a
@@ -1,3 +1,3 @@
-date: //span[@class='date']\r
+date: //span[@class='date']
 body: //div[@class='post_content']
 test_url: http://www.shabayek.com/blog/2011/10/16/%D8%AF%D8%B1%D9%88%D8%B3-%D9%85%D9%86-%D9%82%D8%B5%D8%A9-%D8%AA%D8%A3%D8%B3%D9%8A%D8%B3-%D8%AA%D9%88%D9%8A%D8%AA%D8%B1-%E2%80%93%D8%AC3/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index b536fc3..bd8438f
@@ -1,11 +1,11 @@
-title://*[@class='primary']/h1\r
-date: //*[@class='articledate']\r
-author: substring-before(substring-after(//*[@class='block first']/p,'2012 '),'.')\r
-body: //div[@class='primary']\r
-footnotes: yes\r
-strip: //*[@class='primary']/h1\r
-strip: //*[@class='articledate']\r
-strip: //*[@class='detailsarticle']\r
-strip: //*[@class='endnav']\r
-strip: //*[@class='endmeta']\r
+title://*[@class='primary']/h1
+date: //*[@class='articledate']
+author: substring-before(substring-after(//*[@class='block first']/p,'2012 '),'.')
+body: //div[@class='primary']
+footnotes: yes
+strip: //*[@class='primary']/h1
+strip: //*[@class='articledate']
+strip: //*[@class='detailsarticle']
+strip: //*[@class='endnav']
+strip: //*[@class='endmeta']
 test_url: http://shawnblanc.net/2011/11/kindle-touch-review/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 68059ae..43fd871
@@ -1,6 +1,6 @@
-body: //div[ @class='entry-content' ]\r
-\r
-strip: //div[ contains(@class, 'sharing') ]\r
-\r
+body: //div[ @class='entry-content' ]
+
+strip: //div[ contains(@class, 'sharing') ]
+
 date: //div[ @class='entry-meta' ]/a
 test_url: http://shifteleven.com/articles/2008/05/10/issue-tracking-git-ticgit
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a82ce69..b10e12d
@@ -1,11 +1,11 @@
-#body: (//div[@class='ftr-yt-vid'])[1]\r
-body: (//blockquote[contains(@class, 'postcontent')])[1]\r
-body: (//div[starts-with(@id, 'post_message')])[1]\r
-\r
-prune: no\r
-tidy: no\r
-\r
-#replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player"\r
-#replace_string(</iframe>): </iframe>&nbsp;</div>\r
-\r
+#body: (//div[@class='ftr-yt-vid'])[1]
+body: (//blockquote[contains(@class, 'postcontent')])[1]
+body: (//div[starts-with(@id, 'post_message')])[1]
+
+prune: no
+tidy: no
+
+#replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player"
+#replace_string(</iframe>): </iframe>&nbsp;</div>
+
 test_url: http://www.siasat.pk/forum/showthread.php?107668-Policy-Matters-17th-March-2012-Dr-Shahid-Masood-Gen-Hameed-gul-amp-Fawad-Chudhary-Pak-US-Relationship&p=787733
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/signalscv.com.txt b/inc/3rdparty/site_config/standard/signalscv.com.txt
new file mode 100755 (executable)
index 0000000..2d3c388
--- /dev/null
@@ -0,0 +1,10 @@
+author: //span[contains(@class, 'byline_1')]
+date: //span[@class='posted_date']
+body: //*[contains(@class, 'bigimage_container') or contains(@class, 'overlay_text') or contains(@id, 'articlebody')]
+
+strip_id_or_class: leftWrapper
+
+prune: no
+
+test_url: http://www.signalscv.com/section/46/article/102948/
+test_url: http://www.signalscv.com/syndication/feeds/rss/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e3ad6e4..6999969
@@ -1,5 +1,5 @@
-body: //div[contains(@class, "entry")]\r
-\r
-date: //div[contains(@class, "entryFooter")]/a\r
+body: //div[contains(@class, "entry")]
+
+date: //div[contains(@class, "entryFooter")]/a
 
 test_url: http://simonwillison.net/2009/Oct/22/redis/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a1b6b67..46e2d5f
@@ -1,5 +1,5 @@
-body: //div[@class='post-body']\r
-strip: //div[@id='lws_0']\r
-prune: no\r
+body: //div[@class='post-body']
+strip: //div[@id='lws_0']
+prune: no
 
 test_url: http://singaporeanstocksinvestor.blogspot.com/2011/04/aims-amp-capital-industrial-reit.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 822bbeb..0d05c40
@@ -1,6 +1,6 @@
-title: //div[@class='headline']//h2\r
-body: //div[contains(@class, 'storycontent')]\r
-\r
-prune: no\r
-\r
+title: //div[@class='headline']//h2
+body: //div[contains(@class, 'storycontent')]
+
+prune: no
+
 test_url: http://sintagoulis.gr/sokolatenia/sokolatenia-mpompa-me-amaretti-
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sivers.org.txt b/inc/3rdparty/site_config/standard/sivers.org.txt
new file mode 100755 (executable)
index 0000000..a88f30d
--- /dev/null
@@ -0,0 +1,6 @@
+title: //article[@class='post']/header[@class='wrapper']//h1/a
+author: //header[@id='masthead']//h1/a
+date: //article[@class='post']/header[@class='wrapper']//p[@class='postdate']
+body: //div[@id='body-content']
+
+test_url: http://sivers.org/delegate/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/skanesfria.se.txt b/inc/3rdparty/site_config/standard/skanesfria.se.txt
new file mode 100755 (executable)
index 0000000..a0ddac7
--- /dev/null
@@ -0,0 +1,7 @@
+body: //div[contains(@class, 'layout__inner')]//div[contains(@class, 'file-image') or contains(@class, 'node__content')]
+author: //article//div[contains(@class, 'field-byline')]
+strip_id_or_class: rekommenderade
+strip_id_or_class: disqus
+strip_id_or_class: annonser
+
+test_url: http://www.skanesfria.se/artikel/112045
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 78d38ec..4d17176
@@ -1,15 +1,15 @@
-title: substring-before(//title,'| /Film')\r
-date: substring-before(substring-after(//p[@class='post-info'],'Posted on '),'by')\r
-strip: //div[@class='pm-left']\r
-strip: //div[@class='pm-right']\r
-strip: //h2/span\r
-next_page_link: //h2/strong/a\r
-strip: //h2/strong/a\r
-strip: //p[contains(text(),'we have to split this post over')]\r
-strip: //p[@class='post-info']\r
-strip: //h1/a\r
-strip: //img[contains(@src,'siteimages/authors')]\r
-strip: //div[@id='header']\r
-strip: //div[@class='topad-right']\r
-strip: //strong[contains(text(),'Cool Posts From Around the Web:')]\r
+title: substring-before(//title,'| /Film')
+date: substring-before(substring-after(//p[@class='post-info'],'Posted on '),'by')
+strip: //div[@class='pm-left']
+strip: //div[@class='pm-right']
+strip: //h2/span
+next_page_link: //h2/strong/a
+strip: //h2/strong/a
+strip: //p[contains(text(),'we have to split this post over')]
+strip: //p[@class='post-info']
+strip: //h1/a
+strip: //img[contains(@src,'siteimages/authors')]
+strip: //div[@id='header']
+strip: //div[@class='topad-right']
+strip: //strong[contains(text(),'Cool Posts From Around the Web:')]
 test_url: http://www.slashfilm.com/superhero-bits-206/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e92f6a0..d5798e0
@@ -1,19 +1,19 @@
-title: //h1[@class="sl-art-head-dek"]\r
-body: //article//div[@class='sl-art-body']/div[contains(@class, 'body')]\r
-strip: //div[@class="department_kicker"]\r
-strip: //div[@id="insider_ad_wrapper" or @id="insider_ad_inner"]\r
-strip: //div[@id="bottom_sponsored_links"]\r
-strip: //div[@class="sl-art-ad-midflex"]\r
-#strip: //dl\r
-#strip: //p[em/a[contains(@href, 'facebook.com')]]\r
-prune: no\r
-\r
-author: //div[@id='author_bio']//a[contains(@href, '/author/')]\r
-author: //a[contains(@href, '/authors.')]\r
-\r
-date: substring-before(substring-after(//span[@class='sl-art-byline'], 'Posted '), ', at ')\r
-\r
-single_page_link: //a[@class='sl-art-sinpage']\r
-\r
-test_url: http://www.slate.com/id/2274583/pagenum/all/\r
+title: //h1[@class="sl-art-head-dek"]
+body: //article//div[@class='sl-art-body']/div[contains(@class, 'body')]
+strip: //div[@class="department_kicker"]
+strip: //div[@id="insider_ad_wrapper" or @id="insider_ad_inner"]
+strip: //div[@id="bottom_sponsored_links"]
+strip: //div[@class="sl-art-ad-midflex"]
+#strip: //dl
+#strip: //p[em/a[contains(@href, 'facebook.com')]]
+prune: no
+
+author: //div[@id='author_bio']//a[contains(@href, '/author/')]
+author: //a[contains(@href, '/authors.')]
+
+date: substring-before(substring-after(//span[@class='sl-art-byline'], 'Posted '), ', at ')
+
+single_page_link: //a[@class='sl-art-sinpage']
+
+test_url: http://www.slate.com/id/2274583/pagenum/all/
 test_url: http://www.slate.com/id/2293116/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 1a902b9..e62a396
@@ -1,15 +1,15 @@
-body: //div[@id='content']\r
-\r
-# clean up recipe pages\r
-strip: //h2[@class='fn'] | //h2[@class='double-lined'] | //h3 | //div[@id='threeColumn2'] | //div[@id='threeColumn3']\r
-\r
-#recipe pages\r
-strip_id_or_class: "recipe-feedback"\r
-strip_id_or_class: "comments"\r
-strip_id_or_class: "procedure-number"\r
-strip_id_or_class: "more-with-author"\r
-\r
-#slice\r
-strip_id_or_class: "inner"\r
+body: //div[@id='content']
+
+# clean up recipe pages
+strip: //h2[@class='fn'] | //h2[@class='double-lined'] | //h3 | //div[@id='threeColumn2'] | //div[@id='threeColumn3']
+
+#recipe pages
+strip_id_or_class: "recipe-feedback"
+strip_id_or_class: "comments"
+strip_id_or_class: "procedure-number"
+strip_id_or_class: "more-with-author"
+
+#slice
+strip_id_or_class: "inner"
 
 test_url: http://slice.seriouseats.com/archives/2010/10/the-pizza-lab-how-to-make-great-new-york-style-pizza.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index daa5e31..f952694
@@ -1,4 +1,4 @@
-strip_id_or_class: postCategory\r
-title: //h3[@class='postTitle']\r
+strip_id_or_class: postCategory
+title: //h3[@class='postTitle']
 body: //div[@class='postBody']
 test_url: http://slog.thestranger.com/slog/archives/2010/10/12/sl-letter-of-the-day-leave-it-alone
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ec6c55c..85ca46d
@@ -1,5 +1,5 @@
-title: //td[@class='hweissblau2']\r
-body: //p[@class='copy'] | //div[@class='Section1']\r
-prune: no\r
+title: //td[@class='hweissblau2']
+body: //p[@class='copy'] | //div[@class='Section1']
+prune: no
 
 test_url: http://www.smartinvestor.de/news/smartinvestor/detail.hbs?itemid=item949496655&recnr=14593
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c3d01ff..d41612c
@@ -1,3 +1,3 @@
-title: //meta[@property='og:title']/@content\r
+title: //meta[@property='og:title']/@content
 date: //p[@class='autor_line']/b/text()
 test_url: http://www.sme.sk/c/6268206/lipsic-vidi-malcharkove-uplatky.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 10a3f71..3e8fee9
@@ -1,20 +1,20 @@
-# meta data\r
-title://h1[@id = 'articleTitle']\r
-author:substring-after(//ul[@id = 'byLine']/li[1],'By ')\r
-date:substring-before(substring-after(//ul[@id = 'byLine']/li[last()],','),',')\r
-body://div[@id = 'article-body']\r
-\r
-# full content\r
-single_page_link://td/li[@class = 'article-singlepage']/a\r
-\r
-# caption clean up\r
-wrap_in(i)://span[@class='articleImageCaptionwide']\r
-move_into (//span[@class='articleImageCaptionwide'])://div[@id = 'articleImage']/p\r
-\r
-\r
-# clean up\r
-strip://p[@id = 'articlePaginationWrapper']\r
-strip://ul[contains(@class, 'cat-breadcrumb')]\r
-strip://div [@class= 'viewMorePhotos']\r
+# meta data
+title://h1[@id = 'articleTitle']
+author:substring-after(//ul[@id = 'byLine']/li[1],'By ')
+date:substring-before(substring-after(//ul[@id = 'byLine']/li[last()],','),',')
+body://div[@id = 'article-body']
+
+# full content
+single_page_link://td/li[@class = 'article-singlepage']/a
+
+# caption clean up
+wrap_in(i)://span[@class='articleImageCaptionwide']
+move_into (//span[@class='articleImageCaptionwide'])://div[@id = 'articleImage']/p
+
+
+# clean up
+strip://p[@id = 'articlePaginationWrapper']
+strip://ul[contains(@class, 'cat-breadcrumb')]
+strip://div [@class= 'viewMorePhotos']
 
 test_url: http://www.smithsonianmag.com/history-archaeology/The-Goddess-Goes-Home.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e22af7a..c68c132
@@ -1,5 +1,5 @@
-title: //h2[@class='custom-entry-title']\r
-author: substring-after(//span[@class='author vcard'],'by ')\r
-date: substring-after(//span[@class='publ'],'Published on ')\r
-body: //div[@class='postentry-content']\r
+title: //h2[@class='custom-entry-title']
+author: substring-after(//span[@class='author vcard'],'by ')
+date: substring-after(//span[@class='publ'],'Published on ')
+body: //div[@class='postentry-content']
 test_url: http://smokingapples.com/software/popclip-for-mac/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/somethingawful.com.txt b/inc/3rdparty/site_config/standard/somethingawful.com.txt
new file mode 100755 (executable)
index 0000000..4854794
--- /dev/null
@@ -0,0 +1,17 @@
+title: //h1
+body: //div[@id = 'content-area']
+author: //p[contains(@class, 'byline')]/a
+autodetect_next_page: yes
+tidy: no
+
+strip_id_or_class: articleid
+strip_id_or_class: logo
+strip_id_or_class: pagebar
+strip_id_or_class: featurenavlinks
+strip_id_or_class: featured_frontpage
+strip_id_or_class: sidebar
+strip_id_or_class: footer
+strip_id_or_class: byline
+strip_id_or_class: logo
+strip_id_or_class: nav_network
+test_url: http://www.somethingawful.com/d/dungeons-and-dragons/wtf-monster-manual.php
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/songshuhui.net.txt b/inc/3rdparty/site_config/standard/songshuhui.net.txt
new file mode 100755 (executable)
index 0000000..a923359
--- /dev/null
@@ -0,0 +1,7 @@
+# This filter is tested on:
+# http://songshuhui.net/archives/65522
+# http://songshuhui.net/archives/75760
+title://h2/span/a
+date:substring-before(substring-after(//div[@class='atrctitle']/div, '发表于'),' |')
+body://div[@class='entry']
+test_url: http://songshuhui.net/archives/74819
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 668fc44..b52169d
@@ -1,4 +1,4 @@
-#grab the actual content div\r
-body: //div[@class='rt-article']\r
-\r
+#grab the actual content div
+body: //div[@class='rt-article']
+
 test_url: http://www.sourcebooks.com/next/sourcebooks-next-our-blog/1601-another-piece-of-the-e-puzzle-or-when-good-ebook-promotions-go-bad.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a05c839..d0605ed
@@ -1,7 +1,7 @@
-author: /html/body/div[@id='wrapper']/div[@id='main-content']/div[@class='article_body']/a[@class='author-link']/child::text()\r
-\r
-body: /html/body/div[@id='wrapper']/div[@id='main-content']/div[@class='article_body']\r
-\r
-# Not very helpfull, the title and author are container by the same element that contains the body\r
+author: /html/body/div[@id='wrapper']/div[@id='main-content']/div[@class='article_body']/a[@class='author-link']/child::text()
+
+body: /html/body/div[@id='wrapper']/div[@id='main-content']/div[@class='article_body']
+
+# Not very helpfull, the title and author are container by the same element that contains the body
 strip: /html/body/div[@id='wrapper']/div[@id='main-content']/div[@class='article_body']/h2 | /html/body/div[@id='wrapper']/div[@id='main-content']/div[@class='article_body']/a[@class='author-link']
 test_url: http://www.spectator.co.uk/arts-and-culture/night-and-day/7449683/spotify-sunday-my-personal-soundtrack.thtml
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 4b0704a..aea3627
@@ -1,3 +1,3 @@
-body://div[@class="articleBody"]\r
+body://div[@class="articleBody"]
 author://p[@class="articleBodyTtl"]
 test_url: http://spectrum.ieee.org/semiconductors/processors/behind-intels-new-randomnumber-generator/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 390c075..413e015
@@ -1,75 +1,75 @@
-# A. Niepel, narya.de@...\r
-# - added single_page_link\r
-# - added author for default and single page view\r
-# - added date for single page view\r
-# fforst@...\r
-# - Fixed it\r
-# bode2104@...\r
-# - Fixed single_page_link\r
-# - Included intro text in single page view\r
-# - Added body in default view\r
-\r
-# set body\r
-tidy: no\r
-# body in single page view\r
-body: //div[@id="spArticleContent"]\r
-# body in default view\r
-body: //div[@id="spArticleSection"]\r
-# body in "Fotostrecke"\r
-body: //div[@id="spBigaContent"]\r
-\r
-# set date in single page view\r
-date: //div[@id="spArticleContent"]/h3\r
-# strip date\r
-strip: //div[@id="spArticleContent"]/h3\r
-# set date in "Fotostrecke"\r
-date: //div[@id="spBigaDatum"]\r
-\r
-#set title in single page view\r
-title: //div[@id='spArticleContent']/h2\r
-# strip title\r
-strip: //div[@id='spArticleContent']/h1\r
-strip: //div[@id='spArticleContent']/h2\r
-#set title in "Fotostrecke"\r
-title: //div[@class='spBigaHeadline']\r
-\r
-# set author\r
-author: //p[@class="spAuthor"]/a\r
-author: substring-after(//p[@class="spAuthor"], 'Von ')\r
-# strip author\r
-strip: //p[@class='spAuthor']\r
-\r
-# remove captions\r
-strip: //*/span[@class='spPicLayerText']\r
-strip: //*/div[@class='spPanoPlayerPaneControl']\r
-strip: //*/div[@class='spCredit']\r
-strip: //*/div[@class='spCredit']/following-sibling::p\r
-\r
-# remove ads\r
-strip: //div[@class='spMInline']\r
-\r
-# remove photogalleries and extras\r
-strip: //div[@class='spPhotoGallery']\r
-strip: //div[@class='spPhotoGallery']/following-sibling::br\r
-strip: //div[@class='spAssetAlignleft']\r
-strip: //div[contains(@class,'spAsset')]\r
-strip: //br[@clear='all']\r
-\r
-# remove community functions\r
-strip: //div[@id='spSocialBookmark']\r
-strip: //div[contains(@class, 'spCommunityBox')]\r
-strip: //div[contains(@class, 'spArticleNewsfeedBox')]\r
-strip: //div[@class='spArticleCredit']\r
-\r
-# remove clutter in "Fotostrecke"\r
-strip: //div[@id='spBreadcrumb']\r
-strip: //div[@id='spBigaLatestEntries']\r
-strip: //div[contains(@class, 'spBigaNavi')]\r
-strip: //div[@class='spDottedLine']\r
-\r
-# Use link to print article for single page view\r
-single_page_link: //a[contains(@href, '-druck')]\r
-\r
-# use next link in "Fotostrecke"\r
-next_page_link: //a[@class='spBigaControlForw']\r
+# A. Niepel, narya.de@...
+# - added single_page_link
+# - added author for default and single page view
+# - added date for single page view
+# fforst@...
+# - Fixed it
+# bode2104@...
+# - Fixed single_page_link
+# - Included intro text in single page view
+# - Added body in default view
+
+# set body
+tidy: no
+# body in single page view
+body: //div[@id="spArticleContent"]
+# body in default view
+body: //div[@id="spArticleSection"]
+# body in "Fotostrecke"
+body: //div[@id="spBigaContent"]
+
+# set date in single page view
+date: //div[@id="spArticleContent"]/h3
+# strip date
+strip: //div[@id="spArticleContent"]/h3
+# set date in "Fotostrecke"
+date: //div[@id="spBigaDatum"]
+
+#set title in single page view
+title: //div[@id='spArticleContent']/h2
+# strip title
+strip: //div[@id='spArticleContent']/h1
+strip: //div[@id='spArticleContent']/h2
+#set title in "Fotostrecke"
+title: //div[@class='spBigaHeadline']
+
+# set author
+author: //p[@class="spAuthor"]/a
+author: substring-after(//p[@class="spAuthor"], 'Von ')
+# strip author
+strip: //p[@class='spAuthor']
+
+# remove captions
+strip: //*/span[@class='spPicLayerText']
+strip: //*/div[@class='spPanoPlayerPaneControl']
+strip: //*/div[@class='spCredit']
+strip: //*/div[@class='spCredit']/following-sibling::p
+
+# remove ads
+strip: //div[@class='spMInline']
+
+# remove photogalleries and extras
+strip: //div[@class='spPhotoGallery']
+strip: //div[@class='spPhotoGallery']/following-sibling::br
+strip: //div[@class='spAssetAlignleft']
+strip: //div[contains(@class,'spAsset')]
+strip: //br[@clear='all']
+
+# remove community functions
+strip: //div[@id='spSocialBookmark']
+strip: //div[contains(@class, 'spCommunityBox')]
+strip: //div[contains(@class, 'spArticleNewsfeedBox')]
+strip: //div[@class='spArticleCredit']
+
+# remove clutter in "Fotostrecke"
+strip: //div[@id='spBreadcrumb']
+strip: //div[@id='spBigaLatestEntries']
+strip: //div[contains(@class, 'spBigaNavi')]
+strip: //div[@class='spDottedLine']
+
+# Use link to print article for single page view
+single_page_link: //a[contains(@href, '-druck')]
+
+# use next link in "Fotostrecke"
+next_page_link: //a[@class='spBigaControlForw']
 test_url: http://www.spiegel.de/politik/deutschland/0,1518,787602,00.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/spiked-online.com.txt b/inc/3rdparty/site_config/standard/spiked-online.com.txt
new file mode 100755 (executable)
index 0000000..7ec39c2
--- /dev/null
@@ -0,0 +1,7 @@
+title: //div[@id='articleTitleWrapper' or @id='mainFeature']//h1
+author: //*[@id='authorNameJob']//a
+date: //div[@id='articleMeta']/p
+body: //div[@id='mainFeature']//img | //div[contains(@class, 'fullText')]
+
+test_url: http://www.spiked-online.com/newsite/article/standing_up_to_the_white-coated_gods_of_fortune/13785
+test_url: http://www.spiked-online.com/newsite/article/sex_box_and_the_crisis_of_intimacy/14168
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 66f6192..88eb454
@@ -1,5 +1,5 @@
-tidy: no\r
-body: //section[contains(@class, 'main')]\r
-strip: //footer\r
+tidy: no
+body: //section[contains(@class, 'main')]
+strip: //footer
 strip: //a[@class='paginated']
 test_url: http://www.spin.com/articles/bathlands-deep-heart-americas-new-drug-nightmare
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d567165..3e05a22
@@ -1,5 +1,5 @@
-author:string('Dan Frommer/SplatF')\r
-date://div[@class='postdate']\r
-body://div[@class='entry']\r
+author:string('Dan Frommer/SplatF')
+date://div[@class='postdate']
+body://div[@class='entry']
 title://div[@class='post']/h1
 test_url: http://www.splatf.com/2012/02/month-six/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d1d392e..4bbc7aa
@@ -1,4 +1,4 @@
-author: //div[@class='byline']/a\r
-date: //div[@id='date']\r
+author: //div[@class='byline']/a
+date: //div[@id='date']
 body: //div[@class='entry']
 test_url: http://splitsider.com/2011/10/saturday-nights-children-rob-riggle-2004-2005/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index b404b82..18552d1
@@ -1,8 +1,8 @@
-title://div[@class="content_detail"]/h1\r
-\r
-author://div[@class="author"]/strong\r
-\r
-date:substring-before(substring-after(//div[@class="content_detail"]/*[@class="date"], ','), ' WIB')\r
-\r
+title://div[@class="content_detail"]/h1
+
+author://div[@class="author"]/strong
+
+date:substring-before(substring-after(//div[@class="content_detail"]/*[@class="date"], ','), ' WIB')
+
 body://div[@class='text_detail']
 test_url: http://sport.detik.com/sepakbola/read/2012/05/23/065011/1922350/71/agen-silva-ingin-bertahan-di-milan?b99220270
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a794ded..f0be85c
@@ -1,11 +1,11 @@
-single_page_link: //div[@id='content']//p[@class='readMore']/a\r
-\r
-title: //div[@class='hidden offscreen']/h2\r
-body: //div[@id="storyText"]\r
-move_into(//div[@id='storyText']): //div[@class='fact']\r
-strip: //small[@class='credit']\r
-strip: //small[@class='caption']\r
-date: substring-after(//div[@id='storyMeta']//p[@class='date'],'Publiziert am')\r
-strip: //p[@class='toplink']\r
+single_page_link: //div[@id='content']//p[@class='readMore']/a
+
+title: //div[@class='hidden offscreen']/h2
+body: //div[@id="storyText"]
+move_into(//div[@id='storyText']): //div[@class='fact']
+strip: //small[@class='credit']
+strip: //small[@class='caption']
+date: substring-after(//div[@id='storyMeta']//p[@class='date'],'Publiziert am')
+strip: //p[@class='toplink']
 
 test_url: http://sport.orf.at/stories/2084851/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sport365.fr.txt b/inc/3rdparty/site_config/standard/sport365.fr.txt
new file mode 100755 (executable)
index 0000000..8688f40
--- /dev/null
@@ -0,0 +1,8 @@
+body: //h2[contains(@class, 'body_head')] | //div[@id='img_article' or contains(@class, 'body_content')]
+body: //div[contains(@class, 'cpanel')]//div[contains(@class, 'thumbnails')]
+prune: no
+strip: //div[starts-with(@class, 'actu_')]
+strip: //div[contains(@class, 'data')]
+
+test_url: http://www.sport365.fr/basketball/nba/new-york-accord-avec-toronto-pour-bargnani-1038773.shtml
+test_url: http://www.sport365.fr/rss.xml
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e0f8223..8c21ef2
@@ -1,12 +1,12 @@
-title: //div[@class='headline'] | //div[@class='mod-header']/h3\r
-body: //div[contains(@class, 'article')]\r
-strip: //div[contains(@class, 'mod-inline')]\r
-strip: //*/span[@class='page-actions']/a\r
-strip: //*/span[@class='page-actions']/a\r
-strip: //div[@class='page-actions']/*\r
-strip: //div[@class='headline'] | //div[@class='mod-header']/h3\r
-strip: //div[@class='mod-blog-navigation']\r
-strip: //div[@class='monthday']\r
-strip: //div[@class='time']\r
-strip: //div[@class='timeofday']\r
+title: //div[@class='headline'] | //div[@class='mod-header']/h3
+body: //div[contains(@class, 'article')]
+strip: //div[contains(@class, 'mod-inline')]
+strip: //*/span[@class='page-actions']/a
+strip: //*/span[@class='page-actions']/a
+strip: //div[@class='page-actions']/*
+strip: //div[@class='headline'] | //div[@class='mod-header']/h3
+strip: //div[@class='mod-blog-navigation']
+strip: //div[@class='monthday']
+strip: //div[@class='time']
+strip: //div[@class='timeofday']
 test_url: http://sports.espn.go.com/espn/page2/story?page=simmonsnfl2010/lebron_james_return_clevelend&sportCat=nba
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 96a3bb7..b0f57e2
@@ -1,9 +1,9 @@
-title: //div[@id='article']/div[@class='hd']/h1\r
-body: //p[@id='byline'] | //div[@id='article']//div[@class='body_copy 0']\r
-strip: //div[@class='foot']\r
-strip: //div[@id='sidebar']//div[@class='ft']\r
-strip: //p[@id='byline']//em\r
-tidy: no\r
-prune: no\r
+title: //div[@id='article']/div[@class='hd']/h1
+body: //p[@id='byline'] | //div[@id='article']//div[@class='body_copy 0']
+strip: //div[@class='foot']
+strip: //div[@id='sidebar']//div[@class='ft']
+strip: //p[@id='byline']//em
+tidy: no
+prune: no
 
 test_url: http://sports.yahoo.com/nba/news?slug=ap-nbafinals
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6500e75..1e58b52
@@ -1,22 +1,22 @@
-title://div[@id='ardContent']/h1\r
-\r
-author://p[@id='ardAutor']\r
-author://span[@id='ardQuelle']\r
-author:string('sportschau.de')\r
-\r
-date:substring-after(//span[@id='ardStand'], 'Stand: ')\r
-\r
-body://div[@id='ardContent']\r
-\r
-strip://div[@id='ardContent']/h1\r
-strip://p[@id='ardAutor']\r
-strip: //div[@class='embeddedPlayer_clipinfo']\r
-strip: //div[@class='ardMehrZumThemaRechts']\r
-strip: //*[contains(@class, 'inv')]\r
-\r
-strip: //p[@id='ardAbbinder']\r
-strip: //div[@class='socialBookmarks']\r
-strip: //div[@id='ardContentEnd']\r
-strip: //div[@id='ardDisclaimer']\r
+title://div[@id='ardContent']/h1
+
+author://p[@id='ardAutor']
+author://span[@id='ardQuelle']
+author:string('sportschau.de')
+
+date:substring-after(//span[@id='ardStand'], 'Stand: ')
+
+body://div[@id='ardContent']
+
+strip://div[@id='ardContent']/h1
+strip://p[@id='ardAutor']
+strip: //div[@class='embeddedPlayer_clipinfo']
+strip: //div[@class='ardMehrZumThemaRechts']
+strip: //*[contains(@class, 'inv')]
+
+strip: //p[@id='ardAbbinder']
+strip: //div[@class='socialBookmarks']
+strip: //div[@id='ardContentEnd']
+strip: //div[@id='ardDisclaimer']
 strip: //div[@id='ardRechteSpalte']
 test_url: http://www.sportschau.de/sp/fussball/news201203/17/analyse_leverkusen_gladbach.jsp
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index afc5879..b3da813
@@ -1,26 +1,26 @@
-# main sportsillustrated.com articles\r
-#\r
-body: //div[@id="cnnStoryContent"]\r
-title: //div[@id="cnnStoryHeadline"]//h1\r
-author: //div[@id="cnnSubBanner"]//strong\r
-date: substring-after(//div[@id="cnnTimeStamp"], "Updated: ")\r
-date: substring-after(//div[@id="cnnTimeStamp"], "Posted: ")\r
-\r
-# kill ugly font buttons\r
-strip: //div[@id="cnnSCFontButtons"]\r
-\r
-# kill misc filler videos & etc\r
-strip: //div[@class="cnnDivideContent"]\r
-strip: //*[@class="cnnTMbox"]\r
-\r
-# si vault articles\r
-# -------------\r
-body: //div[@class="siv_artPara"]\r
-title: //div[@class="siv_artHeader"]//h1\r
-author: //div[@class="byline"]\r
-date: //div[@class="date"]\r
-\r
-next_page_link: //div[@id='cnnStoryContinue']/a\r
-strip_id_or_class: cnnstorypagination\r
-\r
+# main sportsillustrated.com articles
+#
+body: //div[@id="cnnStoryContent"]
+title: //div[@id="cnnStoryHeadline"]//h1
+author: //div[@id="cnnSubBanner"]//strong
+date: substring-after(//div[@id="cnnTimeStamp"], "Updated: ")
+date: substring-after(//div[@id="cnnTimeStamp"], "Posted: ")
+
+# kill ugly font buttons
+strip: //div[@id="cnnSCFontButtons"]
+
+# kill misc filler videos & etc
+strip: //div[@class="cnnDivideContent"]
+strip: //*[@class="cnnTMbox"]
+
+# si vault articles
+# -------------
+body: //div[@class="siv_artPara"]
+title: //div[@class="siv_artHeader"]//h1
+author: //div[@class="byline"]
+date: //div[@class="date"]
+
+next_page_link: //div[@id='cnnStoryContinue']/a
+strip_id_or_class: cnnstorypagination
+
 test_url: http://sportsillustrated.cnn.com/2012/writers/peter_king/02/27/combine/index.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 16636bc..5b68381
@@ -1,5 +1,5 @@
-title: //h2\r
-author: string('Michael Spreng')\r
-date: //div[@class='date']\r
+title: //h2
+author: string('Michael Spreng')
+date: //div[@class='date']
 body: //div[@class='entry']
 test_url: http://www.sprengsatz.de/?p=3691
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 4872519..15763c3
@@ -1,7 +1,7 @@
-body: //div[@id='ff-body']\r
-\r
-replace_string(<h1 align=center>): <div id="ff-body"><h1 align=center>\r
-\r
-prune: no\r
-\r
+body: //div[@id='ff-body']
+
+replace_string(<h1 align=center>): <div id="ff-body"><h1 align=center>
+
+prune: no
+
 test_url: http://www.sqlite.org/fileformat2.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 388209a..8eae13e
@@ -1,4 +1,4 @@
-body: //div[@class='content']\r
-date: substring-before( //div[@class='unit dateAndNotes'], 'with')\r
+body: //div[@class='content']
+date: substring-before( //div[@class='unit dateAndNotes'], 'with')
 title: //h3
 test_url: http://squashed.tumblr.com/post/17613522228/lets-stop-blaming-the-victims-of-predatory-lending
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e5317ba..bb95e93
@@ -1,14 +1,14 @@
-body: //div[@class='post-text' or @class='user-action-time' or @class='user-details' or @class='vote'] | //div[@id='answers-header']//h2\r
-\r
-replace_string(<div class="user-details"><br></div>): <!-- nothing -->\r
-replace_string(<div class="vote">): <div class="vote"><h3>Vote count: \r
-\r
-strip_id_or_class: vote-up\r
-strip_id_or_class: vote-down\r
-strip_id_or_class: star-off\r
-strip_id_or_class: favoritecount\r
-strip_id_or_class: -share\r
-strip_id_or_class: badgecount\r
-\r
+body: //div[@class='post-text' or @class='user-action-time' or @class='user-details' or @class='vote'] | //div[@id='answers-header']//h2
+
+replace_string(<div class="user-details"><br></div>): <!-- nothing -->
+replace_string(<div class="vote">): <div class="vote"><h3>Vote count: 
+
+strip_id_or_class: vote-up
+strip_id_or_class: vote-down
+strip_id_or_class: star-off
+strip_id_or_class: favoritecount
+strip_id_or_class: -share
+strip_id_or_class: badgecount
+
 
 test_url: http://stackoverflow.com/questions/4484289/id-like-to-understand-the-jquery-plugin-syntax
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index bde1421..a0f1587
@@ -1,14 +1,14 @@
-title: //div[@class='articleLeft']/h3\r
-\r
-author: substring-after(//span[@class='articleAuthor']/a,'By ')\r
-\r
-date: substring-before(//span[@class='articleDateTime'],'in ')\r
-\r
-body: //div[@class='articleLeft']\r
-strip: //div[@class='articleMoreNews']\r
-strip: //div[@class='articleLeft']/h3\r
-strip: //div[@class='articleLeft']/p[@class='articleInfo clearfix']\r
-\r
-# Remove duplicate title from text\r
-strip: //div[@id='site']/div[5][@class='holder']/div[1][@class='hBlock']/div[1][@class='sglCol article']/h3\r
+title: //div[@class='articleLeft']/h3
+
+author: substring-after(//span[@class='articleAuthor']/a,'By ')
+
+date: substring-before(//span[@class='articleDateTime'],'in ')
+
+body: //div[@class='articleLeft']
+strip: //div[@class='articleMoreNews']
+strip: //div[@class='articleLeft']/h3
+strip: //div[@class='articleLeft']/p[@class='articleInfo clearfix']
+
+# Remove duplicate title from text
+strip: //div[@id='site']/div[5][@class='holder']/div[1][@class='hBlock']/div[1][@class='sglCol article']/h3
 test_url: http://www.stalbansreview.co.uk/news/9581446.New_roundabout_in_King_Harry_Lane/r/?ref=rss
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 22a3348..71a2bda
@@ -1,16 +1,16 @@
-autodetect_next_page: no\r
-footnotes: no\r
-dissolve: //div[@class="column-2"]//div[@class="widget"]\r
-dissolve: //div[@class="column-2"]//div\r
-\r
-author: //div[@class="innerbyline"]/a\r
-strip: //div[@class="innerbyline"]/a\r
-\r
-strip: //p[@class="dateline"]\r
-date: //p[@class="dateline"]\r
-\r
-title: //h1[@class="title"]\r
-author: //div[@class="innerbyline"]/a\r
-date: //p[@class="dateline"]\r
+autodetect_next_page: no
+footnotes: no
+dissolve: //div[@class="column-2"]//div[@class="widget"]
+dissolve: //div[@class="column-2"]//div
+
+author: //div[@class="innerbyline"]/a
+strip: //div[@class="innerbyline"]/a
+
+strip: //p[@class="dateline"]
+date: //p[@class="dateline"]
+
+title: //h1[@class="title"]
+author: //div[@class="innerbyline"]/a
+date: //p[@class="dateline"]
 body: //div[@class="column-2"]
 test_url: http://www.standard.co.uk/lifestyle/esmagazine/grace-and-flavour-pizarro-7938350.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0579455..254e2c2
@@ -1,11 +1,11 @@
-title: //h1[@id='storyTitle']\r
-author: substring-after(//span[@class='hsa_postCredit'], 'By ') \r
-date://span[@class='hsa_dateStamp']\r
-body: //div[@class='storytext']\r
-strip_id_or_class: insideStoryAd \r
-strip_id_or_class: printDesc\r
-strip_id_or_class: sb_2010_story_tools\r
-strip_id_or_class: FBConnectButton_Text\r
-strip_id_or_class: breadcrumbs\r
-prune: no\r
+title: //h1[@id='storyTitle']
+author: substring-after(//span[@class='hsa_postCredit'], 'By ') 
+date://span[@class='hsa_dateStamp']
+body: //div[@class='storytext']
+strip_id_or_class: insideStoryAd 
+strip_id_or_class: printDesc
+strip_id_or_class: sb_2010_story_tools
+strip_id_or_class: FBConnectButton_Text
+strip_id_or_class: breadcrumbs
+prune: no
 test_url: http://www.staradvertiser.com/news/20111112_World_leaders_step_onto_isle_stage.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 1169984..efd1ec2
@@ -1,8 +1,8 @@
-title: /html/head/meta[@name='title']/@content\r
-author: //span[contains(concat(' ',normalize-space(@class),' '),' article_author ')]/a\r
-date: //span[contains(concat(' ',normalize-space(@class),' '),' article_date ')]\r
-\r
-body: //div[@class='entry-content']\r
-\r
+title: /html/head/meta[@name='title']/@content
+author: //span[contains(concat(' ',normalize-space(@class),' '),' article_author ')]/a
+date: //span[contains(concat(' ',normalize-space(@class),' '),' article_date ')]
+
+body: //div[@class='entry-content']
+
 single_page_link: //p[@class='pagination']/a
 test_url: http://www.stephenfry.com/2011/10/06/steve-jobs/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d66fee9..75379a9
@@ -1,5 +1,5 @@
-title: article/h1\r
-author: //p[@class='byline']\r
-date:  //p[@class='date']\r
+title: article/h1
+author: //p[@class='byline']
+date:  //p[@class='date']
 body: //div[@class='body']
 test_url: https://www.stlbeacon.org/#!/content/23404/mogop_caucus_031712
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 073043d..2f4f8cb
@@ -1,5 +1,5 @@
-strip_id_or_class: 'left'\r
-strip_id_or_class: 'right'\r
-strip_id_or_class: 'block-belowcontent'\r
+strip_id_or_class: 'left'
+strip_id_or_class: 'right'
+strip_id_or_class: 'block-belowcontent'
 
 test_url: http://stockholm.etc.se/reportage/bakom-stangda-dorrar-pa-fas-3-massa
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/stockholmsfria.nu.txt b/inc/3rdparty/site_config/standard/stockholmsfria.nu.txt
new file mode 100755 (executable)
index 0000000..cc8c28b
--- /dev/null
@@ -0,0 +1,7 @@
+body: //div[contains(@class, 'layout__inner')]//div[contains(@class, 'file-image') or contains(@class, 'node__content')]
+author: //article//div[contains(@class, 'field-byline')]
+strip_id_or_class: rekommenderade
+strip_id_or_class: disqus
+strip_id_or_class: annonser
+
+test_url: http://www.stockholmsfria.nu/artikel/112068
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/straightdope.com.txt b/inc/3rdparty/site_config/standard/straightdope.com.txt
new file mode 100755 (executable)
index 0000000..f01d7ad
--- /dev/null
@@ -0,0 +1,6 @@
+body: //div[@id='article' or @id='current_illustration']
+title: //div[@id='article']//h1
+date: //div[@id='article']//div[@class='date']
+prune: no
+
+test_url: http://www.straightdope.com/columns/read/947/whatever-happened-to-adoption-of-the-metric-system-in-the-u-s
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0b62a3d..6cf03ca
@@ -1,7 +1,7 @@
-title: //h2[@class="post-title"]\r
-date: //span[@class="post-date"]\r
-body: //div[@class="post-entry"]\r
-\r
-#This is also good for *.streetsblog.org, for example:\r
+title: //h2[@class="post-title"]
+date: //span[@class="post-date"]
+body: //div[@class="post-entry"]
+
+#This is also good for *.streetsblog.org, for example:
 #http://dc.streetsblog.org/2011/10/21/friday-job-market/
 test_url: http://streetsblog.net/2011/10/20/look-out-below-one-in-nine-bridges-structurally-deficient-reports-t4a/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 12fd093..3756092
@@ -1,22 +1,22 @@
-title://div[@id='left_col']/h1\r
-author:substring-after(//span[contains(@class,'storycredit')],'BY ')\r
-author://span[contains(@class,'storycredit')]\r
-date:substring-after(//div[contains(@class,'toolbox_date')],'Last updated ')\r
-date://div[contains(@class,'toolbox_date')]\r
-body://div[@id='left_col']\r
-\r
-strip_id_or_class: toolbox\r
-strip_id_or_class: story_features\r
-strip_id_or_class: sharebox_new\r
-strip_id_or_class: related_box\r
-strip_id_or_class: sponsored_links\r
-strip_id_or_class: hidden_ad\r
-strip_id_or_class: story_content_top\r
-strip_id_or_class: total_number\r
-strip_id_or_class: sort_order\r
-strip_id_or_class: subscribe_order\r
-\r
-strip://div[contains(@class,'ad_story')]\r
-\r
-test_url: http://www.stuff.co.nz/national/politics/3930344/PM-issues-challenge\r
+title://div[@id='left_col']/h1
+author:substring-after(//span[contains(@class,'storycredit')],'BY ')
+author://span[contains(@class,'storycredit')]
+date:substring-after(//div[contains(@class,'toolbox_date')],'Last updated ')
+date://div[contains(@class,'toolbox_date')]
+body://div[@id='left_col']
+
+strip_id_or_class: toolbox
+strip_id_or_class: story_features
+strip_id_or_class: sharebox_new
+strip_id_or_class: related_box
+strip_id_or_class: sponsored_links
+strip_id_or_class: hidden_ad
+strip_id_or_class: story_content_top
+strip_id_or_class: total_number
+strip_id_or_class: sort_order
+strip_id_or_class: subscribe_order
+
+strip://div[contains(@class,'ad_story')]
+
+test_url: http://www.stuff.co.nz/national/politics/3930344/PM-issues-challenge
 test_url: http://www.stuff.co.nz/entertainment/7045944/International-praise-for-Ladyhawke
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8568216..9adc3c5
@@ -1,3 +1,3 @@
-single_page_link: //iframe[@id='stumbleFrame']/@src\r
-\r
-test_url: www.stumbleupon.com/su/35V0wB/zouchmagazine.com/poetry-violet/
\ No newline at end of file
+single_page_link: //iframe[@id='tb-stumble-frame']/@src
+
+test_url: http://www.stumbleupon.com/su/35V0wB/zouchmagazine.com/poetry-violet/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 454e37b..9ba6eb7
@@ -1,17 +1,17 @@
-title: //*[@id='posts']/div[1]/h2\r
-author: //*[@id='posts']/div[1]/div[2]/span[2]/a\r
-date: //*[@class='date']\r
-body: //div[@class='body-lead']\r
-\r
-# take out the bit saying 'body'\r
-strip: //div[@class='body-lead']/div[@class='info-label']\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
+title: //*[@id='posts']/div[1]/h2
+author: //*[@id='posts']/div[1]/div[2]/span[2]/a
+date: //*[@class='date']
+body: //div[@class='body-lead']
+
+# take out the bit saying 'body'
+strip: //div[@class='body-lead']/div[@class='info-label']
+
+
+
+
+
+
+
+
+
 test_url: http://www.subtraction.com/2011/02/01/unnecessary-explanations
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 4aa9410..74b8d45
@@ -1,18 +1,18 @@
-# 2012-12-04: complete rewrite after Süddeutsche.de relaunch - carlo@...\r
-\r
-single_page_link: //a[ contains( @href, "/2.220/" ) ]\r
-\r
-body: //article[@id="sitecontent"]/section[@class="body"]\r
-author: //address[@class="author"]\r
-date: //div[@class="header"]//h1//span[@class="updated"]\r
-wrap_in(small): //div[@class="footer"]\r
-wrap_in(i): //figcaption/h3\r
-dissolve: //figcaption//h3\r
-dissolve: //figure/div[@class="body"]\r
-dissolve: //figure/a\r
-\r
-strip: //figure[ not( contains(@class, "zoomimage" ) ) ]\r
-strip: //div[@data-onlineonly="true"]\r
-strip: //address[@class="author"]\r
-\r
+# 2012-12-04: complete rewrite after Süddeutsche.de relaunch - carlo@...
+
+single_page_link: //a[ contains( @href, "/2.220/" ) ]
+
+body: //article[@id="sitecontent"]/section[@class="body"]
+author: //address[@class="author"]
+date: //div[@class="header"]//h1//span[@class="updated"]
+wrap_in(small): //div[@class="footer"]
+wrap_in(i): //figcaption/h3
+dissolve: //figcaption//h3
+dissolve: //figure/div[@class="body"]
+dissolve: //figure/a
+
+strip: //figure[ not( contains(@class, "zoomimage" ) ) ]
+strip: //div[@data-onlineonly="true"]
+strip: //address[@class="author"]
+
 test_url: http://www.sueddeutsche.de/muenchen/mietshaus-am-gaertnerplatz-alles-muss-raus-1.1556693
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 13390e4..6d4594c
@@ -1,14 +1,14 @@
-title: //div[@class='story-details']/h1\r
-date: //span[@class='date-time']\r
-Author: substring-after(//p[@class='by-line'], 'By ')\r
-\r
-strip: //div[@class='videoThumbnails']\r
-strip: //div[@class='ad-square2-container']\r
-strip: //div[@class='homeDeliveryContainer5']\r
-\r
-strip: //div[@class='image-description']\r
-strip: //div[@id='internal-side-bar']\r
-\r
-strip: //span[@class='hide']\r
+title: //div[@class='story-details']/h1
+date: //span[@class='date-time']
+Author: substring-after(//p[@class='by-line'], 'By ')
+
+strip: //div[@class='videoThumbnails']
+strip: //div[@class='ad-square2-container']
+strip: //div[@class='homeDeliveryContainer5']
+
+strip: //div[@class='image-description']
+strip: //div[@id='internal-side-bar']
+
+strip: //span[@class='hide']
 strip: //div[@class='date']
 test_url: http://www.suntimes.com/technology/ihnatko/8816567-452/review-kindle-fire-is-no-ipad-killer-but-it-is-a-killer-device.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 02b5b8c..bc0a1ca
@@ -1,4 +1,14 @@
-# Ads\r
-strip_id_or_class: articlead\r
+body: //div[@id='article-content']
+author: //div[@id='article']//div[@class='byline']/p
 
-test_url: http://www.svd.se/nyheter/inrikes/manga-huggormsbitna-golfare_5004031.svd
\ No newline at end of file
+# Ads
+strip_id_or_class: articlead
+
+# Sharing
+strip_id_or_class: share
+
+prune: no
+
+test_url: http://www.svd.se/nyheter/inrikes/oppositionen-stoppar-skattesankning_8531228.svd
+test_url: http://www.svd.se/nyheter/inrikes/manga-huggormsbitna-golfare_5004031.svd
+test_url: http://www.svd.se/?service=rss&type=senastenytt
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/svt.se.txt b/inc/3rdparty/site_config/standard/svt.se.txt
new file mode 100755 (executable)
index 0000000..ba35f7d
--- /dev/null
@@ -0,0 +1,16 @@
+title: //article[@role='main']//h1
+body: //article[@role='main']
+strip: //aside
+replace_string(<noscript>): <div>
+replace_string(</noscript>): </div>
+strip_id_or_class: svtHide-No-Js
+strip_id_or_class: aside
+strip_id_or_class: Aside
+strip_id_or_class: hidden
+strip_id_or_class: Share
+tidy: no
+prune: no
+
+test_url: http://www.svt.se/ug/framtidsdrommar-om-jobb-blev-lackande-gifthal
+test_url: http://www.svt.se/nyheter/het-debatt-mellan-borg-och-andersson
+test_url: http://www.svt.se/nyheter/regionalt/svtsormland/sj-tag-evakuerades-efter-rokdrama
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index da6772a..24ba142
@@ -1,11 +1,18 @@
-title: //h1\r
-\r
-author: //a[contains(@href, '/sok/?')]/text()\r
-\r
-date: substring-after(//span[@class='date'], 'Publicerad ')\r
-\r
-body: //div[@class='two_column_left']\r
-strip_id_or_class: story\r
-strip: //div[@class='leadText saplo:lead']/h5\r
-
-test_url: http://www.sydsvenskan.se/kultur-och-nojen/-jag-vill-garna--stanna--
\ No newline at end of file
+title: //h1
+
+author: //a[contains(@href, '/sok/?')]/text()
+
+date: //meta[@name='bi3dPubDate']/@content
+
+body: (//div[contains(@class, 'slider_wrapper')])[1] | //div[@id='article_image' or @class='two_column_left']
+strip_id_or_class: story
+strip_id_or_class: article_body_ad
+strip: //div[@class='leadText saplo:lead']/h5
+
+replace_string(<br />): <br /><br />
+
+prune: no
+
+test_url: http://www.sydsvenskan.se/malmo/allt-jag-ager-ligger-pa-botten/
+test_url: http://www.sydsvenskan.se/kultur-och-nojen/-jag-vill-garna--stanna--
+test_url: http://www.sydsvenskan.se/rss.xml
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3109c0e..5bcfb9e
@@ -1,12 +1,12 @@
-title: //div[contains(@class, "post")]/h2\r
-\r
-author: //div[contains(@class, "post")]/p[position()=last()]/text()[1]\r
-\r
-date: //div[contains(@class, "post")]/p[1]\r
-\r
-body: //div[contains(@class, "post")]\r
-\r
-strip: //div[contains(@class, "post")]/h2[1]\r
-strip: //div[contains(@class, "post")]/p[1]\r
+title: //div[contains(@class, "post")]/h2
+
+author: //div[contains(@class, "post")]/p[position()=last()]/text()[1]
+
+date: //div[contains(@class, "post")]/p[1]
+
+body: //div[contains(@class, "post")]
+
+strip: //div[contains(@class, "post")]/h2[1]
+strip: //div[contains(@class, "post")]/p[1]
 strip: //div[contains(@class, "post")]/p[position()=last()]
 test_url: http://www.symmetrymagazine.org/breaking/?p=12784
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c3e3497..e058032
@@ -1,15 +1,15 @@
-title: //h1\r
-body://div[@class='drucken']\r
-author: substring-after(//span[@class='autor'], 'Von ')\r
-author: //span[@class='autor']\r
-\r
-single_page_link://a[contains(@href, '/drucken/')]\r
-convert_double_br_tags:yes\r
-\r
-dissolve://div[@class='vorspann']\r
-\r
-strip://h1\r
-strip_id_or_class: klassifizierung\r
-strip_id_or_class: source\r
+title: //h1
+body://div[@class='drucken']
+author: substring-after(//span[@class='autor'], 'Von ')
+author: //span[@class='autor']
+
+single_page_link://a[contains(@href, '/drucken/')]
+convert_double_br_tags:yes
+
+dissolve://div[@class='vorspann']
+
+strip://h1
+strip_id_or_class: klassifizierung
+strip_id_or_class: source
 strip_id_or_class: autor
 test_url: http://sz-magazin.sueddeutsche.de/texte/anzeigen/37567
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sz.de.txt b/inc/3rdparty/site_config/standard/sz.de.txt
new file mode 100755 (executable)
index 0000000..f67637d
--- /dev/null
@@ -0,0 +1,18 @@
+# 2012-12-04: complete rewrite after Süddeutsche.de relaunch - carlo@...
+
+single_page_link: //a[ contains( @href, "/2.220/" ) ]
+
+body: //article[@id="sitecontent"]/section[@class="body"]
+author: //address[@class="author"]
+date: //div[@class="header"]//h1//span[@class="updated"]
+wrap_in(small): //div[@class="footer"]
+wrap_in(i): //figcaption/h3
+dissolve: //figcaption//h3
+dissolve: //figure/div[@class="body"]
+dissolve: //figure/a
+
+strip: //figure[ not( contains(@class, "zoomimage" ) ) ]
+strip: //div[@data-onlineonly="true"]
+strip: //address[@class="author"]
+
+test_url: http://sz.de/1.1556693
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8ce8a90..be76cd0
@@ -1,23 +1,23 @@
-title://h1[1]\r
-\r
-author: substring-after(//em, 'Von ')\r
-author:string('tagesschau.de')\r
-\r
-date:substring-after(//div[@class='standDatum'], 'Stand: ')\r
-\r
-body://div[contains(@class, 'article')] | //div[contains(@class, 'centerCol')]\r
-\r
-strip://h1[1]\r
-strip: //div[contains(@class, 'directLinks')]\r
-strip: //div[contains(@class, 'zitatBox')]\r
-strip: //div[contains(@class, 'teaserBox metaBlock')]\r
-strip: //*[contains(@class, 'inv')]\r
-strip: //span[@class='imgSubline']\r
-strip: //*[contains(@class, 'topline')][1]\r
-strip: //div[@id='rightCol'][1]\r
-strip: //div[@id="footer"][1]\r
-strip: //div[@class="fPlayer"] \r
-strip: //div[@id='seitenanfang']\r
-strip: //div[@class='standDatum']\r
+title://h1[1]
+
+author: substring-after(//em, 'Von ')
+author:string('tagesschau.de')
+
+date:substring-after(//div[@class='standDatum'], 'Stand: ')
+
+body://div[contains(@class, 'article')] | //div[contains(@class, 'centerCol')]
+
+strip://h1[1]
+strip: //div[contains(@class, 'directLinks')]
+strip: //div[contains(@class, 'zitatBox')]
+strip: //div[contains(@class, 'teaserBox metaBlock')]
+strip: //*[contains(@class, 'inv')]
+strip: //span[@class='imgSubline']
+strip: //*[contains(@class, 'topline')][1]
+strip: //div[@id='rightCol'][1]
+strip: //div[@id="footer"][1]
+strip: //div[@class="fPlayer"] 
+strip: //div[@id='seitenanfang']
+strip: //div[@class='standDatum']
 strip: //em
 test_url: http://www.tagesschau.de/ausland/wahlkampffrankreich102.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index bfe841c..47a6ffa
@@ -1,5 +1,5 @@
-title: //span[@class="entry-title"]\r
-author: //*[contains(@class, 'item')]/p/a/text()\r
-date: substring-after(//*[contains(@class, 'item')]/p/text()[3], 'Posted:')\r
+title: //span[@class="entry-title"]
+author: //*[contains(@class, 'item')]/p/a/text()
+date: substring-after(//*[contains(@class, 'item')]/p/text()[3], 'Posted:')
 body: //div[@class="entry-content"]
 test_url: http://www.tampabay.com/news/salvador-dali-leaders-want-st-petersburg-city-council-to-put-brakes-on/1236349
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 13de70e..e1e7942
@@ -1,4 +1,4 @@
-title: //h3[@class="storytitle"]\r
-body: //div[@class="post"]\r
+title: //h3[@class="storytitle"]
+body: //div[@class="post"]
 strip: //div[@class="blurbBox"]
 test_url: http://taptaptap.com/blog/apples-precedents-vs-apples-guidelines/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7777336..f3234f3
@@ -1,15 +1,11 @@
-title: //span[@id='ctl00_ctl00_MainContent_MainContent_RecipeImage1_lblRecipeTitle']\r
-body: //div[@id='RDNEW']//*[@class='Recipe-imgCon' or @class='Recipe-Intro' or @class='recipeDetails']\r
-strip_id_or_class: rec-ExRightPanel\r
-strip_id_or_class: divCarousel\r
-strip_id_or_class: preptimeOuter\r
-strip_id_or_class: cooktimeOuter\r
-strip_id_or_class: durationOuter\r
-strip_id_or_class: divImageFooter\r
-strip_id_or_class: microFormatFnIngred\r
-strip: //span[@class='Recipe-Intro']//*[@class='link' or @class='rating']\r
-\r
-prune: no\r
-tidy: no\r
-
-test_url: http://www.tasteofhome.com/recipes/Grinch-Punch
\ No newline at end of file
+title: //div[@id='ctl00_MainContent_ctl00_Div1']//h2
+body: //div[@id='ctl00_MainContent_ctl00_Div1']
+
+single_page_link: //div[contains(@class, 'recipeHeader')]//a[contains(@href, '/print')]
+
+strip_image_src: tohPrintL.png
+
+prune: no
+
+test_url: http://www.tasteofhome.com/recipes/Grinch-Punch
+test_url: http://www.tasteofhome.com/recipes/lactose-free-chocolate-chip-cookies
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6e84527..cf85366
@@ -1,8 +1,8 @@
-date: //div[@class='secthead']\r
-body: //div[@class='sectbody']\r
-title: concat(//div[@class='sectbody']/h4,': ',//div[@class='sectbody']/h1)\r
-author: //span[@class='author']\r
-strip: //p[@class='caption']\r
-strip_id_or_class: rack\r
+date: //div[@class='secthead']
+body: //div[@class='sectbody']
+title: concat(//div[@class='sectbody']/h4,': ',//div[@class='sectbody']/h1)
+author: //span[@class='author']
+strip: //p[@class='caption']
+strip_id_or_class: rack
 
 test_url: http://www.taz.de/Protestbewegung-Occupy/!80188/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index fbe94fa..558dc9c
@@ -1,5 +1,5 @@
-body: //div[@id='centercontent']\r
-strip: //div[@id='rightcontent']\r
-date: substring-before( //div[@id='cats'], '·')\r
+body: //div[@id='centercontent']
+strip: //div[@id='rightcontent']
+date: substring-before( //div[@id='cats'], '·')
 title: //h1
 test_url: http://www.tbray.org/ongoing/When/201x/2012/03/04/Mobile-Money
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tcmanila.tk.txt b/inc/3rdparty/site_config/standard/tcmanila.tk.txt
new file mode 100755 (executable)
index 0000000..f6032ec
--- /dev/null
@@ -0,0 +1,7 @@
+title: //h2
+body: //div[@class="post_content"]
+author: //span[@class="fn"]
+date: //time[@class="updated"]
+strip_comments: //yes
+footnotes: //yes
+test_url: http://tcmanila.tk/post/29189064358/my-2012-roadmap-is-almost-complete-look-at-the
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 765224e..4873b50
@@ -1,4 +1,4 @@
-title: //div[@id='main-content']/h1\r
-body: //div[@id='main-content']\r
+title: //div[@id='main-content']/h1
+body: //div[@id='main-content']
 strip: //div[@id='main-content']/h1
 test_url: http://www.tcng.org/index.php/blog/view/teaching-basic-health-cutting-down-costs
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index b6d17da..da19862
@@ -1,4 +1,4 @@
-title: //h1[@class='storyheadline']\r
-body: //div[@class='storytext']\r
+title: //h1[@class='storyheadline']
+body: //div[@class='storytext']
 strip: //strong
 test_url: http://tech.fortune.cnn.com/2011/03/17/why-startups-dont-go-public-anymore/?section=money_topstories&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fmoney_topstories+%28Top+Stories%29
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tech.gilt.com.txt b/inc/3rdparty/site_config/standard/tech.gilt.com.txt
new file mode 100755 (executable)
index 0000000..ab56460
--- /dev/null
@@ -0,0 +1,5 @@
+title: //div[@class="title"]/h1
+title: //div[@class="caption"]/h1
+author: substring-after(//div[@class="metadata"]/div[@class="date"]/a[2], 'by ')
+date: //div[@class="metadata"]/div[@class="date"]/a
+test_url: http://tech.gilt.com/post/46359463184/26-3-13-todays-noon-outage-and-what-were-doing-to
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f7228eb..75126f9
@@ -1,11 +1,11 @@
-title://h1[contains(@id,'artibodyTitle')]\r
-\r
-date://span[contains(@id,'pub_date')]\r
-\r
-body://div[contains(@id,'artibody')]\r
-\r
-strip://div[contains(@class,'otherContent')]\r
-\r
-next_page_link://p[@class='page']/a[contains(.,'下一页')]\r
+title://h1[contains(@id,'artibodyTitle')]
+
+date://span[contains(@id,'pub_date')]
+
+body://div[contains(@id,'artibody')]
+
+strip://div[contains(@class,'otherContent')]
+
+next_page_link://p[@class='page']/a[contains(.,'下一页')]
 
 test_url: http://tech.sina.com.cn/mobile/n/2012-03-22/07476863046.shtml
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f436acb..1509c46
@@ -1,18 +1,18 @@
-body: //div[contains(@class, 'media-container') or contains(@class, 'body-copy')]\r
-\r
-author: //a[@class="name"]\r
-\r
-date: //div[@class="post-time"]\r
-\r
-title: //h1[@class="headline"]\r
-strip_id_or_class: module-crunchbase\r
-\r
-# The following is for the mobile site\r
-body: //div[@id="singlentry"]\r
-author: substring-after(//span[@class="single-post-meta-top"],'rsaquo; ')\r
-date: substring-before(//div[@class="single-post-meta-top"],' @')\r
-title: //a[@class="sh2"]\r
-\r
-prune: no\r
-\r
+body: //div[contains(@class, 'media-container') or contains(@class, 'body-copy')]
+
+author: //a[@class="name"]
+
+date: //div[@class="post-time"]
+
+title: //h1[@class="headline"]
+strip_id_or_class: module-crunchbase
+
+# The following is for the mobile site
+body: //div[@id="singlentry"]
+author: substring-after(//span[@class="single-post-meta-top"],'rsaquo; ')
+date: substring-before(//div[@class="single-post-meta-top"],' @')
+title: //a[@class="sh2"]
+
+prune: no
+
 test_url: http://techcrunch.com/2011/10/18/apples-insanely-great-q1-2012/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 727f370..7db2f95
@@ -1,12 +1,12 @@
-body: //div[@class='story']\r
-title: //div[@class='story']/h1\r
-strip: //div[@class='story']/h1\r
-\r
-author: //div[@class='details']/p[contains(., 'by ')]/a\r
-date: //p[@class='storydate']\r
-\r
-strip: //p[a[contains(., 'Leave a Comment')]]\r
-strip_id_or_class: share\r
-strip_id_or_class: maincolumn_head\r
+body: //div[@class='story']
+title: //div[@class='story']/h1
+strip: //div[@class='story']/h1
+
+author: //div[@class='details']/p[contains(., 'by ')]/a
+date: //p[@class='storydate']
+
+strip: //p[a[contains(., 'Leave a Comment')]]
+strip_id_or_class: share
+strip_id_or_class: maincolumn_head
 strip_id_or_class: maincolmod
 test_url: http://www.techdirt.com/articles/20120112/17455117394/sega-gets-it-right-about-sopa-its-time-hard-reset-copyright-law-congress.shtml
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/techhive.com.txt b/inc/3rdparty/site_config/standard/techhive.com.txt
new file mode 100755 (executable)
index 0000000..29720b0
--- /dev/null
@@ -0,0 +1,18 @@
+title: //div[@class='articleHead']//h1
+author: //div[@class="author-name"]/a[1]
+body: //div[@class="main"]
+
+# remove 'From the Lab' and 'Recent posts' text
+strip: //div[@class='blogLabel']
+
+# remove byline and meta info
+strip: //div[@class="article-meta"]
+strip: //div[@class="author-info"]
+
+#strip tags and categories
+strip: //div[@class="department"]
+
+#strip product cap links
+strip: //div[@class="cap-main"]
+strip: //div[@id="compare-lede"]
+test_url: http://www.techhive.com/article/2010549/up-close-with-blackberry-10.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8644e00..0b4bfbd
@@ -1,3 +1,3 @@
-single_page_link_in_feed: //b/a\r
-\r
+single_page_link_in_feed: //b/a
+
 test_url_feed: http://www.techmeme.com/feed.xml
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index cc26ee4..d871b60
@@ -1,8 +1,8 @@
-title: //h2\r
-author: //meta[@name="author"]/@content\r
-date: //h3\r
-body: //div[@class="postBody"]\r
-strip: //h1\r
-strip: //h2\r
-strip: //h3\r
+title: //h2
+author: //meta[@name="author"]/@content
+date: //h3
+body: //div[@class="postBody"]
+strip: //h1
+strip: //h2
+strip: //h3
 test_url: http://technicallyjordan.tumblr.com/post/22914659822/facebook-to-launch-app-store-knock-off
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/technologizer.com.txt b/inc/3rdparty/site_config/standard/technologizer.com.txt
new file mode 100755 (executable)
index 0000000..179bf5a
--- /dev/null
@@ -0,0 +1,5 @@
+next_page_link: //a[contains(., 'NEXT PAGE')]
+# following::node() selects text nodes too whereas following::* selects only elements.
+strip: //span[@class='pageo']/following::node()
+strip: //span[@class='pageo']
+test_url: http://technologizer.com/2010/03/08/the-secret-origin-of-windows/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 41f21d4..d405eb1
@@ -1,16 +1,16 @@
-title: //header[@class='article-meta']/h1\r
-title: substring-before(//title, '|')\r
-\r
-body: //section[contains(@class, 'body')]\r
-\r
-# Author & Date for News and Featured Stories\r
-author: //ul[@class='byline']/li/a\r
-author: substring-before(substring-after(//ul[@class='byline']/li, 'By '), ' on')\r
-date: substring-after(//ul[@class='byline']/li, 'on ')\r
-\r
-# Author & Date for "Views"\r
-author: //div[@class='view-byline']/div[@class='meta']/h2[1]\r
-date: //div[@class='view-byline']/div[@class='meta']/h2[2]\r
-\r
-next_page_link: //section[@class='pagination']/a[contains(@class, 'continue')]\r
+title: //header[@class='article-meta']/h1
+title: substring-before(//title, '|')
+
+body: //section[contains(@class, 'body')]
+
+# Author & Date for News and Featured Stories
+author: //ul[@class='byline']/li/a
+author: substring-before(substring-after(//ul[@class='byline']/li, 'By '), ' on')
+date: substring-after(//ul[@class='byline']/li, 'on ')
+
+# Author & Date for "Views"
+author: //div[@class='view-byline']/div[@class='meta']/h2[1]
+date: //div[@class='view-byline']/div[@class='meta']/h2[2]
+
+next_page_link: //section[@class='pagination']/a[contains(@class, 'continue')]
 test_url: http://www.technologyreview.com/news/427567/facebooks-telescope-on-human-behavior/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 89ed834..8e1aa96
@@ -1,7 +1,7 @@
-body: //div[@class="post"]\r
-\r
-strip: //div[@class="post-meta"]\r
-strip: //div[@id="socialicons"]\r
-strip: //div[@id="authorbox"]\r
+body: //div[@class="post"]
+
+strip: //div[@class="post-meta"]
+strip: //div[@id="socialicons"]
+strip: //div[@id="authorbox"]
 
 test_url: http://techpinions.com/why-google-and-microsoft-hate-siri/3572
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ed92a97..0a0ca61
@@ -1,12 +1,12 @@
-# Title without news/reviews etc. appended\r
-title: //div[@id='subColumn1Pad']/div[1][@class='article']/div[1][@class='articleHead']/h1\r
-\r
-# Remove home link\r
-strip: //div[@id='page_logo']/a\r
-\r
-# Remove utilities\r
-strip: //*[(@id = "utilities")]\r
-\r
-# Remove comments link\r
+# Title without news/reviews etc. appended
+title: //div[@id='subColumn1Pad']/div[1][@class='article']/div[1][@class='articleHead']/h1
+
+# Remove home link
+strip: //div[@id='page_logo']/a
+
+# Remove utilities
+strip: //*[(@id = "utilities")]
+
+# Remove comments link
 strip: //div[@id='subColumn1Pad']/div[1][@class='article']/div[1][@class='articleHead']/p[@class='tiny']
 test_url: http://www.techradar.com/news/television/sky-to-rebrand-living-as-sky-living-903105
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ff3cd06..91b5baf
@@ -1,9 +1,9 @@
-body: //div[@id='artikelKolom']\r
-strip: //div[@class='broodMediaBox']/div[@class='docbox' or @class='artBannerWrapper']\r
-strip: //div[@id='artikeltoolbar']\r
-strip: //div[@class='reactiebalk artspacer' or @class='bannercenter clearfix artspacer']\r
-strip: //div[@id='artikelKolomRechts' or @id='TMGTweetWidget']\r
-tidy: no\r
-prune: no\r
+body: //div[@id='artikelKolom']
+strip: //div[@class='broodMediaBox']/div[@class='docbox' or @class='artBannerWrapper']
+strip: //div[@id='artikeltoolbar']
+strip: //div[@class='reactiebalk artspacer' or @class='bannercenter clearfix artspacer']
+strip: //div[@id='artikelKolomRechts' or @id='TMGTweetWidget']
+tidy: no
+prune: no
 
 test_url: http://www.telegraaf.nl/binnenland/10275097/__Identiteit_man_in_sloot_onbekend__.html?cid=rss
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e1faf23..8dcdb42
@@ -1,10 +1,10 @@
-body: //div[@class='byline' or @id='storyEmbSlide' or @id='mainBodyArea']\r
-strip: //p[@class='comments']\r
-strip: //div[@id='storyEmbSlide']//div[contains(@class, "hide")]\r
-strip: //div[@id='tmg-related-links' or @id='outbrain-related-links' or @id='onespot-related-links']\r
-strip: //p[@class='bbpTweet']/span[@class='timestamp']\r
-strip: //p[@class='bbpTweet']/span[@class='metadata']//img\r
-tidy: no\r
-prune: no\r
+body: //div[@class='byline' or @id='storyEmbSlide' or @id='mainBodyArea']
+strip: //p[@class='comments']
+strip: //div[@id='storyEmbSlide']//div[contains(@class, "hide")]
+strip: //div[@id='tmg-related-links' or @id='outbrain-related-links' or @id='onespot-related-links']
+strip: //p[@class='bbpTweet']/span[@class='timestamp']
+strip: //p[@class='bbpTweet']/span[@class='metadata']//img
+tidy: no
+prune: no
 
 test_url: http://www.telegraph.co.uk/news/worldnews/europe/ireland/8663451/Is-Ireland-divorcing-from-the-Catholic-Church.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thanhnien.com.vn.txt b/inc/3rdparty/site_config/standard/thanhnien.com.vn.txt
new file mode 100755 (executable)
index 0000000..596ecc9
--- /dev/null
@@ -0,0 +1,4 @@
+body://div[@id="print-news"]
+strip://a
+strip://span[@class="date-line"]
+test_url: http://www.thanhnien.com.vn/pages/20121006/hon-90-trieu-usd-nang-cap-do-thi-can-tho.aspx
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/the-magazine.org.txt b/inc/3rdparty/site_config/standard/the-magazine.org.txt
new file mode 100755 (executable)
index 0000000..0886465
--- /dev/null
@@ -0,0 +1,3 @@
+tidy: no
+
+test_url: http://the-magazine.org/1/alone-together-again
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/theage.com.au.txt b/inc/3rdparty/site_config/standard/theage.com.au.txt
new file mode 100755 (executable)
index 0000000..ea27c31
--- /dev/null
@@ -0,0 +1,5 @@
+author: //h3[@class='authorName']
+date: //time
+body: //div[@class='articleBody']
+strip_id_or_class: adspot
+test_url: http://www.theage.com.au/victoria/top-cops-warns-outlaw-bikies-we-have-a-gang-too-20130331-2h1l8.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/theamericanscholar.org.txt b/inc/3rdparty/site_config/standard/theamericanscholar.org.txt
new file mode 100755 (executable)
index 0000000..38b9667
--- /dev/null
@@ -0,0 +1,13 @@
+# Article Metadata
+title: //meta[@property="og:title"]/@content
+author: substring-after(//h3, 'By ')
+date: //h4/a[2]
+
+# Content Pruning
+strip: //h4
+strip: //a[@id="print_button"]
+strip: //p[@class="excerpt"]
+strip: //h3
+strip: //div[@class="caption"]
+strip: //center/a/img
+test_url: http://theamericanscholar.org/too-big-to-fail-and-too-risky-to-exist/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3bd555f..caa5ae0
@@ -1,3 +1,3 @@
-# Remove home link\r
+# Remove home link
 strip: //div[@id='blog-title']/a
 test_url: http://theappleblog.com/2010/10/21/the-new-macbook-air-is-underwhelming/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 267fd39..aa41b15
@@ -1,18 +1,20 @@
-title: //div[@id='article']/h1\r
-title: //h1\r
-\r
-body: //div[@class='articleText']\r
-body: //div[@class='articleContent']\r
-body: //div[@id='article']\r
-date: //*[contains(@class, 'date')]\r
-author: //div[@id='profile']//*[@class='authors']//a[1]\r
-author: //*[@class='author']/span\r
-prune: no\r
-\r
-strip: //div[@class='moreOnBoxWithImages']\r
-\r
-single_page_link: //a[@class='print']\r
-\r
-test_url: http://www.theatlantic.com/technology/archive/2011/04/want-to-see-how-crazy-a-bot-run-market-can-be/237773/\r
-test_url: http://www.theatlantic.com/magazine/archive/2007/11/the-autumn-of-the-multitaskers/6342/\r
+title: //div[contains(@class, 'articleHead')]//h1
+
+body: //div[@class='articleText']
+body: //div[@class='articleContent']
+body: //div[@id='article']
+date: //*[contains(@class, 'date')]
+author: //div[@id='profile']//*[@class='authors']//a[1]
+author: //*[@class='author']/span
+prune: no
+
+strip: //div[@class='moreOnBoxWithImages']
+strip: //p[contains(., 'This article available online at:')]
+strip: //p[contains(., 'This article available online at:')]/following::*
+strip: //div[@class='earthbox']
+
+single_page_link: //article//a[contains(@class, 'print')]
+
+test_url: http://www.theatlantic.com/technology/archive/2011/04/want-to-see-how-crazy-a-bot-run-market-can-be/237773/
+test_url: http://www.theatlantic.com/magazine/archive/2007/11/the-autumn-of-the-multitaskers/6342/
 test_url: http://www.theatlantic.com/entertainment/archive/2012/04/30-rock-live-a-funny-reminder-of-why-sitcoms-arent-shot-live-anymore/256447/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/theatlanticcities.com.txt b/inc/3rdparty/site_config/standard/theatlanticcities.com.txt
new file mode 100755 (executable)
index 0000000..880f207
--- /dev/null
@@ -0,0 +1,17 @@
+# To administrator:
+# Please replace the hostname with "*.theatlanticcities.com"
+
+# This filter is tested on:
+# http://m.theatlanticcities.com/arts-and-lifestyle/2012/04/invisible-borders-define-american-culture/1839/
+# http://www.theatlanticcities.com/housing/2012/11/chinas-holdouts/3981/
+# http://www.theatlanticcities.com/arts-and-lifestyle/2012/12/christmas-time-here/4133/
+
+title://h1
+author: //ul[@class='meta']/li/a
+date: //ul[@class='meta']/li/following-sibling::li
+body://article[@class='post']
+
+strip://h1
+strip://ul[@class='meta']
+strip://div[@class='newsletter-slug']
+test_url: http://www.theatlanticcities.com/arts-and-lifestyle/2012/12/christmas-time-here/4133/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 64df90c..b74442d
@@ -1,7 +1,7 @@
-title: //meta[@name='og:title']/@content\r
-date: //meta[@name='created']/@content\r
-body: //div[@class="StoryBody" or @class="storyTeaser"]\r
-\r
-replace_string(<p></p>): <br /><br />\r
-\r
+title: //meta[@name='og:title']/@content
+date: //meta[@name='created']/@content
+body: //div[@class="StoryBody" or @class="storyTeaser"]
+
+replace_string(<p></p>): <br /><br />
+
 test_url: http://www.thebostonchannel.com/slideshow/news/28210648/detail.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c3c2050..807e7da
@@ -1,10 +1,10 @@
-title: //h2[contains(@class, 'page-title')]\r
-body: //div[@id='content']/div[contains(@id, 'node-')]/div[@class='content']\r
-\r
-prune: no\r
-\r
-strip: //div[contains(@class, 'node-book')]//a[@class='button']\r
-\r
-single_page_link: //a[@class='tool-print']\r
+title: //h2[contains(@class, 'page-title')]
+body: //div[@id='content']/div[contains(@id, 'node-')]/div[@class='content']
+
+prune: no
+
+strip: //div[contains(@class, 'node-book')]//a[@class='button']
+
+single_page_link: //a[@class='tool-print']
 
 test_url: http://thebrowser.com/interviews/yotam-ottolenghi-on-his-favourite-cookery-books
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 9ef4ed8..13fa35a
@@ -1,10 +1,10 @@
-title: substring-before(//title, ' &ndash; ') \r
-author:string('Shawn')\r
-date: //*/time/@pubdate\r
-\r
-\r
-strip: //header\r
-strip: //div[@id='prev_next']\r
-strip: //div[@id='masthead']\r
-\r
+title: substring-before(//title, ' &ndash; ') 
+author:string('Shawn')
+date: //*/time/@pubdate
+
+
+strip: //header
+strip: //div[@id='prev_next']
+strip: //div[@id='masthead']
+
 test_url: http://thecarton.net/2012/12/20/imdb
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 24ebbba..e255e6a
@@ -1,24 +1,24 @@
-#keep all body text\r
-prune: no\r
-\r
-#title, body, metadata\r
-title: //div[@class='story_header']/h1\r
-body: //div[@id='content']\r
-author: substring-after(//span[@class='byline'], "by ")\r
-author: substring-after(//span[@class='byline'], "By ")\r
-author: //span[@class='byline']\r
-date: //span[@class='date']\r
-\r
-#formatting\r
-convert_double_br_tags: yes\r
-dissolve: //div[@class='slides_full']/ul/li\r
-\r
-# cleanup\r
-strip: //a[@id='story_note']\r
-strip: //br\r
-strip: //div[@class='intro']\r
-strip: //div[@class='share-block']\r
-strip: //div[@class='sidebar-social']\r
-strip: //div[@class='top-stories']\r
-strip: //div[@class='prevnext']\r
+#keep all body text
+prune: no
+
+#title, body, metadata
+title: //div[@class='story_header']/h1
+body: //div[@id='content']
+author: substring-after(//span[@class='byline'], "by ")
+author: substring-after(//span[@class='byline'], "By ")
+author: //span[@class='byline']
+date: //span[@class='date']
+
+#formatting
+convert_double_br_tags: yes
+dissolve: //div[@class='slides_full']/ul/li
+
+# cleanup
+strip: //a[@id='story_note']
+strip: //br
+strip: //div[@class='intro']
+strip: //div[@class='share-block']
+strip: //div[@class='sidebar-social']
+strip: //div[@class='top-stories']
+strip: //div[@class='prevnext']
 test_url: http://www.thedaily.com/page/2012/01/09/010912-news-college-costs-1-5/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 4781c65..f5e938a
@@ -1,7 +1,7 @@
-title: //h1\r
-body: //article/div[contains(@class, 'article-body')]\r
-#strip: //header/hgroup/h1\r
-strip: //footer[@class='storyFooter']\r
-single_page_link: //li[@class='print']/a\r
-prune: no\r
+title: //h1
+body: //article/div[contains(@class, 'article-body')]
+#strip: //header/hgroup/h1
+strip: //footer[@class='storyFooter']
+single_page_link: //li[@class='print']/a
+prune: no
 test_url: http://www.thedailybeast.com/articles/2010/04/06/how-mastercard-predicts-divorce.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0f15558..a83a6cf
@@ -1,14 +1,14 @@
-# Remove duplicated title\r
-strip: //div[@id='content']/div[1][@class='full_intro']/h2\r
-\r
-# Remove links, ads etc.\r
-strip: //*[(@class= "aside")]\r
-\r
-# Remove the  date and add it to the date published field in Instapaper\r
-strip: //div[@class="date"]\r
-date: //div[@class="date"]\r
-\r
-# There is no byline on The Daily Mash.\r
-\r
-convert_double_br_tags: yes\r
+# Remove duplicated title
+strip: //div[@id='content']/div[1][@class='full_intro']/h2
+
+# Remove links, ads etc.
+strip: //*[(@class= "aside")]
+
+# Remove the  date and add it to the date published field in Instapaper
+strip: //div[@class="date"]
+date: //div[@class="date"]
+
+# There is no byline on The Daily Mash.
+
+convert_double_br_tags: yes
 test_url: http://www.thedailymash.co.uk/index.php?option=com_content&task=view&id=4994&Itemid=81&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+thedailymash+%28The+Daily+Mash.+It%27s+news+to+us.%29
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thedisneyblog.com.txt b/inc/3rdparty/site_config/standard/thedisneyblog.com.txt
new file mode 100755 (executable)
index 0000000..57b3254
--- /dev/null
@@ -0,0 +1,7 @@
+title: //h1[contains(@class, 'entry-title')]
+author: //span[contains(@class, 'author vcard')]
+date: //span[@class = 'entry-date']
+body: //div[@class='entry-content']
+strip_id_or_class: bottomcontainerBox
+strip_id_or_class: lightsocial_container
+test_url: http://thedisneyblog.com/2012/11/17/videopolis-one-woman-disney-musical-beauty-and-the-beast/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/theeuropean-magazine.com.txt b/inc/3rdparty/site_config/standard/theeuropean-magazine.com.txt
new file mode 100755 (executable)
index 0000000..a19bae1
--- /dev/null
@@ -0,0 +1,17 @@
+# Tested on:
+# http://theeuropean-magazine.com/352-dyson-george/353-evolution-and-innovation
+# http://theeuropean-magazine.com/522-casertano-stefano/919-morsi-and-the-future-of-egypt
+
+title://h2[@class='article-title']
+author:substring-before(substring-after(//p[@class='article-meta'], 'by'), '&mdash;')
+date:substring-after(//p[@class='article-meta'], '&mdash;')
+body://div[@class='article']
+
+wrap_in(strong)://p[@class='article-teaser']
+move_into(//div[@class='article-head'])://li/img
+
+strip://h2[@class='article-title']
+strip://p[@class='article-meta']
+strip://div[@class='copyright']
+strip://div[@class='opinions-of-readers']
+test_url: http://theeuropean-magazine.com/522-casertano-stefano/919-morsi-and-the-future-of-egypt
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thegamedesignforum.com.txt b/inc/3rdparty/site_config/standard/thegamedesignforum.com.txt
new file mode 100755 (executable)
index 0000000..849ede7
--- /dev/null
@@ -0,0 +1,14 @@
+## ERROR: Removes all images. Please fix, have no idea why (bad HTML?)
+
+title: //h1[@class='featuretitle']
+body: //div[@id='nobordercontentarea']
+
+# remove Twitter badge
+strip: //img[@alt='Follow tgdfweb on Twitter']
+
+# fix for headers not showing for some reason
+wrap_in(h2): //h2[@class='sectionheader']
+dissolve: //h2[@class='sectionheader']
+
+tidy: yes
+test_url: http://thegamedesignforum.com/features/acceleration_flow_1.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index fae0fb2..da1c84f
@@ -1,41 +1,41 @@
-title: //h1[@id="headline"]\r
-author: //div[contains(@class, "editorial-byline-author")]/a\r
-date: substring-after(//div[contains(@class, "editorial-byline-meta")], " | ")\r
-\r
-# The article body contains a mix or article and non-article elements, so lot of manual tweaks are needed\r
-body: //div[@id="template"]\r
-strip_id_or_class: editorial-byline-pic\r
-strip_id_or_class: editorial-byline\r
-strip_id_or_class: headline\r
-\r
-# Include the leadin paragraph in the body text, but remove quotes because they're out of context\r
-dissolve: //div[contains(@id, "leadin")]\r
-strip_id_or_class: pullquote\r
-\r
-# Image captions removed because they're confusing in body text\r
-strip_id_or_class: image-caption-content\r
-\r
-# Remove header and footer\r
-strip_id_or_class: header\r
-strip_id_or_class: footer\r
-\r
-# Remove the hidden logo that seems to be used to cause Facebook to show the logo instead of a random article image\r
-strip: /html/body/span[contains(@style, "display: none")]\r
-\r
-# Remove search box\r
-strip_id_or_class: searchContainer\r
-strip: //div[contains(@class, "searchInstruction")]\r
-strip: //div[contains(@class, "searchResults")]/h4\r
-\r
-# Remove the 'Letters to the Editor' section\r
-strip_id_or_class: letter-text\r
-strip_id_or_class: letter-from\r
-strip_id_or_class: letter-date\r
-\r
-# Remove Like/Tweet links \r
-strip_id_or_class: social-tab\r
-\r
-# Remove 'divider' which causes an inexplicable slash to appear in the article body\r
-strip_id_or_class: divider\r
+title: //h1[@id="headline"]
+author: //div[contains(@class, "editorial-byline-author")]/a
+date: substring-after(//div[contains(@class, "editorial-byline-meta")], " | ")
+
+# The article body contains a mix or article and non-article elements, so lot of manual tweaks are needed
+body: //div[@id="template"]
+strip_id_or_class: editorial-byline-pic
+strip_id_or_class: editorial-byline
+strip_id_or_class: headline
+
+# Include the leadin paragraph in the body text, but remove quotes because they're out of context
+dissolve: //div[contains(@id, "leadin")]
+strip_id_or_class: pullquote
+
+# Image captions removed because they're confusing in body text
+strip_id_or_class: image-caption-content
+
+# Remove header and footer
+strip_id_or_class: header
+strip_id_or_class: footer
+
+# Remove the hidden logo that seems to be used to cause Facebook to show the logo instead of a random article image
+strip: /html/body/span[contains(@style, "display: none")]
+
+# Remove search box
+strip_id_or_class: searchContainer
+strip: //div[contains(@class, "searchInstruction")]
+strip: //div[contains(@class, "searchResults")]/h4
+
+# Remove the 'Letters to the Editor' section
+strip_id_or_class: letter-text
+strip_id_or_class: letter-from
+strip_id_or_class: letter-date
+
+# Remove Like/Tweet links 
+strip_id_or_class: social-tab
+
+# Remove 'divider' which causes an inexplicable slash to appear in the article body
+strip_id_or_class: divider
 
 test_url: http://www.theglobalmail.org/feature/tiramisu-time-in-pyongyang/88/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 90634a0..750f847
@@ -1,5 +1,5 @@
-single_page_link: //div[contains(@class, 'pagination')]//a[contains(@title, 'ingle page')]\r
-tidy: no\r
-prune: no\r
+single_page_link: //div[contains(@class, 'pagination')]//a[contains(@title, 'ingle page')]
+tidy: no
+prune: no
 
 test_url: http://www.theglobeandmail.com/report-on-business/rob-magazine/how-a-novice-miner-survived-a-summer-in-the-klondike/article2345350/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thegreatdiscontent.com.txt b/inc/3rdparty/site_config/standard/thegreatdiscontent.com.txt
new file mode 100755 (executable)
index 0000000..12442b4
--- /dev/null
@@ -0,0 +1,6 @@
+title: //h1[@id='headline']
+author: substring-after(//section[@class="credits"]/ul/li[1],"Interview by ")
+date: //time[@pubdate]
+body: //article[@class='interview']
+strip: //article[@class='interview']/footer
+test_url: http://thegreatdiscontent.com/jeffrey-zeldman
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/theguardian.com.txt b/inc/3rdparty/site_config/standard/theguardian.com.txt
new file mode 100755 (executable)
index 0000000..c803e4e
--- /dev/null
@@ -0,0 +1,13 @@
+title: //div[@id='main-article-info']//h1
+body: //div[@id='article-wrapper']
+date: //li[@class='publication']//time[@pubdate] | //li[@class='publication']//data[@pubdate]
+strip: //div[contains(@class, 'email-subscription')]
+strip: //div[contains(@class, 'kindleWidget')]
+#strip: //a[not(text())]
+strip_id_or_class: pocket-btn
+author: //li[@class='byline']
+prune: no
+tidy: no
+test_url: http://www.theguardian.com/world/2013/oct/04/nsa-gchq-attack-tor-network-encryption
+test_url: http://www.theguardian.com/world/2013/oct/03/edward-snowden-files-john-lanchester
+test_url: http://www.theguardian.com/commentisfree/2014/jun/15/britishness-search-identity-my-part-in-camerons-odyssey
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3544f24..2cd865b
@@ -1,13 +1,13 @@
-title: //h1[@class="Headline"]\r
-date: substring-after(//div[@class="posted"], 'EDT ')\r
-body: //div[@class="storyBody"]\r
-\r
-strip: //td[@class="AssocContentTD"]\r
-strip: //div[@id="pageTitle"]\r
-strip: //div[@class="posted"]\r
-strip: //div[@class="updated"]\r
-strip: //div[@class="js-kit-disclaimer"]\r
-strip: //table[@class="row3table"]\r
-strip: //div[@class="container2"]\r
+title: //h1[@class="Headline"]
+date: substring-after(//div[@class="posted"], 'EDT ')
+body: //div[@class="storyBody"]
+
+strip: //td[@class="AssocContentTD"]
+strip: //div[@id="pageTitle"]
+strip: //div[@class="posted"]
+strip: //div[@class="updated"]
+strip: //div[@class="js-kit-disclaimer"]
+strip: //table[@class="row3table"]
+strip: //div[@class="container2"]
 strip: //div[@id="delta"]
 test_url: http://www.theindychannel.com/news/31050840/detail.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/themarker.com.txt b/inc/3rdparty/site_config/standard/themarker.com.txt
new file mode 100755 (executable)
index 0000000..141b1a3
--- /dev/null
@@ -0,0 +1,11 @@
+title: //h1[contains(@class, 'mainTitle')]
+author: //ul[@class='author']//a[@rel='author']
+body: //div[@id='article-box']
+prune: no
+tidy: no
+strip_id_or_class: head
+strip_id_or_class: social-nav
+strip_id_or_class: rate
+strip_id_or_class: video
+
+test_url: http://www.themarker.com/markerweek/1.2093167
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e3e57fe..4d46dae
@@ -1,10 +1,10 @@
-title: /html/body/div/div[2]/div/div/div/h3\r
-\r
-body: /html/body/div/div[2]/div/div/div/div[2]\r
-\r
-strip: /html/body/div/div[2]/div/div/div/div[6]/div[3]/div/div/div\r
-\r
-tidy: no\r
-\r
+title: /html/body/div/div[2]/div/div/div/h3
+
+body: /html/body/div/div[2]/div/div/div/div[2]
+
+strip: /html/body/div/div[2]/div/div/div/div[6]/div[3]/div/div/div
+
+tidy: no
+
 # any way to get rid of this word character garbage?
 test_url: http://www.themillions.com/2010/07/at-the-movies-with-david-mitchell-the-thousand-autumns-of-jacob-de-zoet.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 518bff9..80aba44
@@ -1,7 +1,7 @@
-body: single-review\r
-strip_id_or_class: featured-review\r
-strip_id_or_class: resources\r
-strip_id_or_class: rate-the-book\r
-strip_id_or_class: write-review\r
+body: single-review
+strip_id_or_class: featured-review
+strip_id_or_class: resources
+strip_id_or_class: rate-the-book
+strip_id_or_class: write-review
 
 test_url: http://themuseumofinnocence.com/review.php?id=1179
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d88bcdd..dab17f0
@@ -1,11 +1,13 @@
-title: //h1[@class='print-title']\r
-body: //div[@class='print-content']\r
-author: //a[contains(@href, '/authors')]\r
-author: substring-before(//div[@class='print-created'], '|')\r
-date: //span[@class='article-date']\r
-date: substring-after(//div[@class='print-created'], '|')\r
-prune: no\r
-\r
-single_page_link: //ul[contains(@class, 'article-actions-bar')]//a[contains(@href, '/print/article/')]\r
-\r
+title: //h2[@property='dc:title']
+#body: //div[@class='print-content']
+body: //div[@id='wysiwyg']
+author: //a[contains(@href, '/authors')]
+author: substring-before(//div[@class='print-created'], '|')
+date: //span[@class='article-date']
+date: substring-after(//div[@class='print-created'], '|')
+prune: no
+
+#single_page_link: //ul[contains(@class, 'article-actions-bar')]//a[contains(@href, '/print/article/')]
+single_page_link: //ul[contains(@class, 'article-actions-bar')]//a[contains(@href, '?page=full')]
+
 test_url: http://www.thenation.com/article/162331/hard-against-time-roy-fisher
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 846b8a8..b7f5f0f
@@ -1,4 +1,4 @@
-body: //div[@id="beta-inner"]\r
-title: //h3[@class="entry-header"]\r
+body: //div[@id="beta-inner"]
+title: //h3[@class="entry-header"]
 
 test_url: http://thenetworkgarden.blogs.com/weblog/2011/09/microsoft-metro-and-the-next-wave-in-computing.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thenextgeneration.org.txt b/inc/3rdparty/site_config/standard/thenextgeneration.org.txt
new file mode 100755 (executable)
index 0000000..dedd989
--- /dev/null
@@ -0,0 +1,8 @@
+title: //h1[@class='interior-page-title']
+author: //span[@class='author']/a
+date: //div[@class='byline']/time
+body: //div[@class='rich-text-body']
+
+strip: //div[@class='byline']
+strip: //div[@class='offscreen-menu']
+test_url: http://thenextgeneration.org/blog/post/rebrand-announce/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index fdc7000..684fe82
@@ -1,12 +1,12 @@
-body: //div[@class= 'article-body']\r
-author: //div[@class='featured mb-1']//a[starts-with(@href,'/author')]\r
-\r
-strip: //div[@class = 'bargo']\r
-strip: //div[@class = 'tf']\r
-strip: //div[@class = 'article']/div[@class = 'blue-box']\r
-strip_id_or_class: respond\r
-\r
-tidy: no\r
-next_page_link: //div[@class='pages-wrapper']//span/following-sibling::a/@href\r
-\r
+body: //div[@class= 'article-body']
+author: //div[@class='featured mb-1']//a[starts-with(@href,'/author')]
+
+strip: //div[@class = 'bargo']
+strip: //div[@class = 'tf']
+strip: //div[@class = 'article']/div[@class = 'blue-box']
+strip_id_or_class: respond
+
+tidy: no
+next_page_link: //div[@class='pages-wrapper']//span/following-sibling::a/@href
+
 test_url: http://thenextweb.com/apple/2011/10/12/tnw-review-a-complete-guide-to-apples-ios-5-with-icloud-an-os-14-years-in-the-making/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c713232..c9abda7
@@ -1,3 +1,3 @@
-body: //div[@id='fullstory']\r
+body: //div[@id='fullstory']
 strip: //div[@id='page_leftbar']
 test_url: http://theoaklandpress.com/articles/2011/04/25/news/doc4db5330e0bce9220005852.txt
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 12918b8..90e8d65
@@ -1,11 +1,11 @@
-title: //h2[@class='title']\r
-date: substring-before(//p[@class='meta'], '|')\r
-body: //div[@class='story']\r
-#body: //div[@class='article_body']\r
-\r
-strip: //h2[@class='title']\r
-strip: //p[@class='meta']\r
-strip: //div[@class='ga_section']\r
-strip: //div[@id='recent_slider']\r
+title: //h2[@class='title']
+date: substring-before(//p[@class='meta'], '|')
+body: //div[@class='story']
+#body: //div[@class='article_body']
+
+strip: //h2[@class='title']
+strip: //p[@class='meta']
+strip: //div[@class='ga_section']
+strip: //div[@id='recent_slider']
 
 test_url: http://www.theonion.com/articles/pathetic-bobcats-owner-again-regaling-players-with,27572/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f89f3a8..75583cd
@@ -1,11 +1,11 @@
-title: //h1[@class='post-title']\r
-body: //div[@class='post']\r
-author: //p[@class='posted-by']\r
-date: //div[@class='sprite post-date']\r
-\r
-# The body of the post doesn't have it's own div so we have to strip out the metadata\r
-strip: //div[@class='author_avatar']\r
-strip: //div[@class='sprite post-date']\r
-strip: //h1[@class='post-title']\r
+title: //h1[@class='post-title']
+body: //div[@class='post']
+author: //p[@class='posted-by']
+date: //div[@class='sprite post-date']
+
+# The body of the post doesn't have it's own div so we have to strip out the metadata
+strip: //div[@class='author_avatar']
+strip: //div[@class='sprite post-date']
+strip: //h1[@class='post-title']
 strip: //p[@class='posted-by']
 test_url: http://thepioneerwoman.com/cooking/2011/08/pie-fats-a-comparison/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ebcc55d..5d30230
@@ -1,5 +1,8 @@
-title: //div[@id="article"]/h2\r
-author: //div[@id="article"]/p[@class="byline"]/a[1]\r
-date: //div[@id="article"]/p[@class="dateline"]/a[2]\r
-body: //div[@id="article"]/div[@id="body"]
-test_url: http://www.theregister.co.uk/2011/10/06/gas_bill_shocker/
\ No newline at end of file
+# Updated 25-Jan-2014
+single_page_link: //a[contains(@href, '/Print/')]
+
+title: //div[@id="article"]/h2
+author: //p[@class="byline"]/a
+date: //p[@class="dateline"]/a[last()]
+
+test_url: http://www.theregister.co.uk/2014/01/24/thirty_years_of_the_apple_macintosh_part_2/
old mode 100644 (file)
new mode 100755 (executable)
index ebff662..1f56316
@@ -1,3 +1,3 @@
-body: //div[@id='node-content']\r
+body: //div[@id='node-content']
 strip_id_or_class: pager
 test_url: http://www.theroot.com/views/why-i-am-male-feminist
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d01a89b..84d0e78
@@ -1,4 +1,4 @@
-title: /html/body/div/div[2]/div/div/h1\r
-\r
+title: /html/body/div/div[2]/div/div/h1
+
 body: /html/body/div/div[2]/div/div/div[2]
 test_url: http://therumpus.net/2010/07/the-rumpus-interview-with-david-means/?full=yes
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ab9a99e..68a8bc8
@@ -1,11 +1,11 @@
-#body: (//div[@class='ftr-yt-vid'])[1]\r
-body: (//blockquote[contains(@class, 'postcontent')])[1]\r
-body: (//div[starts-with(@id, 'post_message')])[1]\r
-\r
-prune: no\r
-tidy: no\r
-\r
-#replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player"\r
-#replace_string(</iframe>): </iframe>&nbsp;</div>\r
-\r
+#body: (//div[@class='ftr-yt-vid'])[1]
+body: (//blockquote[contains(@class, 'postcontent')])[1]
+body: (//div[starts-with(@id, 'post_message')])[1]
+
+prune: no
+tidy: no
+
+#replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player"
+#replace_string(</iframe>): </iframe>&nbsp;</div>
+
 test_url: http://www.thesiasat.com/showthread.php?19220-Dunya-News-HASB-E-HAAL-16-06-2012-Part-1-5
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d5c6c9e..dcdf257
@@ -1,4 +1,4 @@
-title: //h3[@class='post-title']/a[@class='post-title-link']\r
-body: //div[@class='post-content']\r
+title: //h3[@class='post-title']/a[@class='post-title-link']
+body: //div[@class='post-content']
 author: //div[@class='post-meta-under-title']/a
 test_url: http://www.thesimpledollar.com/2011/09/13/determining-the-size-of-your-emergency-fund/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e2ed1e6..ca98328
@@ -1,3 +1,3 @@
-strip: //*[(@id = "content")]/h2\r
+strip: //*[(@id = "content")]/h2
 strip: //*[(@class = "wp-notable-line")]
 test_url: http://www.thespoiler.co.uk/index.php/2010/10/21/wayne-rooney-tells-man-utd-its-not-me-its-you
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 409dc0c..f71cfb6
@@ -1,9 +1,9 @@
-title: //h1[contains(@class, 'cTitle')]\r
-body: //div[contains(@class, 'KonaBody') or @id='articleimageright']\r
-author: //meta[@name='Author']/@content\r
-date: //meta[@name='OriginalPublicationDate']/@content\r
-\r
-prune: no\r
-tidy: no\r
-\r
+title: //h1[contains(@class, 'cTitle')]
+body: //div[contains(@class, 'KonaBody') or @id='articleimageright']
+author: //meta[@name='Author']/@content
+date: //meta[@name='OriginalPublicationDate']/@content
+
+prune: no
+tidy: no
+
 test_url: http://www.thespoof.com/news/spoof.cfm?headline=s8i108389
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0f9855c..6fcf4fd
@@ -1,12 +1,12 @@
-# savage* filtering is for Savage Love, such as: http://www.thestranger.com/seattle/SavageLove?oid=5135029\r
-\r
-#other filtering are plain articles, such as: http://www.thestranger.com/seattle/the-stranger-election-control-board/Content?oid=5142885\r
-\r
-title: //div[@id='savageColumn_head']/h1\r
-title: //h1[@class="headlineLarge"]\r
-\r
-strip: //div[@id='savage_right'] | //div[@id='savageColumn_head'] | //div[@id='savageArticleRight'] | //div[@id='articleRight'] | //div[@class='savAppBanner']\r
-\r
-body: //div[@id='savageColumn']\r
+# savage* filtering is for Savage Love, such as: http://www.thestranger.com/seattle/SavageLove?oid=5135029
+
+#other filtering are plain articles, such as: http://www.thestranger.com/seattle/the-stranger-election-control-board/Content?oid=5142885
+
+title: //div[@id='savageColumn_head']/h1
+title: //h1[@class="headlineLarge"]
+
+strip: //div[@id='savage_right'] | //div[@id='savageColumn_head'] | //div[@id='savageArticleRight'] | //div[@id='articleRight'] | //div[@class='savAppBanner']
+
+body: //div[@id='savageColumn']
 body: //div[@id='story_text']
 test_url: http://www.thestranger.com/seattle/SavageLove?oid=5135029
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 5de7563..58eabf0
@@ -1,25 +1,25 @@
-title: //div[@id='storyHdr']/h1\r
-title: //div[@id='print']//h2\r
-body: //div[@class="virtualpage"]\r
-body: //div[@id='print']//div[@id='bd']\r
-author: //meta[@name="AUTHOR"]/@content\r
-author: (//div[@id='print']//div[@id='bd']/h4)[1]\r
-date: //meta[@name="DATE"]/@content\r
-date: //div[@id='print']//div[@id='dte']\r
-\r
-strip_id_or_class: articleFooter\r
-strip_id_or_class: sidebar\r
-strip_id_or_class: ie6PrintSubhead\r
-strip_id_or_class: subHdr\r
-\r
-\r
-replace_string(<P/>): </p><p>\r
-\r
-prune: no\r
-\r
-#TODO: redirects back - perhaps needs referer to work\r
-single_page_link: //div[@id='storyDetail']//a[contains(@href, '/print/')]\r
-\r
-test_url: http://www.thestreet.com/story/11386556/1/which-of-these-10-dividend-stocks-is-worth-the-risk.html\r
-# multi page\r
+title: //div[@id='storyHdr']/h1
+title: //div[@id='print']//h2
+body: //div[@class="virtualpage"]
+body: //div[@id='print']//div[@id='bd']
+author: //meta[@name="AUTHOR"]/@content
+author: (//div[@id='print']//div[@id='bd']/h4)[1]
+date: //meta[@name="DATE"]/@content
+date: //div[@id='print']//div[@id='dte']
+
+strip_id_or_class: articleFooter
+strip_id_or_class: sidebar
+strip_id_or_class: ie6PrintSubhead
+strip_id_or_class: subHdr
+
+
+replace_string(<P/>): </p><p>
+
+prune: no
+
+#TODO: redirects back - perhaps needs referer to work
+single_page_link: //div[@id='storyDetail']//a[contains(@href, '/print/')]
+
+test_url: http://www.thestreet.com/story/11386556/1/which-of-these-10-dividend-stocks-is-worth-the-risk.html
+# multi page
 test_url: http://www.thestreet.com/story/11387090/1/7-ubs-stock-picks-for-2012.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 11c5c15..1e1ce58
@@ -1,31 +1,48 @@
-title: //h1[contains(@class, "headline")]\r
-\r
-author: //p[contains(@class, "byline")]/a[contains(@class, "author")]\r
-\r
-date: substring-after(normalize-space(//p[contains(@class, "byline")]/span[contains(@class, "publish-date")]), "on ")\r
-\r
-body: //article[contains(@class, 'feature-entry')]\r
-body: //article\r
-prune: no\r
-tidy: no\r
-\r
-strip: //article/header\r
-strip: //*[@id='sticky-menu']\r
-strip: //aside\r
-strip: //nav\r
-\r
-strip_id_or_class: gallery\r
-strip_id_or_class: article-meta\r
-strip_id_or_class: story-navigation\r
-strip_id_or_class: slegend\r
-strip_id_or_class: related-product-meta\r
-strip_id_or_class: comments\r
-strip_id_or_class: ui-jump-list\r
-strip_id_or_class: pullquote\r
-\r
-strip: //q\r
-\r
-strip: //a[contains(@class, 'entry-section-title')]\r
-\r
-test_url: http://www.theverge.com/2012/2/29/2821763/lytro-review\r
-test_url: http://www.theverge.com/2011/11/3/2534861/nokia-lumia-800-review
\ No newline at end of file
+author: //p[contains(@class, "byline")]/a[contains(@class, "author")]
+
+date: //span[contains(@class, "publish-date")]/time[@pubdate]/@datetime
+
+body: //div[contains(@class, 'entry-content')]
+# for vergecasts, e.g. http://www.theverge.com/2013/8/22/4648566/the-vergecast-090-august-22th-2013-video
+body: //article
+prune: no
+#tidy: no
+
+strip: //article/header
+strip: //*[@id='sticky-menu']
+strip: //aside
+strip: //nav
+strip: //img[contains(@class, 'vox-lazy-load')]
+# deal with bad parsing
+strip: //div[contains(@class, 'story-image')]//div[contains(., 'function(')]
+
+strip_id_or_class: gallery
+strip_id_or_class: article-meta
+strip_id_or_class: story-navigation
+strip_id_or_class: slegend
+strip_id_or_class: related-product-meta
+strip_id_or_class: comments
+strip_id_or_class: ui-jump-list
+strip_id_or_class: pullquote
+strip_id_or_class: m-ad
+strip_id_or_class: social-sharing
+strip_id_or_class: m-video-entry__excerpt
+strip_id_or_class: hidden
+
+replace_string(<noscript>): <div>
+replace_string(</noscript>): </div>
+
+find_string: <script
+replace_string: <div style="display:none" 
+find_string: </script>
+replace_string: </div>
+
+strip: //q
+
+strip: //a[contains(@class, 'entry-section-title')]
+
+test_url: http://www.theverge.com/2012/2/29/2821763/lytro-review
+test_url: http://www.theverge.com/2011/11/3/2534861/nokia-lumia-800-review
+test_url: http://www.theverge.com/2013/2/24/4026114/barnes-noble-shifting-focus-away-from-nook-hardware
+test_url: http://www.theverge.com/2014/6/19/5824072/top-shelf-living-the-dream
+test_url: http://www.theverge.com/rss/frontpage
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 27281ce..f98749e
@@ -1,4 +1,4 @@
-body: //div[@class="briefingEntry"]\r
-prune: no\r
+body: //div[@class="briefingEntry"]
+prune: no
 
 test_url: http://theweek.com/article/index/215763/insider-trading-on-capitol-hill
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8934b68..1eec4e3
@@ -1,4 +1,4 @@
-author: //p[@class="byline"]/a\r
-body: //div[@class="post"]\r
+author: //p[@class="byline"]/a
+body: //div[@class="post"]
 
 test_url: http://thinkprogress.org/special/2011/11/12/367040/harvard-law-professor-criticizes-homeland-security-feel-of-overreaction-to-occupy-harvard/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 958d4b2..73b3c9e
@@ -1,2 +1,2 @@
-body: //div[@class='main-content-panel']/div[@class='img'] | //div[@id='page_content_Content9_oModuleContent_2_div_Body']\r
+body: //div[@class='main-content-panel']/div[@class='img'] | //div[@id='page_content_Content9_oModuleContent_2_div_Body']
 test_url: http://www.thisdaylive.com/articles/australia-pm-talks-human-rights-with-chinas-wen/90394/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6850b4b..70b5399
@@ -1,8 +1,8 @@
-author: //div[@class='meta clearfix']/a\r
-body: //div[@class='post']\r
-\r
-strip: //div[@class='metaCat']\r
-strip: //div[@class='post']/h1\r
-strip: //div[@class='post']/div[@class='meta clearfix']\r
+author: //div[@class='meta clearfix']/a
+body: //div[@class='post']
+
+strip: //div[@class='metaCat']
+strip: //div[@class='post']/h1
+strip: //div[@class='post']/div[@class='meta clearfix']
 strip: //div[@class='post']/div[@class='social-bar clearfix']
 test_url: http://thisismynext.com/2011/10/18/galaxy-nexus-android-ice-cream-sandwich-pictures-video-hands-on/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8bcf2ec..1950e58
@@ -1,3 +1,3 @@
-author: //span[@class='fn']\r
-date: substring-before(substring-after(//*[@id='center_ajax_sub']/div/div[3],'|'),'|')\r
+author: //span[@class='fn']
+date: substring-before(substring-after(//*[@id='center_ajax_sub']/div/div[3],'|'),'|')
 test_url: http://tidbits.com/article/12651
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index fd3fe08..f3f886b
@@ -1,14 +1,12 @@
-# 2011-10-25 - carlo@... - Initial setup.\r
-\r
-single_page_link: //li[@class='print']/a/@href\r
-\r
-title: //h1\r
-author: //meta[@name="byline"]/@content\r
-date: //meta[@name="date"]/@content\r
-\r
-strip: //span[@class="see"]\r
-strip: //div[@class="byline"]\r
-strip: //div[@id="date2"]\r
-strip: //h1\r
-\r
-test_url: http://www.time.com/time/specials/packages/article/0,28804,2094921_2094923_2094924,00.html
\ No newline at end of file
+title: //h1[contains(@class, 'article-title')]
+author: //article//span[contains(@class, 'byline')]
+date: //time[@pubdate]/@datetime
+body: //section[contains(@class, 'article-body')]
+prune: no
+tidy: no
+
+strip: //figcaption
+strip: //p[contains(., 'MORE:') and ./a]
+strip: //aside
+
+test_url: http://time.com/14478/emotions-may-not-be-so-universal-after-all/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 1729773..af1c23c
@@ -1,6 +1,6 @@
-title: //h1\r
-body: //div[@class="storytext"]\r
-strip: //div[@id="thelogin"]\r
-strip: //*[@class="hide"]\r
+title: //h1
+body: //div[@class="storytext"]
+strip: //div[@id="thelogin"]
+strip: //*[@class="hide"]
 strip: //div[@id="anchored"]
 test_url: http://www.timeshighereducation.co.uk/story.asp?sectioncode=26&storycode=416124&c=1
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 9533eb0..b8474d9
@@ -1,9 +1,9 @@
-body: //div[@id='content']\r
-\r
-strip_id_or_class: featured-box\r
-strip_id_or_class: postmeta\r
-strip_id_or_class: respond\r
-\r
-author: //a[contains(@href, '/author/') and contains(@title, 'Posts by')]\r
-date: substring-before(//a[contains(@href, '/author/') and contains(@title, 'Posts by')]/.., ' by ')\r
+body: //div[@id='content']
+
+strip_id_or_class: featured-box
+strip_id_or_class: postmeta
+strip_id_or_class: respond
+
+author: //a[contains(@href, '/author/') and contains(@title, 'Posts by')]
+date: substring-before(//a[contains(@href, '/author/') and contains(@title, 'Posts by')]/.., ' by ')
 test_url: http://www.tipb.com/2011/10/17/iphone-4s-review/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 65a1899..199f5d1
@@ -1,17 +1,17 @@
-title: //div[contains(@class, 'article_detail')]/div[@class='entry_header']/h1\r
-title: //div[contains(@class, 'article_detail')]//h1\r
-title: //h1\r
-\r
-body: //div[contains(@class, 'article_detail')]\r
-\r
-author: //div[@class='article_detail']/div[@class='entry_header']/li/div[@class='author']//h3\r
-author: div[@class='author']//h3\r
-strip: //div[contains(@class, 'field-field-book-cover')]\r
-\r
-date: translate(//*[@class='post_date' and contains(., ' 20')], '|', '')\r
-\r
-prune: no\r
-\r
-single_page_link: //a[@class='print-page']\r
-\r
+title: //div[contains(@class, 'article_detail')]/div[@class='entry_header']/h1
+title: //div[contains(@class, 'article_detail')]//h1
+title: //h1
+
+body: //div[contains(@class, 'article_detail')]
+
+author: //div[@class='article_detail']/div[@class='entry_header']/li/div[@class='author']//h3
+author: div[@class='author']//h3
+strip: //div[contains(@class, 'field-field-book-cover')]
+
+date: translate(//*[@class='post_date' and contains(., ' 20')], '|', '')
+
+prune: no
+
+single_page_link: //a[@class='print-page']
+
 test_url: http://www.tnr.com/blog/jonathan-chait/92991/did-obama-get-rolled
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d8548c7..701a212
@@ -1,6 +1,6 @@
-title: //div[@id='maincontent']//div[@class='title']\r
-body: //div[@id='maincontent']//div[@class='byline'] | //div[@id='maincontent']//div[@class='meat']\r
-\r
-tidy: no\r
+title: //div[@id='maincontent']//div[@class='title']
+body: //div[@id='maincontent']//div[@class='byline'] | //div[@id='maincontent']//div[@class='meat']
+
+tidy: no
 
 test_url: http://www.tomdispatch.com/post/175436/tomgram:_noam_chomsky%2C_the_imperial_mentality_and_9_11/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 2bba6de..2b43757
@@ -1,8 +1,8 @@
-tidy: no\r
-title: //title\r
-author: //a[@itemprop = 'author']\r
-date: //time[@itemprop = 'datePublished']\r
-body: //div[@id = 'intelliTXT']\r
-\r
+tidy: no
+title: //title
+author: //a[@itemprop = 'author']
+date: //time[@itemprop = 'datePublished']
+body: //div[@id = 'intelliTXT']
+
 next_page_link: //li[@class="pagin next"]/a
 test_url: http://www.tomshardware.com/reviews/gaming-graphics-card-review,3107.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e910003..eee57cc
@@ -1,12 +1,12 @@
-body://div[@id="news-content"]/div[@id="intelliTXT"][1]\r
-\r
-author://div[@id="header-news-infos"]/a[1]\r
-\r
-date: //div[@id="header-news-infos"]/span[1]\r
-\r
-title://h1[@id="header-news-title" and @class="hardwareTitle"][1]\r
-\r
-strip://div[@id="news-content"]/div[@id="intelliTXT"]/table \r
-\r
+body://div[@id="news-content"]/div[@id="intelliTXT"][1]
+
+author://div[@id="header-news-infos"]/a[1]
+
+date: //div[@id="header-news-infos"]/span[1]
+
+title://h1[@id="header-news-title" and @class="hardwareTitle"][1]
+
+strip://div[@id="news-content"]/div[@id="intelliTXT"]/table 
+
 footnotes: no
 test_url: http://www.tomshardware.de/DDR4-DDR3-ISSCC-Samsung-Hynix,news-247133.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index dbe60b1..bb45d89
@@ -1,6 +1,6 @@
-body: //div[@class='post']\r
-\r
-strip: //div[@class='social']\r
-strip: //span[@class='next']\r
+body: //div[@class='post']
+
+strip: //div[@class='social']
+strip: //span[@class='next']
 strip: //span[@class='previous']
 test_url: http://toolsandtoys.net/noble-tonic-02/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tracks.ranea.org.txt b/inc/3rdparty/site_config/standard/tracks.ranea.org.txt
new file mode 100755 (executable)
index 0000000..5a38647
--- /dev/null
@@ -0,0 +1,14 @@
+# Metadata
+title: substring-after(//title, 'Coyote Tracks - ')
+author: //meta[@name="author"]/@content
+date: //div[@class="post_header"]/a
+
+# Content Pruning
+strip: //div[@class="column left"]
+strip: //div[@class="pages"]
+strip: //a[@class="text_title"]
+strip: //ol[@class="notes"]
+
+dissolve: //div[@class='column right']/ul
+dissolve: //li[@class='post']
+test_url: http://tracks.ranea.org/post/31431060205/the-next-big-uh-slightly-taller-thing
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/trailerzone.de.txt b/inc/3rdparty/site_config/standard/trailerzone.de.txt
new file mode 100755 (executable)
index 0000000..02151a6
--- /dev/null
@@ -0,0 +1,9 @@
+body: //div[@id='video' or @id='main']
+
+strip_id_or_class: socialshareprivacy2
+strip_id_or_class: wp_rp_first
+
+find_string: Genre</strong>
+replace_string: </strong></p><p><strong>Genre</strong>
+
+test_url: http://www.trailerzone.de/g-i-joe-2-die-abrechnung/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 96e491f..d6cfb6d
@@ -1,8 +1,8 @@
-title: //div[@class="Post-body"]//span[@class="PostHeader"]\r
-author: //div[@class="PostHeaderIcons metadata"]/a[@title="Author"]\r
-date: substring-before(//div[@class="PostHeaderIcons metadata"], '|')\r
-body: //div[@class="Post-body"]\r
-strip_id_or_class: print1\r
-strip_id_or_class: metadata\r
+title: //div[@class="Post-body"]//span[@class="PostHeader"]
+author: //div[@class="PostHeaderIcons metadata"]/a[@title="Author"]
+date: substring-before(//div[@class="PostHeaderIcons metadata"], '|')
+body: //div[@class="Post-body"]
+strip_id_or_class: print1
+strip_id_or_class: metadata
 strip_id_or_class: authorbox
 test_url: http://traningslara.se/skoinlagg-och-skador-finns-det-nagot-samband/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 82797db..663cafe
@@ -1,13 +1,13 @@
-title: //title\r
-author: //span/a\r
-date: substring-after(//small,'Published:')\r
-\r
-strip: //h1[@class='vert_class']\r
-strip: //h1[@class='headline']\r
-strip: //img[contains(@src,'logo_triblive.gif')]\r
-\r
-#strip: //h6\r
-#strip_img_src: logo_triblive.gif\r
-\r
-single_page_link: //a[@class='stprint']\r
+title: //title
+author: //span/a
+date: substring-after(//small,'Published:')
+
+strip: //h1[@class='vert_class']
+strip: //h1[@class='headline']
+strip: //img[contains(@src,'logo_triblive.gif')]
+
+#strip: //h6
+#strip_img_src: logo_triblive.gif
+
+single_page_link: //a[@class='stprint']
 test_url: http://triblive.com/sports/2819913-85/lemieux-deal-penguins-burkle-nhl-owners-team-mario-bettman-case
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e7c1a4b..9e0663b
@@ -1,10 +1,12 @@
-title: //div[@class='printbody']/h1\r
-body: //div[@class='printbody']\r
-prune: no\r
-\r
-strip: //div[@class='printbody']/a[@href='http://www.truthdig.com/']\r
-strip: //table[@class='footer']\r
-\r
-single_page_link: //div[@class='article_tools']//a[contains(@href, '/print/')]\r
-\r
-test_url: http://www.truthdig.com/report/item/the_election_march_of_the_trolls_20110829/
\ No newline at end of file
+title: //div[@class='printbody']/h1
+body: //div[@class='printbody']
+prune: no
+
+strip: //div[@class='printbody']/a[@href='http://www.truthdig.com/']
+strip: //table[@class='footer']
+strip: //h6[contains(., 'http://')]
+
+single_page_link: //a[contains(@href, '/print/')]
+
+test_url: http://www.truthdig.com/report/item/the_election_march_of_the_trolls_20110829/
+test_url: http://www.truthdig.com/dig/item/the_death_of_truth_20130505/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0dab5b0..63537c1
@@ -1,4 +1,4 @@
-title: //h2\r
-author: //a[starts-with(@href, '/AuthorStories')]\r
+title: //h2
+author: //a[starts-with(@href, '/AuthorStories')]
 body: //div[@id='storyinnerbody']
 test_url: http://www.tthfanfic.org/Story-6512/Kudra+Journeys.htm
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
old mode 100644 (file)
new mode 100755 (executable)
index b86f8cc..2af00c2
@@ -1,6 +1,6 @@
-title: //h1[@class='posttitle']\r
-author: //span[@class='author']/a\r
-date: //span[@class='timestamp']\r
-body: //div[@class='body']\r
+title: //h1[@class='posttitle']
+author: //span[@class='author']/a
+date: //span[@class='timestamp']
+body: //div[@class='body']
 
 test_url: http://www.tuaw.com/2011/10/19/apple-posts-fans-memories-of-steve-jobs/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a3946cb..6e18e3d
@@ -1,6 +1,6 @@
-title: //h1[@class='post-title']\r
-author: //div[@class='display-name']\r
-date: //div[@class='date']\r
-body: //div[@class='body']\r
-footnotes: no\r
+title: //h1[@class='post-title']
+author: //div[@class='display-name']
+date: //div[@class='date']
+body: //div[@class='body']
+footnotes: no
 test_url: http://tuckreview.com/2012/8/14/migrating-to-v6
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 08dbba5..3cc3a9c
@@ -1,20 +1,20 @@
-# Google Custom Search\r
-strip_id_or_class: google_branding_style\r
-\r
-# Avoid double title\r
-strip_id_or_class: pagetitle\r
-\r
-# external links are labelled\r
-strip_image_src: http://static.mediatropes.info/pmwiki/pub/external_link.gif\r
-\r
-title: //div[@class="pagetitle"]\r
-body: //div[@id="wikitext"]\r
-\r
-# don't get clever.\r
-strip_comments: no\r
-prune: no\r
-\r
-# navigation in footer lives inside the wikitext div, annoyingly.\r
-strip_id_or_class: pathholder\r
+# Google Custom Search
+strip_id_or_class: google_branding_style
+
+# Avoid double title
+strip_id_or_class: pagetitle
+
+# external links are labelled
+strip_image_src: http://static.mediatropes.info/pmwiki/pub/external_link.gif
+
+title: //div[@class="pagetitle"]
+body: //div[@id="wikitext"]
+
+# don't get clever.
+strip_comments: no
+prune: no
+
+# navigation in footer lives inside the wikitext div, annoyingly.
+strip_id_or_class: pathholder
 
 test_url: http://tvtropes.org/pmwiki/pmwiki.php/Main/WithinParameters
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 12ab154..520ebd8
@@ -1,9 +1,9 @@
-title: //title\r
-body: (//p[contains(@class, 'js-tweet-text')])[1]\r
-author: (//strong[contains(@class, 'fullname')])[1]\r
-date: //span[contains(@class, 'js-short-timestamp')]/@data-time\r
-\r
-prune: no\r
-tidy: no\r
-\r
+title: //title
+body: (//p[contains(@class, 'js-tweet-text')])[1]
+author: (//strong[contains(@class, 'fullname')])[1]
+date: //span[contains(@class, 'js-short-timestamp')]/@data-time
+
+prune: no
+tidy: no
+
 test_url: https://twitter.com/medialens/status/216883678582804480
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 088d658..3469be0
@@ -1,6 +1,6 @@
-body: //div[@class='d3cmsCBody']//div[@class='pubText pubDate' or @class='newsComment' or contains(@class, 'newsPhoto') or @class='newsText']\r
-strip: //div[contains(@class, 'mpindex')]\r
-prune: no\r
-tidy: no\r
-\r
+body: //div[@class='d3cmsCBody']//div[@class='pubText pubDate' or @class='newsComment' or contains(@class, 'newsPhoto') or @class='newsText']
+strip: //div[contains(@class, 'mpindex')]
+prune: no
+tidy: no
+
 test_url: http://www.uefa.com/uefaeuropaleague/news/newsid=1617320.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 29e1956..cd9c136
@@ -1,23 +1,23 @@
-# applies to uk.ds.ign.com, uk.wii.ign.com etc.\r
-# possibly to non-UK versions, but I can&rsquo;t test that\r
-\r
-title: //h1[@class="headline"]\r
-author: //div[@class="hdr-sub byline"]/a\r
-date: //h2[@class="publish-date"]/span\r
-body: //div[@id="main-article-content"]\r
-\r
-strip: //ul[@class="lnks-readmore"]\r
-\r
-strip: //div[@class="inlineImageCaption"]\r
-# can&rsquo;t make the images appear, so remove the captions\r
-\r
-strip: //div[@style="width:468px"]\r
-# video caption links\r
-\r
-convert_double_br_tags: yes\r
-\r
-strip_comments: no\r
-# otherwise the &lsquo;Closing Comments&rsquo; are removed\r
-\r
+# applies to uk.ds.ign.com, uk.wii.ign.com etc.
+# possibly to non-UK versions, but I can&rsquo;t test that
+
+title: //h1[@class="headline"]
+author: //div[@class="hdr-sub byline"]/a
+date: //h2[@class="publish-date"]/span
+body: //div[@id="main-article-content"]
+
+strip: //ul[@class="lnks-readmore"]
+
+strip: //div[@class="inlineImageCaption"]
+# can&rsquo;t make the images appear, so remove the captions
+
+strip: //div[@style="width:468px"]
+# video caption links
+
+convert_double_br_tags: yes
+
+strip_comments: no
+# otherwise the &lsquo;Closing Comments&rsquo; are removed
+
 # Ratings box could do with some rearranging, but it&rsquo;s tricky
 test_url: http://uk.xbox360.ign.com/articles/121/1210717p1.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index cbe87d1..4a5ae34
@@ -1,17 +1,17 @@
-author: substring-before(substring-after(//div[@class='post-byline'], 'By '), ', on')\r
-date: substring-after(//div[@class='post-byline'], ', on')\r
-\r
-# for some reason, the following is producing a "no text [48]" error\r
-#title: //div[@class='post-headline']\r
-\r
-# for some reason, the following doesn't appear to isolate just the body copy\r
-body: //div[@class='post-bodycopy']\r
-\r
-# we solve the above issue by stripping out everything else we don't want\r
-# these can probably all be removed if the body: command above worked\r
-strip_id_or_class: reply\r
-strip_id_or_class: left\r
-strip_id_or_class: post-headline\r
-strip_id_or_class: post-byline\r
+author: substring-before(substring-after(//div[@class='post-byline'], 'By '), ', on')
+date: substring-after(//div[@class='post-byline'], ', on')
+
+# for some reason, the following is producing a "no text [48]" error
+#title: //div[@class='post-headline']
+
+# for some reason, the following doesn't appear to isolate just the body copy
+body: //div[@class='post-bodycopy']
+
+# we solve the above issue by stripping out everything else we don't want
+# these can probably all be removed if the body: command above worked
+strip_id_or_class: reply
+strip_id_or_class: left
+strip_id_or_class: post-headline
+strip_id_or_class: post-byline
 strip_id_or_class: footer
 test_url: http://www.uni-watch.com/2011/10/18/the-curious-case-of-steve-debergs-microphone-and-speaker/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/unwinnable.com.txt b/inc/3rdparty/site_config/standard/unwinnable.com.txt
new file mode 100755 (executable)
index 0000000..05ad86a
--- /dev/null
@@ -0,0 +1,9 @@
+title: //h1[@class='postTitle']
+author: //a[@rel='author']
+date: substring-before(//h4[@class='postAuthor'], '|')
+body: //div[@class='postContent']
+
+strip: //div[@class='simplePullQuote']
+
+wrap_in(figure): //img
+test_url: http://www.unwinnable.com/2013/04/23/gratifying-play/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/uppsalafria.se.txt b/inc/3rdparty/site_config/standard/uppsalafria.se.txt
new file mode 100755 (executable)
index 0000000..79c59ec
--- /dev/null
@@ -0,0 +1,7 @@
+body: //div[contains(@class, 'layout__inner')]//div[contains(@class, 'file-image') or contains(@class, 'node__content')]
+author: //article//div[contains(@class, 'field-byline')]
+strip_id_or_class: rekommenderade
+strip_id_or_class: disqus
+strip_id_or_class: annonser
+
+test_url: http://www.uppsalafria.se/artikel/97167
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 86061f7..385c95c
@@ -1,3 +1,3 @@
-title: //title\r
-body: //td[@id='content']
-test_url: http://www.urbandictionary.com/define.php?term=Grown-Ass
\ No newline at end of file
+title: //title
+body: //table[@id='entries']
+test_url: http://www.urbandictionary.com/define.php?term=Grown-Ass
diff --git a/inc/3rdparty/site_config/standard/usatoday.com.txt b/inc/3rdparty/site_config/standard/usatoday.com.txt
new file mode 100755 (executable)
index 0000000..710a7b3
--- /dev/null
@@ -0,0 +1,8 @@
+date: //meta[@itemprop="datePublished"]/@content
+author: //div[@itemprop="author"]
+body: //div[@itemprop='articleBody']
+
+strip_id_or_class: share-tools
+
+test_url: http://www.usatoday.com/story/news/world/2014/03/18/malaysia-plane-search/6552429/
+test_url: http://rssfeeds.usatoday.com/usatoday-NewsTopStories
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index eb10a48..30c2882
@@ -1,6 +1,6 @@
-body: //div[@id='CS_Element_maincontent']\r
-\r
-tidy: no\r
-prune: no\r
+body: //div[@id='CS_Element_maincontent']
+
+tidy: no
+prune: no
 
 test_url: http://www.usccb.org/bible/readings/072412.cfm
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f6be84c..b8511c7
@@ -1,8 +1,8 @@
-title: //h1\r
-\r
-date: substring-after(//p[@class='overline']/strong, ',')\r
-body: //div[@class="maintext"]\r
-strip: //p[@class='overline']\r
-strip: //h1\r
+title: //h1
+
+date: substring-after(//p[@class='overline']/strong, ',')
+body: //div[@class="maintext"]
+strip: //p[@class='overline']
+strip: //h1
 tidy: no
 test_url: http://www.useit.com/alertbox/mobile-startup-screen.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/usfirst.org.txt b/inc/3rdparty/site_config/standard/usfirst.org.txt
new file mode 100755 (executable)
index 0000000..f02b2d3
--- /dev/null
@@ -0,0 +1,6 @@
+title: //meta[@property='dc:title']/@content
+date: //div[@class='content']//span[@property='dc:date']/@content
+body: //div[@property='content:encoded']
+prune: no
+
+test_url: http://www.usfirst.org/roboticsprograms/frc/Photo-From-Kickoff-Filming
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/utdailybeacon.com.txt b/inc/3rdparty/site_config/standard/utdailybeacon.com.txt
new file mode 100755 (executable)
index 0000000..d37911b
--- /dev/null
@@ -0,0 +1,5 @@
+title: //h1
+author: //*[@class='byline']
+date: substring-after(//*[@class='pubdatetime'], 'Published: ')
+body: //*[@class='body-block']
+test_url: http://utdailybeacon.com/news/2012/oct/8/energy-forum-continues/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a893bda..c69f2df
@@ -1,7 +1,7 @@
-author: ("Arturo Toledo")\r
-title: //div[@class="post"]/h2\r
-body: //div[@class="entry"]\r
-\r
-# Remove Twitter button\r
+author: ("Arturo Toledo")
+title: //div[@class="post"]/h2
+body: //div[@class="entry"]
+
+# Remove Twitter button
 strip: //div[@class="entry"]/p[2]/a/img
 test_url: http://ux.artu.tv/?p=192
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index bfc47d1..efa3822
@@ -1,30 +1,30 @@
-title: //meta[@property="og:title"]/@content\r
-author: //div[contains(@class, 'byline')]//span[contains(@class, 'name')]\r
-date: //div[contains(@class, 'cn_date_time')]\r
-body: //div[contains(@class, 'pageContainers')]\r
-body: //article[@id='items-container']\r
-#body: //h2[@class='sub-header'] | //div[contains(@class, 'contributor-type') or @class='display-date' or @class='content-container']\r
-\r
-strip_id_or_class: bc\r
-strip_id_or_class: utilities\r
-strip_id_or_class: list-supporting\r
-strip_id_or_class: yrail\r
-strip_id_or_class: urail\r
-\r
-prune: no\r
-#tidy: no\r
-\r
-strip_id_or_class: super-rubric-section\r
-strip_id_or_class: cn_date_time\r
-strip_id_or_class: cn_contributors\r
-strip_id_or_class: cn_pagination_controls\r
-strip_id_or_class: cn_features_container\r
-strip_id_or_class: global-footer\r
-strip_id_or_class: cn_ecom_placement\r
-strip: //li[@class='blogNavPrev']\r
-\r
-single_page_link: //a[@title='Print this page']\r
-\r
-test_url: http://www.vanityfair.com/politics/features/2011/05/egypt-revolutionaries-201105\r
-test_url: http://www.vanityfair.com/politics/features/2008/08/hitchens200808\r
+title: //meta[@property="og:title"]/@content
+author: //div[contains(@class, 'byline')]//span[contains(@class, 'name')]
+date: //div[contains(@class, 'cn_date_time')]
+body: //div[contains(@class, 'pageContainers')]
+body: //article[@id='items-container']
+#body: //h2[@class='sub-header'] | //div[contains(@class, 'contributor-type') or @class='display-date' or @class='content-container']
+
+strip_id_or_class: bc
+strip_id_or_class: utilities
+strip_id_or_class: list-supporting
+strip_id_or_class: yrail
+strip_id_or_class: urail
+
+prune: no
+#tidy: no
+
+strip_id_or_class: super-rubric-section
+strip_id_or_class: cn_date_time
+strip_id_or_class: cn_contributors
+strip_id_or_class: cn_pagination_controls
+strip_id_or_class: cn_features_container
+strip_id_or_class: global-footer
+strip_id_or_class: cn_ecom_placement
+strip: //li[@class='blogNavPrev']
+
+single_page_link: //a[@title='Print this page']
+
+test_url: http://www.vanityfair.com/politics/features/2011/05/egypt-revolutionaries-201105
+test_url: http://www.vanityfair.com/politics/features/2008/08/hitchens200808
 test_url: http://www.vanityfair.com/style/2012/01/prisoners-of-style-201201
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6b5e0ae..c0133c9
@@ -1,5 +1,5 @@
-title: //div[@class='ArticleHeadlineDetailedView']\r
-date:  //span[@class='ArticlePublicationDateTimeDetailedView']\r
-author://span[@class='ArticleBylineDetailedView']\r
+title: //div[@class='ArticleHeadlineDetailedView']
+date:  //span[@class='ArticlePublicationDateTimeDetailedView']
+author://span[@class='ArticleBylineDetailedView']
 body: //div[@class='ArticleTextDetailedView']
 test_url: http://www.varingen.no/Nyheter/tabid/392/Default.aspx?ModuleId=56651&articleView=true
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index b1db4c3..dfbf69c
@@ -1,4 +1,4 @@
-# FB comments are inside an h2. Weird. Without this, the line 'Comments' is preserved by the text parser\r
-\r
+# FB comments are inside an h2. Weird. Without this, the line 'Comments' is preserved by the text parser
+
 strip: //h2
 test_url: http://www.varsity.co.uk/reviews/2662
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/vea.gov.vn.txt b/inc/3rdparty/site_config/standard/vea.gov.vn.txt
new file mode 100755 (executable)
index 0000000..9c8420c
--- /dev/null
@@ -0,0 +1,7 @@
+title://div[@class="detail-new-title"]
+body://div[@class="innerpad"]
+strip://div[@class="ArticleUtility"]
+strip://div[@class="commentPost"]
+strip://div[@class="comment-box"]
+strip://div[@id="TinLienQuan"]
+test_url: http://vea.gov.vn/vn/tintuc/tintuchangngay/Pages/T%C4%83ng-c%C6%B0%E1%BB%9Dng-b%E1%BA%A3o-t%E1%BB%93n-%C4%91%E1%BB%99ng-v%E1%BA%ADt-hoang-d%C3%A3-%E1%BB%9F-Vi%E1%BB%87t-Nam.aspx
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ba99917..265f9fc
@@ -1,3 +1,3 @@
-title: //td[@class='second_content']/h1\r
+title: //td[@class='second_content']/h1
 body: //td[@class='second_content']/div[@class='article_text']
 test_url: http://www.vedomosti.ru/newspaper/article/259377/rasprodazha_mailru
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 14144c0..2a44c31
@@ -1,5 +1,5 @@
-author: //div[@class="blogginnleggForfatter"]\r
-date: concat(//div[@class='blogginnleggDatoDag'],' ',//div[@class='blogginnleggDatoMnd'])\r
-strip: //div[contains(@id,"bloggDelingslenker")]\r
+author: //div[@class="blogginnleggForfatter"]
+date: concat(//div[@class='blogginnleggDatoDag'],' ',//div[@class='blogginnleggDatoMnd'])
+strip: //div[contains(@id,"bloggDelingslenker")]
 strip: //div[contains(@id,"bloggDelingslenker")]
 test_url: http://veggbilder.no/blogginnlegg/fristelser
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 294ace9..d22fc5c
@@ -1,6 +1,6 @@
-title: //h2\r
-date: substring-before(//small," &bull; Permalink")\r
-author:string('Martin Hering')\r
-\r
+title: //h2
+date: substring-before(//small," &bull; Permalink")
+author:string('Martin Hering')
+
 Strip: //p/small
 test_url: http://vemedio.com/blog/posts/state-of-support-and-icloud
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 41bfa8c..d6321d7
@@ -1,6 +1,6 @@
-title: //h1[@class="entry-title"]\r
-author: //div[@class="author-name"]\r
-date: //span[@class="the-time"]\r
-body: //div[@class="entry-content"]\r
+title: //h1[@class="entry-title"]
+author: //div[@class="author-name"]
+date: //span[@class="the-time"]
+body: //div[@class="entry-content"]
 strip: //div[@class="vb-gallery"]
 test_url: http://venturebeat.com/2012/07/17/marissa-mayer-yahoo/#s:mayer-1
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 74203ca..418b83a
@@ -1,12 +1,12 @@
-title: //article/header/h1\r
-\r
-author: //article/header/section[@class='byline']/span[contains(@class, 'author')]/a\r
-date: //article/header/section[@class='byline']/span[@class='published']/span\r
-\r
-body: //article/section[@class='body']\r
-\r
-convert_double_br_tags: yes\r
-\r
-# This is required, because Tidy chokes on the HTML5 tags...\r
+title: //article/header/h1
+
+author: //article/header/section[@class='byline']/span[contains(@class, 'author')]/a
+date: //article/header/section[@class='byline']/span[@class='published']/span
+
+body: //article/section[@class='body']
+
+convert_double_br_tags: yes
+
+# This is required, because Tidy chokes on the HTML5 tags...
 tidy: no
 test_url: http://www.version2.dk/artikel/17069-amerikansk-hit-investor-er-vild-med-danske-net-ivaerksaettere
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 4cdd0c0..ad0fec6
@@ -1,7 +1,7 @@
-title: //title\r
-body: //div[contains(@class, 'printRecipe')]\r
-strip: //div[@class='recipeHeader']\r
-prune: no\r
-tidy: no\r
+title: //title
+body: //div[contains(@class, 'printRecipe')]
+strip: //div[@class='recipeHeader']
+prune: no
+tidy: no
 single_page_link: //ul[@class='printOptions']//a[contains(@href, 'detail.aspx?p=1&showphoto=true')]
 test_url: http://www.verybestbaking.com/recipes/143190/Penne-Pasta-with-Sun-dried-Tomato-Cream-Sauce/detail.aspx
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index fceeea0..bfadb4a
@@ -1,3 +1,3 @@
-body: //div[@id='artikkelspalte']\r
+body: //div[@id='artikkelspalte']
 strip_id_or_class: 'breadcrumb'
 test_url: http://www.vg.no/spill/artikkel.php?artid=10003628
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 1dca55a..5db7746
@@ -1,9 +1,9 @@
-title: concat("Video: ", //div[@id='currentVideoTitleDivId'])\r
-body: //div[@id='currentVideoDescriptionId']\r
-author: //meta[@name='author']/@content\r
-\r
-replace_string(<div id="currentVideoDescriptionId" style="display): <div id="currentVideoDescriptionId" style="displayitplease\r
-\r
-replace_string(<div id="currentVideoTitleDivId" style="display): <div id="currentVideoTitleDivId" style="displayitplease\r
-\r
+title: concat("Video: ", //div[@id='currentVideoTitleDivId'])
+body: //div[@id='currentVideoDescriptionId']
+author: //meta[@name='author']/@content
+
+replace_string(<div id="currentVideoDescriptionId" style="display): <div id="currentVideoDescriptionId" style="displayitplease
+
+replace_string(<div id="currentVideoTitleDivId" style="display): <div id="currentVideoTitleDivId" style="displayitplease
+
 test_url: http://video.forbes.com/fvn/business/wells-fargo-inside-the-bank-that-works
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a166381..d93780c
@@ -1,6 +1,6 @@
-title: //h2[@class='posttitle']\r
-date: substring-before(substring-after(//span[@class='postdate'], 'on '), ' by')\r
-date: //span[@class='postdate']\r
-author: //span[@class='postdate']/a\r
+title: //h2[@class='posttitle']
+date: substring-before(substring-after(//span[@class='postdate'], 'on '), ' by')
+date: //span[@class='postdate']
+author: //span[@class='postdate']/a
 body: //div[@class='entry line_top']
 test_url: http://videogum.com/395042/here-are-some-afternoon-links-92/list/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index df37460..36e4a2f
@@ -1,9 +1,9 @@
-title: //h2[@class='headline']\r
-\r
+title: //h2[@class='headline']
+
 body: //div[@class='ContentPrint']
-\r
-prune: no\r
-\r
-single_page_link: //a[contains(@href, '/printVersion/')]\r
-\r
+
+prune: no
+
+single_page_link: //a[contains(@href, '/printVersion/')]
+
 test_url: http://www.villagevoice.com/2010-03-16/news/new-york-s-ten-worst-landlords/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d6c6701..f36c9c5
@@ -1,17 +1,17 @@
-title: //title\r
-body: //iframe\r
-\r
-find_string: <html>&lt;iframe \r
-replace_string: <iframe id="video" \r
-\r
-find_string: &gt;&lt;/iframe&gt;</html>\r
-replace_string: ></iframe>\r
-\r
-replace_string(&quot;): "\r
-\r
-single_page_link: //link[@type='text/xml+oembed']\r
-\r
-prune: no\r
-tidy: no\r
-\r
+title: //title
+body: //iframe
+
+find_string: <html>&lt;iframe 
+replace_string: <iframe id="video" 
+
+find_string: &gt;&lt;/iframe&gt;</html>
+replace_string: ></iframe>
+
+replace_string(&quot;): "
+
+single_page_link: //link[@type='text/xml+oembed']
+
+prune: no
+tidy: no
+
 test_url: http://vimeo.com/35941909
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/viply.de.txt b/inc/3rdparty/site_config/standard/viply.de.txt
new file mode 100755 (executable)
index 0000000..e3599c9
--- /dev/null
@@ -0,0 +1,12 @@
+title: //div[@id='singletext']//h1
+body: //div[contains(@class, 'mypictureborder')] | //div[@id='singletext']
+prune: no
+
+strip_id_or_class: singletostart
+strip_id_or_class: navigation
+strip_id_or_class: social
+strip_id_or_class: single_topwrapper
+strip: //a[contains(., 'Nächster Artikel')]
+
+test_url: http://www.viply.de/?p=87973
+test_url: http://www.viply.de/?feed=rss2
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0f03198..04e0910
@@ -1,14 +1,14 @@
-# Author's name, when present, has 'skrifar:' ('writes:') appended to it.\r
-# In case of multiple authors, this would be 'skrifa:', hence only 7 characters\r
-# are stripped off.\r
-author: substring(//div[@class='paragraph']/div[@class='meta'], 0, string-length(//div[@class='paragraph']/div[@class='meta']) - 7)\r
-\r
-date: //span[@class='date']\r
-title: //h1\r
-body: //div[@class='paragraph']\r
-\r
-# Strip out author string when present\r
-strip: //div[@class='paragraph']/div[@class='meta']\r
-\r
+# Author's name, when present, has 'skrifar:' ('writes:') appended to it.
+# In case of multiple authors, this would be 'skrifa:', hence only 7 characters
+# are stripped off.
+author: substring(//div[@class='paragraph']/div[@class='meta'], 0, string-length(//div[@class='paragraph']/div[@class='meta']) - 7)
+
+date: //span[@class='date']
+title: //h1
+body: //div[@class='paragraph']
+
+# Strip out author string when present
+strip: //div[@class='paragraph']/div[@class='meta']
+
 convert_double_br_tags: yes
 test_url: http://visir.is/esb,-ipa,-bhm-og-bsrb/article/2012701319997
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8b2a300..f2d11c7
@@ -1,6 +1,6 @@
-strip: //*[(@id = "ja-search")]\r
-body: //*[(@id = "ja-mainbody")]\r
-body: //*[(@id = "content-mass-bottom")]\r
-strip://h3[contains(span,'Related Posts')]\r
+strip: //*[(@id = "ja-search")]
+body: //*[(@id = "ja-mainbody")]
+body: //*[(@id = "content-mass-bottom")]
+strip://h3[contains(span,'Related Posts')]
 strip://img
 test_url: http://vitispr.com/blog/coventry-is-a-technology-hotspot
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 23c928b..e5ebc43
@@ -1,8 +1,8 @@
-body: //div[@cpms_content]//h2[@class='Lead'] | //div[@cpms_content]//p[@class='Normal'] | //div[@cpms_content]//table\r
-strip://div[@class="box-item"]\r
-strip://div[@id="ARTICLE_BANNER"]\r
-strip://a\r
-strip://div[@class="tag-parent"]\r
-strip://div[@class="email-print txtr"]\r
-\r
+body: //div[@cpms_content]//h2[@class='Lead'] | //div[@cpms_content]//p[@class='Normal'] | //div[@cpms_content]//table
+strip://div[@class="box-item"]
+strip://div[@id="ARTICLE_BANNER"]
+strip://a
+strip://div[@class="tag-parent"]
+strip://div[@class="email-print txtr"]
+
 test_url: http://vnexpress.net/gl/xa-hoi/2011/04/tim-thay-nan-nhan-cuoi-cung-vu-sap-mo-da-o-len-co/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6bd0e85..b754aeb
@@ -1,3 +1,3 @@
-title: //h1\r
+title: //h1
 body: //div[@class='entrytext']
 test_url: http://voices.washingtonpost.com/ezra-klein/2010/10/why_isnt_monetary_policy_discr.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a39c9f4..cfb9ea1
@@ -1,3 +1,3 @@
-body: //div[contains(@class, 'KonaBody')]\r
+body: //div[contains(@class, 'KonaBody')]
 
 test_url: http://www.vworker.com/RentACoder/misc/BidRequests/ShowBidRequest.asp?lngBidRequestId=1634186
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index afcba0f..e92757d
@@ -1,4 +1,4 @@
-title: //h2[@class="title"]\r
-body: //div[@class="post"]\r
+title: //h2[@class="title"]
+body: //div[@class="post"]
 
 test_url: http://waffle.wootest.net/2011/06/22/on-reading-news/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3ab2217..c53eb0d
@@ -1,14 +1,14 @@
-title: //div[@id='pr']/h3\r
-author: //div[@class='dateline']//a[contains(@href, '/author/')]\r
-\r
-# print page\r
-body: //div[@id='prbody']\r
-# standard page\r
-body: //div[@id='pgbody']\r
-\r
-# for multi-page articles\r
-single_page_link: //div[@class='tipjar']//a[contains(@href, '/printerFriendly.php?')]\r
-\r
-prune: no\r
+title: //div[@id='pr']/h3
+author: //div[@class='dateline']//a[contains(@href, '/author/')]
+
+# print page
+body: //div[@id='prbody']
+# standard page
+body: //div[@id='pgbody']
+
+# for multi-page articles
+single_page_link: //div[@class='tipjar']//a[contains(@href, '/printerFriendly.php?')]
+
+prune: no
 
 test_url: http://www.walrusmagazine.com/articles/2011.12-memoir-kidnapped
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a41a351..21f5635
@@ -1,3 +1,3 @@
-title: //h3\r
+title: //h3
 body: //div[@class="content_wysiwyg"]
 test_url: http://www.warnerbros.fr/game-of-thrones-un-junket-vu-de-l-interieur-268.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/washingtoninstitute.org.txt b/inc/3rdparty/site_config/standard/washingtoninstitute.org.txt
new file mode 100755 (executable)
index 0000000..17f4567
--- /dev/null
@@ -0,0 +1,6 @@
+body: //div[@class='main']//article
+
+prune: no
+
+test_url: http://www.washingtoninstitute.org/policy-analysis/view/striking-syria-lessons-from-the-israeli-experience?goback=.gde_3822158_member_273623672
+test_url: http://www.washingtoninstitute.org/rss/11/10
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index edf1642..8f8902a
@@ -1,10 +1,10 @@
-title://a[@class = 'headline-article']\r
-\r
-author: substring-after(//div[@class = 'article']/p[@class = 'author'], 'By ')\r
-date://div[@class = 'article']/span[@class = 'date']\r
-body://div[@class = 'article']\r
-single_page_link://a[@class = 'print']\r
-strip://p[@class = 'author']\r
-strip://a[@class = 'headline-article']\r
+title://a[@class = 'headline-article']
+
+author: substring-after(//div[@class = 'article']/p[@class = 'author'], 'By ')
+date://div[@class = 'article']/span[@class = 'date']
+body://div[@class = 'article']
+single_page_link://a[@class = 'print']
+strip://p[@class = 'author']
+strip://a[@class = 'headline-article']
 strip://span[@class = 'date']
 test_url: http://www.washingtonmonthly.com/magazine/julyaugust_2011/features/the_trinity_sisters030380.php
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 2931ca5..0aa9f1d
@@ -1,21 +1,32 @@
-body: //div[@class="article_body"]\r
-author://meta[@name='DC.creator']/@content\r
-title://meta[@name='title']/@content\r
-date://div[contains(@class,'byline')]//span[contains(@class,'published')]/@title\r
-date://meta[@name="DC.date.issued"]/@content\r
-strip://div[@class="relative primary-slot padding-top img-border gallery-container photo-wrapper"]\r
-strip://div[@id="wp-column six end"]\r
-strip://div[contains(@class,'hidden')]\r
-strip://div[@id='article-side-rail']\r
-strip://div[@class="module component todays-paper-module curved"]\r
-strip://div[@class="module component live-qa curved img-border"]\r
-strip://div[@class="module component newsletter-signup curved"]\r
-strip://div[@class="module featured-stories component curved img-border"]\r
-\r
-strip_id_or_class: carousel\r
-strip_id_or_class: toolbar\r
-strip_id_or_class: module\r
-\r
-test_url: http://www.washingtonpost.com/world/europe/in-europe-new-fears-of-german-might/2011/10/19/gIQA3baZ7L_story.html?hpid=z1\r
-test_url: http://www.washingtonpost.com/national/health-science/radical-theory-of-first-americans-places-stone-age-europeans-in-delmarva-20000-years-ago/2012/02/28/gIQA4mriiR_story.html\r
+# Seems to be redirecting to articles.washingtonpost.com for many users
+
+body: //div[contains(@class, "article_body")]
+# print view
+body: //div[@id='print_facet']//div[@id='body']
+
+author://meta[@name='DC.creator']/@content
+title://meta[@name='title']/@content
+date://div[contains(@class,'byline')]//span[contains(@class,'published')]/@title
+date://meta[@name="DC.date.issued"]/@content
+strip://div[@class="relative primary-slot padding-top img-border gallery-container photo-wrapper"]
+strip://div[@id="wp-column six end"]
+strip://div[contains(@class,'hidden')]
+strip://div[@id='article-side-rail']
+strip://div[@class="module component todays-paper-module curved"]
+strip://div[@class="module component live-qa curved img-border"]
+strip://div[@class="module component newsletter-signup curved"]
+strip://div[@class="module featured-stories component curved img-border"]
+
+strip_id_or_class: carousel
+strip_id_or_class: toolbar
+strip_id_or_class: module
+
+# Change gJQAwdJG4U_story.html to gJQAwdJG4U_print.html
+single_page_link: concat(substring-before(//link[@rel="canonical"]/@href, "_story.html"), "_print.html")
+
+# [OLD] Change gJQAwdJG4U_story.html to gJQAwdJG4U_story_print.html
+#single_page_link: concat(substring-before(//link[@rel="canonical"]/@href, "_story.html"), "_story_print.html")
+
+test_url: http://www.washingtonpost.com/world/europe/in-europe-new-fears-of-german-might/2011/10/19/gIQA3baZ7L_story.html?hpid=z1
+test_url: http://www.washingtonpost.com/national/health-science/radical-theory-of-first-americans-places-stone-age-europeans-in-delmarva-20000-years-ago/2012/02/28/gIQA4mriiR_story.html
 test_url: http://www.washingtonpost.com/lifestyle/magazine/the-sorry-fate-of-a-tech-pioneer-halsey-minor-and-historic-virginia-estate-carters-grove/2012/05/30/gJQAwdJG4U_story.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index dfcd008..9ed43a2
@@ -1,6 +1,6 @@
-body: //div[@id='template_article']\r
-\r
-strip_id_or_class: article_more\r
-strip: //hr\r
+body: //div[@id='template_article']
+
+strip_id_or_class: article_more
+strip: //hr
 
 test_url: http://www.web-libre.org/dossiers/jacuzzi-gonflable,8493.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 9e75a8a..578ba52
@@ -1,5 +1,5 @@
-title://div[@class="post"]/h2\r
-author://p[@class="postinfo"]/a\r
-date:substring-before(substring-after(//p[@class="postinfo"],' on '),' under ')\r
+title://div[@class="post"]/h2
+author://p[@class="postinfo"]/a
+date:substring-before(substring-after(//p[@class="postinfo"],' on '),' under ')
 body://div[@class="contenttext"]
 test_url: http://weblog.bignerdranch.com/?p=304
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3fabda0..7cfa49d
@@ -1,9 +1,9 @@
-title: //h2[@class="pageTitle"]\r
-strip: //div[@class="postfoot"]\r
-strip: //h2[@class="pageTitle"]\r
-strip: //h3[@class="pageTitle"]\r
-body: //div[@class="post"]\r
-author: substring-before(substring-after(//div[@class="postfoot"], 'by'), 'Filed')\r
-date: substring-before(substring-after(//div[@class="postfoot"], 'Published'), 'by')\r
+title: //h2[@class="pageTitle"]
+strip: //div[@class="postfoot"]
+strip: //h2[@class="pageTitle"]
+strip: //h3[@class="pageTitle"]
+body: //div[@class="post"]
+author: substring-before(substring-after(//div[@class="postfoot"], 'by'), 'Filed')
+date: substring-before(substring-after(//div[@class="postfoot"], 'Published'), 'by')
 
 test_url: http://weblogs.asp.net/scottgu/archive/2011/08/31/html-editor-smart-tasks-and-event-handler-generation-asp-net-vnext-series.aspx
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8922b02..cea1014
@@ -1,8 +1,8 @@
-tidy: no\r
-dissolve: //div[@id="content"]/div/article/header\r
-body: //div[@id="content"]/div/article \r
-title: //div[@id="content"]/div/article/h1\r
-date: //div[@id="content"]/div/article/header/div[@id="issueSelectTrigger"]\r
-strip: //div[@id="content"]/div/article/h1\r
+tidy: no
+dissolve: //div[@id="content"]/div/article/header
+body: //div[@id="content"]/div/article 
+title: //div[@id="content"]/div/article/h1
+date: //div[@id="content"]/div/article/header/div[@id="issueSelectTrigger"]
+strip: //div[@id="content"]/div/article/h1
 
 test_url: http://webpaper.nzz.ch/2012/06/23/front/JJKMS/aphrodite-und-die-kommunisten?guest_pass=24a3ca5b6d%3AJJKMS%3Ad30e1be8628c099669671d4da56cdce4187790ba
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/webwereld.nl.txt b/inc/3rdparty/site_config/standard/webwereld.nl.txt
new file mode 100755 (executable)
index 0000000..40a5aa3
--- /dev/null
@@ -0,0 +1,8 @@
+strip: //*[@class="paginator"]
+body: //*[@id="articleText"]
+next_page_link: //a[@class="next"]
+
+# No author detection
+# No publishing date detection
+# No author and intro deduplication over multiple pages
+test_url: http://webwereld.nl/analyse/111452/de-code-van-dorifel-nader-bekeken.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6e4f828..42e65e9
@@ -1,22 +1,22 @@
-# set body\r
-tidy: no\r
-body: //div[contains(@class, 'articleContent')]\r
-\r
-# remove clutter\r
-strip: //div[@class='advertising']\r
-strip: //div[@class='themenalarm']\r
-strip: //div[contains(@class, 'inTextTeaser')]\r
-\r
-# remove captions\r
-strip: //span[@class='copyRight']\r
-\r
-# remove photo galleries and extras\r
-strip: //div[contains(@class, 'textGallery')]\r
-strip: //div[contains(@class, 'videoGallery')]\r
-strip: //div[contains(@class, 'imageGallery')]\r
-strip: //div[contains(@class, 'openContent')]\r
-\r
-# remove comments\r
-strip: //div[@id = 'writeComment']\r
-\r
+# set body
+tidy: no
+body: //div[contains(@class, 'articleContent')]
+
+# remove clutter
+strip: //div[@class='advertising']
+strip: //div[@class='themenalarm']
+strip: //div[contains(@class, 'inTextTeaser')]
+
+# remove captions
+strip: //span[@class='copyRight']
+
+# remove photo galleries and extras
+strip: //div[contains(@class, 'textGallery')]
+strip: //div[contains(@class, 'videoGallery')]
+strip: //div[contains(@class, 'imageGallery')]
+strip: //div[contains(@class, 'openContent')]
+
+# remove comments
+strip: //div[@id = 'writeComment']
+
 test_url: http://www.welt.de/vermischtes/weltgeschehen/article11050589/27-Bergleute-in-neuseelaendischer-Mine-vermisst.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index b934302..3132e98
@@ -1,6 +1,6 @@
-title: substring-before(//title, '«')\r
-\r
-body: //div[@class='entry']\r
-strip: //div[@class='sharing_label']\r
+title: substring-before(//title, '«')
+
+body: //div[@class='entry']
+strip: //div[@class='sharing_label']
 strip: //div[@class='snap_nopreview sharing robots-nocontent']
 test_url: http://www.westhamtillidie.com/2012/03/11/twelve-things-we-learned-from-the-doncaster-game/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 52c5cf1..100a8c8
@@ -1,7 +1,7 @@
-strip: //div[@class="navigation"]\r
-strip: //div[@id="sidebar"]\r
-strip: //div[@id="post-extra-content"]\r
-strip: //div[@id="footer"]\r
-strip: //div[contains(@class, "sharing")]\r
+strip: //div[@class="navigation"]
+strip: //div[@id="sidebar"]
+strip: //div[@id="post-extra-content"]
+strip: //div[@id="footer"]
+strip: //div[contains(@class, "sharing")]
 
 test_url: http://whatever.scalzi.com/2011/01/09/quick-giffords-follow-up/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index aa9783c..b9eeaa0
@@ -1,11 +1,11 @@
-body://div[contains(@class,'oAndtLyrics')]\r
-strip://div[contains(@class,'info')]\r
-strip://div[contains(@id,'romanization')]\r
-strip://div[contains(@id,'youtube')]\r
-strip://div[contains(@id,'romanizationSelector')]\r
-strip://div[contains(@id,'langSelectWrap')]\r
-strip://div[contains(@id,'requestTranslationWrap')]\r
-strip://div[contains(@id,'viewMore')]\r
-strip://div[contains(@class,'lyricsListInMainContent')]\r
+body://div[contains(@class,'oAndtLyrics')]
+strip://div[contains(@class,'info')]
+strip://div[contains(@id,'romanization')]
+strip://div[contains(@id,'youtube')]
+strip://div[contains(@id,'romanizationSelector')]
+strip://div[contains(@id,'langSelectWrap')]
+strip://div[contains(@id,'requestTranslationWrap')]
+strip://div[contains(@id,'viewMore')]
+strip://div[contains(@class,'lyricsListInMainContent')]
 strip://div[contains(@class,'descIpNoti')]
 test_url: http://wheelyric.com/lyrics/121#2
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 1f262a0..b80fe5d
@@ -1,8 +1,8 @@
-title: //h1\r
-body: //div[@id='content']\r
-strip_id_or_class: editsection\r
-strip_id_or_class: toc\r
-strip: //div[@id='siteNotice']\r
-strip: //div[@id='content']//table[last()]\r
+title: //h1
+body: //div[@id='content']
+strip_id_or_class: editsection
+strip_id_or_class: toc
+strip: //div[@id='siteNotice']
+strip: //div[@id='content']//table[last()]
 prune: no
 test_url: http://wiki.guildwars.com/wiki/Monk
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e176907..e923399
@@ -1,8 +1,8 @@
-title: //h1\r
-body: //div[@id='content']\r
-strip_id_or_class: editsection\r
-strip_id_or_class: toc\r
-strip: //div[@id='siteNotice']\r
-strip: //div[@id='content']//table[last()]\r
+title: //h1
+body: //div[@id='content']
+strip_id_or_class: editsection
+strip_id_or_class: toc
+strip: //div[@id='siteNotice']
+strip: //div[@id='content']//table[last()]
 prune: no
 test_url: http://wiki.guildwars2.com/wiki/Guardian
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/wikihow.com.txt b/inc/3rdparty/site_config/standard/wikihow.com.txt
new file mode 100755 (executable)
index 0000000..fe95d3f
--- /dev/null
@@ -0,0 +1,15 @@
+# ...&printable=yes
+body: //div[@id='bodycontents']
+prune: no
+tidy: no
+strip_id_or_class: gatEditSection
+strip_id_or_class: relatedwikihows
+#strip: //div[contains(@class, 'step_num')]
+
+replace_string(<script ): <div style="display: none" 
+replace_string(</script>): </div>
+
+single_page_link: //a[@id='gatPrintView']
+single_page_link: concat(//link[@rel='canonical']/@href, '?printable=yes')
+
+test_url: http://www.wikihow.com/Start-Your-Own-Country
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index da5bd0b..1f32a37
@@ -1,14 +1,14 @@
-# copied from .wikipedia.org.txt\r
-title: //h1[@id='firstHeading' or @class='firstHeading']\r
-body: //div[@id = 'bodyContent']\r
-strip_id_or_class: editsection\r
-#strip_id_or_class: toc\r
-strip_id_or_class: vertical-navbox\r
-strip: //table[@id='toc'] | //div[@id='p-toc']\r
-strip: //div[@id='catlinks' or @id='contentSub']\r
-strip: //div[@id='jump-to-nav']\r
-strip: //div[@class='thumbcaption']//div[@class='magnify']\r
-strip: //table[@class='navbox']\r
-prune: no\r
+# copied from .wikipedia.org.txt
+title: //h1[@id='firstHeading' or @class='firstHeading']
+body: //div[@id = 'bodyContent']
+strip_id_or_class: editsection
+#strip_id_or_class: toc
+strip_id_or_class: vertical-navbox
+strip: //table[@id='toc'] | //div[@id='p-toc']
+strip: //div[@id='catlinks' or @id='contentSub']
+strip: //div[@id='jump-to-nav']
+strip: //div[@class='thumbcaption']//div[@class='magnify']
+strip: //table[@class='navbox']
+prune: no
 tidy: no
 test_url: http://wikitravel.org/wiki/en/index.php?title=Bangkok&printable=yes
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 24467c2..394f9ca
@@ -1,4 +1,4 @@
-strip: //div[@class="widget-area"]\r
-title: //*[@class="entry-title"]\r
+strip: //div[@class="widget-area"]
+title: //*[@class="entry-title"]
 date: //time[@class="entry-date"]
 test_url: http://will-self.com/2012/02/01/real-meals-dominos-pizza/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index fb5f92e..cefabec
@@ -1,3 +1,3 @@
-title: substring-after(//span[@class='itemTitle'], ':') \r
+title: substring-after(//span[@class='itemTitle'], ':') 
 body: //div[@id='content']
 test_url: http://www.williampfaff.com/modules/news/article.php?storyid=491
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index bc93637..dddc6f9
@@ -1,12 +1,12 @@
-title: //h1/span\r
-\r
-body: //div[@id="news_content"]\r
-\r
-author: //div[@class="bookmarks_btm"]/p[1]/a[1]/text()\r
-\r
-date: //span[@class='date']\r
-\r
-# Rubrikenbild entfernen\r
-strip: //div[@id="news_content"]/a[1]\r
+title: //h1/span
+
+body: //div[@id="news_content"]
+
+author: //div[@class="bookmarks_btm"]/p[1]/a[1]/text()
+
+date: //span[@class='date']
+
+# Rubrikenbild entfernen
+strip: //div[@id="news_content"]/a[1]
 
 test_url: http://winfuture.de/news,69672.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index cedb439..f25f9c9
@@ -1,6 +1,6 @@
-title: //h1[@class='page-heading']\r
-author: //small/strong/a\r
-#their date string is relative, so if you save the page 2 hours after it is posted it may say 'two hours ago, instead of providing a useful date/time'\r
-date: substring-before(substring-after(//small,'on'),'with')\r
-body: //div[@class='entry']\r
+title: //h1[@class='page-heading']
+author: //small/strong/a
+#their date string is relative, so if you save the page 2 hours after it is posted it may say 'two hours ago, instead of providing a useful date/time'
+date: substring-before(substring-after(//small,'on'),'with')
+body: //div[@class='entry']
 test_url: http://www.winrumors.com/chinese-windows-phone-launch-still-on-track-for-early-2012/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index db6a6fc..f725b67
@@ -1,3 +1,3 @@
-date: //*[@class='kicker']\r
-body: //*[@class='KonaBody']\r
+date: //*[@class='kicker']
+body: //*[@class='KonaBody']
 test_url: http://www.winsupersite.com/article/paul-thurrotts-wininfo/android-malware-surges-separate-studies-141364
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 69bbf5b..f5a72d1
@@ -1,22 +1,25 @@
-title: //meta[@property="og:title"]/@content \r
-title: //h1\r
-title: //*[@class='posttitle']\r
-author: //*[@class='entryAuthor']/a[1]\r
-author://*[@class='member-title']\r
-author://li[@class='author']/a[contains(@href, '/author/')]\r
-date: substring-after(//div[@class='entryAuthor'], '·')\r
-date: substring-before(//*[@class='entryDate'], '|')\r
-body: //div[@class='entry']\r
-strip: //span[contains(@class, 'nextprev')]\r
-#strip_id_or_class: ngg-galleryoverview \r
-# ngg-galleryoverview is the whole content sometimes, e.g. http://www.wired.com/underwire/2011/12/best-mixtapes-of-2011/?pid=5736&viewall=true\r
-\r
-strip: //p[span[contains(@class, 'contentjump')]]\r
-strip: //text()[contains(., 'nextpage')]\r
-\r
-prune: no\r
-\r
-single_page_link: //a[contains(@href, '/all/1') and contains(@class, 'contentjumpall')]\r
-\r
-test_url: http://www.wired.com/cloudline/2011/10/meet-arms-cortex-a15-the-future-of-the-ipad-and-possibly-the-macbook-air/\r
-test_url: http://www.wired.com/threatlevel/2012/05/ff_counterfeiter/all/1
\ No newline at end of file
+title: //meta[@name='Title']/@content
+author: //meta[@name='Author']/@content
+date: //meta[@name='DisplayDate']/@content
+body: //div[@class='entry']
+strip: //p[contains(., 'Pages:') and contains(., 'View All')]
+strip: //p[@class='caption']
+strip: //div[@class='desc' or @class='slide' or @id='slide-info']
+
+strip_id_or_class: pullquote
+strip_id_or_class: left_rail
+strip_id_or_class: related-container
+strip_id_or_class: radvert-caption-wrap
+
+# Remove gallery?
+strip_id_or_class: wpgallery
+
+#strip: //text()[contains(., 'nextpage')]
+
+prune: no
+
+single_page_link: //a[.='View All' and contains(@href, '/all/')]
+
+test_url: http://www.wired.com/cloudline/2011/10/meet-arms-cortex-a15-the-future-of-the-ipad-and-possibly-the-macbook-air/
+test_url: http://www.wired.com/wiredenterprise/2013/09/docker/
+test_url: http://www.wired.com/threatlevel/2012/05/ff_counterfeiter/all/
old mode 100644 (file)
new mode 100755 (executable)
index ffb6b2d..1d403a9
@@ -1,13 +1,13 @@
-title: //div[@class="bodyText"]/h1/text()\r
-body: //div[@class="bodyText"]\r
-\r
-# author and date are separated by only a newline\r
-# can't figure out how to tokenize that yet\r
-author: //div[@class="bodyText"]/span[@class="info"]/text()\r
-date: //div[@class="bodyText"]/span[@class="info"]/text()\r
-\r
-# strip metdata from body text\r
-strip: //div[@class="bodyText"]/h1/text()\r
-strip: //div[@class="bodyText"]/span[@class="info"]\r
+title: //div[@class="bodyText"]/h1/text()
+body: //div[@class="bodyText"]
+
+# author and date are separated by only a newline
+# can't figure out how to tokenize that yet
+author: //div[@class="bodyText"]/span[@class="info"]/text()
+date: //div[@class="bodyText"]/span[@class="info"]/text()
+
+# strip metdata from body text
+strip: //div[@class="bodyText"]/h1/text()
+strip: //div[@class="bodyText"]/span[@class="info"]
 strip: //div[@class="bodyText"]/span[@class="info"]
 test_url: http://www.wmnf.org/news_stories/light-rail-advocates-join-forces-to-combat-opposition-in-pinellas
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d9011d2..70168fb
@@ -1,4 +1,4 @@
-date://*[@class="entry-date"]\r
-author://*[@class="author vcard"]\r
+date://*[@class="entry-date"]
+author://*[@class="author vcard"]
 strip://*[@style="position:relative;left:72px;top:2px;"]|//*[@id="authorbox"]
 test_url: http://wmpoweruser.com/breaking-nokia-announces-nfc-support-in-lumia-610-windows-phone-device/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0e42ca5..b88f927
@@ -1,5 +1,5 @@
-title: //div[@class="content article"]/h1\r
-date: substring-after(//*[@class='date'], '//')\r
-body: //*[@class='article-content']\r
+title: //div[@class="content article"]/h1
+date: substring-after(//*[@class='date'], '//')
+body: //*[@class='article-content']
 strip: //*[@id='nomodal']
 test_url: http://www.worldpoultry.net/news/kyrgyzstan-restricts-poultry-imports-from-russia-and-kazakhstan-9332.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 733d607..4682e0d
@@ -1,4 +1,4 @@
-title: //p[@id='content']\r
-\r
+title: //p[@id='content']
+
 body: //div[@class='contentblock']
 test_url: http://www.worldwidewords.org/weirdwords/ww-gro1.htm
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 759fb81..44add9c
@@ -1,6 +1,6 @@
-title: //h2[@class="posttitle"]\r
-body: //div[@class="post"]\r
-strip: //h2[@class="posttitle"]\r
-strip: //p[@class="filed-under"]\r
+title: //h2[@class="posttitle"]
+body: //div[@class="post"]
+strip: //h2[@class="posttitle"]
+strip: //p[@class="filed-under"]
 convert_double_br_tags: yes
 test_url: http://wow.joystiq.com/2011/06/20/the-overachiever-guide-to-midsummer-festival-2011-achievements/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/wpmayor.com.txt b/inc/3rdparty/site_config/standard/wpmayor.com.txt
new file mode 100755 (executable)
index 0000000..bb4fffc
--- /dev/null
@@ -0,0 +1,8 @@
+body: //div[@id='nrelate_flyout_placeholder']
+
+strip_id_or_class: share
+
+prune: no
+
+test_url: http://www.wpmayor.com/themes/wordpress-portfolio-resume-themes/
+test_url: http://www.wpmayor.com/feed/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/wtatennis.com.txt b/inc/3rdparty/site_config/standard/wtatennis.com.txt
new file mode 100755 (executable)
index 0000000..1000ab2
--- /dev/null
@@ -0,0 +1,7 @@
+title: //h1[contains(@class, 'header-2')]
+body: //article//*[contains(@class, 'teaserText') or contains(@class, 'lastUpdated') or contains(@class, 'image') or contains(@class, 'body')]
+strip_id_or_class: articleIndex
+prune: no
+
+test_url: http://www.wtatennis.com/news/article/3190914
+test_url: http://www.wtatennis.com/news/article/3190244
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0846be2..97a5c19
@@ -1,15 +1,15 @@
-body://div[@id='articleNew']\r
-strip://div[@id='articleBy']\r
-strip://div[@id='articleDate']\r
-strip://td[@class='articleGraphicCredit']\r
-strip://h1\r
-strip://div[@id='articleEnd']\r
-strip://p[@class='tagline']\r
-strip://div[@class='openBox adslibraryArticle']\r
-strip_id_or_class:ad-180x150-1\r
-\r
-\r
-title: //div[@id="articleNew"]/h1\r
-author: //div[@id="articleBy"]/p/b\r
-date: substring-before(//div[@id="articleDate"], "-")\r
+body://div[@id='articleNew']
+strip://div[@id='articleBy']
+strip://div[@id='articleDate']
+strip://td[@class='articleGraphicCredit']
+strip://h1
+strip://div[@id='articleEnd']
+strip://p[@class='tagline']
+strip://div[@class='openBox adslibraryArticle']
+strip_id_or_class:ad-180x150-1
+
+
+title: //div[@id="articleNew"]/h1
+author: //div[@id="articleBy"]/p/b
+date: substring-before(//div[@id="articleDate"], "-")
 test_url: http://www1.folha.uol.com.br/mundo/1115805-ex-ditador-argentino-videla-e-condenado-a-50-anos-de-prisao.shtml
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f99467c..638583d
@@ -1,11 +1,9 @@
-title:h1
-author: //*[@class = 'author']
-date: //*[@class = 'date']
-body: //*[@id = 'art']
-next_page_link: //*[@id='Str']/a[contains(text(), 'nastepne')]
-strip: //*[@class = 'rel_zdjTOP']
-strip: //*[@id = 'rel']
-strip: //*[@class = 'txt_upl']
-strip: //*[@id='Str']
-strip: //*[@id='source']
-test_url: http://wyborcza.pl/1,123455,11536088,Gdy_peknie_fejs__obryzga_wszystko.html?as=1&startsz=x
\ No newline at end of file
+body: //div[@id='article']
+strip: //div[@class='head']
+
+strip_id_or_class: txt_upl
+
+single_page_link: //div[@id='gazeta_article_tools']//a[contains(@class, 'print')]
+
+test_url: http://wyborcza.pl/1,123455,11536088,Gdy_peknie_fejs__obryzga_wszystko.html?as=1&startsz=x
+test_url: http://wyborcza.pl/1,75478,14880255,Biskup_Dydycz_o_pedofilii_i_tajemnicy_spowiedzi__Zamiast.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d8c8713..bd7ecf2
@@ -1,3 +1,3 @@
-body: //div[@class='article-body']\r
+body: //div[@class='article-body']
 title: //h1
 test_url: http://wyctim.com/icloud-sync-regebbi-rendszereken/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index fbc1d3d..5b2be74
@@ -1,5 +1,5 @@
-title://h1\r
-\r
-date://p[@class='articleDate']\r
+title://h1
+
+date://p[@class='articleDate']
 body://div[@class='articleBody wzStandardArticle']
 test_url: http://www.wz-newsline.de/home/sport/tennis/federer-zum-vierten-mal-sieger-in-indian-wells-1.938050
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/xfgjls.com.txt b/inc/3rdparty/site_config/standard/xfgjls.com.txt
new file mode 100755 (executable)
index 0000000..2dc247a
--- /dev/null
@@ -0,0 +1,11 @@
+# This filter is tested on:
+# http://www.xfgjls.com/magazine/html/?131.html
+# http://www.xfgjls.com/magazine/html/?170.html
+
+body://h3/following-sibling::div
+title: //h3
+date: substring-before(//h3/following-sibling::div/p, ' ')
+author: substring-before(substring-after(//h3/following-sibling::div/p, '作者:'), '来源')
+wrap_in(strong)://span[contains(@style, "FONT-WEIGHT: bold")]
+dissolve://span[@style="FONT-FAMILY: '宋体'; FONT-SIZE: 10.5pt; FONT-WEIGHT: bold; mso-spacerun: 'yes'"]
+test_url: http://www.xfgjls.com/magazine/html/?170.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e02960e..c09fa4d
@@ -1,4 +1,4 @@
-title: //h1[@class="entry-title"]\r
-author: //span[@class="fn"]\r
+title: //h1[@class="entry-title"]
+author: //span[@class="fn"]
 date: //p[@class="meta"]
 test_url: http://xoeb.us/blog/2012/03/16/my-mistakes-with-our-first-release/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
diff --git a/inc/3rdparty/site_config/standard/ynet.co.il.txt b/inc/3rdparty/site_config/standard/ynet.co.il.txt
new file mode 100755 (executable)
index 0000000..aa86566
--- /dev/null
@@ -0,0 +1,26 @@
+body: //span[@id='article_content' or @class='text16g']
+
+# ads
+strip: //div[.//div[contains(@id, 'ads.')]]
+# related content heading
+strip: //p[contains(., 'עוד בערוץ החדשות של ynet:')]
+strip: //p[contains(., 'כותרות אחרונות מהעולם בחדשות ynet:')]
+strip: //div[contains(., 'אינציקלופדיית ynet:')]
+# related content links
+strip: //a[@class='bluelink']
+# strip image bullets
+strip_image_src: ynet_manual_bullet.png
+
+prune: no
+tidy: no
+
+# prevent JS issues
+find_string: <script type='text/javascript'>
+replace_string: <div style="display:none;">
+find_string: </script>
+replace_string: </div>
+
+test_url: http://www.ynet.co.il/articles/0,7340,L-4354266,00.html
+test_url: http://www.ynet.co.il/articles/0,7340,L-4354268,00.html
+#feed
+test_url: http://www.ynet.co.il/Integration/StoryRss2.xml
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 9e24db3..2aeb7e0
@@ -1,5 +1,5 @@
-title://div[@class='entry-title']\r
-body://div[@class='entry-content']\r
-strip_comments:yes\r
+title://div[@class='entry-title']
+body://div[@class='entry-content']
+strip_comments:yes
 convert_double_br_tags:yes
 test_url: http://www.yostivanich.com/2010/07/11/wired-com-with-world-watching-wikileaks-falls-into-disrepair/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/yourerie.com.txt b/inc/3rdparty/site_config/standard/yourerie.com.txt
new file mode 100755 (executable)
index 0000000..b46b09e
--- /dev/null
@@ -0,0 +1,2 @@
+body: //div[@class="nxFullTextData"]
+test_url: http://yourerie.com/fulltext?nxd_id=306552
old mode 100644 (file)
new mode 100755 (executable)
index d52b735..b0d95f1
@@ -1,15 +1,15 @@
-title: //title\r
-body: //iframe\r
-\r
-find_string: <html>&lt;iframe \r
-replace_string: <iframe id="video" \r
-\r
-find_string: &gt;&lt;/iframe&gt;</html>\r
-replace_string: ></iframe>\r
-\r
-single_page_link: //link[@type='text/xml+oembed']\r
-\r
-prune: no\r
-tidy: no\r
-\r
+title: //title
+body: //iframe
+
+find_string: <html>&lt;iframe 
+replace_string: <iframe id="video" 
+
+find_string: &gt;&lt;/iframe&gt;</html>
+replace_string: ></iframe>
+
+single_page_link: //link[@type='text/xml+oembed']
+
+prune: no
+tidy: no
+
 test_url: http://www.youtube.com/watch?v=F6gLH0r3iVU
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/zcommunications.org.txt b/inc/3rdparty/site_config/standard/zcommunications.org.txt
new file mode 100755 (executable)
index 0000000..4deb49b
--- /dev/null
@@ -0,0 +1,7 @@
+title: //h1[@id='view_title']
+author: //div[contains(@class, 'content_authors')]//a
+body: //div[@id='view_body']
+
+prune: no
+
+test_url: http://www.zcommunications.org/orwellian-language-update-by-edward-s-herman.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index b244b22..939fb0e
@@ -1,10 +1,10 @@
-title: //h1[@class="h s-1"]\r
-author: substring-before(substring-after(//p[@class="meta s-10"], 'By'), '|')\r
-author: substring-after(//div[@class="bio"]//h3, 'About ')\r
-date: substring-after(//p[@class="meta s-10"], '|')\r
-date: substring-after(//p[@class="meta"], '|')\r
-body: //div[@class="content-1 entry space-1 clear"]\r
-body: //div[@class="storyBody"]\r
-\r
-test_url: http://www.zdnet.com/blog/microsoft/the-bing-back-end-more-on-cosmos-tiger-and-scope/10920\r
+title: //h1[@class="h s-1"]
+author: substring-before(substring-after(//p[@class="meta s-10"], 'By'), '|')
+author: substring-after(//div[@class="bio"]//h3, 'About ')
+date: substring-after(//p[@class="meta s-10"], '|')
+date: substring-after(//p[@class="meta"], '|')
+body: //div[@class="content-1 entry space-1 clear"]
+body: //div[@class="storyBody"]
+
+test_url: http://www.zdnet.com/blog/microsoft/the-bing-back-end-more-on-cosmos-tiger-and-scope/10920
 test_url: http://www.zdnet.com/researchers-find-web-tracking-up-privacy-down-7000000358/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 66a7f1a..9815d47
@@ -1,44 +1,45 @@
-# 2012-12-23 [carlo@...] fixed half-assed headlines in articles, removed inline author profiles, adjusted picture captions\r
-# 2012-03-17 [dkless@...] Cut metadata parts in the beginning and the ends of the content block; copyright entries for pictures removed; Author fixed, not sure if old entries still valid (I left them); Weird problems with some pages addressed (see last section for removing hidden section)\r
-# 2011-12-09 [carlo@...] Removed "related articles" block\r
-# 2011-08-23 [carlo@...] changed single page link to use print version: page works better, less ambiguity. Related cleanups and simplifications.\r
-# 2011-08-20 [carlo@...] added author, fixed date\r
-\r
-\r
-single_page_link: //a[@title='Druckversion']\r
-tidy: no\r
-\r
-title: //title\r
-date: substring-before( //li[@class="date"], " " )\r
-author: //li[@class="author"]/a/text() | //li[@class="author first"]/a/text()\r
-author: substring-after(//li[@class='source first '], 'Quelle: ')\r
-\r
-strip_id_or_class: articleheader\r
-strip: //div[@id="comments"] | //div[@class="pagination block"] | //p[@class="ressortbacklink"] | //div[@id="relatedArticles"]  |  // div[@class="inline portrait"]\r
-\r
-#Removes author and date from the start\r
-strip: //ul[@class="tools"]\r
-#Removes copyright statement - often disturb as first line of the news\r
-strip: //p[@class="copyright"]\r
-strip: //div[@class="copyright"]\r
-#Removes pagination links at the end\r
-strip: //div[@class="pagination"]\r
-\r
-# Fix picture captions\r
-wrap_in(small): //p[@class="caption"]/text()\r
-\r
-# Fix sub-headlines\r
-wrap_in(h2): //p/strong\r
-dissolve: //h2/strong\r
-\r
-#Sometimes things are embedded in the print version that are not displayed on the web, but will be displayed in the mobilized versions and lead even to problems. These sections are removed here.\r
-strip_id_or_class:"informatives"\r
-strip_id_or_class:"bottom"\r
-strip_id_or_class:"teasermosaic"\r
-strip_id_or_class:"comments"\r
-strip_id_or_class:"articlefooter af"\r
-strip_id_or_class:"relateds"\r
-strip_id_or_class:"pagination"\r
-\r
-footnotes: no\r
-test_url: http://www.zeit.de/kultur/film/2012-12/Kurzfilmtag
\ No newline at end of file
+# 2013.10.30 [rezor92] fixed single_page_link
+# 2012-12-23 [carlo@...] fixed half-assed headlines in articles, removed inline author profiles, adjusted picture captions
+# 2012-03-17 [dkless@...] Cut metadata parts in the beginning and the ends of the content block; copyright entries for pictures removed; Author fixed, not sure if old entries still valid (I left them); Weird problems with some pages addressed (see last section for removing hidden section)
+# 2011-12-09 [carlo@...] Removed "related articles" block
+# 2011-08-23 [carlo@...] changed single page link to use print version: page works better, less ambiguity. Related cleanups and simplifications.
+# 2011-08-20 [carlo@...] added author, fixed date
+
+
+single_page_link: //a[@title='Auf einer Seite']
+tidy: no
+
+title: //title
+date: substring-before( //li[@class="date"], " " )
+author: //li[@class="author"]/a/text() | //li[@class="author first"]/a/text()
+author: substring-after(//li[@class='source first '], 'Quelle: ')
+
+strip_id_or_class: articleheader
+strip: //div[@id="comments"] | //div[@class="pagination block"] | //p[@class="ressortbacklink"] | //div[@id="relatedArticles"]  |  // div[@class="inline portrait"]
+
+#Removes author and date from the start
+strip: //ul[@class="tools"]
+#Removes copyright statement - often disturb as first line of the news
+strip: //p[@class="copyright"]
+strip: //div[@class="copyright"]
+#Removes pagination links at the end
+strip: //div[@class="pagination"]
+
+# Fix picture captions
+wrap_in(small): //p[@class="caption"]/text()
+
+# Fix sub-headlines
+wrap_in(h2): //p/strong
+dissolve: //h2/strong
+
+#Sometimes things are embedded in the print version that are not displayed on the web, but will be displayed in the mobilized versions and lead even to problems. These sections are removed here.
+strip_id_or_class:"informatives"
+strip_id_or_class:"bottom"
+strip_id_or_class:"teasermosaic"
+strip_id_or_class:"comments"
+strip_id_or_class:"articlefooter af"
+strip_id_or_class:"relateds"
+strip_id_or_class:"pagination"
+
+footnotes: no
+test_url: http://www.zeit.de/kultur/film/2012-12/Kurzfilmtag
diff --git a/inc/3rdparty/site_config/standard/zerohedge.com.txt b/inc/3rdparty/site_config/standard/zerohedge.com.txt
new file mode 100755 (executable)
index 0000000..7e76aee
--- /dev/null
@@ -0,0 +1,10 @@
+author: //span[@class='submitted']/a
+strip: //div[@class='clear-block clr']
+strip: //div[@class='picture']
+strip: //span[@class='submitted']
+strip: //div[@class='breadcrumb']
+strip: //div[@class='fivestar-static-form-item']
+strip: //div[@class='js-links']
+strip: //div[@class='links clear-block clear']
+strip: //div[@class='block block-block']
+test_url: http://www.zerohedge.com/news/bernankes-columbus-voyage-end-monetary-policy-world
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ea9132a..afa964d
@@ -1,3 +1,3 @@
-title: //h1\r
+title: //h1
 body: //div[@id="primarycontent"]
 test_url: http://zerokspot.com/weblog/2011/06/26/europython2011/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/zhihu.com.txt b/inc/3rdparty/site_config/standard/zhihu.com.txt
new file mode 100755 (executable)
index 0000000..3c9d8c1
--- /dev/null
@@ -0,0 +1,19 @@
+# This filter is tested on:
+# http://www.zhihu.com/question/19587406
+# http://www.zhihu.com/question/20649035
+# http://www.zhihu.com/question/20637942
+
+author: //h3[@class='zm-item-answer-author-wrap']
+title://h2[@class='zm-item-title']
+date://a[@class='answer-date-link meta-item']
+convert_double_br_tags: yes
+
+wrap_in(blockquote)://div[@class='zm-editable-content']
+wrap_in(blockquote)://sup/text()
+dissolve://sup
+
+strip://div[@class='zh-answers-title']
+strip:///div[@class='zm-item-vote-info ']
+strip://div[@class='zm-item-answer-author-info']
+strip://div[@class='zu-blue-info-board zg-r3px']
+test_url: http://www.zhihu.com/question/20637942
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 2a2f58a..188d4dd
@@ -1,3 +1,3 @@
-title: substring-after(id, 'post')/h2\r
+title: substring-after(id, 'post')/h2
 body://div[@class = 'entry']
 test_url: http://www.zingtrain.com/category/ontrack/january-2007/
\ No newline at end of file
index 8d74f2ff0d4d25db92fd48b980790b4beb7a562d..2c80b64b5dff24687777be87754134057be01091 100755 (executable)
 class Database {
 
     var $handle;
-    private $order = array(
-      'ia' => 'ORDER BY entries.id',
-      'id' => 'ORDER BY entries.id DESC',
-      'ta' => 'ORDER BY lower(entries.title)',
-      'td' => 'ORDER BY lower(entries.title) DESC',
-      'default' => 'ORDER BY entries.id'
+    private $order = array (
+        'ia' => 'ORDER BY entries.id',
+        'id' => 'ORDER BY entries.id DESC',
+        'ta' => 'ORDER BY lower(entries.title)',
+        'td' => 'ORDER BY lower(entries.title) DESC',
+        'default' => 'ORDER BY entries.id'
     );
 
     function __construct()
@@ -170,11 +170,11 @@ class Database {
     public function login($username, $password, $isauthenticated = FALSE)
     {
         if ($isauthenticated) {
-          $sql = "SELECT * FROM users WHERE username=?";
-          $query = $this->executeQuery($sql, array($username));
+            $sql = "SELECT * FROM users WHERE username=?";
+            $query = $this->executeQuery($sql, array($username));
         } else {
-          $sql = "SELECT * FROM users WHERE username=? AND password=?";
-          $query = $this->executeQuery($sql, array($username, $password));
+            $sql = "SELECT * FROM users WHERE username=? AND password=?";
+            $query = $this->executeQuery($sql, array($username, $password));
         }
         $login = $query->fetchAll();