]> git.immae.eu Git - github/wallabag/wallabag.git/commitdiff
Merge branch 'dev' into data-for-mysql
authortcit <tcit@tcit.fr>
Wed, 8 Oct 2014 17:26:26 +0000 (19:26 +0200)
committertcit <tcit@tcit.fr>
Wed, 8 Oct 2014 17:26:26 +0000 (19:26 +0200)
1002 files changed:
CONTRIBUTING.md
COPYING.md
CREDITS.md
GUIDELINES.md [new file with mode: 0644]
README.md
TRANSLATION.md
check_setup.php [changed mode: 0644->0755]
inc/3rdparty/FlattrItem.class.php
inc/3rdparty/Session.class.php
inc/3rdparty/site_config/custom/blogs.faz.net.txt [new file with mode: 0644]
inc/3rdparty/site_config/standard/24ways.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/36kr.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/37signals.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/3quarksdaily.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/3voor12.vpro.nl.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/43folders.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/500px.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/512pixels.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/5by5.tv.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/7newsbelize.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/944.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/README.md [new file with mode: 0755]
inc/3rdparty/site_config/standard/aachener-nachrichten.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/aachener-zeitung.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/abc.es.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/abc.net.au.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/abcnews.go.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/accesstoinsight.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/acidcow.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/acquia.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/acroswing.fr.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/aftenposten.no.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/aftonbladet.se.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/aht.seriouseats.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/albayan.ae.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/alex.mullr.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/alexduner.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/alexduner.squarespace.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/alistapart.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/aljazeera.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/allrecipes.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/allthingsd.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/allyou.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/alphabeta.argaam.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/alriyadh.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/alseraj.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/alt1040.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/alternet.org.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/altfoto.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/alumni.stanford.edu.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/amandala.com.bz.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/amazon.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/americandrink.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/americascup.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/americastestkitchenfeed.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/amptoons.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/anandtech.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/androidpolice.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/andyrutledge.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/annatravelling.wordpress.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/applature.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/apple.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/appledaily.com.tw.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/appleinsider.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/appleweblog.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/archdaily.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/archiveofourown.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/arstechnica.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/articles.boston.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/articles.courant.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/articles.washingtonpost.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/asahi.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ascarter.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/astronews.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/asymco.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/autoblog.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/avclub.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/baltimoresun.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/baseballprospectus.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/basicthinking.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/bb.is.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/bbc.co.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/bbcgoodfood.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/benoitmaison.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/berlingske.dk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/bernama.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/betabeat.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/betanews.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/biography.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/bitelia.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/bizjournals.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/bjango.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/blog.arsln.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/blog.asmartbear.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/blog.cloudflare.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/blog.fefe.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/blog.instagram.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/blog.instapaper.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/blog.jaysalvat.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/blog.kaelig.fr.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/blog.naver.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/blog.pchome.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/blog.pinboard.in.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/blog.renren.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/blog.sina.com.cn.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/blog.spu.edu.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/blog.wells.ee.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/blogs.aljazeera.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/blogs.forbes.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/blogs.hbr.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/blogs.msdn.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/blogs.reuters.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/blogs.scientificamerican.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/blogs.smithsonianmag.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/blogs.technet.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/bluetouff.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/boagworld.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/boingboing.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/boldizsar.palotas.eu.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/book.douban.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/bookforum.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/borderhouseblog.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/bostonglobe.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/bostonreview.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/boundlessline.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/bowdoinorient.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/brainfacts.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/brandeins.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/brandingstrategyinsider.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/brasil.elpais.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/brettterpstra.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/brisbanetimes.com.au.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/brookings.edu.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/brooksreview.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/bt.no.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/buffed.de.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/buquad.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/business2community.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/businessinsider.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/businessnews.com.tn.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/businessweek.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/buzzfeed.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/bygonebureau.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/cable.co.uk.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/cardboardconnection.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/carpeaqua.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/cars.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/catb.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/cbc.ca.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/cbn.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/cbsnews.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/cedarrepublican.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/chareidi.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/chinamining.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/chomsky.info.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/chrisltd.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/christianitytoday.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/christianpf.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/christies.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/chrome.google.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/chronicle.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ciaosamin.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/cicero.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ciperchile.cl.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/cjr.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/classyllama.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/clientk.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/clubic.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/cmswire.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/cn.engadget.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/cn.reuters.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/cnet.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/cnn.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/cnnsi.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/code.activestate.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/code.fivefilters.org.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/code.google.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/codeproject.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/codinghorror.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/collegehumor.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/communities-dominate.blogs.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/community.service-now.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/computer.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/computerbase.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/computerworld.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/computerworld.dk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/contemporist.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/conversaciones.nokia.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/cooper.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/core77.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/counterpunch.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/crazybutable.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/crimemagazine.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/crimethinc.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/crn.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/csmonitor.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/csnbayarea.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/csnphilly.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/css-tricks.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/cucharasonica.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/cw.com.tw.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/da.feedsportal.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/dagogtid.no.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/dailydot.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/dailykos.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/dailymail.co.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/dailystar.com.lb.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/danleech.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/dansdata.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/dantri.com.vn.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/daringfireball.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/datanami.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/dcurt.is.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/defomicron.net.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/delong.typepad.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/democracynow.org.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/derstandard.at.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/designtagebuch.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/desitvforum.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/details.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/developers.facebook.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/devlinsangle.blogspot.co.at.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/dictionary.reference.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/diepresse.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/digiphoto.techbang.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/digital-photography-school.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/digitalspy.co.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/dilbert.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/dinamalar.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/dn.se.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/dobreprogramy.pl.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/doctac.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/domusweb.it.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/dou.ua.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/douban.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/dpreview.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/dr.dk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/dramasonline.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/drdobbs.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/drive2.ru.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/dropbox.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/drupal.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/dukebasketballreport.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/dushumashang.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/dvice.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/eamesinerudition.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/eandt.theiet.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/eastoftheweb.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ebay.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ecetia.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/econlog.econlib.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/economia.estadao.com.br.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/economist.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/edge-online.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/edge.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/edition.channel5belize.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/edition.cnn.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/eetimes.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/ekultura.hu.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/elance.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/elderscrollsonline.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/elektroniknet.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/elmalpensante.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/elpais.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/emaratalyoum.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/en.espnf1.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/engadget.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/engineering.tumblr.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/english.aljazeera.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/enikos.gr.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/entertainment.timesonline.co.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ericsuh.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/es.hu.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/escapistmagazine.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/espn.go.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/esquire.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/essentialpublicradio.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/etc.se.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/eternabuenosaires.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/eurogamer.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/evo.co.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/expressen.se.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/extracine.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/f1actual.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/facebook.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/facta.co.jp.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/falter.at.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/fanfiction.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/fastcompany.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/faz.net.txt
inc/3rdparty/site_config/standard/fertigung.de.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/fictionpress.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ficwad.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/finance.yahoo.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/findtheswagger.tumblr.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/firstthings.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/fivechapters.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/fivefilters.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/fivethirtyeight.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/flyingmachinestudios.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/fm4.orf.at.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/fnal.gov.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/focus.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/folklore.org.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/food.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/fool.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/forbes.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/foreignaffairs.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/foreignpolicy.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/forsvaret.no.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/foxnews.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/freelancer.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/freytag-film.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/fria.nu.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/friatidningen.se.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/friendskorner.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ft.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ftchinese.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/ftd.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/fubiz.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/futurezone.at.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/gamasutra.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/gameblog.fr.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/gamechurch.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/gamer.no.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/gamereactor.no.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/garythink.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/gasteroprod.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/gatopardo.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/gawker.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/geeksofdoom.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/geenstijl.nl.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/getnews.jp.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/giantbomb.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/giga.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/gigaom.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/gihyo.jp.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/gist.github.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/givemesomethingtoread.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/gizmodo.co.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/gizmodo.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/gizmodo.uol.com.br.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/gizmologia.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/gizmovil.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/global.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/globalissues.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/globoesporte.globo.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/gloswielkopolski.pl.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/goal.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/golem.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/good.is.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/goodfil.ms.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/gossip-tv.gr.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/goteborgsfria.se.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/gothamist.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/gotomanager.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/gov.ky.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/gp.se.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/gq.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/grantland.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/greatergreaterwashington.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/groups.drupal.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/gulfnews.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/guokr.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/haberler.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/habrahabr.ru.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/hackmake.org.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/halo.bungie.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/hammers.theoffside.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/handelsblatt.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/hanselman.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/hardware.fr.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/hardware.no.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/hbr.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/headrush.typepad.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/heise-online.mobi.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/heise.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/hemmings.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/heroturko.me.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/hespress.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/hiamag.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/highscalability.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/hiperpop.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/hiphopleeft.nl.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/historytoday.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/hmercer.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/hollywoodlife.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/hometheaterreview.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/hosted.ap.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/howtogeek.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/hs.fi.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ht.ly.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/huffingtonpost.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/humantransit.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/hurriyet.com.tr.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/hvg.hu.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/hypebeast.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/icannabis.tumblr.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/idealog.co.nz.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/idlewords.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/igeneration.fr.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ignoredbydinosaurs.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ilounge.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ilyabirman.ru.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/inc.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/independent.co.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/indiatimes.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/inessential.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/info.abril.com.br.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/infoq.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/informador.com.mx.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/information.dk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/informationarchitects.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/informationclearinghouse.info.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/informit.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/infoworld.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/infzm.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/inhabitat.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/instagr.am.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/interest.co.nz.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/iolanguage.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ipadclub.nl.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ipadplanet.nl.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/iphoneclub.nl.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/iphonehacks.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/iplaysoft.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/isource.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/itavisen.no.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/itmedia.co.jp.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/itstactical.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/itwire.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/itworld.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/izismile.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/jalopnik.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/jandan.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/jetzt.sueddeutsche.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/jjahnke.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/jobbank.gc.ca.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/joelonsoftware.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/jouire.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/joystiq.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/juedische-allgemeine.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/juppy.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/kachestvo.ru.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/kachiblog.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/kathimerini.gr.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/kenrockwell.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/kicker.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/kickstarter.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/kingarthurflour.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/kotaku.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/kottke.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/kumailplus.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/kumb.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/kwerfeldein.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/landetsfria.se.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/laphamsquarterly.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/laprensagrafica.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/laquadrature.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/lareviewofbooks.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/latimes.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/laughingsquid.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/leancrew.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/lefigaro.fr.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/lemonde.fr.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/lesnumeriques.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/letemps.ch.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/libcom.org.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/lifeandculture.fr.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/lifehacker.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/lifestyle.inquirer.net.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/lifeweek.com.cn.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/linkedin.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/livescience.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/longform.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/loopinsight.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/lostgarden.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/lovefm.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/lovetv.com.bz.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/lrb.co.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/luminous-landscape.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/luxuo.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/m.bbc.co.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/m.douban.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/m.vanityfair.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/m00natic.github.io.txt [new file with mode: 0644]
inc/3rdparty/site_config/standard/mac4ever.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/macdrifter.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/macformat.techradar.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/macgeneration.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/macmagazine.com.br.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/macrumors.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/macstories.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/mactalk.com.au.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/mactechnews.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/macworld.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/mainichi.jp.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/mainpost.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/makeuseof.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/manager.co.th.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/marco.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/marksdailyapple.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/martinfowler.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/mashable.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/matt.might.net.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/mattcutts.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/mbl.is.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/medialens.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/medium.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/megamp3.eu.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/menshealth.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/metafilter.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/mforum.cari.com.my.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/mikeash.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/mikeindustries.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/minnesota.publicradio.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/minnpost.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/mirrorfootball.co.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/mises.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/mlb.mlb.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/mlb.sbnation.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/mlssoccer.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/mmo-champion.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/mnn.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/mno.hu.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/mobile.nytimes.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/mobile.slate.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/mobileopportunity.blogspot.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/modernghana.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/money.cnn.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/monkeyzen.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/moonsault.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/moreintelligentlife.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/motherboard.vice.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/mothering.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/motherjones.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/motorfull.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/movie.douban.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/msdn.microsoft.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/msnbc.msn.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/myfoxatlanta.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/myfoxboston.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/myrecipes.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/narenji.ir.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/nasa.gov.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/nbweekly.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/neh.gov.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/neomoney.co.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/net-security.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/netmagazine.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/netzpolitik.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/newleftproject.org.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/newmatilda.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/newrepublic.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/news-gazette.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/news.cnet.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/news.detik.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/news.kanaloco.jp.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/news.mynavi.jp.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/news.orf.at.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/news.rambler.ru.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/news.techmeme.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/news.yahoo.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/news.ycombinator.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/news.zing.vn.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/news247.gr.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/newsbomb.gr.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/newsle.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/newsmill.se.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/newsunspun.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/newsweek.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/newswise.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/newyorker.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/next-gen.biz.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/nfl.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ngm.nationalgeographic.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/nhk.or.jp.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/nintendoworldreport.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/nojesguiden.se.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/northumberlandview.ca.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/nosalty.hu.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/nplusonemag.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/npr.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/nybooks.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/nymag.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/nyteknik.se.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/nytimes.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/nzz.ch.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/observer.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/off.net.mk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/omaha.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/omiliya.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/on.net.mk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/online.wsj.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/onlinewelten.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/onstartups.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ontologicalgeek.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/opensource.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/openthemagazine.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/openwebx.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/orf.at.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/origo.hu.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/oschina.net.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/pakistantvdekho.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/pakmedia.tv.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/pandagon.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/pandodaily.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/panic.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/papodehomem.com.br.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/parislemon.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/parliament.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/pastebin.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/pastepad.fivefilters.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/pathawks.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/pcast.me.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/pcmag.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/pcworld.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/penny-arcade.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/pentaxforums.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/philadelphiaeagles.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/philly.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/photo.tutsplus.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/php.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/physicstoday.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/pinterest.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/pitchfork.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/pittnews.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/pittsburgh.pirates.mlb.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/pittsburghlive.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/pittsburghmagazine.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/pittsburghpanthers.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/pittscriptblog.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/planetvita.de.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/playboy.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/plus.google.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/plzkthxbai.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/pogue.blogs.nytimes.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/politico.com.txt
inc/3rdparty/site_config/standard/politifact.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/politiken.dk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/polygon.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/popularmechanics.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/portertech.ca.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/positioningmag.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/post-gazette.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/posta.com.tr.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/prb.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/prog21.dadgum.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/prolost.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/propublica.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/prosa.dk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/prospectmagazine.co.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/protothema.gr.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/psychologytoday.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/publications.parliament.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/publico.pt.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/purpleplanetmedia.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/qctimes.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/quantumdiaries.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/queerty.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/quepasa.cl.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/quora.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/racjonalista.pl.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/radar.oreilly.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/radionz.co.nz.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/randsinrepose.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/readability.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/readwriteweb.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/real.gr.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/recipe.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/red-hot-girls.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/reddit.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/redmondpie.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/redtape.msnbc.msn.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/reflets.info.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/renenekuda.cz.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/resume.se.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/retrieverweekly.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/reuters.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/revistapiaui.estadao.com.br.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/rezeptwelt.de.txt [new file with mode: 0644]
inc/3rdparty/site_config/standard/richardmuscat.wordpress.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ritemail.blogspot.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ritholtz.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/robertsspaceindustries.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/robots.thoughtbot.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/rockpapershotgun.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/rodrigo.sharpcube.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/rogerebert.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/rolfinjapan.nl.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/rollingstone.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/rottentomatoes.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/roughtype.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/roy.gbiv.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/rpgsite.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/rubysfera.pl.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ruhlman.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ruttloff.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/salon.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/salzburg.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sanpedrosun.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/saveyourself.ca.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sayidaty.net.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/sbnation.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/schneier.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/science.orf.at.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/scienceblogs.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/scienceticker.info.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/scientificamerican.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/scilogs.de.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/scotusblog.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/scraplab.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/scripting.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sct.temple.edu.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/searchenginejournal.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/searchengineland.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/seattletransitblog.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sebbo.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/select.yeeyan.org.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/seriouseats.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sf.curbed.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sf.eater.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sfgate.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sfweekly.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/shabayek.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/shawnblanc.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/shifteleven.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/siasat.pk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/signalscv.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/simonwillison.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/singaporeanstocksinvestor.blogspot.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/singularityhub.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sintagoulis.gr.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sivers.org.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/skanesfria.se.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/slashfilm.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/slate.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/slice.seriouseats.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/slog.thestranger.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/smartinvestor.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sme.sk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/smithsonianmag.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/smokingapples.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/somethingawful.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/songshuhui.net.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/sourcebooks.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/spectator.co.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/spectrum.ieee.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/speirs.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/spiegel.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/spiked-online.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/spin.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/splatf.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/splitsider.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sport.detik.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sport.orf.at.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sport365.fr.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/sports.espn.go.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sports.yahoo.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sportschau.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sportsillustrated.cnn.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sprengsatz.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sqlite.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/squashed.tumblr.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/stackoverflow.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/stalbansreview.co.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/standard.co.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/staradvertiser.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/stephenfry.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/stlbeacon.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/stockholm.etc.se.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/stockholmsfria.nu.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/straightdope.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/streetsblog.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/stuff.co.nz.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/stumbleupon.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/subtraction.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sueddeutsche.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/summify.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/suntimes.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/svd.se.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/svt.se.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/sydsvenskan.se.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/symmetrymagazine.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sz-magazin.sueddeutsche.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/sz.de.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/tagesschau.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/tampabay.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/taptaptap.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/tasteofhome.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/taz.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/tbray.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/tcmanila.tk.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/tcng.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/tech.fortune.cnn.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/tech.gilt.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/tech.sina.com.cn.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/techcrunch.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/techdirt.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/techhive.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/techmeme.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/technicallyjordan.tumblr.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/technologizer.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/technologyreview.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/techpinions.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/techradar.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/telegraaf.nl.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/telegraph.co.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thanhnien.com.vn.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/the-magazine.org.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/theage.com.au.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/theamericanscholar.org.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/theappleblog.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/theatlantic.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/theatlanticcities.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/thebostonchannel.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thebrowser.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thecarton.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thedaily.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thedailybeast.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thedailymash.co.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thedisneyblog.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/theeuropean-magazine.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/thefilmexperience.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thegamedesignforum.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/theglobalmail.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/theglobeandmail.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thegreatdiscontent.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/theguardian.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/theindychannel.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/themarker.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/themillions.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/themuseumofinnocence.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thenation.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thenetworkgarden.blogs.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thenextgeneration.org.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/thenextweb.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/theoaklandpress.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/theonion.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thepioneerwoman.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/theregister.co.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/theroot.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/therumpus.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thesiasat.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thesimpledollar.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thespoiler.co.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thespoof.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thestranger.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thestreet.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thethaovanhoa.vn.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/theverge.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/theweek.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thinkprogress.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thisdaylive.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/thisismynext.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/tidbits.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/time.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/timeshighereducation.co.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/tipb.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/tnr.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/tomdispatch.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/tomshardware.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/tomshardware.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/toolsandtoys.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/tracks.ranea.org.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/trailer.web-view.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/trailerzone.de.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/traningslara.se.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/triblive.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/truthdig.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/tthfanfic.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/tthor.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/tuaw.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/tuckreview.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/tuhdo.github.io.txt [new file with mode: 0644]
inc/3rdparty/site_config/standard/tvtropes.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/twitter.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/uefa.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/uk.xbox360.ign.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/uni-watch.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/unwinnable.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/uppsalafria.se.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/urbandictionary.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/usatoday.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/usccb.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/useit.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/usfirst.org.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/utdailybeacon.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/ux.artu.tv.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/uzivatelsketestovani.cz.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/vanityfair.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/varingen.no.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/varsity.co.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/vea.gov.vn.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/vedomosti.ru.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/veggbilder.no.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/vemedio.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/venturebeat.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/version2.dk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/verybestbaking.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/vg.no.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/video.forbes.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/videogum.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/villagevoice.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/vimeo.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/viply.de.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/visir.is.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/vitispr.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/vivirmexico.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/vnexpress.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/voices.washingtonpost.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/vworker.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/waffle.wootest.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/walrusmagazine.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/warnerbros.fr.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/washingtoninstitute.org.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/washingtonmonthly.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/washingtonpost.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/web-libre.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/weblog.bignerdranch.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/weblogs.asp.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/webpaper.nzz.ch.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/webwereld.nl.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/welt.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/westhamtillidie.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/what-if.xkcd.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/whatever.scalzi.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/wheelyric.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/wiki.guildwars.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/wiki.guildwars2.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/wikihow.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/wikitravel.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/will-self.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/williampfaff.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/winfuture.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/winrumors.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/winsupersite.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/wired.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/wmnf.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/wmpoweruser.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/wordyard.com.txt [new file with mode: 0644]
inc/3rdparty/site_config/standard/worldpoultry.net.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/worldwidewords.org.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/wow.joystiq.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/wpmayor.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/wtatennis.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/www1.folha.uol.com.br.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/www3.imperial.ac.uk.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/wyborcza.pl.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/wyctim.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/wz-newsline.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/xfgjls.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/xoeb.us.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/yated.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/ynet.co.il.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/yostivanich.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/yourerie.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/youtube.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/zcommunications.org.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/zdnet.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/zeit.de.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/zerohedge.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/zerokspot.com.txt [changed mode: 0644->0755]
inc/3rdparty/site_config/standard/zhihu.com.txt [new file with mode: 0755]
inc/3rdparty/site_config/standard/zingtrain.com.txt [changed mode: 0644->0755]
inc/poche/Database.class.php
inc/poche/Language.class.php [new file with mode: 0644]
inc/poche/Poche.class.php
inc/poche/Routing.class.php [new file with mode: 0755]
inc/poche/Template.class.php [new file with mode: 0644]
inc/poche/Tools.class.php
inc/poche/Url.class.php
inc/poche/User.class.php
inc/poche/WallabagEpub.class.php [new file with mode: 0644]
inc/poche/config.inc.default.php
inc/poche/global.inc.php
inc/poche/pochePictures.php
index.php
install/index.php
locale/en_EN.utf8/LC_MESSAGES/en_EN.utf8.mo
locale/en_EN.utf8/LC_MESSAGES/en_EN.utf8.po
locale/fr_FR.utf8/LC_MESSAGES/fr_FR.utf8.mo
locale/fr_FR.utf8/LC_MESSAGES/fr_FR.utf8.po
themes/baggy/_menu.twig
themes/baggy/_pocheit-form.twig
themes/baggy/_search-form.twig
themes/baggy/config.twig
themes/baggy/css/main.css
themes/baggy/home.twig
themes/baggy/view.twig
themes/courgette/_menu.twig
themes/courgette/_view.twig
themes/courgette/config.twig
themes/courgette/home.twig
themes/default/_menu.twig
themes/default/_search-form.twig
themes/default/config.twig
themes/default/css/messages.css
themes/default/home.twig
themes/default/js/popupForm.js
themes/default/view.twig
wallabag_compatibility_test.php

index 9ccb0b149d791a4c9745575e88ca8df7264ba637..58bc7134545a15b10f765e033320b83216c35c7d 100644 (file)
@@ -26,3 +26,5 @@ Note : If you have large portions of text, use [Github's Gist service](https://g
 
 ## You want to fix a bug or to add a feature
 Please fork wallabag and work with **the dev branch** only. **Do not work on master branch**.
+
+[Don't forget to read our guidelines](https://github.com/wallabag/wallabag/blob/dev/GUIDELINES.md).
\ No newline at end of file
index ee7d6a54e91479b8b51312c8f603a2c10d53d1ee..c43f619a3a99389db14326c4a3f14f0d34e6270d 100644 (file)
@@ -1,14 +1,19 @@
-            DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE\r
-                    Version 2, December 2004\r
+Copyright (c) 2013-2014 Nicolas Lœuillet\r
 \r
- Copyright (C) 2004 Sam Hocevar <sam@hocevar.net>\r
+Permission is hereby granted, free of charge, to any person obtaining a copy\r
+of this software and associated documentation files (the "Software"), to deal\r
+in the Software without restriction, including without limitation the rights\r
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\r
+copies of the Software, and to permit persons to whom the Software is furnished\r
+to do so, subject to the following conditions:\r
 \r
- Everyone is permitted to copy and distribute verbatim or modified\r
- copies of this license document, and changing it is allowed as long\r
- as the name is changed.\r
-\r
-            DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE\r
-   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION\r
-\r
-  0. You just DO WHAT THE FUCK YOU WANT TO.\r
+The above copyright notice and this permission notice shall be included in all\r
+copies or substantial portions of the Software.\r
 \r
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\r
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\r
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\r
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\r
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\r
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\r
+THE SOFTWARE.
\ No newline at end of file
index c892336dc8a7394fc82ae20d22ce12a213ca0894..f1e9d7af4a13c1dd8aa1671ee1931fb4a7fb6951 100644 (file)
@@ -1,7 +1,6 @@
 wallabag is based on :
 * PHP Readability https://bitbucket.org/fivefilters/php-readability
 * Full Text RSS http://code.fivefilters.org/full-text-rss/src
-* Encoding https://github.com/neitanod/forceutf8
 * logo by Maylis Agniel https://github.com/wallabag/logo
 * icons http://icomoon.io
 * PHP Simple HTML DOM Parser (for Pocket import) http://simplehtmldom.sourceforge.net/
@@ -10,6 +9,8 @@ wallabag is based on :
 * Flash messages https://github.com/plasticbrain/PHP-Flash-Messages
 * Pagination https://github.com/daveismyname/pagination
 
-wallabag is developed by Nicolas Lœuillet under the Do What the Fuck You Want to Public License
+wallabag is mainly developed by Nicolas Lœuillet under the MIT License
+
+Thank you so much to @tcitworld and @mariroz.
 
 Contributors : https://github.com/wallabag/wallabag/graphs/contributors
\ No newline at end of file
diff --git a/GUIDELINES.md b/GUIDELINES.md
new file mode 100644 (file)
index 0000000..51e0de9
--- /dev/null
@@ -0,0 +1,53 @@
+# Guidelines for wallabag
+
+If you want to contribute to wallabag, you have some rules to respect. These rules were defined by [PHP Framework Interop Group](http://www.php-fig.org).
+
+## Basic Coding Standard (PSR-1)
+
+This section of the standard comprises what should be considered the standard coding elements that are required to ensure a high level of technical interoperability between shared PHP code.
+
+* Files MUST use only `<?php` and `<?=` tags.
+
+* Files MUST use only UTF-8 without BOM for PHP code.
+
+* Files SHOULD either declare symbols (classes, functions, constants, etc.) or cause side-effects (e.g. generate output, change .ini settings, etc.) but SHOULD NOT do both.
+
+* Namespaces and classes MUST follow [PSR-0](https://github.com/php-fig/fig-standards/blob/master/accepted/PSR-0.md).
+
+* Class names MUST be declared in `StudlyCaps`.
+
+* Class constants MUST be declared in all upper case with underscore separators.
+
+* Method names MUST be declared in `camelCase`.
+
+You can read details on [PHP FIG website](http://www.php-fig.org/psr/psr-1/).
+
+## Coding Style Guide (PSR-2)
+
+This guide extends and expands on PSR-1, the basic coding standard.
+
+The intent of this guide is to reduce cognitive friction when scanning code from different authors. It does so by enumerating a shared set of rules and expectations about how to format PHP code.
+
+The style rules herein are derived from commonalities among the various member projects. When various authors collaborate across multiple projects, it helps to have one set of guidelines to be used among all those projects. Thus, the benefit of this guide is not in the rules themselves, but in the sharing of those rules.
+
+* Code MUST follow PSR-1.
+
+* Code MUST use 4 spaces for indenting, not tabs.
+
+* There MUST NOT be a hard limit on line length; the soft limit MUST be 120 characters; lines SHOULD be 80 characters or less.
+
+* There MUST be one blank line after the `namespace` declaration, and there MUST be one blank line after the block of `use` declarations.
+
+* Opening braces for classes MUST go on the next line, and closing braces MUST go on the next line after the body.
+
+* Opening braces for methods MUST go on the next line, and closing braces MUST go on the next line after the body.
+
+* Visibility MUST be declared on all properties and methods; `abstract` and `final` MUST be declared before the visibility; `static` MUST be declared after the visibility.
+
+* Control structure keywords MUST have one space after them; method and function calls MUST NOT.
+
+* Opening braces for control structures MUST go on the same line, and closing braces MUST go on the next line after the body.
+
+* Opening parentheses for control structures MUST NOT have a space after them, and closing parentheses for control structures MUST NOT have a space before.
+
+You can read details on [PHP FIG website](http://www.php-fig.org/psr/psr-2/).
\ No newline at end of file
index 0b54dff45a19a639c6a64fbbef09be07fcada90b..38866f7af91a91cd25cc5da81c877fb96a458f24 100644 (file)
--- a/README.md
+++ b/README.md
@@ -4,7 +4,6 @@ wallabag is a self hostable application allowing you to not miss any content any
 More informations on our website: [wallabag.org](http://wallabag.org)
 
 ## License
-Copyright © 2010-2014 Nicolas Lœuillet <nicolas@loeuillet.org>
+Copyright © 2013-2014 Nicolas Lœuillet <nicolas@loeuillet.org>
 This work is free. You can redistribute it and/or modify it under the
-terms of the Do What The Fuck You Want To Public License, Version 2,
-as published by Sam Hocevar. See the COPYING file for more details.
+terms of the MIT License. See the COPYING file for more details.
index 2e38d5ccbd44c9ffbc3978153c05a8869f92b918..a033662c47134cfdb4ba622c54ffdd18e363a22c 100755 (executable)
@@ -1,10 +1,10 @@
-# How to manage translations of wallabag
+# How to manage translations for wallabag
 
-This guide will describe procedure of translation management of wallabag web application.
+This guide will describe the procedure of translation management of the wallabag web application.
 
-All translation are made using [gettext](http://en.wikipedia.org/wiki/Gettext) system and tools. 
+All translations are made using [gettext](http://en.wikipedia.org/wiki/Gettext) system and tools. 
 
-You will need [Poedit](http://www.poedit.net/download.php) editor to update, edit and create your translation files comfortably. In general, you can handle translations also without it: all can be done using gettext tools and your favorite plain text editor only. This guide, however, describes editing with Poedit. If you want to use gettext only, pls refer to xgettext manual page to update po files from sources (see also how it is used by Poedit below) and use msgunfmt tool to compile .mo files manually.  
+You will need the [Poedit](http://www.poedit.net/download.php) editor to update, edit and create your translation files easily. However, you can also handle translations also without it: all can be done using gettext tools and your favorite plain text editor only. This guide, however, describes editing with Poedit. If you want to use gettext only, please refer to the xgettext manual page to update po files from sources (see also how it is used by Poedit below) and use msgunfmt tool to compile .mo files manually.  
 
 You need to know, that translation phrases are stored in **".po"** files (for example: `locale/pl_PL.utf8/LC_MESSAGES/pl_PL.utf8.po`), which are then complied in **".mo"** files using **msgfmt** gettext tool or by Poedit, which will run msgfmt for you in background. 
 
@@ -22,7 +22,7 @@ go to root of your installation of wallabag project and run next command:
 
 `rm -rf ./cache/*`
 
-(this may require root privileges if you run, for example Apatche web server with mod_php)
+(this may require root privileges if you run, for example Apache web server with mod_php)
 
 ### 2. Generate php files from all twig templates
 Do this using next command:
@@ -31,37 +31,37 @@ Do this using next command:
 
 OR
 
-from your browser: **http://your-wallabag-host.com/locale/tools/fillCache.php** (this may require removal of .htacces file in locale/ directory).
+from your browser: **http://your-wallabag-host.com/locale/tools/fillCache.php** (this may require removal of .htaccess file in locale/ directory).
 
 ### 3. Configure your Poedit
 Open Poedit editor, open Edit->Preferences. Go to "Parsers" tab, click on PHP and press "Edit" button. Make sure your "Parser command:" looks like
 
 `xgettext --no-location --force-po -o %o %C %K %F`
 
-Usualy it is required to add "--no-location" to default value. 
+Usually it is required to add "--no-location" to default value. 
 
-### 4. Open .po file you want to edit in Poedit and change it's settings
+### 4. Open .po file you want to edit in Poedit and change its settings
 Open, for example `locale/pl_PL.utf8/LC_MESSAGES/pl_PL.utf8.po` file in your Poedit.
 
-Go to "Catalog"->"Settings..." menu. Go to "Path" tab and add path to wallabag installaion in your local file system. This step can't be ommited as you will not be able to update phrases otherwise.
+Go to "Catalog"->"Settings..." menu. Then go to "Path" tab and add path to wallabag installation in your local file system. This step can't be omitted as you will not be able to update phrases otherwise.
 
 You can also check "project into" tab to be sure, that "Language" is set correctly (this will allow you to spell check your translation).
 
 ### 5. Update opened .po file from sources
 Once you have set your path correctly, you are able to update phrases from sources. Press "Update catalog - synchronize it with sources" button or go to "Catalog"->"Update from sources" menu.
 
-As a result you will see confirmation popup with two tabs: "New strings" and "Obsolete strings". Pls review and accept changes (or press "Undo" if you see too many obsolete strings, as Poedit will remove them all - in this case please make sure all previous steps are performed w/o errors).
+As a result you will see confirmation popup with two tabs: "New strings" and "Obsolete strings". Please review and accept changes (or press "Undo" if you see too many obsolete strings, as Poedit will remove them all - in this case please make sure all previous steps are performed w/o errors).
 
 ### 6. Translate and save your .po file
-If you have any dificulties on this step, please consult with Poedit manual.
-Every time you save your .po file, Poedit will also comple appropriate .mo file by default (of course, if not disabled in preferences).
+If you have any difficulties on this step, please consult with Poedit manual.
+Every time you save your .po file, Poedit will also compile appropriate .mo file by default (of course, if not disabled in preferences).
 
-So, you are almost done.
+You are now almost done.
 
 ### 7. Clear cache again
 This step may be required if your web server runs php scripts in name of, say, www user (i.e. Apache with mod_php, not cgi).
 
 
-##To create new translation 
-Please simple create appropriate directories in locale folder and perform all steps, described above. Instead of opening an existing file just create new one.
+##To create new translation
+You just have to copy the folder corresponding to the language you want to translate from, change language in the project settings and for the folder and files names. Then start replacing all existing translations with your own.
 
old mode 100644 (file)
new mode 100755 (executable)
index 7e378b1..cf02c34
@@ -5,11 +5,6 @@ if (! is_writable('cache')) {
     die('The directory "cache" must be writeable by your web server user');
 }
 
-// Check if /db is writeable
-if (! is_writable('db') && STORAGE === 'sqlite') {
-    die('The directory "db" must be writeable by your web server user');
-}
-
 // install folder still present, need to install wallabag
 if (is_dir('install')) {
     require('install/index.php');
index 711b4ee0787aa6f906419bf642ab7969a4e06581..ef8c62f7f5606a8f72e2bedd05a5658580c23836 100644 (file)
@@ -1,28 +1,35 @@
 <?php
-/* 
-* Class for Flattr querying
-*/
-class FlattrItem {
+/**
+ * wallabag, self hostable application allowing you to not miss any content anymore
+ *
+ * @category   wallabag
+ * @author     Nicolas Lœuillet <nicolas@loeuillet.org>
+ * @copyright  2013
+ * @license    http://opensource.org/licenses/MIT see COPYING file
+ */
 
+class FlattrItem
+{
     public $status;
-    public $urltoflattr;
+    public $urlToFlattr;
     public $flattrItemURL;
-    public $numflattrs;
+    public $numFlattrs;
 
-    public function checkItem($urltoflattr,$id) {
-        $this->cacheflattrfile($urltoflattr, $id);
+    public function checkItem($urlToFlattr, $id)
+    {
+        $this->_cacheFlattrFile($urlToFlattr, $id);
         $flattrResponse = file_get_contents(CACHE . "/flattr/".$id.".cache");
         if($flattrResponse != FALSE) {
             $result = json_decode($flattrResponse);
-                       if (isset($result->message)){
+            if (isset($result->message)) {
                 if ($result->message == "flattrable") {
                     $this->status = FLATTRABLE;
                 }
             } 
-               elseif (is_object($result) && $result->link) {
+            elseif (is_object($result) && $result->link) {
                 $this->status = FLATTRED;
                 $this->flattrItemURL = $result->link;
-                $this->numflattrs = $result->flattrs;
+                $this->numFlattrs = $result->flattrs;
             }
             else {
                 $this->status = NOT_FLATTRABLE;
@@ -33,17 +40,18 @@ class FlattrItem {
         }
     }
 
-    private function cacheflattrfile($urltoflattr, $id) {
+    private function _cacheFlattrFile($urlToFlattr, $id)
+    {
         if (!is_dir(CACHE . '/flattr')) {
             mkdir(CACHE . '/flattr', 0777);
         }
 
         // if a cache flattr file for this url already exists and it's been less than one day than it have been updated, see in /cache
         if ((!file_exists(CACHE . "/flattr/".$id.".cache")) || (time() - filemtime(CACHE . "/flattr/".$id.".cache") > 86400)) {
-            $askForFlattr = Tools::getFile(FLATTR_API . $urltoflattr);
+            $askForFlattr = Tools::getFile(FLATTR_API . $urlToFlattr);
             $flattrCacheFile = fopen(CACHE . "/flattr/".$id.".cache", 'w+');
             fwrite($flattrCacheFile, $askForFlattr);
             fclose($flattrCacheFile);
         }
     }
-}
\ No newline at end of file
+}
index 59dfbe67009dd6bf7f09163f3c6eb34782ccb781..b56e4c545b23fa815d53c63b7c8e2a66e33cd0b7 100644 (file)
@@ -309,4 +309,38 @@ class Session
 
         return true; // User is not banned.
     }
+
+
+    /**
+     * Tells if a param exists in session
+     *
+     * @param $name name of the param to test
+     * @return bool
+     */
+    public static function isInSession($name)
+    {
+        return (isset($_SESSION[$name]) ? : FALSE);
+    }
+
+    /**
+     * Returns param in session
+     *
+     * @param $name name of the param to return
+     * @return mixed param or null
+     */
+    public static function getParam($name)
+    {
+        return (self::isInSession($name) ? $_SESSION[$name] : NULL);
+    }
+
+    /**
+     * Store value in session
+     *
+     * @param $name     name of the variable to store
+     * @param $value    value to store
+     */
+    public static function setParam($name, $value)
+    {
+        $_SESSION[$name] = $value;
+    }
 }
diff --git a/inc/3rdparty/site_config/custom/blogs.faz.net.txt b/inc/3rdparty/site_config/custom/blogs.faz.net.txt
new file mode 100644 (file)
index 0000000..4f2626f
--- /dev/null
@@ -0,0 +1,45 @@
+# Author: zinnober
+
+tidy: no
+prune: no
+
+# Set author
+author: //a[@rel='author']
+
+# Set date
+date: //span[@class='Datum']
+
+# Content is here
+body: //div[@class='Artikel']
+
+# Tidy up before article
+strip: //div[@id='FAZHeaderNeu']
+strip: //h2[@itemprop='headline']
+strip: //span[@class='Datum']
+strip: //span[@class='Autor']
+strip_id_or_class: ArticlePagerTop
+strip: //div[@class='FAZArtikelEinleitung']/h2
+
+# General cleanup
+strip: //div[@class='clear']
+strip: //span[@class='Bildnachweis']
+strip: //iframe
+strip_id_or_class: Community
+strip: ' ·  '
+
+# Remove tracking and ads
+strip_image_src: /l.gif?
+strip: //img[@width='1']
+strip_id_or_class: invisible
+strip_id_or_class: Anzeige
+strip_id_or_class: billboard
+
+# Remove clutter after article
+strip_id_or_class: Tagline
+strip_id_or_class: ArtikelAbbinder
+strip_id_or_class: FAZArtikelKommentare
+strip_id_or_class: ArtikelKommentieren
+strip_id_or_class: FAZContentRight
+
+# Try it yourself
+test_url: http://blogs.faz.net/wost/2014/08/17/viel-fuck-und-wenig-guter-sex-1239/
old mode 100644 (file)
new mode 100755 (executable)
index 03bd195..86c9e07
@@ -1,6 +1,6 @@
-title: //div[@class='meta']/h2/a\r
-author: //div[@class='meta']/h2/following-sibling::p/a/text()\r
-date://div[@class='meta']/h2/strong\r
-body: //div[@id='article']\r
+title: //div[@class='meta']/h2/a
+author: //div[@class='meta']/h2/following-sibling::p/a/text()
+date://div[@class='meta']/h2/strong
+body: //div[@id='article']
 strip: //div[@class='domore']
 test_url: http://24ways.org/2011/composing-the-new-canon
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/36kr.com.txt b/inc/3rdparty/site_config/standard/36kr.com.txt
new file mode 100755 (executable)
index 0000000..d73d7de
--- /dev/null
@@ -0,0 +1,8 @@
+title: //h1[contains(@class, 'entry-title')]
+date: //meta[@name='weibo: article:create_at']/@content
+body: //div[contains(@class, 'mainContent')]
+strip_id_or_class: related_topics
+
+prune: no
+
+test_url: http://www.36kr.com/p/207879.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 43a10ae..531cac1
@@ -1,6 +1,6 @@
-title: //div[@class='post_header']//h2/a\r
-author: //span[@class='author']\r
-date: //span[@class='date']\r
-body: //div[@id='Content']\r
+title: //div[@class='post_header']//h2/a
+author: //span[@class='author']
+date: //span[@class='date']
+body: //div[@id='Content']
 
 test_url: http://37signals.com/svn/posts/2785-the-end-of-the-it-department
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c4e7940..80a3958
@@ -1,9 +1,9 @@
-body: //div[@class='content']\r
-date: //div[@class='content']/h2\r
-strip: //div[@class='content']/h2\r
-title: //div[@class='content']/h3\r
-\r
-strip: //div[@id='postmenu']\r
-strip: //div[@class='trackback']\r
-tidy: no\r
+body: //div[@class='content']
+date: //div[@class='content']/h2
+strip: //div[@class='content']/h2
+title: //div[@class='content']/h3
+
+strip: //div[@id='postmenu']
+strip: //div[@class='trackback']
+tidy: no
 test_url: http://www.3quarksdaily.com/3quarksdaily/2012/01/martin-luther-king-i-have-a-dream.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e8073f6..3777c66
@@ -1,4 +1,4 @@
-body: //*[@class = 'content']\r
-author: //*[@class = 'submitted']/a\r
+body: //*[@class = 'content']
+author: //*[@class = 'submitted']/a
 date: substring-after(//*[@class = 'submitted']/text(), '|')
 test_url: http://www.43folders.com/2011/04/22/cranking
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 68e6b2d..b9b7e9d
@@ -1,27 +1,27 @@
-# very loose setup for both 500px.com/photo/* and 500px.com/blog/*\r
-# photo page example: http://500px.com/photo/4181666\r
-# blog page example: http://500px.com/blog/110\r
-\r
-# avoid "no text" error\r
-tidy:no\r
-prune:no\r
-\r
-# reorganize photo page elements\r
-#body://div[contains(@class,'container')]\r
-move_into(body)://div[contains(@id,'thephoto')]\r
-move_into(body)://div[contains(@id,'description')]\r
-move_into(body)://div[contains(@id,'tags')]\r
-move_into(body)://div[contains(@id,'photo-info')]\r
-\r
-# clean photo page info\r
-strip://span[contains(@id,'copyright')]\r
-strip://*[contains(@id,'store')]\r
-strip://*[contains(@id,'user-info')]\r
-strip://*[contains(@id,'photo-stats')]\r
-strip://*[contains(@id,'voting_controls_container')]\r
-strip://*[contains(@id,'more-photos')]\r
-strip://*[contains(@id,'embed-photo')]\r
-\r
-# clean blog page side bar\r
+# very loose setup for both 500px.com/photo/* and 500px.com/blog/*
+# photo page example: http://500px.com/photo/4181666
+# blog page example: http://500px.com/blog/110
+
+# avoid "no text" error
+tidy:no
+prune:no
+
+# reorganize photo page elements
+#body://div[contains(@class,'container')]
+move_into(body)://div[contains(@id,'thephoto')]
+move_into(body)://div[contains(@id,'description')]
+move_into(body)://div[contains(@id,'tags')]
+move_into(body)://div[contains(@id,'photo-info')]
+
+# clean photo page info
+strip://span[contains(@id,'copyright')]
+strip://*[contains(@id,'store')]
+strip://*[contains(@id,'user-info')]
+strip://*[contains(@id,'photo-stats')]
+strip://*[contains(@id,'voting_controls_container')]
+strip://*[contains(@id,'more-photos')]
+strip://*[contains(@id,'embed-photo')]
+
+# clean blog page side bar
 strip://*[contains(@class,'col d3 clearafter')]
 test_url: http://500px.com/photo/3641041?from=editors
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index dce0df4..59b70a9
@@ -1,9 +1,9 @@
-body: //*[@id="episode"]\r
-prune: no\r
-tidy: no\r
-\r
-autodetect_next_page: no\r
-strip_id_or_class: player\r
-\r
+body: //*[@id="episode"]
+prune: no
+tidy: no
+
+autodetect_next_page: no
+strip_id_or_class: player
+
 strip://*[@id="header"]
 test_url: http://5by5.tv/buildanalyze/60
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/7newsbelize.com.txt b/inc/3rdparty/site_config/standard/7newsbelize.com.txt
new file mode 100755 (executable)
index 0000000..46d09f8
--- /dev/null
@@ -0,0 +1,7 @@
+title: //*[@id='sstitle']
+body: //div[@id='sstory']
+strip_id_or_class: newsoptions
+prune: no
+
+test_url: http://www.7newsbelize.com/sstory.php?nid=25654
+test_url: http://www.7newsbelize.com/7news.xml
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 84380e7..8bf6a4c
@@ -1,9 +1,9 @@
-title: //h2[@class='border']\r
-body: //div[@class='padding']\r
-\r
-convert_double_br_tags: yes\r
-\r
-strip: //div[@id='social_sharing']\r
-strip: //div[@class='socialLinks']\r
+title: //h2[@class='border']
+body: //div[@class='padding']
+
+convert_double_br_tags: yes
+
+strip: //div[@id='social_sharing']
+strip: //div[@class='socialLinks']
 
 test_url: http://www.944.com/articles/mild-obsessions-frock-la-get-to-know-victoria-tik-s-haute-sustainable-fashion-line/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/README.md b/inc/3rdparty/site_config/standard/README.md
new file mode 100755 (executable)
index 0000000..9040ba8
--- /dev/null
@@ -0,0 +1,38 @@
+Full-Text RSS site config files
+================
+
+[Full-Text RSS](http://fivefilters.org/content-only/), our article extraction tool, makes use of site-specific extraction rules to improve results. Each time a URL is processed, it checks to see if there are extraction rules for the site being processed. If there are no site patterns, it tries to detect the content block automatically.
+
+This repository contains the site config files we use in Full-Text RSS.
+
+### Contributing changes
+
+We chose GitHub for this set of files because they offer one feature which we hope will make contributing changes easier: [file editing](https://github.com/blog/844-forking-with-the-edit-button) through the web interface. 
+
+You can now make changes to any of our site config files and request that your changes be pulled into the main set we maintain. This is what GitHub calls the Fork and Pull model:
+
+> The Fork & Pull Model lets anyone fork an existing repository and push changes to their personal fork without requiring access be granted to the source repository. The changes must then be pulled into the source repository by the project maintainer. This model reduces the amount of friction for new contributors and is popular with open source projects because it allows people to work independently without upfront coordination.
+
+When we receive a pull request we'll review the changes and if everything's okay we'll update our copy.
+
+If a site is not in our set, you can create a file for it in the same way. See [Creating files on GitHub](https://github.com/blog/1327-creating-files-on-github).
+
+### How to write a site config file
+
+The quickest and simplest way is to use our [point-and-click interface](http://siteconfig.fivefilters.org). It's a simple tool only intended to create a rule to extract the correct content block. 
+
+For further refinements, e.g. selecting the title, stripping elements, dealing with multi-page articles, please see our [help page](http://help.fivefilters.org/customer/portal/articles/223153-site-patterns).
+
+### Instapaper
+
+When we introduced site patterns, we chose to adopt the [same format](http://blog.instapaper.com/post/730281947) used by Instapaper. This allows us to make use of the existing extraction rules contributed by Instapaper users. 
+
+Marco, Instapaper's creator, graciously opened up the database of contributions to everyone:
+
+> And, recognizing that your efforts could be useful to a wide range of other tools and services, I'll make the list of all of these site-specific configurations available to the public, free, with no strings attached.
+
+Most of the extraction rules in our set are borrowed from Instapaper. You can see the list maintained by Instapaper at [instapaper.com/bodytext/](http://instapaper.com/bodytext/) (login required).
+
+### Testing site config files
+
+Currently you will have to have a copy of Full-Text RSS to test changes to the site config files. In the future we will try to make this process easier.
old mode 100644 (file)
new mode 100755 (executable)
index 379592e..b60c15d
@@ -1,10 +1,10 @@
-title: //meta[@property='og:title']/@content\r
-body: //*[@class='fliesstext_detail' or @class='detail_fliesstext'] | //img[@itemprop="image" and starts-with(@src, "/sixcms/media.php/")]\r
-\r
-strip_id_or_class: socialshareprivacy1\r
-strip_id_or_class: zvaFacebookButton\r
-\r
-tidy: no\r
-prune: no\r
-\r
+title: //meta[@property='og:title']/@content
+body: //*[@class='fliesstext_detail' or @class='detail_fliesstext'] | //img[@itemprop="image" and starts-with(@src, "/sixcms/media.php/")]
+
+strip_id_or_class: socialshareprivacy1
+strip_id_or_class: zvaFacebookButton
+
+tidy: no
+prune: no
+
 test_url: http://www.aachener-nachrichten.de/lokales/aachen-detail-an/2517757
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 4d76fac..013afa4
@@ -1,10 +1,10 @@
-title: //meta[@property='og:title']/@content\r
-body: //*[@class='fliesstext_detail' or @class='detail_fliesstext'] | //img[@itemprop="image" and starts-with(@src, "/sixcms/media.php/")]\r
-\r
-strip_id_or_class: socialshareprivacy1\r
-strip_id_or_class: zvaFacebookButton\r
-\r
-tidy: no\r
-prune: no\r
-\r
+title: //meta[@property='og:title']/@content
+body: //*[@class='fliesstext_detail' or @class='detail_fliesstext'] | //img[@itemprop="image" and starts-with(@src, "/sixcms/media.php/")]
+
+strip_id_or_class: socialshareprivacy1
+strip_id_or_class: zvaFacebookButton
+
+tidy: no
+prune: no
+
 test_url: http://www.aachener-zeitung.de/sixcms/detail.php?template=az_detail&id=2552718
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a99833d..43aadc4
@@ -1,7 +1,7 @@
-title: //meta[@property='og:title']/@content\r
-body: //div[@class='datosi' or @class='date' or @class='photo-alt1' or @class='text']\r
-strip_id_or_class: colB\r
-\r
-prune: no\r
+title: //meta[@property='og:title']/@content
+body: //div[@class='datosi' or @class='date' or @class='photo-alt1' or @class='text' or @itemprop='articleBody']
+strip_id_or_class: colB
+
+prune: no
 
 test_url: http://www.abc.es/20120209/tv-series/abci-house-ultima-temporada-201202090936.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 5e6269c..22b3a0f
@@ -1,10 +1,18 @@
-title: //h1\r
-author: //div[@class="byline"]/a\r
-date: //span[@class="timestamp"]\r
-\r
-strip: //p[@class="topics"]\r
-strip: //h1\r
-strip: //div[@class="byline"]\r
-strip: //p[@class="published"]\r
+title: //div[@class='article section']//h1
+author: //div[@class="byline"]/a
+date: //span[@class="timestamp"]
+body: //div[@class="page section"]
+
+strip: //a[@class="inline-caption"]
+strip: //p[@class="ticker section noprint"]
+strip: //p[@class="topics"]
+strip: //h1
+strip: //div[@class="byline"]
+strip: //p[@class="published"]
 strip: //div[contains(@class,"featured-scroller")]
-test_url: http://www.abc.net.au/news/2011-11-08/crabb-carbon-legislation-abbott-demolition/3652544
\ No newline at end of file
+strip_id_or_class: footer
+
+tidy: no
+
+test_url: http://www.abc.net.au/news/2013-03-27/open-speed-highways-change-clp-giles/4597892
+test_url: http://www.abc.net.au/news/2013-04-30/credit-growth-remains-subdued/4660054?section=business
old mode 100644 (file)
new mode 100755 (executable)
index c515d3e..8d36735
@@ -1,27 +1,27 @@
-title: //h1[@class='headline']\r
-body: //div[@id='storyText']\r
-# for video entries\r
-body: //img[@id='ff-img'] | //div[@id='meta']//div[contains(@class, 'overview')]\r
-author: //div[@class='byline']\r
-date: //div[@class='date']\r
-strip: //*[@id='date_partner']\r
-\r
-strip: //div[@class='breadcrumb']\r
-strip: //div[contains(@class,'show_tools')]\r
-strip: //div[@id='sponsoredByAd']\r
-strip: //div[contains(@class,'rel_container')]\r
-strip: //p[a[starts-with(@href, 'http://www.twitter.com')]]\r
-strip: //p[a[starts-with(@href, 'http://www.facebook.com')]]\r
-strip: //p[contains(., 'Click here to return to')]\r
-#strip_id_or_class: media\r
-strip_id_or_class: mediaplayer\r
-\r
-replace_string(<link rel="image_src" href="http): <img id="ff-img" src="http\r
-\r
-prune: no\r
-\r
-single_page_link: concat(//li[@class='pager']//a/@href, '&singlePage=true')\r
-\r
-test_url: http://abcnews.go.com/Politics/newt-gingrich-rocky-rollout-presidential-campaign-recover/story?id=13632744\r
-# multi-page\r
+title: //h1[@class='headline']
+body: //div[@id='storyText']
+# for video entries
+body: //img[@id='ff-img'] | //div[@id='meta']//div[contains(@class, 'overview')]
+author: //div[@class='byline']
+date: //div[@class='date']
+strip: //*[@id='date_partner']
+
+strip: //div[@class='breadcrumb']
+strip: //div[contains(@class,'show_tools')]
+strip: //div[@id='sponsoredByAd']
+strip: //div[contains(@class,'rel_container')]
+strip: //p[a[starts-with(@href, 'http://www.twitter.com')]]
+strip: //p[a[starts-with(@href, 'http://www.facebook.com')]]
+strip: //p[contains(., 'Click here to return to')]
+#strip_id_or_class: media
+strip_id_or_class: mediaplayer
+
+replace_string(<link rel="image_src" href="http): <img id="ff-img" src="http
+
+prune: no
+
+single_page_link: concat(//li[@class='pager']//a/@href, '&singlePage=true')
+
+test_url: http://abcnews.go.com/Politics/newt-gingrich-rocky-rollout-presidential-campaign-recover/story?id=13632744
+# multi-page
 test_url: http://abcnews.go.com/Blotter/family-freed-american-hostage-somalia-seals-obama/story?id=15439544
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index b5d8507..45d6653
@@ -1,9 +1,9 @@
-title: //div[@id='H_docTitle']\r
-\r
-body: //div[@id='H_meta' or @id='H_content' or @id='F_footer']\r
-\r
-strip_id_or_class: F_toenail\r
-\r
-prune: no\r
-\r
+title: //div[@id='H_docTitle']
+
+body: //div[@id='H_meta' or @id='H_content' or @id='F_footer']
+
+strip_id_or_class: F_toenail
+
+prune: no
+
 test_url: http://www.accesstoinsight.org/lib/authors/nyanaponika/wheel026.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 60ede6a..2195865
@@ -1,3 +1,3 @@
-body: //div[starts-with(@id, 'news-id-')]\r
-\r
+body: //div[starts-with(@id, 'news-id-')]
+
 test_url: http://acidcow.com/fun/20933-acid-picdump-83-pics.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 5ddf542..2803611
@@ -1,9 +1,9 @@
-title://h1[@class="title"]\r
-author://div[@class="submitted"]/span/a\r
-date://div[@class="submitted"]/span\r
-body://div[@class="content-wrapper"]\r
-\r
-strip://div[@id="skip-link"]\r
-strip://div[@id="region-content-3-3"]\r
+title://h1[@class="title"]
+author://div[@class="submitted"]/span/a
+date://div[@class="submitted"]/span
+body://div[@class="content-wrapper"]
+
+strip://div[@id="skip-link"]
+strip://div[@id="region-content-3-3"]
 strip://div[@id="section-footer"]
 test_url: https://www.acquia.com/blog/drupals-long-warmth-toward-third-party-code
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 57d86d2..6b1d67f
@@ -1,5 +1,5 @@
-tidy:no\r
-date: //time[@class='updated']\r
-dissolve: //ul[@class='video-gallery']/li\r
+tidy:no
+date: //time[@class='updated']
+dissolve: //ul[@class='video-gallery']/li
 dissolve: //ul[@class='video-gallery']
 test_url: http://www.acroswing.fr/actualites/competition_rock/selectif_bellegarde_sur_valserine__2012-02-26.php
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/aftenposten.no.txt b/inc/3rdparty/site_config/standard/aftenposten.no.txt
new file mode 100755 (executable)
index 0000000..8a69c35
--- /dev/null
@@ -0,0 +1,5 @@
+title: //h1[@class='articleTitle ']
+body: //div[@class='bodyText widget storyContent']
+strip: //p/span[@class='quote']/..
+strip_id_or_class: 'pull1'
+test_url: https://www.aftenposten.no/meninger/spaltister/Portrett-av-scenekunstneren-som-ung-mann-7167959.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/aftonbladet.se.txt b/inc/3rdparty/site_config/standard/aftonbladet.se.txt
new file mode 100755 (executable)
index 0000000..b6c576a
--- /dev/null
@@ -0,0 +1,13 @@
+author: //article//address[contains(@class, 'author')]
+body: //article[.//div[contains(@class, 'abBodyText')]]//*[contains(@class, 'abLeadText') or contains(@class, 'abBodyText') or contains(@class, 'abImageBlock') or contains(@class, 'abIGSatellite')]
+
+strip: //address//img
+strip: //footer
+strip_id_or_class: abSticky
+
+prune: no
+
+test_url: http://www.aftonbladet.se/sportbladet/hockey/sverige/allsvenskan/article17498194.ab
+test_url: http://www.aftonbladet.se/debatt/article16207536.ab
+test_url: http://www.aftonbladet.se/debatt/debattamnen/politik/article17483377.ab
+test_url: http://www.aftonbladet.se/rss.xml
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 408e909..b2d88a0
@@ -1,15 +1,15 @@
-body: //div[@id='content']\r
-\r
-# clean up recipe pages\r
-strip: //h2[@class='fn'] | //h2[@class='double-lined'] | //h3 | //div[@id='threeColumn2'] | //div[@id='threeColumn3']\r
-\r
-#recipe pages\r
-strip_id_or_class: "recipe-feedback"\r
-strip_id_or_class: "comments"\r
-strip_id_or_class: "procedure-number"\r
-strip_id_or_class: "more-with-author"\r
-\r
-#slice\r
-strip_id_or_class: "inner"\r
+body: //div[@id='content']
+
+# clean up recipe pages
+strip: //h2[@class='fn'] | //h2[@class='double-lined'] | //h3 | //div[@id='threeColumn2'] | //div[@id='threeColumn3']
+
+#recipe pages
+strip_id_or_class: "recipe-feedback"
+strip_id_or_class: "comments"
+strip_id_or_class: "procedure-number"
+strip_id_or_class: "more-with-author"
+
+#slice
+strip_id_or_class: "inner"
 
 test_url: http://aht.seriouseats.com/archives/2009/12/the-burger-lab-salting-ground-beef.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/albayan.ae.txt b/inc/3rdparty/site_config/standard/albayan.ae.txt
new file mode 100755 (executable)
index 0000000..f6c093d
--- /dev/null
@@ -0,0 +1,6 @@
+body: //div[@id='main-column']//div[@class='content']
+
+prune: no
+
+test_url: http://www.albayan.ae/across-the-uae/education/2013-08-29-1.1949645
+test_url: http://www.albayan.ae/1.448?ot=ot.AjaxPageLayout
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/alexduner.com.txt b/inc/3rdparty/site_config/standard/alexduner.com.txt
new file mode 100755 (executable)
index 0000000..bd9de9d
--- /dev/null
@@ -0,0 +1,4 @@
+body: //section[@class='content']
+date: //span[1]
+author: //h1[@id='sitetitle']
+test_url: https://alexduner.com/blog/2013/1/something-i-learned-today
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/alexduner.squarespace.com.txt b/inc/3rdparty/site_config/standard/alexduner.squarespace.com.txt
new file mode 100755 (executable)
index 0000000..875405e
--- /dev/null
@@ -0,0 +1,4 @@
+body: //section[@class='content']
+date: //span[1]
+author: //h1[@id='sitetitle']
+test_url: https://alexduner.squarespace.com/blog/2013/1/tech-culture-from-the-outside-looking-in
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 090f7eb..7a7096e
@@ -1,12 +1,12 @@
-title: //h1[@class='title']\r
-author: //h3[@class='byline']/a\r
-date: //div[@class='ishinfo']\r
-\r
-body: //*[@id='articletext']\r
-strip_id_or_class: 'ishinfo'\r
-strip_id_or_class: 'metastuff'\r
-strip_id_or_class: 'learnmore'\r
-strip_id_or_class: 'discuss'\r
-\r
+title: //h1[@class='title']
+author: //h3[@class='byline']/a
+date: //div[@class='ishinfo']
+
+body: //*[@id='articletext']
+strip_id_or_class: 'ishinfo'
+strip_id_or_class: 'metastuff'
+strip_id_or_class: 'learnmore'
+strip_id_or_class: 'discuss'
+
 prune: no
 test_url: http://www.alistapart.com/articles/organizing-mobile/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 4f0148f..d3bf401
@@ -1,8 +1,8 @@
-title: //span[@id='DetailedTitle']\r
-body: //td[@id='tdTextContent']\r
-strip_id_or_class: Skyscrapper_Body\r
-date: //span[@id='ctl00_cphBody_lblDate']\r
-author: //div[@id="dvAuthorInfo"]//a/text()\r
-strip: //table[ tbody/tr/td/object ]\r
-prune: no\r
+title: //span[@id='DetailedTitle']
+body: //td[@id='tdTextContent']
+strip_id_or_class: Skyscrapper_Body
+date: //span[@id='ctl00_cphBody_lblDate']
+author: //div[@id="dvAuthorInfo"]//a/text()
+strip: //table[ tbody/tr/td/object ]
+prune: no
 test_url: http://www.aljazeera.com/indepth/opinion/2012/01/2012114121925380575.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e9767bd..85dc2a5
@@ -1,14 +1,14 @@
-title: //h1[@id='itemTitle']\r
-body: //img[@id="ctl00_CenterColumnPlaceHolder_recipe_photoStuff_imgPhoto"] | //div[@id='ctl00_CenterColumnPlaceHolder_recipe_divSubmitter'] | //div[contains(@class, 'recipe-details-content')]\r
-strip: //div[@class='top-left' or @class='top-right' or @class='bot-left' or @class='bot-right']\r
-strip: //div[contains(@class, 'rightcoltoolsdiv')]\r
-strip: //div[contains(@class, 'servings-form')]\r
-strip: //p[@class='nutritional-information']\r
-strip: //a[contains(@class, 'nutritional-information') or contains(@class, 'nutritionanchor')]\r
-strip: //div[@id='nutri-info']/div[contains(@class, 'title')]\r
-strip: //img[@id='ctl00_CenterColumnPlaceHolder_recipe_imgSubmitter']\r
-strip_id_or_class: eshaAttribute\r
-strip_id_or_class: eshaParagraph\r
-prune: no\r
+title: //h1[@id='itemTitle']
+body: //img[@id="ctl00_CenterColumnPlaceHolder_recipe_photoStuff_imgPhoto"] | //div[@id='ctl00_CenterColumnPlaceHolder_recipe_divSubmitter'] | //div[contains(@class, 'recipe-details-content')]
+strip: //div[@class='top-left' or @class='top-right' or @class='bot-left' or @class='bot-right']
+strip: //div[contains(@class, 'rightcoltoolsdiv')]
+strip: //div[contains(@class, 'servings-form')]
+strip: //p[@class='nutritional-information']
+strip: //a[contains(@class, 'nutritional-information') or contains(@class, 'nutritionanchor')]
+strip: //div[@id='nutri-info']/div[contains(@class, 'title')]
+strip: //img[@id='ctl00_CenterColumnPlaceHolder_recipe_imgSubmitter']
+strip_id_or_class: eshaAttribute
+strip_id_or_class: eshaParagraph
+prune: no
 
 test_url: http://allrecipes.com/Recipe/Taco-Pie/Detail.aspx?src=rotd
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index cd52498..f8c67d0
@@ -1,10 +1,13 @@
-title://div[@class="article-title"]/h1[@class="title"]\r
-date: //p[@class="article-date"]\r
-body://*[@class="article-body article-text"]\r
-# Trim out related posts at bottom of article\r
-strip://blockquote[@class="memo"]\r
-\r
-# Yup, no idea why author won't work...\r
-author://div[@class="page-header article-header clearfix"]/p[@class="title"]\r
+title://div[@class="article-title"]/h1[@class="title"]
+date: //p[@class="article-date"]
+body://div[contains(@class, "article-body")]
+# Trim out related posts at bottom of article
+strip://blockquote[@class="memo"]
+
+tidy: no
+
+# Yup, no idea why author won't work...
+author://div[@class="page-header article-header clearfix"]/p[@class="title"]
 # [Marco:] Author won't work here because the page defines the "home" link under the author's name as rel="author", which always gets priority if the page has defined it.
-test_url: http://allthingsd.com/20120513/exclusive-yahoos-thompson-out-levinsohn-in-board-settlement-with-loeb-nears-completion/
\ No newline at end of file
+test_url: http://allthingsd.com/20120513/exclusive-yahoos-thompson-out-levinsohn-in-board-settlement-with-loeb-nears-completion/
+test_url: http://allthingsd.com/20131010/google-cio-ben-fried-on-how-google-works/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3c26c68..a13a725
@@ -1,8 +1,8 @@
-title: //div[@id='pageHdr']//h1\r
-body: //div[@id='pageHdr']/*[@class='dek'] | //div[@id='printArticle' or @id='slideShowPrint']\r
-strip: //div[contains(@class, 'infoBox') or @id='infoBox']\r
-single_page_link: //li[@id='print']/a\r
-\r
+title: //div[@id='pageHdr']//h1
+body: //div[@id='pageHdr']/*[@class='dek'] | //div[@id='printArticle' or @id='slideShowPrint']
+strip: //div[contains(@class, 'infoBox') or @id='infoBox']
+single_page_link: //li[@id='print']/a
+
 prune: no
-\r
+
 test_url: http://www.allyou.com/budget-home/money-shopping/freebies-online-00400000066392/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f5865f8..da1a67b
@@ -1,11 +1,11 @@
-body: //div[@class = 'entry']\r
-date: substring-after(//p[@class="date"],'بتاريخ ')\r
-strip_id_or_class: date\r
-strip_id_or_class: follow-single\r
-strip_id_or_class: ratingblock\r
-strip_id_or_class: newRatingHolder\r
-strip_id_or_class: postmetadata\r
-strip_id_or_class: addthis_toolbox\r
-strip_id_or_class: addthis_default_style\r
+body: //div[@class = 'entry']
+date: substring-after(//p[@class="date"],'بتاريخ ')
+strip_id_or_class: date
+strip_id_or_class: follow-single
+strip_id_or_class: ratingblock
+strip_id_or_class: newRatingHolder
+strip_id_or_class: postmetadata
+strip_id_or_class: addthis_toolbox
+strip_id_or_class: addthis_default_style
 strip_id_or_class: size-full
 test_url: http://alphabeta.argaam.com/?p=35657
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d006000..be7c43d
@@ -1,9 +1,9 @@
-body: //div[@id = "article-view"]\r
-body: //div[contains(@class, 'article')]//div[contains(@class, 'photo_bg')]\r
-author: //p[@class = "author"]\r
-strip: //h1\r
-strip: //h2\r
-strip_id_or_class: author\r
-prune: no\r
-test_url: http://www.alriyadh.com/2011/10/10/article674357.html\r
+body: //div[@id = "article-view"]
+body: //div[contains(@class, 'article')]//div[contains(@class, 'photo_bg')]
+author: //p[@class = "author"]
+strip: //h1
+strip: //h2
+strip_id_or_class: author
+prune: no
+test_url: http://www.alriyadh.com/2011/10/10/article674357.html
 test_url: http://www.alriyadh.com/net/article/780935
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/alternet.org.txt b/inc/3rdparty/site_config/standard/alternet.org.txt
new file mode 100755 (executable)
index 0000000..e92252e
--- /dev/null
@@ -0,0 +1,4 @@
+single_page_link: //div[contains(@class, 'story_tools')]//a[contains(@href, '/print/')]
+
+test_url: http://www.alternet.org/civil-liberties/noam-chomsky-surveillance-state-beyond-imagination-being-created-one-freest
+test_url: http://feeds.feedblitz.com/alternet
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7fd4719..a5bd03b
@@ -1,10 +1,10 @@
-title: //h1\r
-\r
-author: substring-after(//div[@class="enableBullets"]/preceding-sibling::p[1], "By ")\r
-\r
-date: //div/a[contains (@href, "issue")]\r
-\r
-move_into(//div[@class="enableBullets"]/p): (//div[@id="content"]//img)[1]\r
-\r
+title: //h1
+
+author: substring-after(//div[@class="enableBullets"]/preceding-sibling::p[1], "By ")
+
+date: //div/a[contains (@href, "issue")]
+
+move_into(//div[@class="enableBullets"]/p): (//div[@id="content"]//img)[1]
+
 body: //div[@class="enableBullets"]
 test_url: http://alumni.stanford.edu/get/page/magazine/article/?article_id=54819
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/amandala.com.bz.txt b/inc/3rdparty/site_config/standard/amandala.com.bz.txt
new file mode 100755 (executable)
index 0000000..fb0e21b
--- /dev/null
@@ -0,0 +1,6 @@
+body: //div[@id='content']//div[contains(@class, 'content')]
+strip_id_or_class: widget
+strip: //a[contains(@href, 'upm_export=')]
+
+test_url: http://amandala.com.bz/news/feed/
+test_url: http://amandala.com.bz/news/poor-pse-results-30-raise/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 1a23c4b..cd7ad15
@@ -1,19 +1,19 @@
-title: //span[@id = 'btAsinTitle']\r
-body: (//*[@id='prodImageCell']//a)[1] | //div[@id = 'ps-content'] | //span[@id='actualPriceValue'] | //h2[.='Product Details']/following-sibling::div | //div[@class='h2' and .='Product Description']/following-sibling::div\r
-#strip_id_or_class: quantityDropdownDiv\r
-#strip_id_or_class: addToCartSpan\r
-#strip_id_or_class: oneClickDiv\r
-strip_id_or_class: nocontent\r
-strip_id_or_class: masDynamicConten\r
-strip_id_or_class: dynamic-content\r
-prune: no\r
-\r
-find_string: <span id="actualPriceValue">\r
-replace_string: <span id="actualPriceValue"><br />Price: \r
-\r
-strip_id_or_class: collapsePS\r
-strip_id_or_class: expandPS\r
-strip_id_or_class: psPlaceHolde\r
-strip: //li[contains(., 'update product info') or contains(., 'give feedback on images')]\r
-\r
+title: //span[@id = 'btAsinTitle']
+body: (//*[@id='prodImageCell']//a)[1] | //div[@id = 'ps-content'] | //span[@id='actualPriceValue'] | //h2[.='Product Details']/following-sibling::div | //div[@class='h2' and .='Product Description']/following-sibling::div
+#strip_id_or_class: quantityDropdownDiv
+#strip_id_or_class: addToCartSpan
+#strip_id_or_class: oneClickDiv
+strip_id_or_class: nocontent
+strip_id_or_class: masDynamicConten
+strip_id_or_class: dynamic-content
+prune: no
+
+find_string: <span id="actualPriceValue">
+replace_string: <span id="actualPriceValue"><br />Price: 
+
+strip_id_or_class: collapsePS
+strip_id_or_class: expandPS
+strip_id_or_class: psPlaceHolde
+strip: //li[contains(., 'update product info') or contains(., 'give feedback on images')]
+
 test_url: http://www.amazon.com/Common-Sense-Forestry-Living-Mother/dp/1931498210/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index dee0e86..7145f3f
@@ -1,6 +1,6 @@
-title: //div[@class='head']/h2/a\r
-author: //div[@class='head']/a\r
-date: //div[@class='head']/p[@class='date']/a\r
-body: //div[@class='copy']\r
+title: //div[@class='head']/h2/a
+author: //div[@class='head']/a
+date: //div[@class='head']/p[@class='date']/a
+body: //div[@class='copy']
 strip: //p[@class='meta']
 test_url: http://americandrink.net/post/10567188712/free-the-hooch
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index b1673b6..31723f8
@@ -1,10 +1,10 @@
-title: //div[@class="editorial-content"]/h3\r
-body: //div[@class="hero-image" or @class="editorial-content"]\r
-\r
-strip: //ul[@class="hero-caption"]\r
-strip_id_or_class: footer\r
-\r
-prune: no\r
-tidy: no\r
-\r
+title: //div[@class="editorial-content"]/h3
+body: //div[@class="hero-image" or @class="editorial-content"]
+
+strip: //ul[@class="hero-caption"]
+strip_id_or_class: footer
+
+prune: no
+tidy: no
+
 test_url: http://www.americascup.com/en/Latest/News/2012/3/Coutts-and-Peyron-tell-transformative-tale-at-Global-Sports-Forum/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8bf31ec..c2b62b5
@@ -1,5 +1,5 @@
-title: //h1[@class="post-title"]\r
-author: //span[@class="author"]/a\r
-date: //span[@class="date"]\r
+title: //h1[@class="post-title"]
+author: //span[@class="author"]/a
+date: //span[@class="date"]
 body: //div[@class="post-content main"]
 test_url: http://www.americastestkitchenfeed.com/gadgets-and-gear/2012/07/chill-out-with-tovolos-king-cube-silicone-ice-cube-tray/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/amptoons.com.txt b/inc/3rdparty/site_config/standard/amptoons.com.txt
new file mode 100755 (executable)
index 0000000..87547c6
--- /dev/null
@@ -0,0 +1,8 @@
+title: //title
+
+body: //div[@class="entry-content"]
+
+author: //span[@class="author vcard"]
+
+date: //span[@class="entry-date"]
+test_url: http://www.amptoons.com/blog/2013/03/14/open-thread-and-link-farm-i-hate-being-sick-edition/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8067e03..7d80491
@@ -1,11 +1,11 @@
-author: //a[@class='b'][1]\r
-date: substring-after(substring-before(//div, 'Posted in'), ' on ')\r
-strip_image_src: /content/images/globals/\r
-strip: //h2[. = 'Page 1']/preceding::p\r
-strip: //h2\r
-\r
-prune: no\r
-\r
-single_page_link: concat('http://www.anandtech.com/print/', substring-after(//meta[@property='og:url']/@content, '/show/'))\r
-\r
+author: //a[@class='b'][1]
+date: substring-after(substring-before(//div, 'Posted in'), ' on ')
+strip_image_src: /content/images/globals/
+strip: //h2[. = 'Page 1']/preceding::p
+strip: //h2
+
+prune: no
+
+single_page_link: concat('http://www.anandtech.com/print/', substring-after(//meta[@property='og:url']/@content, '/show/'))
+
 test_url: http://www.anandtech.com/show/5812/eurocom-monster-10-clevos-little-monster/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/androidpolice.com.txt b/inc/3rdparty/site_config/standard/androidpolice.com.txt
new file mode 100755 (executable)
index 0000000..8f9b1a2
--- /dev/null
@@ -0,0 +1,5 @@
+body: //div[@class='post_content']
+date: //div[@class='date_day'] | div[@class='date_month']
+
+test_url: http://www.androidpolice.com/2014/03/30/music-boss-for-pebble-can-now-control-playback-and-volume-on-chromecast-content-from-your-smartwatch/
+
old mode 100644 (file)
new mode 100755 (executable)
index f9ffd3c..ce31fcf
@@ -1,9 +1,9 @@
-title: //h2\r
-author: string('Andy Rutledge')\r
-date: //div[@class='articledate']\r
-body: //div[@class='copybody']\r
-\r
-strip: //*[@class='space']\r
-strip: //*[@class='articleFoot']\r
-\r
+title: //h2
+author: string('Andy Rutledge')
+date: //div[@class='articledate']
+body: //div[@class='copybody']
+
+strip: //*[@class='space']
+strip: //*[@class='articleFoot']
+
 test_url: http://www.andyrutledge.com/hungry-for-a-better-menu.php
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a5c7c08..2d8937f
@@ -1,9 +1,9 @@
-title: //h1[@class="title"]\r
-\r
-author: ("Anna Manasova")\r
-# is ignored, unfortunately\r
-\r
-date: //p[@class="date"]\r
-\r
+title: //h1[@class="title"]
+
+author: ("Anna Manasova")
+# is ignored, unfortunately
+
+date: //p[@class="date"]
+
 body: //div[@class="entry"]
 test_url: http://annatravelling.wordpress.com/2011/11/07/a-day-of-cooking-thai/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a78a615..a820bba
@@ -1,18 +1,18 @@
-title: //h1[contains(@class, 'title')#\r
-body: //div[@id='mainContent']//div[contains(@class, 'section_content')] | //ul[@class='section_footer']\r
-date: //div[@class='date']\r
-\r
-strip_id_or_class: sharethis\r
-strip_id_or_class: stats\r
-strip_id_or_class: apply_form\r
-strip_id_or_class: job_map\r
-strip_id_or_class: respond\r
-strip: //h1//span[@class='type']\r
-strip: //li[@class='print' or @class='map']\r
-\r
-replace_string(<ul class="section_footer" style="display): <ul class="section_footer" style="display-bla\r
-\r
-prune: no\r
-tidy: no\r
-\r
+title: //h1[contains(@class, 'title')#
+body: //div[@id='mainContent']//div[contains(@class, 'section_content')] | //ul[@class='section_footer']
+date: //div[@class='date']
+
+strip_id_or_class: sharethis
+strip_id_or_class: stats
+strip_id_or_class: apply_form
+strip_id_or_class: job_map
+strip_id_or_class: respond
+strip: //h1//span[@class='type']
+strip: //li[@class='print' or @class='map']
+
+replace_string(<ul class="section_footer" style="display): <ul class="section_footer" style="display-bla
+
+prune: no
+tidy: no
+
 test_url: http://applature.com/mining-jobs/jobs/nickel-west-leinster-analytical-laboratory-technician/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 4c48395..a54dccc
@@ -1,7 +1,7 @@
-strip: //p[@class='sosumi']\r
-# Aren't they witty?\r
-\r
-# I can't work out what causes the  before the title. \r
-title: //h1[@class='title']\r
-strip: //h1[@class='title']\r
+strip: //p[@class='sosumi']
+# Aren't they witty?
+
+# I can't work out what causes the  before the title. 
+title: //h1[@class='title']
+strip: //h1[@class='title']
 test_url: http://www.apple.com/pr/library/2011/02/15appstore.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/appledaily.com.tw.txt b/inc/3rdparty/site_config/standard/appledaily.com.tw.txt
new file mode 100755 (executable)
index 0000000..82d6f37
--- /dev/null
@@ -0,0 +1,4 @@
+body: //div[contains(@class, 'articulum')]
+
+test_url: http://www.appledaily.com.tw/realtimenews/article/new/20140120/330479
+test_url: http://www.appledaily.com.tw/rss/create/kind/rnews/type/new/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 279fbce..5ae1050
@@ -1,11 +1,23 @@
-title: //p[@class='title']\r
-\r
-author: //p[text() = 'By ']/a/text()\r
-strip: //p[text() = 'By ']\r
-\r
-body: //td[@class='bod']\r
-strip_id_or_class: title\r
-strip_id_or_class: minor\r
-\r
-strip_id_or_class: multipagefooter\r
-test_url: http://www.appleinsider.com/articles/12/02/29/inside_os_x_108_mountain_lion_safari_52_gets_a_simplified_user_interface_with_new_sharing_features.html
\ No newline at end of file
+title: //h1[@class="art-head"]
+
+author: //p[contains(@class, 'byline')]/a
+#author: //p[text() = 'By ']/a/text()
+#strip: //p[text() = 'By ']
+
+date: //p[contains(@class, 'date-header')]
+
+body: //div[@class="article"]
+strip_id_or_class: lazy
+#strip_id_or_class: minor
+strip_id_or_class: multipagefooter
+strip_id_or_class: date-header
+strip_id_or_class: byline
+
+find_string: <noscript>
+replace_string: <div>
+find_string: </noscript>
+replace_string: </div>
+
+test_url: http://www.appleinsider.com/articles/12/02/29/inside_os_x_108_mountain_lion_safari_52_gets_a_simplified_user_interface_with_new_sharing_features.html
+test_url: http://appleinsider.com/articles/13/10/03/goldee-companion-app-for-philips-hue-bulbs-offers-shifting-dynamic-light-scenes
+test_url: http://appleinsider.com/appleinsider.rss
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 9476cf5..0178639
@@ -1,5 +1,5 @@
-date: //div[@class='post_date']\r
-\r
-body: //div[@class='post_content']\r
+date: //div[@class='post_date']
+
+body: //div[@class='post_content']
 
 test_url: http://www.archdaily.com/185325/p10-mixed-use-building-studio-up
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 50ff632..579de51
@@ -1,18 +1,22 @@
-# Description: Fix XPaths to include ALL chapters on 'view_full_work' pages.\r
-# Include: work meta, summary, chapter information, and notes which Instapaper strips out on default.\r
-# Exclude: header, footer, navigation, comments.\r
-# Notes: User is a newbie with XPaths.\r
-\r
-title: //h2[@class='title']\r
-author: //h3[@class='byline']\r
-author: //a[@class='login author']\r
-\r
-strip_id_or_class:header\r
-strip_id_or_class:navigation\r
-strip_id_or_class:feedback\r
-strip_id_or_class:kudos\r
-strip_id_or_class:add_comment_placeholder\r
-strip_id_or_class:add_comment\r
-strip_id_or_class:globalize\r
+# Description: Fix XPaths to include ALL chapters on 'view_full_work' pages.
+# Include: work meta, summary, chapter information, and notes which Instapaper strips out on default.
+# Exclude: header, footer, navigation, comments.
+# Notes: User is a newbie with XPaths.
+
+title: //h2[@class='title']
+author: //h3[@class='byline']
+author: //a[@class='login author']
+
+strip_id_or_class:header
+strip_id_or_class:navigation
+strip_id_or_class:feedback
+strip_id_or_class:kudos
+strip_id_or_class:add_comment_placeholder
+strip_id_or_class:add_comment
+strip_id_or_class:globalize
 strip_id_or_class:footer
-test_url: http://archiveofourown.org/works/229402?view_full_work=true
\ No newline at end of file
+
+single_page_link: //div[@id='main']//a[contains(@href, 'view_adult=true')]
+
+test_url: http://archiveofourown.org/works/229402?view_full_work=true
+test_url: http://archiveofourown.org/works/750111/chapters/1399929
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 49bb3db..767f680
@@ -1,16 +1,17 @@
-author: //p[@class='byline']/a\r
-body: //div[contains(@class,'article-content')]\r
-strip: //h2[@class='title']\r
-strip_id_or_class: byline\r
-prune: no\r
-\r
-date: //div[@class='byline']/span[@class='posted']//abbr/@original-title\r
-date: //div[@class='byline']/span[@class='posted']//abbr\r
-\r
-title: //div[@id='story']//h2[@class='title']\r
-\r
-strip: //div[@class='pager']\r
-next_page_link: //nav//a[span/@class='next']/@href\r
-\r
-test_url: http://arstechnica.com/tech-policy/news/2012/02/gigabit-internet-for-80-the-unlikely-success-of-californias-sonicnet.ars\r
-test_url: http://arstechnica.com/apple/2005/04/macosx-10-4/
\ No newline at end of file
+author: //p[@class='byline']/a
+body: //div[contains(@class,'article-content')]
+strip: //h2[@class='title']
+strip_id_or_class: byline
+strip_id_or_class: story-sidebar
+prune: no
+
+date: //div[@class='byline']/span[@class='posted']//abbr/@original-title
+date: //div[@class='byline']/span[@class='posted']//abbr
+
+title: //div[@id='story']//h2[@class='title']
+
+strip: //div[@class='pager']
+next_page_link: //nav//a[span/@class='next']/@href
+
+test_url: http://arstechnica.com/tech-policy/news/2012/02/gigabit-internet-for-80-the-unlikely-success-of-californias-sonicnet.ars
+test_url: http://arstechnica.com/apple/2005/04/macosx-10-4/
old mode 100644 (file)
new mode 100755 (executable)
index e54423b..73bcdb4
@@ -1,6 +1,6 @@
-title: //div[@class="mod-bostonarticleheader mod-articleheader"]/h1\r
-author: substring-after(//div[@class="mod-bostonarticlebyline mod-articlebyline"]/span[3],"By ")\r
-date: //div[@class="mod-bostonarticlebyline mod-articlebyline"]/span[@class="pubdate"]\r
-\r
+title: //div[@class="mod-bostonarticleheader mod-articleheader"]/h1
+author: substring-after(//div[@class="mod-bostonarticlebyline mod-articlebyline"]/span[3],"By ")
+date: //div[@class="mod-bostonarticlebyline mod-articlebyline"]/span[@class="pubdate"]
+
 strip_id_or_class: mod-pagination
 test_url: http://articles.boston.com/2011-10-23/news/30313691_1_bigfoot-free-speech-monadnock-state-park
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a08f204..984d81d
@@ -1,11 +1,11 @@
-title: //div[@class="mod-courantarticleheader mod-articleheader"]/h1\r
-date: //div[@class="mod-courantarticlebyline mod-articlebyline"]/span[@class="pubdate"]\r
-author: //div[@class="mod-courantarticlebyline mod-articlebyline"]/span[3]\r
-\r
-strip_id_or_class: mod-article-byline\r
-strip_id_or_class: mod-article-header\r
-strip_id_or_class: mod-article-subtitle\r
-#This leaves some crud after the article, but it's better than nothing.\r
-#It would be ideal if we could set the body to every element matching //div[contains(@class, "mod-articletext")]/p, but it seems like body only takes the first matching element.\r
+title: //div[@class="mod-courantarticleheader mod-articleheader"]/h1
+date: //div[@class="mod-courantarticlebyline mod-articlebyline"]/span[@class="pubdate"]
+author: //div[@class="mod-courantarticlebyline mod-articlebyline"]/span[3]
+
+strip_id_or_class: mod-article-byline
+strip_id_or_class: mod-article-header
+strip_id_or_class: mod-article-subtitle
+#This leaves some crud after the article, but it's better than nothing.
+#It would be ideal if we could set the body to every element matching //div[contains(@class, "mod-articletext")]/p, but it seems like body only takes the first matching element.
 
 test_url: http://articles.courant.com/2011-10-22/news/hc-green-drugsearch--1022-20111022_1_drugs-in-student-lockers-police-dogs-lockdown
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/articles.washingtonpost.com.txt b/inc/3rdparty/site_config/standard/articles.washingtonpost.com.txt
new file mode 100755 (executable)
index 0000000..a76c2d0
--- /dev/null
@@ -0,0 +1,11 @@
+body: //div[contains(@class, "article_body")]
+# print view
+body: //div[@id='print_facet']//div[@id='body']
+
+tidy: no
+prune: no
+
+single_page_link: concat(substring-before(//div[@id="echo_container_a"]/@guid, '_story.html'), '_print.html')
+
+test_url: http://articles.washingtonpost.com/2011-10-22/world/35279694_1_germany-acts-german-leaders-chancellor-angela-merkel
+test_url: http://articles.washingtonpost.com/2013-05-31/opinions/39658000_1_chemical-weapons-mass-destruction-cartels
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 2562edb..b4eec7b
@@ -1,3 +1,3 @@
-body: //div[@id='HeadLine']\r
+body: //div[@id='HeadLine']
 strip: //div[@id='utility_right']
 test_url: http://www.asahi.com/culture/update/0520/TKY201105200321.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 5236d09..0327e84
@@ -1,5 +1,5 @@
-title: //h1[@class='article_title']\r
-author: //span[@class='author']\r
-date: //h2[@class='dateline']\r
+title: //h1[@class='article_title']
+author: //span[@class='author']
+date: //h2[@class='dateline']
 body: //div[@class='article_body']
 test_url: http://ascarter.net/2012/02/20/enough-is-enough.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 33e8153..8de2227
@@ -1,7 +1,7 @@
-title: //span[@class='titel']\r
-author: //span[@class='metadaten_C']/a//span[@class='metadaten_C']\r
-date: substring-after(//span[@class='metadaten_C'],'astronews.com')\r
-strip: //span[@class='bu']\r
-strip_image_src: '/_images/'\r
+title: //span[@class='titel']
+author: //span[@class='metadaten_C']/a//span[@class='metadaten_C']
+date: substring-after(//span[@class='metadaten_C'],'astronews.com')
+strip: //span[@class='bu']
+strip_image_src: '/_images/'
 
 test_url: http://www.astronews.com/news/artikel/2011/10/1110-021.shtml
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index adad5f1..f639b04
@@ -1,8 +1,8 @@
-# Johannes Stühler\r
-\r
-title://h2\r
-author://span[@class='meta-content']\r
-date://abbr[@class='date published']/@title\r
-body://div[@class='entry-content']\r
+# Johannes Stühler
+
+title://h2
+author://span[@class='meta-content']
+date://abbr[@class='date published']/@title
+body://div[@class='entry-content']
 
 test_url: http://www.asymco.com/2011/01/14/is-android-more-efficient-than-ios-at-generating-search-revenue/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 58681bf..291db99
@@ -1,6 +1,6 @@
-prune: no\r
-body: //div[@class='post-body']\r
-author: //p[@class='byline']//a\r
-date: substring-after(//div[@class='about']/p[2], 'Posted')\r
+prune: no
+body: //div[@class='post-body']
+author: //p[@class='byline']//a
+date: substring-after(//div[@class='about']/p[2], 'Posted')
 strip: //div[@class='body']/div[@class='meta']
 test_url: http://www.autoblog.com/2012/01/17/next-gen-bmw-x5-caught-again/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 776ee10..c365a7a
@@ -1,4 +1,4 @@
-author: //*[@id="article_wrapper"]/div[1]/a[1]\r
-body: //*[@id="article_wrapper"]/div[2]\r
+author: //*[@id="article_wrapper"]/div[1]/a[1]
+body: //*[@id="article_wrapper"]/div[2]
 date: //*[@id="article_wrapper"]/div[1]/text()[2]
 test_url: http://www.avclub.com/articles/forgetmenot,70904
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 32adff8..35b6242
@@ -1,12 +1,12 @@
-single_page_link: //div[@class='toppaginate']//a[@rel='nofollow']\r
-convert_double_br_tags: yes\r
-\r
-title: //div[@class="story"]/h1\r
-body: //div[@id="story-body-text"]\r
-author: //span[@class="byline"]\r
-date: //p[@class="date"]\r
-\r
-strip: //*[@class='all']\r
-strip: //*[@class='articlerail']\r
+single_page_link: //div[@class='toppaginate']//a[@rel='nofollow']
+convert_double_br_tags: yes
+
+title: //div[@class="story"]/h1
+body: //div[@id="story-body-text"]
+author: //span[@class="byline"]
+date: //p[@class="date"]
+
+strip: //*[@class='all']
+strip: //*[@class='articlerail']
 
 test_url: http://www.baltimoresun.com/news/maryland/bs-md-omalley-budget-2-20120116,0,5340585.story
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/baseballprospectus.com.txt b/inc/3rdparty/site_config/standard/baseballprospectus.com.txt
new file mode 100755 (executable)
index 0000000..1207b34
--- /dev/null
@@ -0,0 +1,13 @@
+title: //h1[@class='title']
+author: //p[@class="author"]/a[1]
+body: //div[@class="article"]
+date: //p[@class="date"]
+
+# remove user tools
+strip: //div[@class='tools']
+strip: //h1
+strip: //h2[@class='subtitle']
+strip: //p[@class='author']
+strip: //p[@class='date']
+
+test_url: http://www.baseballprospectus.com/article.php?articleid=18463
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ab58314..f08c1f2
@@ -1,7 +1,7 @@
-title: //h2\r
-date: //span[@class='date']\r
-body: //div[@class='entry']\r
-\r
-strip: //div[@class='zusatz']\r
+title: //h2
+date: //span[@class='date']
+body: //div[@class='entry']
+
+strip: //div[@class='zusatz']
 
 test_url: http://www.basicthinking.de/blog/2011/12/13/sagt-social-networks-adieu-begrust-private-networks/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index eaafaf1..57f7fdf
@@ -1,13 +1,13 @@
-author: substring(//h3[@class='headlines']/span[@class='dates'],0,string-length(//h3[@class='headlines']/span[@class='dates'])-20)\r
-\r
-\r
-date: substring((//h3[@class='headlines']/span[@class='dates']),string-length(//h3[@class='headlines']/span[@class='dates'])-18,12)\r
-\r
-\r
-body:  //div[@class='first-article-big']\r
-strip: //table[@class='newsimagecontainer']\r
-strip: //h3[@class='headlines']\r
-strip: //iframe[@class='headlines']\r
-strip: //a[@class='newslink']\r
+author: substring(//h3[@class='headlines']/span[@class='dates'],0,string-length(//h3[@class='headlines']/span[@class='dates'])-20)
+
+
+date: substring((//h3[@class='headlines']/span[@class='dates']),string-length(//h3[@class='headlines']/span[@class='dates'])-18,12)
+
+
+body:  //div[@class='first-article-big']
+strip: //table[@class='newsimagecontainer']
+strip: //h3[@class='headlines']
+strip: //iframe[@class='headlines']
+strip: //a[@class='newslink']
 convert_double_br_tags: yes
 test_url: http://bb.is/Pages/82?NewsID=174119
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 9c5c341..ef1f491
@@ -1,32 +1,42 @@
-body: //div[@class="story-body"]\r
-title: //h1[@class="story-header"]\r
-date: //span[@class="story-date"]/span[@class='date']\r
-\r
-# recipes, e.g. http://www.bbc.co.uk/food/recipes/mymincepies_71055\r
-body: //div[contains(@class, 'hrecipe')]//div[@id='subcolumn-1']\r
-\r
-#strip: //div[@class="story-feature narrow"]\r
-#strip: //div[@class="story-feature wide"]\r
-#strip: //div[@class="story-feature dslideshow-enclosure"]\r
-strip: //div[contains(@class, "story-feature")]\r
-strip: //span[@class="story-date"]\r
-#strip: //div[@class="caption body-narrow-width"]\r
-strip: //div[@class="warning"]//p\r
-strip: //div[@id='page-bookmark-links-head']\r
-strip: //object\r
-strip: //div[contains(@class, "bbccom_advert_placeholder")]\r
-strip: //div[contains(@class, "embedded-hyper")]\r
-strip: //div[contains(@class, 'market-data')]\r
-strip: //a[contains(@class, 'hidden')]\r
-strip: //div[contains(@class, 'hypertabs')]\r
-strip: //div[contains(@class, 'related')]\r
-strip: //form[@id='comment-form']\r
-strip: //div[contains(@class, 'comment-introduction')]\r
-\r
-replace_string(<noscript>): <div>\r
-replace_string(</noscript>): </div>\r
-\r
-prune: no\r
-\r
-dissolve: //h2\r
-test_url: http://www.bbc.co.uk/news/business-15060862
\ No newline at end of file
+body: //div[@class="story-body"]
+# for video entries
+body: //div[contains(@class, "videoInStory") or @id="meta-information"]
+title: //h1[@class="story-header"]
+date: //span[@class="story-date"]/span[@class='date']
+# for sport site
+date: //meta[@name='DCTERMS.created']/@content
+author: //div[@id='headline']//span[@class='byline-name']
+
+# recipes, e.g. http://www.bbc.co.uk/food/recipes/mymincepies_71055
+body: //div[contains(@class, 'hrecipe')]//div[@id='subcolumn-1']
+
+#strip: //div[@class="story-feature narrow"]
+#strip: //div[@class="story-feature wide"]
+#strip: //div[@class="story-feature dslideshow-enclosure"]
+strip: //div[contains(@class, "story-feature")]
+strip: //span[@class="story-date"]
+#strip: //div[@class="caption body-narrow-width"]
+strip: //div[@class="warning"]//p
+strip: //div[@id='page-bookmark-links-head']
+strip: //object
+strip: //div[contains(@class, "bbccom_advert_placeholder")]
+strip: //div[contains(@class, "embedded-hyper")]
+strip: //div[contains(@class, 'market-data')]
+strip: //a[contains(@class, 'hidden')]
+strip: //div[contains(@class, 'hypertabs')]
+strip: //div[contains(@class, 'related')]
+strip: //form[@id='comment-form']
+strip: //div[contains(@class, 'comment-introduction')]
+strip: //div[contains(@class, 'share-tools')]
+strip: //div[@id='also-related-links']
+
+replace_string(<noscript>): <div>
+replace_string(</noscript>): </div>
+
+prune: no
+
+dissolve: //h2
+test_url: http://www.bbc.co.uk/sport/0/football/23224017
+test_url: http://www.bbc.co.uk/news/business-15060862
+# video entry
+test_url: http://www.bbc.co.uk/news/world-asia-22056933
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/bbcgoodfood.com.txt b/inc/3rdparty/site_config/standard/bbcgoodfood.com.txt
new file mode 100755 (executable)
index 0000000..1547d62
--- /dev/null
@@ -0,0 +1,16 @@
+title: //header//h1
+#body: //article[contains(@class, 'node-full')]
+body: //div[contains(@class, 'recipe-details') or contains(@class, 'tips-carousel')] | //section[@id='recipe-ingredients' or @id='recipe-method']
+
+strip_id_or_class: recipe-rating-wrapper
+strip_id_or_class: magazine-subcribe-header
+strip_id_or_class: hide
+strip_id_or_class: recipe-actions
+strip_id_or_class: buy-ingredients
+strip_id_or_class: related-content
+strip_id_or_class: recipe-magazine-ad
+strip_id_or_class: copy-right
+
+prune: no
+
+test_url: http://www.bbcgoodfood.com/recipes/1131634/minced-beef-wellington
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f341d59..72c1bae
@@ -1,16 +1,16 @@
-body: //div[@class="entry-content"]\r
-\r
-# Remove text &lsquo;Tweet&rsquo;\r
-strip: //div[@class="entry-content"]/div[last()]\r
-\r
-title: h1[@class="entry-title"]\r
-\r
-# If the Instapaper text parser worked with HTML5 tags, we would use:\r
-date: //time[@class="entry-date"]\r
-\r
-# But since it does not, use this more complicated rule:\r
-date: //div[@class="entry-meta"]/a[@rel="bookmark"]\r
-\r
-# Unfortunately, the following rule is overridden by the automatically found author.\r
+body: //div[@class="entry-content"]
+
+# Remove text &lsquo;Tweet&rsquo;
+strip: //div[@class="entry-content"]/div[last()]
+
+title: h1[@class="entry-title"]
+
+# If the Instapaper text parser worked with HTML5 tags, we would use:
+date: //time[@class="entry-date"]
+
+# But since it does not, use this more complicated rule:
+date: //div[@class="entry-meta"]/a[@rel="bookmark"]
+
+# Unfortunately, the following rule is overridden by the automatically found author.
 author: ("Benoit Maison")
 test_url: http://www.benoitmaison.org/2011/12/06/why-siri-had-to-start-in-beta/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 607c998..9f8c41c
@@ -1,3 +1,3 @@
-title: //h1[@class='headline']\r
+title: //h1[@class='headline']
 body: //div[contains(@class, 'article-wrapper')]
 test_url: http://www.berlingske.dk/danmark/festen-er-flyttet-nordpaa
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/bernama.com.txt b/inc/3rdparty/site_config/standard/bernama.com.txt
new file mode 100755 (executable)
index 0000000..fdc04b7
--- /dev/null
@@ -0,0 +1,5 @@
+body: //div[contains(@class, "NewsText"]
+prune: no
+
+test_url: http://www.bernama.com/bernama/v7/rss/english.php
+test_url: http://www.bernama.com/bernama/v7/newsindex.php?id=943513
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0eaf085..90a54a2
@@ -1,7 +1,7 @@
-# some articles at this site like this one doesn't\r
-# seem to pick up the article body via normal \r
-# processing, other articles come through fine\r
-# http://www.betanews.com/joewilcox/article\r
-#  /Google-is-a-marketing-sensation/1309708375\r
+# some articles at this site like this one doesn't
+# seem to pick up the article body via normal 
+# processing, other articles come through fine
+# http://www.betanews.com/joewilcox/article
+#  /Google-is-a-marketing-sensation/1309708375
 body: //*[@id="article"]
 test_url: http://www.betanews.com/joewilcox/article/Google-is-a-marketing-sensation/1309708375
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index dc07129..e431037
@@ -1,8 +1,8 @@
-title: //div[contains(@class, 'main-content')]//h1\r
-body: //div[@class='summary-column'] | //div[contains(@class, 'main-content')]\r
-\r
-prune: no\r
-\r
-single_page_link: //div[@id='biography-action-links']//a[contains(@href, '/print/')]\r
+title: //div[contains(@class, 'main-content')]//h1
+body: //div[@class='summary-column'] | //div[contains(@class, 'main-content')]
+
+prune: no
+
+single_page_link: //div[@id='biography-action-links']//a[contains(@href, '/print/')]
 
 test_url: http://www.biography.com/print/profile/martin-luther-9389283
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/bizjournals.com.txt b/inc/3rdparty/site_config/standard/bizjournals.com.txt
new file mode 100755 (executable)
index 0000000..cfba766
--- /dev/null
@@ -0,0 +1,13 @@
+date: //meta[@name='publish-date']/@content
+body: //div[contains(@class, 'articleContentWrapper')]
+prune: no
+
+strip: //div[contains(@class, 'staff_info')]//dd[contains(., 'Twitter')]
+
+strip_id_or_class: related_content
+strip_id_or_class: enlarge
+strip_id_or_class: photoBy
+strip_id_or_class: older
+
+test_url: http://www.bizjournals.com/cincinnati/news/2013/10/03/harris-teeter-shareholders-vote-on.html
+test_url: http://feeds.bizjournals.com/industry_20?format=xml
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6cb0463..0fed552
@@ -1,7 +1,7 @@
-title: //h1[@class='articlehead']\r
-body: //div[@class='column']\r
-strip: //h1\r
-strip: //div[@class='help']\r
-\r
+title: //h1[@class='articlehead']
+body: //div[@class='column']
+strip: //h1
+strip: //div[@class='help']
+
 #no author or date/time provided in current layout
 test_url: http://bjango.com/articles/actions/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 1f43f49..7ac8cc1
@@ -1,8 +1,8 @@
-tidy: no\r
-prune: no\r
-date: //article/header/h6/time\r
-title: //article/header/h3\r
-author: //meta[@name='author']/@content\r
-body: //article//post\r
+tidy: no
+prune: no
+date: //article/header/h6/time
+title: //article/header/h3
+author: //meta[@name='author']/@content
+body: //article//post
 
 test_url: http://blog.arsln.org/aska-ayip-oluyor/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 81c3bda..78d7f51
@@ -1,7 +1,7 @@
-title: //title\r
-author: //span[@class='author vcard']/a\r
-date: //p[@class='headline_meta']/abbr[@class='published']\r
-body: //div[@class='format_text entry-content']\r
-\r
+title: //title
+author: //span[@class='author vcard']/a
+date: //p[@class='headline_meta']/abbr[@class='published']
+body: //div[@class='format_text entry-content']
+
 strip: //div[@id='dd_ajax_float']
 test_url: http://blog.asmartbear.com/how-to-get-quality-freelance-graphics-design-work-on-a-budget.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a4c5aae..db80a35
@@ -1,9 +1,9 @@
-# Instapaper gets this back to front and only gets the blog title instead of the article title.\r
-title: substring-before(//title, '-')\r
-\r
-author: //a[ contains(@href, '/people') ]\r
-\r
-body: //div[ @class='post' ]\r
-\r
+# Instapaper gets this back to front and only gets the blog title instead of the article title.
+title: substring-before(//title, '-')
+
+author: //a[ contains(@href, '/people') ]
+
+body: //div[ @class='post' ]
+
 # Date is impossible to retrieve since they use those stupid "fuzzy" dates, inserted through javascript, at posterous.
 test_url: http://blog.cloudflare.com/understanding-analytics-when-is-a-page-view-n
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 92272b7..97e48e6
@@ -1,5 +1,5 @@
-title: //h2\r
-date: //h3\r
-body: //ul\r
+title: //h2
+date: //h3
+body: //ul
 
 test_url: http://blog.fefe.de/?ts=b063bf55
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3065dd8..13d1d44
@@ -1,11 +1,11 @@
-# clean Instagram blog a little bit\r
-\r
-tidy:no\r
-prune:no\r
-\r
-body://div[contains(@id,'content')]\r
-\r
-strip_id_or_class:meta\r
-strip_id_or_class:notes\r
+# clean Instagram blog a little bit
+
+tidy:no
+prune:no
+
+body://div[contains(@id,'content')]
+
+strip_id_or_class:meta
+strip_id_or_class:notes
 strip_id_or_class:pagination
 test_url: http://blog.instagram.com/post/8757832007/fromwhereistand
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.instapaper.com.txt b/inc/3rdparty/site_config/standard/blog.instapaper.com.txt
new file mode 100755 (executable)
index 0000000..fda01b1
--- /dev/null
@@ -0,0 +1,9 @@
+author: //a[@href="http://www.marco.org/about"]
+date: //span[@class="date"]
+
+# Remove the date from article body.
+strip: //span[@class="date"]
+
+# Remove pagination links from article body.
+strip: //div[@id="pagination"]
+test_url: http://blog.instapaper.com/post/31303984531
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 4e467fe..e89ad3a
@@ -1,4 +1,4 @@
-date: //span[contains(@class, 'date-links')]\r
-author: //span[contains(@class, 'author-links')]\r
+date: //span[contains(@class, 'date-links')]
+author: //span[contains(@class, 'author-links')]
 body: //div[contains(@class, 'entry-content')] 
 test_url: http://blog.jaysalvat.com/article/celui-qui-avait-refait-son-site-web
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ac18ad1..bcd3bdc
@@ -1,5 +1,5 @@
-body: //*[contains(@class, 'post_content')]\r
-author: string('Kaelig Deloumeau-Prigent')\r
-title: //h1[@class='title']\r
+body: //*[contains(@class, 'post_content')]
+author: string('Kaelig Deloumeau-Prigent')
+title: //h1[@class='title']
 date: //span[@class='date']
 test_url: http://blog.kaelig.fr/post/24877648508/preprocesseurs-css-renoncer-par-choix-ou-par
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 702789a..73c30c4
@@ -1,6 +1,6 @@
-title: //span[@class='pcol1 itemSubjectBoldfont']\r
-body: //div[@id='postListBody']\r
-date: //p[@class='date fil5 pcol2']\r
-single_page_link: /html/frameset/frame[1]/attribute::src\r
+title: //span[@class='pcol1 itemSubjectBoldfont']
+body: //div[@id='postListBody']
+date: //p[@class='date fil5 pcol2']
+single_page_link: /html/frameset/frame[1]/attribute::src
 strip: //div[@class='post-btn']
 test_url: http://blog.naver.com/how2invest/110135068757
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3089001..de81beb
@@ -1,12 +1,12 @@
-# PCHOME blog, a popular Chinese blog host\r
-# Oct 15, 2011\r
-#  \r
-\r
-title://*[contains(@class,'imp')]/h2\r
-\r
-date://*[contains(@class,'imp')]/span\r
-body://div[contains(@id,'blog_content')]\r
-\r
-\r
+# PCHOME blog, a popular Chinese blog host
+# Oct 15, 2011
+#  
+
+title://*[contains(@class,'imp')]/h2
+
+date://*[contains(@class,'imp')]/span
+body://div[contains(@id,'blog_content')]
+
+
 
 test_url: http://blog.pchome.net/article/462502.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index b7afe45..40f0c56
@@ -1,6 +1,6 @@
-title: //a[@class="blog_title"]\r
-date: //p[@class="when"]/a\r
-body: //div[@class="blog_entry"]\r
-strip_id_or_class:blog_title\r
+title: //a[@class="blog_title"]
+date: //p[@class="when"]/a
+body: //div[@class="blog_entry"]
+strip_id_or_class:blog_title
 strip_id_or_class:when
 test_url: http://blog.pinboard.in/2011/11/the_social_graph_is_neither/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.renren.com.txt b/inc/3rdparty/site_config/standard/blog.renren.com.txt
new file mode 100755 (executable)
index 0000000..401d31e
--- /dev/null
@@ -0,0 +1,11 @@
+# This filter is tested on:
+# http://blog.renren.com/share/224959024/14260739544
+# http://blog.renren.com/share/231323504/14261768898
+# http://blog.renren.com/share/230305019/1502806705
+
+title://h1[contains(@class, 'title-article')]
+author://span[contains(@class, 'name')]
+body://div[contains(@class, 'content-body')]
+
+convert_double_br_tags:yes
+test_url: http://blog.renren.com/share/230305019/1502806705
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index acb9ce8..4895272
@@ -1,26 +1,26 @@
-# Sina blog, the most popular blog host in China.\r
-# Its source code is horrible.\r
-#  \r
-# Issue:\r
-# Only the first image in the article is displayed.\r
-# The rest images are replace by a 1x1 transparent gif by sina blog host.\r
-# \r
-\r
-title://*[contains(@class,'titName SG_txta')]\r
-author://*[contains(@id,'ownernick')]\r
-date://*[contains(@class,'time SG_txtc')]\r
-body://div[contains(@class,'articalContent')]\r
-\r
-# Remove redundant content which has span class start with "MASS"\r
-# Example <span class="MASSf21674ffeef7"></span>\r
-strip://span[contains(@class,'MASS')]\r
-\r
-# Remove comment\r
-strip://div[contains(@class,'allComm')]\r
-\r
-# Remove hiden text and link\r
-strip://ins\r
-\r
-tidy:no\r
-convert_double_br_tags:yes\r
+# Sina blog, the most popular blog host in China.
+# Its source code is horrible.
+#  
+# Issue:
+# Only the first image in the article is displayed.
+# The rest images are replace by a 1x1 transparent gif by sina blog host.
+# 
+
+title://*[contains(@class,'titName SG_txta')]
+author://*[contains(@id,'ownernick')]
+date://*[contains(@class,'time SG_txtc')]
+body://div[contains(@class,'articalContent')]
+
+# Remove redundant content which has span class start with "MASS"
+# Example <span class="MASSf21674ffeef7"></span>
+strip://span[contains(@class,'MASS')]
+
+# Remove comment
+strip://div[contains(@class,'allComm')]
+
+# Remove hiden text and link
+strip://ins
+
+tidy:no
+convert_double_br_tags:yes
 test_url: http://blog.sina.com.cn/s/blog_5054769e0102dtja.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8c8b383..eae6982
@@ -1,6 +1,6 @@
-title: //h2/a[@class="no-link title"]\r
-author: //h2[@id="blog_owner"]\r
-date: //time\r
-strip: //h2/a[@class="no-link title"]\r
-test_url: http://blog.wells.ee/retina\r
+title: //h2/a[@class="no-link title"]
+author: //h2[@id="blog_owner"]
+date: //time
+strip: //h2/a[@class="no-link title"]
+test_url: http://blog.wells.ee/retina
 test_url: http://blog.wells.ee/skeuomorphism
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f630127..2a66952
@@ -1,8 +1,8 @@
-# 2011-08-23 [carlo@...] Initial version.\r
-\r
-author: //div[@id="blogauthordatebox-node"]//a[@title="View user profile."]/text()\r
-\r
-# why yes, I do feel a bit dirty\r
-date: substring-before( substring-after( substring-after( //div[@id="blogauthordatebox-node"]//td[3], "on " ), ", "), " " )\r
+# 2011-08-23 [carlo@...] Initial version.
+
+author: //div[@id="blogauthordatebox-node"]//a[@title="View user profile."]/text()
+
+# why yes, I do feel a bit dirty
+date: substring-before( substring-after( substring-after( //div[@id="blogauthordatebox-node"]//td[3], "on " ), ", "), " " )
 
 test_url: http://blogs.aljazeera.net/asia/2011/08/22/peoples-hero
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3664d16..d47c352
@@ -1,4 +1,4 @@
-title: //div[@id='pageFeature']/h1\r
-body: //div[@id='articleBody']\r
-strip: //div[@class='module wide']\r
+title: //div[@id='pageFeature']/h1
+body: //div[@id='articleBody']
+strip: //div[@class='module wide']
 test_url: http://blogs.hbr.org/bregman/2011/04/the-1-killer-of-meetings-and-w.html?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+harvardbusiness+%28HBR.org%29
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3d3ec02..b2ff833
@@ -1,6 +1,6 @@
-title: //h3[@class="post-name"]\r
-author: //span[@class="user-name"]\r
-date: //div[@class="post-date"]\r
-body: //div[@class="post-content user-defined-markup"]\r
+title: //h3[@class="post-name"]
+author: //span[@class="user-name"]
+date: //div[@class="post-date"]
+body: //div[@class="post-content user-defined-markup"]
 footnotes: no
 test_url: http://blogs.msdn.com/b/b8/archive/2011/10/04/designing-the-start-screen.aspx
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6907bcb..d3eb996
@@ -1,3 +1,3 @@
-title: //div[@id='single']/h1\r
+title: //div[@id='single']/h1
 body: //div[@id='postcontent']
 test_url: http://blogs.reuters.com/felix-salmon/2010/07/16/the-value-of-a-strong-brand-apple-edition/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a7d1508..2102015
@@ -1,16 +1,16 @@
-# meta data\r
-title://h1[@class = 'postTitle']\r
-author:substring-before(substring-after(//span[@class = 'byline'],'By '),'|')\r
-date://span[@class = 'datestamp']\r
-\r
-#body content\r
-body://div[@id = 'singleBlogPost']\r
-\r
-#reclaim author info\r
-move_into(//div[@id = 'singleBlogPost'])://div[@id = 'aboutAuthorDiv']\r
-strip://p[@class = 'moreLink mobileHide']\r
-\r
-#cleanup comments, there might be some open <div> sections\r
-strip://div[@id = 'comments2']\r
+# meta data
+title://h1[@class = 'postTitle']
+author:substring-before(substring-after(//span[@class = 'byline'],'By '),'|')
+date://span[@class = 'datestamp']
+
+#body content
+body://div[@id = 'singleBlogPost']
+
+#reclaim author info
+move_into(//div[@id = 'singleBlogPost'])://div[@id = 'aboutAuthorDiv']
+strip://p[@class = 'moreLink mobileHide']
+
+#cleanup comments, there might be some open <div> sections
+strip://div[@id = 'comments2']
 strip://h3[a[@href = '#add-comment']]
 test_url: http://blogs.scientificamerican.com/a-blog-around-the-clock/2012/07/10/science-blogs-definition-and-a-history/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ba8bc6e..1bc65e7
@@ -1,15 +1,15 @@
-# metadata\r
-author://div[@class = 'post']/div[@class='meta']/a[1]\r
-date://div[@id = 'rap']/h2[1]\r
-body://div[@class = 'post']\r
-\r
-# wrapping caption and image\r
-wrap_in(fieldset)://div[contains(@class, 'wp-caption')]\r
-\r
-\r
-# clean up\r
-strip://div[@class = 'post']/h3[@class = 'storytitle']\r
-strip://div[@class = 'post']/div[@class = 'social']\r
-strip://img[@style = 'display:none;']\r
+# metadata
+author://div[@class = 'post']/div[@class='meta']/a[1]
+date://div[@id = 'rap']/h2[1]
+body://div[@class = 'post']
+
+# wrapping caption and image
+wrap_in(fieldset)://div[contains(@class, 'wp-caption')]
+
+
+# clean up
+strip://div[@class = 'post']/h3[@class = 'storytitle']
+strip://div[@class = 'post']/div[@class = 'social']
+strip://img[@style = 'display:none;']
 strip://img[@height='0' and @width='0']
 test_url: http://blogs.smithsonianmag.com/adventure/2011/10/tips-for-women-traveling-in-turkey/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a2909fd..3d0fbad
@@ -1,6 +1,9 @@
-title: //h3[@class="post-name"]\r
-author: //span[@class="user-name"]\r
-date: //div[@class="post-date"]\r
-body: //div[@class="post-content user-defined-markup"]\r
+title: //h3[@class="post-name"]
+author: //span[@class="user-name"]
+date: //div[@class="post-date"]
+body: //div[@class="post-content user-defined-markup"]
+strip_id_or_class: log-feedback-list
+tidy: no
 footnotes: no
-test_url: http://blogs.technet.com/b/dlemson/archive/2004/03/03/83304.aspx
\ No newline at end of file
+test_url: http://blogs.technet.com/b/dlemson/archive/2004/03/03/83304.aspx
+test_url: http://blogs.technet.com/b/isablog/archive/2009/01/07/a-pptp-client-might-fail-to-connect-to-a-vpn-server-on-the-internet-through-an-isa-server-2006.aspx
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index fbe7a5c..543d392
@@ -1,4 +1,4 @@
-body://div[@class='entry']\r
-date://div[@class='meta']\r
+body://div[@class='entry']
+date://div[@class='meta']
 strip://a[@class='FlattrButton']
 test_url: http://bluetouff.com/2012/03/02/polemique-google-vie-privee/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 91e48fd..3b3da99
@@ -1,8 +1,8 @@
-title: //h1[@class="entry-title"][2]\r
-author: string("Paul Boag")\r
-date: substring(//span[@class="meta"], 11)\r
-body: //article\r
-strip: //h2\r
-strip: //h1\r
+title: //h1[@class="entry-title"][2]
+author: string("Paul Boag")
+date: substring(//span[@class="meta"], 11)
+body: //article
+strip: //h2
+strip: //h1
 strip: //div[@id="callsToAction"]
 test_url: http://boagworld.com/working-in-web-design/dealing-with-the-dickheads/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 9169e8f..4f39661
@@ -1,11 +1,11 @@
-# This is far from perfect, but so is BoingBoing's markup\r
-title: //h2[@class="headline"]\r
-single_page_link: //h2[@class="headline"]/a\r
-#date: //p[@class="byline"]\r
-body: //div[@class="post"]\r
-\r
-strip_id_or_class: shareMe\r
-strip_id_or_class: authorbox\r
-strip_id_or_class: byline\r
+# This is far from perfect, but so is BoingBoing's markup
+title: //h2[@class="headline"]
+single_page_link: //h2[@class="headline"]/a
+#date: //p[@class="byline"]
+body: //div[@class="post"]
+
+strip_id_or_class: shareMe
+strip_id_or_class: authorbox
+strip_id_or_class: byline
 
 test_url: http://boingboing.net/2011/10/23/understanding-the-hyperrich-through-the-lens-of-tomorrows-history.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 4cc4904..3616bbf
@@ -1,3 +1,3 @@
-title: //h2[@class='entry-title']\r
+title: //h2[@class='entry-title']
 body: //div[@class='entry-content']
 test_url: http://boldizsar.palotas.eu/blog/?p=1394
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8b95856..fe2d2cb
@@ -1,6 +1,6 @@
-body: //span[@property='v:description']\r
-date: //span[@property='v:dtreviewed']\r
-author: //span[@property='v:reviewer']\r
-prune: no\r
+body: //span[@property='v:description']
+date: //span[@property='v:dtreviewed']
+author: //span[@property='v:reviewer']
+prune: no
 
 test_url: http://book.douban.com/review/2422662/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 331f415..03b6003
@@ -1,19 +1,19 @@
-#metadata\r
-title://div[@class = 'Topper']/h1\r
-author://div[@class = 'Topper']/h3\r
-date://div[@class = 'Topper']/h6\r
-body://div[@class = 'Core']\r
-\r
-\r
-\r
-# clean up\r
-strip://div[@class = 'Topper']/h1\r
-strip://div[@class = 'Topper']/h3\r
-strip://div[@class = 'Topper']/h4\r
-strip://div[@class = 'Topper']/h5\r
-strip://div[@class = 'Topper']/h6\r
-strip://br[@clear = 'all']\r
-strip://div[@class = 'adCore']\r
-strip://div[@class = 'BookR']\r
+#metadata
+title://div[@class = 'Topper']/h1
+author://div[@class = 'Topper']/h3
+date://div[@class = 'Topper']/h6
+body://div[@class = 'Core']
+
+
+
+# clean up
+strip://div[@class = 'Topper']/h1
+strip://div[@class = 'Topper']/h3
+strip://div[@class = 'Topper']/h4
+strip://div[@class = 'Topper']/h5
+strip://div[@class = 'Topper']/h6
+strip://br[@clear = 'all']
+strip://div[@class = 'adCore']
+strip://div[@class = 'BookR']
 strip://div[@class = 'InfoBox']
 test_url: http://bookforum.com/inprint/018_04/8595
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 190738d..b4e116f
@@ -1,7 +1,7 @@
-title://h1\r
-author://div[@class="meta"]/span/a\r
-date://div[@class="date"]\r
-body://div[@class="content article"]\r
-strip://div[@class="content article"]/h1\r
+title://h1
+author://div[@class="meta"]/span/a
+date://div[@class="date"]
+body://div[@class="content article"]
+strip://div[@class="content article"]/h1
 
 test_url: http://borderhouseblog.com/?p=7832
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d3e6f43..4c74a34
@@ -1,16 +1,16 @@
-# NOTE:  If testing this configuration yields bad results, including junk text like "Try BostonGlobe.com today" and "THIS STORY APPEARED IN", please replace the Test URL with a current-day headline link from bostonglobe.com.\r
-\r
-title: //div[@class="header"]/h1\r
-author: substring-after(//div[@class="byline"]/h2[@class="author"],"By ")\r
-date: //div[@class="byline"]/p[last()]\r
-body: //div[@class="article-body"]\r
-\r
-strip_id_or_class: aside\r
-strip_id_or_class: promo\r
-strip_id_or_class: skip-nav\r
-strip_id_or_class: article-more\r
-strip_id_or_class: article-bar\r
-\r
-# This removes image captions.  If the parser starts saving images from bostonglobe.com (currently, it does not), then this directive should be removed.\r
+# NOTE:  If testing this configuration yields bad results, including junk text like "Try BostonGlobe.com today" and "THIS STORY APPEARED IN", please replace the Test URL with a current-day headline link from bostonglobe.com.
+
+title: //div[@class="header"]/h1
+author: substring-after(//div[@class="byline"]/h2[@class="author"],"By ")
+date: //div[@class="byline"]/p[last()]
+body: //div[@class="article-body"]
+
+strip_id_or_class: aside
+strip_id_or_class: promo
+strip_id_or_class: skip-nav
+strip_id_or_class: article-more
+strip_id_or_class: article-bar
+
+# This removes image captions.  If the parser starts saving images from bostonglobe.com (currently, it does not), then this directive should be removed.
 strip_id_or_class: figure
 test_url: http://bostonglobe.com/news/nation/2012/03/17/illinois-primary-could-pivotal/PsDzFZqvhEYyXbOcF9FOkO/story.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6856701..64e04a1
@@ -1,15 +1,15 @@
-#basics\r
-title://h3[@class = 'article_title']\r
-date://span[@class = 'article_date']\r
-body://div[@id = 'center_column_article']\r
-#correct, but author not being picked up in preview\r
-author://span[@class = 'article_author']\r
-\r
-#strips basics from article\r
-strip_id_or_class:article_title\r
-strip_id_or_class:article_date\r
-strip_id_or_class:article_author\r
-\r
-#strips pull quotes\r
+#basics
+title://h3[@class = 'article_title']
+date://span[@class = 'article_date']
+body://div[@id = 'center_column_article']
+#correct, but author not being picked up in preview
+author://span[@class = 'article_author']
+
+#strips basics from article
+strip_id_or_class:article_title
+strip_id_or_class:article_date
+strip_id_or_class:article_author
+
+#strips pull quotes
 strip_id_or_class:pull_quote
 test_url: http://www.bostonreview.net/BR36.4/megan_pugh_agnes_de_mille_dance.php
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index bfc3f3d..a836e1e
@@ -1,5 +1,5 @@
-title: substring-before(//title, '|')\r
-body: //div[@class="entry"]\r
-# Remove the author's picture\r
+title: substring-before(//title, '|')
+body: //div[@class="entry"]
+# Remove the author's picture
 strip: //div[@class="entry"]/a[1]
 test_url: http://www.boundlessline.org/2011/06/the-nyts-on-gender-over-the-weekend.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/bowdoinorient.com.txt b/inc/3rdparty/site_config/standard/bowdoinorient.com.txt
new file mode 100755 (executable)
index 0000000..932143d
--- /dev/null
@@ -0,0 +1,6 @@
+title: //*[@class='articletitle']
+body: //*[(@id='articlebody')]
+date: //*[(@class='articledate')]
+author: //*[(@class='articleauthor')]
+autodetect_next_page: no
+test_url: http://bowdoinorient.com/article/8045
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 94b0f56..9705f62
@@ -1,10 +1,10 @@
-title: //div[@class="standard"]/h1\r
-author: string("BrainFacts.org")\r
-date: //div[@class="meta"]/strong\r
-\r
-strip: //p[@class="skip"]\r
-strip: //div[@class="meta"]\r
-strip: //div[@class="standard"]/h1\r
-strip: //div[@class="modal"]\r
+title: //div[@class="standard"]/h1
+author: string("BrainFacts.org")
+date: //div[@class="meta"]/strong
+
+strip: //p[@class="skip"]
+strip: //div[@class="meta"]
+strip: //div[@class="standard"]/h1
+strip: //div[@class="modal"]
 strip: //div[@class="columnRight"]
 test_url: http://brainfacts.org/diseases-disorders/childhood-disorders/articles/2011/autism-the-pervasive-developmental-disorder/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3753ce6..36aa2ef
@@ -1,7 +1,7 @@
-# set body\r
-body: //div[@id='theContent']\r
-\r
-# set title\r
-title: //div[@id='theContent']/h3\r
+# set body
+body: //div[@id='theContent']
+
+# set title
+title: //div[@id='theContent']/h3
 strip: //div[@id='theContent']/h3
 test_url: http://www.brandeins.de/archiv/magazin/gegessen-wird-immer/artikel/hunger.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 1950484..fc02053
@@ -1,3 +1,3 @@
-date://h2[@class="date-header"]\r
+date://h2[@class="date-header"]
 body://div[@class="entry-content"]
 test_url: http://www.brandingstrategyinsider.com/2011/12/top-twelve-branding-keys-for-2012.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/brasil.elpais.com.txt b/inc/3rdparty/site_config/standard/brasil.elpais.com.txt
new file mode 100755 (executable)
index 0000000..0b8feb6
--- /dev/null
@@ -0,0 +1,23 @@
+title: //meta[@name='DC.title']/@content
+title: //div[contains(@class, 'cabecera_noticia')]//h1
+date: //meta[@name='DC.date']/@content
+date: //meta[@name='date']/@content
+body: //div[@class='columna_texto']
+body: //div[@id='cuerpo_noticia']
+body: //div[@class='estructura_2col_1zq']//div[@class='margen_n']
+
+prune: no
+
+strip_id_or_class: disposicion_vertical
+strip_id_or_class: ampliar_foto
+strip_id_or_class: utilidades
+strip_id_or_class: info_relacionada
+strip_id_or_class: m-kiosko
+strip_id_or_class: info_complementa
+
+strip: //p[@class='nota_pie']
+strip: //div[starts-with(@id, 'sumario') and contains(., 'más información')]
+strip: //div[@id='coment' or @id='foros_not']
+
+test_url: http://elpais.com/elpais/2012/02/06/gente/1328526783_491687.html
+test_url: http://www.elpais.com/articulo/cultura/mano/retrato/materia/elpepicul/20120207elpepicul_2/Tes
old mode 100644 (file)
new mode 100755 (executable)
index f6f7377..55da178
@@ -1,5 +1,5 @@
-body: //div[@class='post full']\r
-title: //h1\r
-author: substring-after(//title, '- ')\r
+body: //div[@class='post full']
+title: //h1
+author: substring-after(//title, '- ')
 date: //span[@class='date']
 test_url: http://brettterpstra.com/byword-for-ios/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 9f4fc4e..17a4760
@@ -1,13 +1,13 @@
-title: //div[@id='contentheader']/h1\r
-author: //p[@class='attribution']/span[@class='author']/*\r
-# Is there a way to pull multiple authors? My XPath here is just grabbing the first\r
-\r
-date: /html/head/meta[@name="date"]/@content\r
-body: //div[@class='main-content']\r
-\r
-strip: //p[@class='byline']\r
-strip: //div[@class='img-gallery']\r
-strip: //div[@class='callout']\r
-strip: //div[@class='add-your-view']\r
+title: //div[@id='contentheader']/h1
+author: //p[@class='attribution']/span[@class='author']/*
+# Is there a way to pull multiple authors? My XPath here is just grabbing the first
+
+date: /html/head/meta[@name="date"]/@content
+body: //div[@class='main-content']
+
+strip: //p[@class='byline']
+strip: //div[@class='img-gallery']
+strip: //div[@class='callout']
+strip: //div[@class='add-your-view']
 convert_double_br_tags: yes
 test_url: http://www.brookings.edu/opinions/2011/1018_cyberattack_libya_goldsmith.aspx
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 71cafcd..d33d7d4
@@ -1,6 +1,6 @@
-title: //h1\r
-body: //div[@class='article']\r
-body: //div[@class='post']\r
-date: //*[@id='single']/span\r
-prune: no\r
+title: //h1
+body: //div[@class='article']
+body: //div[@class='post']
+date: //*[@id='single']/span
+prune: no
 test_url: http://brooksreview.net/2011/11/readability-agency/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/bt.no.txt b/inc/3rdparty/site_config/standard/bt.no.txt
new file mode 100755 (executable)
index 0000000..200c2e4
--- /dev/null
@@ -0,0 +1,12 @@
+title: //h1[contains(@class,'articleTitle')]
+author: //span[@itemprop='name']
+date: //time[@class='published']
+body: //div[contains(@class,'bodyText')]
+
+strip_id_or_class: 'pull1'
+strip_id_or_class: 'relationArticle'
+strip: //span[@class='quote']
+
+# strip h2 if at end of article (typically a request for comments)
+strip: //div[contains(@class,'bodyText')]/node()[last()-1]/self::h2
+test_url: http://www.bt.no/meninger/debatt/Typisk-norsk-a-vare-god-nok-2884108.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/buffed.de.txt b/inc/3rdparty/site_config/standard/buffed.de.txt
new file mode 100755 (executable)
index 0000000..3dd36ce
--- /dev/null
@@ -0,0 +1,14 @@
+date: //meta[@itemProp='datePublished']/@content
+body: //div[@class='intro' or contains(@class, 'article_text')]
+prune: no
+strip_id_or_class: embedcode
+strip_id_or_class: EmbedSwitch
+strip_id_or_class: EmbedText
+strip_id_or_class: bildergalerie
+strip_id_or_class: subline_seohour_image
+strip_id_or_class: ova-player
+strip_id_or_class: jcarouseloutput
+strip_id_or_class: cbox_embedded
+
+test_url: http://www.buffed.de/SWTOR-Star-Wars-The-Old-Republic-PC-218697/News/SWTOR-Ab-Patch-24-Lore-Klamotten-faerben-1090051/
+test_url: http://www.buffed.de/feed.cfm?menu_alias=home
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a75fa04..f0fd08d
@@ -1,8 +1,8 @@
-title: //h1\r
-author: //h2/a\r
-date: substring-after(//h2, '|')\r
-strip_id_or_class: 'attachment'\r
-strip: //h3\r
-\r
+title: //h1
+author: //h2/a
+date: substring-after(//h2, '|')
+strip_id_or_class: 'attachment'
+strip: //h3
+
 body: //div[@class='entry']
 test_url: http://buquad.com/2012/04/09/paul-ryan/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/business2community.com.txt b/inc/3rdparty/site_config/standard/business2community.com.txt
new file mode 100755 (executable)
index 0000000..0dcc7ff
--- /dev/null
@@ -0,0 +1,5 @@
+date: substring-after(//p[@class='byline'],'Published')
+
+strip: //div[@class='article-meta']
+
+test_url: http://www.business2community.com/social-media/funky-ways-to-print-instagram-photos-0485340
old mode 100644 (file)
new mode 100755 (executable)
index c773db8..39eb742
@@ -1,12 +1,16 @@
-title://div[@class="sl-layout-post"]/h1\r
-body: //div[contains(@class, 'post-content') or contains(@class, 'KonaBody')]\r
-strip: //div[contains(@class, "post-sidebar")]\r
-strip: //div[@id='related-links']\r
-author://div[@class="byline"]/a\r
-date://div[@class="byline"]/span[@class="date"]\r
-prune: no\r
-\r
-strip://*[contains(@class,'sponsored-text')]\r
-strip: //div[@id='post_footer']\r
-\r
-test_url: http://www.businessinsider.com/microsoft-just-put-one-of-its-hardcore-technical-geniuses-on-xbox-2012-1
\ No newline at end of file
+title://div[@class="sl-layout-post"]/h1
+body: //div[contains(@class, 'post-content') or contains(@class, 'slide-module') or contains(@class, 'KonaBody')]
+strip: //div[contains(@class, "post-sidebar")]
+strip: //div[@id='related-links']
+strip: //div[@class='related-links-container']
+strip: //p[@class='source']
+author://div[@class="byline"]/a
+date://div[@class="byline"]/span[@class="date"]
+prune: no
+
+single_page_link: //a[contains(text(), 'View as one page')]
+
+strip://*[contains(@class,'sponsored-text')]
+strip: //div[@id='post_footer']
+
+test_url: http://www.businessinsider.com/microsoft-just-put-one-of-its-hardcore-technical-geniuses-on-xbox-2012-1
old mode 100644 (file)
new mode 100755 (executable)
index 714cfc9..6502b8e
@@ -1,12 +1,12 @@
-body: //div[@id='article_detail']\r
-title: //meta[@property='og:title']/@content\r
-date: //div[@id='date_com_art']//a[@class='date']\r
-author: //div[@id='article_detail']//font[@class='auteur']\r
-\r
-strip_id_or_class: porte_titre_theme\r
-strip_id_or_class: cont_param\r
-strip_id_or_class: date_com_art\r
-\r
-prune: no\r
-\r
+body: //div[@id='article_detail']
+title: //meta[@property='og:title']/@content
+date: //div[@id='date_com_art']//a[@class='date']
+author: //div[@id='article_detail']//font[@class='auteur']
+
+strip_id_or_class: porte_titre_theme
+strip_id_or_class: cont_param
+strip_id_or_class: date_com_art
+
+prune: no
+
 test_url: http://www.businessnews.com.tn/details_article.php?a=31073&t=522&lang=fr&temp=1
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7b3d063..0308559
@@ -1,30 +1,30 @@
-# story has several pages, should be detected\r
-body: //div[@id='storyBody']\r
-body: //div[@id='article_body']\r
-body: //div[@id='story_body']\r
-\r
-title://h1[@id='article_headline']\r
-\r
-# article author\r
-author: //p[@class='author']/a\r
-# story author(s)\r
-author: substring-after(//p[@class='byline'], 'By ')\r
-\r
-# article date\r
-date: //span[@class='published_date']\r
-# story date\r
-date: //span[@class='date']\r
-\r
-date: substring-after(//div[contains(@class,'attributor')],'on')\r
-strip_id_or_class: inset\r
-strip: //p/span[@class='photoCredit']\r
-strip: //h1\r
-\r
-strip_id_or_class: page_count\r
-strip_id_or_class: tools\r
-strip_id_or_class: pagination\r
-\r
-single_page_link: //li[@id='stPrint']/a\r
-\r
-test_url: http://www.businessweek.com/magazine/buyback-insurance-a-good-deal-for-retailers-07282011.html\r
+# story has several pages, should be detected
+body: //div[@id='storyBody']
+body: //div[@id='article_body']
+body: //div[@id='story_body']
+
+title://h1[@id='article_headline']
+
+# article author
+author: //p[@class='author']/a
+# story author(s)
+author: substring-after(//p[@class='byline'], 'By ')
+
+# article date
+date: //span[@class='published_date']
+# story date
+date: //span[@class='date']
+
+date: substring-after(//div[contains(@class,'attributor')],'on')
+strip_id_or_class: inset
+strip: //p/span[@class='photoCredit']
+strip: //h1
+
+strip_id_or_class: page_count
+strip_id_or_class: tools
+strip_id_or_class: pagination
+
+single_page_link: //li[@id='stPrint']/a
+
+test_url: http://www.businessweek.com/magazine/buyback-insurance-a-good-deal-for-retailers-07282011.html
 test_url: http://www.businessweek.com/articles/2012-06-06/american-pain-the-largest-u-dot-s-dot-pill-mills-rise-and-fall
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6df8bc4..97dddae
@@ -1,15 +1,15 @@
-# Creator: Greg Leuch <greg@...>\r
-\r
-# It can be messy.\r
-tidy:no\r
-\r
-# The basic template.\r
-title: //h1[@data-print='title']\r
-author: //a[@data-print='author']\r
-date: //time[@data-print='date']\r
-body: //div[@data-print='body']\r
-body: //section[@data-print='body']\r
-\r
-# For various things...\r
+# Creator: Greg Leuch <greg@...>
+
+# It can be messy.
+tidy:no
+
+# The basic template.
+title: //h1[@data-print='title']
+author: //a[@data-print='author']
+date: //time[@data-print='date']
+body: //div[@data-print='body']
+body: //section[@data-print='body']
+
+# For various things...
 strip: *[@data-print="ignore"]
 test_url: http://www.buzzfeed.com/hgrant/35-reasons-why-dogs-hate-the-holidays
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0abb643..63c8213
@@ -1,6 +1,6 @@
-title: //h1\r
-author: //a[contains(@href, '/author/')]\r
-date: //*[@class='post-date']\r
-strip: //*[@class='post-date']\r
+title: //h1
+author: //a[contains(@href, '/author/')]
+date: //*[@class='post-date']
+strip: //*[@class='post-date']
 strip: //h1
 test_url: http://bygonebureau.com/2011/06/20/an-existential-psychoanalysis/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/cable.co.uk.txt b/inc/3rdparty/site_config/standard/cable.co.uk.txt
new file mode 100755 (executable)
index 0000000..435bf3b
--- /dev/null
@@ -0,0 +1,11 @@
+title: //div[@class='page-content']//h1
+body: //div[@class='page-content']
+strip_id_or_class: editorial-bar-top
+strip_id_or_class: social-bottom
+strip_id_or_class: comment-form
+strip_id_or_class: pc-why
+
+prune: no
+tidy: no
+
+test_url: http://www.cable.co.uk/news/bt-vision-unveils-interactive-guide-application-800734218/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3adc7a3..49f3430
@@ -1,8 +1,8 @@
-title: //h1[@class='producttabbed-title']\r
-body: //div[@class='postTabs_divs postTabs_curr_div']\r
-strip: //div[@class='ratingblock2']\r
-strip: //p[@id='breadcrumbs']\r
-strip: //div[@style='display: none']\r
-\r
+title: //h1[@class='producttabbed-title']
+body: //div[@class='postTabs_divs postTabs_curr_div']
+strip: //div[@class='ratingblock2']
+strip: //p[@id='breadcrumbs']
+strip: //div[@style='display: none']
+
 
 test_url: http://www.cardboardconnection.com/2012-topps-archives-baseball-cards
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7ba1ed7..5ea302e
@@ -1,6 +1,6 @@
-title: //h2\r
-body: //div[@class='entry']\r
-\r
-prune: no\r
+title: //h2
+body: //div[@class='entry']
+
+prune: no
 # otherwise the footnotes are removed
 test_url: http://carpeaqua.com/2011/03/27/the-intersection-of-power-and-portability/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/cars.com.txt b/inc/3rdparty/site_config/standard/cars.com.txt
new file mode 100755 (executable)
index 0000000..71c5c05
--- /dev/null
@@ -0,0 +1,7 @@
+title: //div[contains(@class, 'basicInfo')]//h1
+
+body: //img[@id='chosenPhotoIMG'] | //div[@id='aboutThisVehicleBox']
+
+prune: no
+
+test_url: http://www.cars.com/go/search/detail.jsp?listingId=115364779
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8908292..2cd197f
@@ -1,7 +1,7 @@
-body: //div[@class='article']\r
-strip: //div[@class='revhistory']\r
-strip: //div[@class='toc']\r
-tidy: no\r
-prune: no\r
+body: //div[@class='article']
+strip: //div[@class='revhistory']
+strip: //div[@class='toc']
+tidy: no
+prune: no
 
 test_url: http://catb.org/~esr/faqs/smart-questions.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 2530510..ba5faf3
@@ -1,5 +1,5 @@
-title: //div[contains(@class, 'headline')]/h1\r
-author: //h5[contains(@class, 'byline')]\r
-date: substring-after(//h4[contains(@class, 'posted')], 'Posted: ')\r
+title: //div[contains(@class, 'headline')]/h1
+author: //h5[contains(@class, 'byline')]
+date: substring-after(//h4[contains(@class, 'posted')], 'Posted: ')
 body: //div[@id="storyboard"]
 test_url: http://www.cbc.ca/news/world/story/2012/01/16/cruise-ship-monday.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/cbn.com.txt b/inc/3rdparty/site_config/standard/cbn.com.txt
new file mode 100755 (executable)
index 0000000..de8d883
--- /dev/null
@@ -0,0 +1,8 @@
+body: //div[contains(@class, 'articleText')]
+date: //div[contains(@class, 'articleDate')]
+author: //a[contains(@id, 'articleDetails_lnkByLine')]
+prune: no
+
+test_url: http://www.cbn.com/cbnnews/world/2013/June/Chilly-G-8-Obama-Putin-Agree-to-Disagree-on-Syria/
+test_url: http://www.cbn.com/cbnnews/world/2013/June/UK-Agency-Accused-of-Hacking-Foreign-Diplomats/
+test_url: http://www.cbn.com/cbnnews/feed/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 4ba3da1..04d2023
@@ -1,14 +1,15 @@
-date: //meta[@name="published"]/@content\r
-date: //div[@class="timeLine"]\r
-title: //div[@id='contentBody']//h1\r
-author: //dl[@class="storyBlogByline"]/dd/a\r
-body: //div[@id='storyMediaBox'] | //div[contains(@class, 'storyText')]\r
-\r
-# Content Pruning\r
-strip: //div[@class="scrollingArrows"]\r
-strip: //div[@class="timeLine"]\r
-strip: //dl[@class="storyBlogByline"]\r
-\r
-prune: no\r
-\r
-test_url: http://www.cbsnews.com/8301-201_162-57366361/rescued-americans-dad-proud-of-the-u.s/
\ No newline at end of file
+date: //meta[@name="published"]/@content
+date: //div[@class="timeLine"]
+title: //div[@id='contentBody']//h1
+author: //dl[@class="storyBlogByline"]/dd/a
+body: //div[@id='storyMediaBox'] | //div[contains(@class, 'storyText')]
+
+# Content Pruning
+strip: //div[@class="scrollingArrows"]
+strip: //div[@class="timeLine"]
+strip: //dl[@class="storyBlogByline"]
+strip: //span[@class='image-credit']
+
+prune: no
+
+test_url: http://www.cbsnews.com/8301-201_162-57366361/rescued-americans-dad-proud-of-the-u.s/
diff --git a/inc/3rdparty/site_config/standard/cedarrepublican.com.txt b/inc/3rdparty/site_config/standard/cedarrepublican.com.txt
new file mode 100755 (executable)
index 0000000..42faa52
--- /dev/null
@@ -0,0 +1,2 @@
+body: //div[@class='frame']//img[@class='horizontal'] | //div[@class='content']
+test_url: http://cedarrepublican.com/online_features/gift_ideas/sending-mother-s-day-flowers-how-to-be-sure-they/article_b69af9b8-1f05-5352-8621-16ce007e5623.html
old mode 100644 (file)
new mode 100755 (executable)
index ea0df2a..d00d65d
@@ -1,10 +1,10 @@
-title: //*[@id='Content']/span[1]\r
-author: substring-after(substring-before(//*[@id='Content']/span[2], ')'), '(')\r
-date: substring-before(substring-after(//*[@id='Content']/span[2], 'Updated: '), 'Counter')\r
-\r
-strip: //*[@id='Content']/span[1]\r
-strip: //*[@id='Content']/span[2]\r
-\r
-body: //*[@id='Content']\r
+title: //*[@id='Content']/span[1]
+author: substring-after(substring-before(//*[@id='Content']/span[2], ')'), '(')
+date: substring-before(substring-after(//*[@id='Content']/span[2], 'Updated: '), 'Counter')
+
+strip: //*[@id='Content']/span[1]
+strip: //*[@id='Content']/span[2]
+
+body: //*[@id='Content']
 
 test_url: http://www.chinamining.org/News/2011-07-22/1311319069d48087.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 1d29410..3144053
@@ -1,5 +1,5 @@
-title: //div[@class='title']\r
-author: //div[@class='author']\r
-prune: no\r
-\r
+title: //div[@class='title']
+author: //div[@class='author']
+prune: no
+
 test_url: http://www.chomsky.info/onchomsky/2002----.htm
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/chrisltd.com.txt b/inc/3rdparty/site_config/standard/chrisltd.com.txt
new file mode 100755 (executable)
index 0000000..86d0f5d
--- /dev/null
@@ -0,0 +1,6 @@
+title: //header/h1/b[contains(@class, 'title')]
+author: substring-after(//article/header/div, 'By ')
+date: //header/h1/span[contains(@class, 'date')]
+body: //div[@id='main]/article
+strip: //header
+test_url: http://chrisltd.com/blog/2012/03/fix-widows-indesign/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 44288a4..86be14c
@@ -1,13 +1,13 @@
-title://div[@class='title']\r
-author://div[@class='byline']/b\r
-date:substring-after(//div[@class='byline'], 'posted')\r
-body://div[@id='body']\r
-wrap_in(h2)://span[@class='subhead']\r
-wrap_in(i)://p[@class='bio']\r
-wrap_in(i)://p[@class='copyright']\r
-strip://div[@class='title']\r
-strip://div[@class='deck']\r
-strip://div[@class='byline']\r
-strip://div[@class='copyright']\r
+title://div[@class='title']
+author://div[@class='byline']/b
+date:substring-after(//div[@class='byline'], 'posted')
+body://div[@id='body']
+wrap_in(h2)://span[@class='subhead']
+wrap_in(i)://p[@class='bio']
+wrap_in(i)://p[@class='copyright']
+strip://div[@class='title']
+strip://div[@class='deck']
+strip://div[@class='byline']
+strip://div[@class='copyright']
 strip://br
 test_url: http://www.christianitytoday.com/ct/2012/aprilweb-only/my-god-forsaken-me.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7f089c5..fb5f342
@@ -1,5 +1,5 @@
-title: //h1[@class="entry-title"]\r
-author: //*[@class="author vcard fn"]\r
-date: //*[@class="published"]\r
+title: //h1[@class="entry-title"]
+author: //*[@class="author vcard fn"]
+date: //*[@class="published"]
 body: //div[(@class = "dd_content_wrap")]
 test_url: http://christianpf.com/do-ibuys-lead-to-more-buying/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 5c5889a..b3c7651
@@ -1,6 +1,6 @@
-tidy: no\r
-prune: no\r
-date: //article//time[@pubdate]\r
-title: //article/header/h2\r
+tidy: no
+prune: no
+date: //article//time[@pubdate]
+title: //article/header/h2
 body: //article
 test_url: http://www.christies.com/LotFinder/custom/lot_details_MultiLanguage.aspx?from=salesummary&intObjectID=5556662&sid=e536ed1a-b763-41c4-afcf-c94815ec6eee&LID=3
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d4cc858..5a1d043
@@ -1,9 +1,9 @@
-body: //pre[@id='cx-desc-text']\r
-body: //div[contains(@class, 'overview-tab-right-bar-info')]\r
-title: //h1[contains(@class, 'detail-dialog-title')]\r
-tidy: no\r
-prune: no\r
-replace_string(<noscript>): <div>\r
-replace_string(</noscript>): </div>\r
+body: //pre[@id='cx-desc-text']
+body: //div[contains(@class, 'overview-tab-right-bar-info')]
+title: //h1[contains(@class, 'detail-dialog-title')]
+tidy: no
+prune: no
+replace_string(<noscript>): <div>
+replace_string(</noscript>): </div>
 
 test_url: https://chrome.google.com/webstore/detail/pnaiinchjaonopoejhknmgjingcnaloc
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0c6c11e..e86d3ec
@@ -1,17 +1,17 @@
-title: //h1[contains(@class, "entry-title")]\r
-author: //p[contains(@class, "byline")]\r
-\r
-# blog articles (chronicle.com/blogs/*)\r
-body: //div[contains(@class, "abstract")]\r
-date: //p[contains(@class, "time")]\r
-\r
-# all (?) other articles\r
-body: //div[@id="article-body"]\r
-date: //p[contains(@class, "dateline")]\r
-\r
-# remove sidebars containing images (I assume this is desired for Instapaper)\r
-strip: //div[@id="related"]\r
-strip: //div[contains(@class, "image")]\r
-\r
+title: //h1[contains(@class, "entry-title")]
+author: //p[contains(@class, "byline")]
+
+# blog articles (chronicle.com/blogs/*)
+body: //div[contains(@class, "abstract")]
+date: //p[contains(@class, "time")]
+
+# all (?) other articles
+body: //div[@id="article-body"]
+date: //p[contains(@class, "dateline")]
+
+# remove sidebars containing images (I assume this is desired for Instapaper)
+strip: //div[@id="related"]
+strip: //div[contains(@class, "image")]
+
 # note that if you're not a Chronicle subscriber (personally or institutionally), you'll only see the first couple of paragraphs of the article, and Instapaper will display that with some crap above and below. thank goodness for that bookmarklet
 test_url: http://chronicle.com/article/In-a-Land-of-Second-Chances/128375/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ciaosamin.com.txt b/inc/3rdparty/site_config/standard/ciaosamin.com.txt
new file mode 100755 (executable)
index 0000000..02fd343
--- /dev/null
@@ -0,0 +1,4 @@
+body://div[contains(@class, 'entry-content')]
+date://h2[contains(@class, 'date-header')]
+title://h3[contains(@class, 'post-title')]
+test_url: http://www.ciaosamin.com/2013/04/how-this-happened.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index b9f9a12..b891363
@@ -1,33 +1,33 @@
-# fforst@...\r
-\r
-# Use link to print article for single page view\r
-single_page_link: //a[@class="print"]\r
-\r
-# set body\r
-tidy: no\r
-body: //div[@class='artikel-content']\r
-\r
-# strip title and subtitle since we got it already\r
-strip: //div[@class='issue']\r
-strip: //div[@class='artikel-content']/h2\r
-\r
-# some authors are known and have a link, others don't\r
-author: //a[contains(@href, 'autor?')]\r
-\r
-#date\r
-date: //span[@class='article-date']\r
-\r
-# Strip author since we got him\r
-strip_id_or_class: author\r
-\r
-#strip captions\r
-strip_id_or_class: field-name-field-image-credit\r
-strip_id_or_class: field-name-field-article-image-subtitle\r
-\r
-# remove community functions\r
-strip: //div[@class='meta']\r
-strip: //div[@id='comments']\r
-\r
-# remove "continue on the next page" text\r
+# fforst@...
+
+# Use link to print article for single page view
+single_page_link: //a[@class="print"]
+
+# set body
+tidy: no
+body: //div[@class='artikel-content']
+
+# strip title and subtitle since we got it already
+strip: //div[@class='issue']
+strip: //div[@class='artikel-content']/h2
+
+# some authors are known and have a link, others don't
+author: //a[contains(@href, 'autor?')]
+
+#date
+date: //span[@class='article-date']
+
+# Strip author since we got him
+strip_id_or_class: author
+
+#strip captions
+strip_id_or_class: field-name-field-image-credit
+strip_id_or_class: field-name-field-article-image-subtitle
+
+# remove community functions
+strip: //div[@class='meta']
+strip: //div[@id='comments']
+
+# remove "continue on the next page" text
 strip: //p[text()="[SEITE]"]
 test_url: http://www.cicero.de/weltbuehne/ihre-wut-ist-global-krise-jugend-revolten-aufstaende-zelte/43049
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 4d3ac80..d7e9b76
@@ -1,4 +1,4 @@
-body: //*[(@id = "articlebody")]\r
-strip_id_or_class: rotulo\r
+body: //*[(@id = "articlebody")]
+strip_id_or_class: rotulo
 
 test_url: http://ciperchile.cl/2011/04/18/las-operaciones-secretas-que-ordenaba-karadima-para-aniquilar-a-su-competencia/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a0c3ea5..df4c7cc
@@ -1,6 +1,6 @@
-body: //p[@class='subhead' or @class='attribution'] | //div[@class='article-body']\r
-prune: no\r
-\r
-single_page_link: //li[@class='print']/a\r
-\r
+body: //p[@class='subhead' or @class='attribution'] | //div[@class='article-body']
+prune: no
+
+single_page_link: //li[@class='print']/a
+
 test_url: http://www.cjr.org/behind_the_news/from_breaking_news_to_baseless.php
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/classyllama.com.txt b/inc/3rdparty/site_config/standard/classyllama.com.txt
new file mode 100755 (executable)
index 0000000..1864eee
--- /dev/null
@@ -0,0 +1,6 @@
+date: //div[@id='content']//p[contains(@class, 'date')]/span
+author: substring-after(//div[@id='content']//div[contains(@class, 'over-under-bars')]/p[last()]/text(), 'Posted by ')
+body: //div[@id='content']//div[@class='pane-content']
+strip_id_or_class: trackback-url
+strip_id_or_class: over-under-bars
+test_url: http://www.classyllama.com/content/layout-caching
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 369e88a..d5a22cc
@@ -1,6 +1,6 @@
-title://div[@class="entrytitle"]/a\r
-author:substring-after(substring-before(//div[@class="entrytime"], "|"), "By ")\r
-date:substring-before(substring-after(//div[@class="entrytime"], "|"), "- Posted")\r
-body://div[@class="entrybody"]\r
+title://div[@class="entrytitle"]/a
+author:substring-after(substring-before(//div[@class="entrytime"], "|"), "By ")
+date:substring-before(substring-after(//div[@class="entrytime"], "|"), "- Posted")
+body://div[@class="entrybody"]
 strip://div[@class="entrybody"]//p[@class="singleinfo"]
 test_url: http://clientk.com/2011/12/19/the-impact-of-more/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index b356bbd..0148e54
@@ -1,11 +1,11 @@
-title: //h1\r
-author: //a[@class='auteur']\r
-body: //div[@class='editorial']\r
-next_page_link: //a[contains(text(),'Page suivante')]\r
-strip: //a[contains(text(),'Page suivante')]\r
-strip: //a[contains(text(),'Page précédente')]\r
-strip_id_or_class: slideshow\r
-\r
-prune: no\r
-\r
+title: //h1
+author: //a[@class='auteur']
+body: //div[@class='editorial']
+next_page_link: //a[contains(text(),'Page suivante')]
+strip: //a[contains(text(),'Page suivante')]
+strip: //a[contains(text(),'Page précédente')]
+strip_id_or_class: slideshow
+
+prune: no
+
 test_url: http://www.clubic.com/carte-graphique/carte-graphique-amd/radeon-hd-7770/article-478936-1-radeon-hd-7750-7770.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 2bc96d2..0b76377
@@ -1,6 +1,6 @@
-body: //div[contains(@id,'article-body')]\r
-strip://div[contains(@id,'disqus_count_block')]\r
-strip://div[contains(@id,'col-left')]\r
-strip://div[contains(@id,'col-right')]\r
+body: //div[contains(@id,'article-body')]
+strip://div[contains(@id,'disqus_count_block')]
+strip://div[contains(@id,'col-left')]
+strip://div[contains(@id,'col-right')]
 
 test_url: http://www.cmswire.com/cms/customer-experience/for-apps-and-appstores-the-singularity-is-approaching-014888.php
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/cn.engadget.com.txt b/inc/3rdparty/site_config/standard/cn.engadget.com.txt
new file mode 100755 (executable)
index 0000000..63f6f7e
--- /dev/null
@@ -0,0 +1,5 @@
+title: //h2[@class="posttitle"]
+body: //div[@class="postbody"]
+prune: no
+
+test_url: http://cn.engadget.com/2013/06/29/google-play-music-all-access/
diff --git a/inc/3rdparty/site_config/standard/cn.reuters.com.txt b/inc/3rdparty/site_config/standard/cn.reuters.com.txt
new file mode 100755 (executable)
index 0000000..b387866
--- /dev/null
@@ -0,0 +1,5 @@
+title: //div[@id='maincontent']//h1
+body: //div[@id='resizeableText']
+
+test_url: http://cn.reuters.com/article/CNAnalysesNews/idCNKBS0FF0NM20140710
+test_url: http://cn.reuters.feedsportal.com/CNAnalysesNews
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 74f46ba..eac08aa
@@ -1,16 +1,16 @@
-title: //meta[@property="og:title"]/@content\r
-body: //div[contains(@class, 'postBody')]\r
-date: //div[@id='nameAndTime']/time\r
-author: //div[@id='nameAndTime']/span[@class='author']\r
-\r
-strip_id_or_class: image-credit\r
-strip_id_or_class: noAutolink\r
-strip_id_or_class: related\r
-\r
-prune: no\r
-tidy: no\r
-\r
-# early end\r
-replace_string(Download today's podcast</a>): Download today's podcast</a></div></body></html>\r
-\r
+title: //meta[@property="og:title"]/@content
+body: //div[contains(@class, 'postBody')]
+date: //div[@id='nameAndTime']/time
+author: //div[@id='nameAndTime']/span[@class='author']
+
+strip_id_or_class: image-credit
+strip_id_or_class: noAutolink
+strip_id_or_class: related
+
+prune: no
+tidy: no
+
+# early end
+replace_string(Download today's podcast</a>): Download today's podcast</a></div></body></html>
+
 test_url: http://www.cnet.com/8301-13952_1-57367607-81/the-404-981-where-the-world-is-a-vampire-podcast/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 995e2c7..6f69e4e
@@ -1,19 +1,23 @@
-title: //div[@class="cnn_storyarea"]/h1\r
-author: //div[@class="cnnByline"]/strong\r
-date: substring-after(//div[@class="cnn_strytmstmp"], 'Sun')\r
-date: substring-after(//div[@class="cnn_strytmstmp"], 'Mon')\r
-date: substring-after(//div[@class="cnn_strytmstmp"], 'Tue')\r
-date: substring-after(//div[@class="cnn_strytmstmp"], 'Wed')\r
-date: substring-after(//div[@class="cnn_strytmstmp"], 'Thu')\r
-date: substring-after(//div[@class="cnn_strytmstmp"], 'Fri')\r
-date: substring-after(//div[@class="cnn_strytmstmp"], 'Sat')\r
-strip: //div[@class="cnn_storyarea"]/h1\r
-strip_id_or_class: cnnByline\r
-strip_id_or_class: cnn_strytmstmp\r
-strip_id_or_class: cnn_strycaptiontxt\r
-strip_id_or_class: cnn_strybtntoolsbttm\r
-strip_id_or_class: cnn_strybtntools\r
-strip_id_or_class: cnn_strybtmcntnt\r
-strip_id_or_class: cnn_containerwht\r
-strip_id_or_class: cnn_stryathrtmp\r
-test_url: http://www.cnn.com/2012/05/13/us/new-york-police-policy/index.html?eref=rss_topstories
\ No newline at end of file
+body: //div[@id='cnnContentContainer']//div[contains(@class, 'cnn_strycntntlft')]
+title: //div[@class="cnn_storyarea"]/h1
+author: //div[@class="cnnByline"]/strong
+date: substring-after(//div[@class="cnn_strytmstmp"], 'Sun')
+date: substring-after(//div[@class="cnn_strytmstmp"], 'Mon')
+date: substring-after(//div[@class="cnn_strytmstmp"], 'Tue')
+date: substring-after(//div[@class="cnn_strytmstmp"], 'Wed')
+date: substring-after(//div[@class="cnn_strytmstmp"], 'Thu')
+date: substring-after(//div[@class="cnn_strytmstmp"], 'Fri')
+date: substring-after(//div[@class="cnn_strytmstmp"], 'Sat')
+strip: //div[@class="cnn_storyarea"]/h1
+strip_id_or_class: cnnByline
+strip_id_or_class: cnn_strytmstmp
+strip_id_or_class: cnn_strycaptiontxt
+strip_id_or_class: cnn_strybtntoolsbttm
+strip_id_or_class: cnn_strybtntools
+strip_id_or_class: cnn_strybtmcntnt
+strip_id_or_class: sharebar
+#strip_id_or_class: cnn_containerwht
+strip_id_or_class: cnn_stryathrtmp
+replace_string(<a name="em0"></a>): <!-- a name -->
+test_url: http://www.cnn.com/2012/05/13/us/new-york-police-policy/index.html?eref=rss_topstories
+test_url: http://rss.cnn.com/rss/edition.rss
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6a2c2b8..ac49aef
@@ -1,26 +1,26 @@
-# main sportsillustrated.com articles\r
-\r
-body: //div[@id="cnnStoryContent"]\r
-title: //div[@id="cnnStoryHeadline"]//h1\r
-author: //div[@id="cnnSubBanner"]//strong\r
-date: substring-after(//div[@id="cnnTimeStamp"], "Updated: ")\r
-date: substring-after(//div[@id="cnnTimeStamp"], "Posted: ")\r
-\r
-# kill ugly font buttons\r
-strip: //div[@id="cnnSCFontButtons"]\r
-\r
-# kill misc filler videos & etc\r
-strip: //div[@class="cnnDivideContent"]\r
-strip: //*[@class="cnnTMbox"]\r
-\r
-# si vault articles\r
-# -------------\r
-body: //div[@class="siv_artPara"]\r
-title: //div[@class="siv_artHeader"]//h1\r
-author: //div[@class="byline"]\r
-date: //div[@class="date"]\r
-\r
-next_page_link: //div[@id='cnnStoryContinue']/a\r
-strip_id_or_class: cnnstorypagination\r
-\r
+# main sportsillustrated.com articles
+
+body: //div[@id="cnnStoryContent"]
+title: //div[@id="cnnStoryHeadline"]//h1
+author: //div[@id="cnnSubBanner"]//strong
+date: substring-after(//div[@id="cnnTimeStamp"], "Updated: ")
+date: substring-after(//div[@id="cnnTimeStamp"], "Posted: ")
+
+# kill ugly font buttons
+strip: //div[@id="cnnSCFontButtons"]
+
+# kill misc filler videos & etc
+strip: //div[@class="cnnDivideContent"]
+strip: //*[@class="cnnTMbox"]
+
+# si vault articles
+# -------------
+body: //div[@class="siv_artPara"]
+title: //div[@class="siv_artHeader"]//h1
+author: //div[@class="byline"]
+date: //div[@class="date"]
+
+next_page_link: //div[@id='cnnStoryContinue']/a
+strip_id_or_class: cnnstorypagination
+
 test_url: http://cnnsi.com/2012/writers/peter_king/01/08/wild.card.round/index.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6cf72e2..83a21e1
@@ -1,10 +1,10 @@
-body: //div[@id='content']\r
-title: //div[@id='page_header']/h1\r
-\r
-strip_id_or_class: 'lineno'\r
-strip_id_or_class: 'block-toolbar-button'\r
-strip_id_or_class: 'recipe_score'\r
-strip: //div[@id='recipe_tools']\r
-strip: //div[@id='addcomment']\r
-\r
+body: //div[@id='content']
+title: //div[@id='page_header']/h1
+
+strip_id_or_class: 'lineno'
+strip_id_or_class: 'block-toolbar-button'
+strip_id_or_class: 'recipe_score'
+strip: //div[@id='recipe_tools']
+strip: //div[@id='addcomment']
+
 test_url: http://code.activestate.com/recipes/500261-named-tuples/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/code.fivefilters.org.txt b/inc/3rdparty/site_config/standard/code.fivefilters.org.txt
new file mode 100755 (executable)
index 0000000..269fb54
--- /dev/null
@@ -0,0 +1 @@
+body: //div[@id='content']
old mode 100644 (file)
new mode 100755 (executable)
index 40a1620..6e9c00a
@@ -1,5 +1,5 @@
-body: //div[@id="gc-pagecontent"]\r
-strip: //a[@class="backtotop"]\r
-prune: no\r
-\r
+body: //div[@id="gc-pagecontent"]
+strip: //a[@class="backtotop"]
+prune: no
+
 test_url: http://code.google.com/apis/analytics/docs/tracking/gaTrackingEcommerce.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/codeproject.com.txt b/inc/3rdparty/site_config/standard/codeproject.com.txt
new file mode 100755 (executable)
index 0000000..d1191ac
--- /dev/null
@@ -0,0 +1,3 @@
+body: //div[@id="contentdiv"]
+date: //span[@class="date"]
+test_url: http://www.codeproject.com/Articles/499902/Profiling-Entity-Framework-5-in-code
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 9c95f10..adf6e5a
@@ -1,15 +1,15 @@
-body: //div[@class='blogbody']\r
-strip: //h3[@class='title']\r
-date: //h2[@class='date']\r
-#Should Atwood just be a literal?\r
-author: substring-before( substring-after(//div[@class='posted'], 'y'), 'V')\r
-\r
-# tim.kingman@... 2011-07-26\r
-# Prune:no to retain all-link ULs that are part of the body content like\r
-# http://www.codinghorror.com/blog/2011/07/building-a-pc-part-vii-rebooting.html\r
-# Then explicitly strip the "Posted By" and prev/next links that Prune:yes would have removed.\r
-\r
-prune: no\r
-strip: //div[@class='posted']/following-sibling::*\r
+body: //div[@class='blogbody']
+strip: //h3[@class='title']
+date: //h2[@class='date']
+#Should Atwood just be a literal?
+author: substring-before( substring-after(//div[@class='posted'], 'y'), 'V')
+
+# tim.kingman@... 2011-07-26
+# Prune:no to retain all-link ULs that are part of the body content like
+# http://www.codinghorror.com/blog/2011/07/building-a-pc-part-vii-rebooting.html
+# Then explicitly strip the "Posted By" and prev/next links that Prune:yes would have removed.
+
+prune: no
+strip: //div[@class='posted']/following-sibling::*
 strip: //div[@class='posted']
 test_url: http://www.codinghorror.com/blog/2011/07/building-a-pc-part-vii-rebooting.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 9d75d64..318e6ff
@@ -1,14 +1,14 @@
-title: //h1[@class='title']\r
-author: //p[@class='byline']/a[1]\r
-date: //*[@class='date']\r
-\r
-body: //div[@class='article_body']\r
-strip: //p[@class='ca_intro']\r
-strip: //div[@id='action_bar']\r
-strip: //div[@class='below_content']\r
-strip: //div[@id='announcement']\r
-strip: //div[@id='leftovers']\r
-strip: //div[@class='form']\r
-strip: //div[@id='email_overlay']\r
+title: //h1[@class='title']
+author: //p[@class='byline']/a[1]
+date: //*[@class='date']
+
+body: //div[@class='article_body']
+strip: //p[@class='ca_intro']
+strip: //div[@id='action_bar']
+strip: //div[@class='below_content']
+strip: //div[@id='announcement']
+strip: //div[@id='leftovers']
+strip: //div[@class='form']
+strip: //div[@id='email_overlay']
 strip: //a[@class='close']
 test_url: http://www.collegehumor.com/article/6599562/how-it-happened-the-necktie
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 10fd251..c9854b4
@@ -1,8 +1,8 @@
-body: //div[@id="center"]//div[@class="node"]\r
-title: //div[@id="center"]//h2\r
-author: substring-after(//div[@id="center"]//div[@class="node"]//span[@class="submitted"], "&mdash;")\r
-date: substring-before(//div[@id="center"]//div[@class="node"]//span[@class="submitted"], "&mdash;")\r
-strip: //div[@id="center"]//h2[1]\r
-strip: //span[@class="submitted"][1]\r
+body: //div[@id="center"]//div[@class="node"]
+title: //div[@id="center"]//h2
+author: substring-after(//div[@id="center"]//div[@class="node"]//span[@class="submitted"], "&mdash;")
+date: substring-before(//div[@id="center"]//div[@class="node"]//span[@class="submitted"], "&mdash;")
+strip: //div[@id="center"]//h2[1]
+strip: //span[@class="submitted"][1]
 move_into(//div[@class="node"])://div[@class="breadcrumb"]
 test_url: http://community.service-now.com/blog/lawrenceeng/seasons-greetings-servicenow-team
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 00e6fdd..8345cf5
@@ -1,5 +1,5 @@
-strip_id_or_class:column-3\r
-strip_id_or_class:portlet-boundary\r
-strip_id_or_class:banner\r
+strip_id_or_class:column-3
+strip_id_or_class:portlet-boundary
+strip_id_or_class:banner
 
 test_url: http://www.computer.org/portal/web/buildyourcareer/careerwatch/jt19
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 2919924..5973c50
@@ -1,18 +1,18 @@
-title://h1\r
-\r
-author://div[@id="news-meta"]/a\r
-\r
-body://*[@id="main"]/div[1]\r
-\r
-strip://*[@id="main"]/div[2]\r
-strip://*[@id="main"]/div[3]\r
-strip://*[@id="page"]//footer\r
-\r
-#date: didn't manage to parse it\r
-\r
-#Images have to be stripped because the page does it with overlay\r
-strip://img\r
-\r
-#figures are not displayed in instapaper...\r
-strip://figure | //figcaption\r
+title://h1
+
+author://div[@id="news-meta"]/a
+
+body://*[@id="main"]/div[1]
+
+strip://*[@id="main"]/div[2]
+strip://*[@id="main"]/div[3]
+strip://*[@id="page"]//footer
+
+#date: didn't manage to parse it
+
+#Images have to be stripped because the page does it with overlay
+strip://img
+
+#figures are not displayed in instapaper...
+strip://figure | //figcaption
 test_url: http://www.computerbase.de/news/2012-06/verbraucherzentrale-mahnt-blizzard-fuer-diablo-3-ab/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8e1f3e1..7f20a4d
@@ -1,22 +1,22 @@
-title: //meta[@name='headline']/@content\r
-date: //meta[@name='date']/@content\r
-author: //meta[@name='author']/@content\r
-body: //div[contains(@class, 'article')]\r
-body://div[@id="article_body"]\r
-\r
-strip_id_or_class: banner\r
-strip: //noscript\r
-strip: //div[@style='width:1px;height:130px;float:right;']\r
-strip: //div[@class='storyby']\r
-strip_image_src: twitter_icon\r
-strip_image_src: rss_bug\r
-\r
-tidy: no\r
-prune: no\r
-\r
-next_page_link://div[@id="next_page"]/a\r
-\r
-single_page_link: concat('http://www.computerworld.com/s/article/print/', substring-after(//link[@rel='canonical']/@href, '/s/article/'))\r
-\r
-test_url: http://www.computerworld.com/s/article/9224348/Apple_s_new_OS_X_tightens_screws_on_some_malware\r
+title: //meta[@name='headline']/@content
+date: //meta[@name='date']/@content
+author: //meta[@name='author']/@content
+body: //div[contains(@class, 'article')]
+body://div[@id="article_body"]
+
+strip_id_or_class: banner
+strip: //noscript
+strip: //div[@style='width:1px;height:130px;float:right;']
+strip: //div[@class='storyby']
+strip_image_src: twitter_icon
+strip_image_src: rss_bug
+
+tidy: no
+prune: no
+
+next_page_link://div[@id="next_page"]/a
+
+single_page_link: concat('http://www.computerworld.com/s/article/print/', substring-after(//link[@rel='canonical']/@href, '/s/article/'))
+
+test_url: http://www.computerworld.com/s/article/9224348/Apple_s_new_OS_X_tightens_screws_on_some_malware
 test_url: http://www.computerworld.com/s/article/9227679/Windows_8_Release_Preview_Updated_but_still_uneasy
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a83f366..d819109
@@ -1,5 +1,5 @@
-strip: //div[contains(@class, 'articleAdtechAd')]\r
-title: //div[@id='article']/h1\r
-title: //div[contains(@class, 'article')]/h1\r
-body: //div[@id='articleText']\r
+strip: //div[contains(@class, 'articleAdtechAd')]
+title: //div[@id='article']/h1
+title: //div[contains(@class, 'article')]/h1
+body: //div[@id='articleText']
 test_url: http://www.computerworld.dk/art/56748/test-din-viden-med-computerworlds-store-sommerquiz?a=fp_1&i=0
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d2b289a..c3120fe
@@ -1,9 +1,9 @@
-# get author from string like "Posted by <author> on <date>"\r
-author: substring-before(substring-after(//div[@class='post']/p[@class='post-meta'], 'by'), 'on')\r
-\r
-# get date from string like "Posted by <author> on <date>"\r
-date: substring-after(//div[@class='post']/p[@class='post-meta'], 'on')\r
-\r
-# this keeps thumbnail images\r
+# get author from string like "Posted by <author> on <date>"
+author: substring-before(substring-after(//div[@class='post']/p[@class='post-meta'], 'by'), 'on')
+
+# get date from string like "Posted by <author> on <date>"
+date: substring-after(//div[@class='post']/p[@class='post-meta'], 'on')
+
+# this keeps thumbnail images
 prune: no
 test_url: http://www.contemporist.com/2011/11/02/landing-200-lamp-by-kim-hyunjoo
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 9bad2c8..966cc86
@@ -1,7 +1,7 @@
-title: //div[@class='article_header']/h1\r
-body: //div[@class='article_header']/p | //div[@class='article_body']\r
-strip_id_or_class: share_this\r
-strip_id_or_class: sociable\r
-prune: no\r
-\r
+title: //div[@class='article_header']/h1
+body: //div[@class='article_header']/p | //div[@class='article_body']
+strip_id_or_class: share_this
+strip_id_or_class: sociable
+prune: no
+
 test_url: http://conversaciones.nokia.com/2011/10/07/cinco-atajos-en-el-nokia-n8/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/cooper.com.txt b/inc/3rdparty/site_config/standard/cooper.com.txt
new file mode 100755 (executable)
index 0000000..a424409
--- /dev/null
@@ -0,0 +1,4 @@
+body: //*[contains(@class,'body')]
+date: //abbr[@class='published']
+
+test_url: http://www.cooper.com/journal/2012/08/2-weeks-left-to-win-your-way-to-the-woodstock-of-ux-coopers-ux-boot-camp.html/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a24374d..cf1fa93
@@ -1,7 +1,7 @@
-body: //div[@id="permalink"]/div[@class="post"]\r
-\r
-strip: //div[@id='backArrow']\r
-strip: //div[@id='fwdArrow']\r
-strip: //div[@class="post-title"]\r
+body: //div[@id="permalink"]/div[@class="post"]
+
+strip: //div[@id='backArrow']
+strip: //div[@id='fwdArrow']
+strip: //div[@class="post-title"]
 strip: //div[@class="sharing"]
 test_url: http://www.core77.com/blog/columns/why_design_education_must_change_17993.asp
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c9e9228..b6bd8be
@@ -1,6 +1,6 @@
-title: //div[@class='main']//h1[contains(@class, 'article-title')]\r
-author: //div[@class='mainauthorstyle']\r
-body: //div[@class='main']//div[@class='main-text']\r
-strip: //td[@width='140']\r
-\r
+title: //div[@class='main']//h1[contains(@class, 'article-title')]
+author: //div[@class='mainauthorstyle']
+body: //div[@class='main']//div[@class='main-text']
+strip: //td[@width='140']
+
 test_url: http://www.counterpunch.org/johnstone05172011.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d25cd05..037cd17
@@ -1,3 +1,3 @@
-title://h2\r
+title://h2
 body://div[contains(@class, 'entrytext')]
 test_url: http://www.crazybutable.com/weblog/archives/2010/07/01/house-ideas-that-worked/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 74bc6db..b5a8018
@@ -1,3 +1,3 @@
-body: //div[@class="readingtext"]\r
+body: //div[@class="readingtext"]
 title: substring-after(substring-after(//title, ':'), ':')
 test_url: http://www.crimethinc.com/texts/recentfeatures/nightmares.php
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7fa950a..61d5d6a
@@ -1,3 +1,3 @@
-author: //p[contains(@class,'author')]/a\r
+author: //p[contains(@class,'author')]/a
 date: //div[contains(@class,'date')]
 test_url: http://www.crn.de/netzwerke-tk/artikel-93103.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d4dbc5c..b482e34
@@ -1,18 +1,18 @@
-title: //h1[contains(@class, 'head')]\r
-\r
-# standard page\r
-body: //div[@id='mainColumn']//div[contains(@class, 'list-article-full')]\r
-# print page\r
-body: //div[@id='mainColumn']\r
-\r
-author: //a[contains(@class, 'ui-author')]\r
-\r
-single_page_link: //div[@class='storyToolbar']//a[contains(@href, '/print/')]\r
-\r
-strip_id_or_class: storyToolbar\r
-strip_id_or_class: promotion-tag\r
-\r
-tidy: no\r
-prune: no\r
+title: //h1[contains(@class, 'head')]
+
+# standard page
+body: //div[@id='mainColumn']//div[contains(@class, 'list-article-full')]
+# print page
+body: //div[@id='mainColumn']
+
+author: //a[contains(@class, 'ui-author')]
+
+single_page_link: //div[@class='storyToolbar']//a[contains(@href, '/print/')]
+
+strip_id_or_class: storyToolbar
+strip_id_or_class: promotion-tag
+
+tidy: no
+prune: no
 
 test_url: www.csmonitor.com/World/Middle-East/2011/1108/Imminent-Iran-nuclear-threat-A-timeline-of-warnings-since-1979/Earliest-warnings-1979-84
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 131a923..1da60b4
@@ -1,7 +1,7 @@
-title: //div[@id='csn_blogST_headline']/h1\r
-\r
-body: //div[@id='csn_blogST_main']\r
-strip_id_or_class: ipfootnotes\r
-strip: //div[@id='csn_blogST_main']/p[1]/img\r
+title: //div[@id='csn_blogST_headline']/h1
+
+body: //div[@id='csn_blogST_main']
+strip_id_or_class: ipfootnotes
+strip: //div[@id='csn_blogST_main']/p[1]/img
 strip: //div[@id='csn_blogST_sidebar']
 test_url: http://www.csnbayarea.com/blog/giants-talk/post/-?blog%2Fgiants-talk%2Fpost%2F-=&blockID=578902&feedID=5987
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0df72c3..c14a934
@@ -1,22 +1,22 @@
-# author's name is not isolated as a tag.... ugh\r
-convert_double_br_tags: yes\r
-body: //csn_blogST_main\r
-\r
-#junk above and around the article\r
-strip: /html/body/div[4]/div[3]/div/div/div/section/div/div/div/div/div/div\r
-strip: /html/body/div[4]/header\r
-strip_id_or_class: article-right-sidebar\r
-strip_id_or_class: rsn-gigya-sharebar-container\r
-strip_id_or_class: article-bottom\r
-strip_id_or_class: hider\r
-strip_id_or_class: footer\r
-strip_id_or_class: masthead\r
-strip_id_or_class: block-menu-menu-rsn-login-or-register\r
-strip_id_or_class: block-menu-menu-header-links\r
-strip_id_or_class: block-rsn-follow-bar-follow-bar\r
-strip_id_or_class: block-rsn-weather-rsn-weather-scoreboard\r
-strip_id_or_class: logo\r
-strip_id_or_class: element-invisible\r
-strip_id_or_class: site-name\r
-strip: //div[contains(@style, 'none')]\r
+# author's name is not isolated as a tag.... ugh
+convert_double_br_tags: yes
+body: //csn_blogST_main
+
+#junk above and around the article
+strip: /html/body/div[4]/div[3]/div/div/div/section/div/div/div/div/div/div
+strip: /html/body/div[4]/header
+strip_id_or_class: article-right-sidebar
+strip_id_or_class: rsn-gigya-sharebar-container
+strip_id_or_class: article-bottom
+strip_id_or_class: hider
+strip_id_or_class: footer
+strip_id_or_class: masthead
+strip_id_or_class: block-menu-menu-rsn-login-or-register
+strip_id_or_class: block-menu-menu-header-links
+strip_id_or_class: block-rsn-follow-bar-follow-bar
+strip_id_or_class: block-rsn-weather-rsn-weather-scoreboard
+strip_id_or_class: logo
+strip_id_or_class: element-invisible
+strip_id_or_class: site-name
+strip: //div[contains(@style, 'none')]
 test_url: http://www.csnphilly.com/eagles/can-stoutland-save-danny-watkins-career
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/css-tricks.com.txt b/inc/3rdparty/site_config/standard/css-tricks.com.txt
new file mode 100755 (executable)
index 0000000..3d8174a
--- /dev/null
@@ -0,0 +1,6 @@
+title://article[contains(@id, "post-")]/h1
+date://article[contains(@id, "post-")]/p[@class="time"]/time
+body://article[contains(@id, "post-")]
+strip://article[contains(@id, "post-")]/p[@class="time"]/time
+prune:yes
+test_url: http://css-tricks.com/off-canvas-menu-with-css-target/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/cw.com.tw.txt b/inc/3rdparty/site_config/standard/cw.com.tw.txt
new file mode 100755 (executable)
index 0000000..6e3a91e
--- /dev/null
@@ -0,0 +1,14 @@
+author://span[contains(@class,'reporter')]
+
+date://span[contains(@class,'date')]
+
+body://div[contains(@class,'mainContaner')]
+
+strip://div[contains(@class,'mainHeaer')]
+strip://div[contains(@class,'keyW')]
+strip://div[contains(@class,'wonderful')]
+strip://div[contains(@class,'pages')]
+strip://div[contains(@class,'Topics TopicsW3')]
+
+next_page_link://li[@class='pageNext']/a[contains(.,'下一頁')]
+test_url: http://www.cw.com.tw/article/article.action?id=5032848
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 4a00ef4..381446e
@@ -1,5 +1,5 @@
-single_page_link: //a\r
-tidy: no\r
-prune: no\r
+single_page_link: //a
+tidy: no
+prune: no
 
 test_url: da.feedsportal.com/c/585/f/413794/s/17037b5a/l/0L0Stelegraaf0Bnl0Cbinnenland0C10A2757860C0I0IKlacht0Itegen0Idr0B0IFrank0Iniet0I0Eontvankelijk0I0I0Bhtml0Dcid0Frss/ia1.htm
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dagogtid.no.txt b/inc/3rdparty/site_config/standard/dagogtid.no.txt
new file mode 100755 (executable)
index 0000000..1531472
--- /dev/null
@@ -0,0 +1,4 @@
+title: //span[@class = 'overskriftEkstrastor']
+author: //em/a
+
+test_url: http://dagogtid.no/nyhet.cfm?nyhetid=2414
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6101399..978ed1c
@@ -1,4 +1,4 @@
-tidy: no\r
-body: //article\r
+tidy: no
+body: //article
 
 test_url: http://www.dailydot.com/entertainment/tumblr-christopher-price-topherchris/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 124675c..6d4cb82
@@ -1,10 +1,10 @@
-body: //div[@id='article-1']//div[contains(@class, 'article-body')]\r
-title: //div[@class='meta']//a[@id='titleHref']\r
-date: //div[@class='meta']//p[@class='date']\r
-\r
-strip_id_or_class: invisible\r
-strip_id_or_class: divider-doodle\r
-\r
-prune: no\r
-\r
-test_url: http://www.dailykos.com/story/2012/01/26/1058790/-Newt-Gingrichs-campaign-admits-he-lied-during-debate-about-ABC-News-interview-with-his ex-wife
\ No newline at end of file
+body: //div[@id='article-1']//div[contains(@class, 'article-body')]
+title: //div[@class='meta']//a[@id='titleHref']
+date: //div[@class='meta']//p[@class='date']
+
+strip_id_or_class: invisible
+strip_id_or_class: divider-doodle
+
+prune: no
+
+test_url: http://www.dailykos.com/story/2012/01/26/1058790/-Newt-Gingrich-s-campaign-admits-he-lied-during-debate-about-ABC-News-interview-with-his-ex-wife
old mode 100644 (file)
new mode 100755 (executable)
index c83dbdb..cd29a4d
@@ -1,12 +1,12 @@
-body: //div[@id='js-article-text']\r
-strip: //div[@class='explore-links']\r
-strip: //div[@id='js-article-text']/br[position()=1]\r
-strip_id_or_class: print-or-mail-links\r
-strip_id_or_class: shareArticles\r
-strip_id_or_class: googleAds\r
-strip_id_or_class: digg-button\r
-strip_id_or_class: article-icon-links-container\r
-strip_id_or_class: clickToEnlarge\r
-tidy: no\r
-\r
+body: //div[@id='js-article-text']
+strip: //div[@class='explore-links']
+strip: //div[@id='js-article-text']/br[position()=1]
+strip_id_or_class: print-or-mail-links
+strip_id_or_class: shareArticles
+strip_id_or_class: googleAds
+strip_id_or_class: digg-button
+strip_id_or_class: article-icon-links-container
+strip_id_or_class: clickToEnlarge
+tidy: no
+
 test_url: http://www.dailymail.co.uk/news/article-1375423/Royal-wedding-Texan-billionaire-Joe-Albritton-invited-Prince-Charles.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dailystar.com.lb.txt b/inc/3rdparty/site_config/standard/dailystar.com.lb.txt
new file mode 100755 (executable)
index 0000000..3b15304
--- /dev/null
@@ -0,0 +1,6 @@
+title: //div[@class='ec-blog-headline']
+body: //*[@id="divDetails"]
+date: //*[@id="ctl00_ContentPlaceHolder1_tdDate"]
+author: //*[@id="ctl00_ContentPlaceHolder1_anchorAuthor"]/a
+autodetect_next_page: no
+test_url: http://dailystar.com.lb/Opinion/Columnist/2012/Oct-10/190803-americas-new-modesty-in-the-mideast.ashx#axzz2928JP5xE
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/danleech.com.txt b/inc/3rdparty/site_config/standard/danleech.com.txt
new file mode 100755 (executable)
index 0000000..1d4cec7
--- /dev/null
@@ -0,0 +1,6 @@
+tidy: no
+prune: no
+date: //article//time[@pubdate]
+title: //article/h1//span[contains(@class, 'entry-title')]
+body: //article/div[contains(@class, 'entry-content')]
+test_url: http://danleech.com/post/36822126876/simple-icons
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 96a2bc4..6066948
@@ -1,5 +1,5 @@
-autodetect_next_page: no\r
-tidy: no\r
-prune: no\r
+autodetect_next_page: no
+tidy: no
+prune: no
 body: //div[@class='NoOverflow']
 test_url: http://www.dansdata.com/gz129.htm
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dantri.com.vn.txt b/inc/3rdparty/site_config/standard/dantri.com.vn.txt
new file mode 100755 (executable)
index 0000000..f19fee7
--- /dev/null
@@ -0,0 +1,7 @@
+title: //h1[contains(@class, 'fon31 mt2')]
+body: //h2[contains(@class, 'fon33 mt1')] | //div[contains(@class, 'fon34 mt3')]
+
+prune: no
+
+test_url: http://dantri.com.vn/su-kien/chang-trai-mot-minh-dap-xe-vuot-450km-de-vieng-mo-dai-tuong-869763.htm
+test_url: http://dantri.com.vn/trangchu.rss
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index dca8ade..251cc67
@@ -1,7 +1,7 @@
-title: //div[@class="article"]/h1\r
-author: //div[@id="Sidebar"]/p/strong\r
-date: //h6[@class="dateline"]\r
-body: //div[@class="article"]\r
-strip: //h6[@class="dateline"]\r
-strip: //div[@class="article"]/h1\r
+title: //div[@class="article"]/h1
+author: //div[@id="Sidebar"]/p/strong
+date: //h6[@class="dateline"]
+body: //div[@class="article"]
+strip: //h6[@class="dateline"]
+strip: //div[@class="article"]/h1
 test_url: http://daringfireball.net/2011/10/apps_are_the_new_channels
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3534002..e9111a4
@@ -1,4 +1,4 @@
-body: //div[@id="article"]\r
-date: //p[@class="date"]\r
+body: //div[@id="article"]
+date: //p[@class="date"]
 author: //p[@class="byline"]
 test_url: http://www.datanami.com/datanami/2011-12-07/new_path_for_sap:_in_memory_computing,_predictive_analysis_converge.html?featured=top
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7d11c6e..524c4bf
@@ -1,8 +1,8 @@
-title: (//article//h2)[1]\r
-body: //article[contains(@class, 'post')]\r
-date: //time[@id='top_time']/@datetime\r
-\r
-prune: no\r
-tidy: no\r
-\r
+title: (//article//h2)[1]
+body: //article[contains(@class, 'post')]
+date: //time[@id='top_time']/@datetime
+
+prune: no
+tidy: no
+
 test_url: http://dcurt.is/predictions-txt
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/defomicron.net.txt b/inc/3rdparty/site_config/standard/defomicron.net.txt
new file mode 100755 (executable)
index 0000000..9f11258
--- /dev/null
@@ -0,0 +1,9 @@
+title: //article/h1
+author: //hgroup/h3/a
+date: //time
+body: //article
+strip: //aside
+footnotes: yes
+prune: no
+tidy: no
+test_url: https://defomicron.net/2012/09/ios-6/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 84fd4f7..c4b922e
@@ -1,4 +1,4 @@
-strip_id_or_class: banner\r
-strip_id_or_class: gamma\r
+strip_id_or_class: banner
+strip_id_or_class: gamma
 strip_id_or_class: module-list
 test_url: http://delong.typepad.com/sdj/2011/02/in-which-suresh-naidu-visits-the-new-jerusalem.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/democracynow.org.txt b/inc/3rdparty/site_config/standard/democracynow.org.txt
new file mode 100755 (executable)
index 0000000..b0050b4
--- /dev/null
@@ -0,0 +1,5 @@
+body: //div[contains(@class, 'blog_body')]
+
+prune: no
+
+test_url: http://www.democracynow.org/blog/2014/1/9/the_fbi_the_nsa_and_a
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 48722eb..07db352
@@ -1,13 +1,13 @@
-title: //div[@id='artikelHeader']/h1\r
-author: //span[@class='author']\r
-date: //span[@class='date']\r
-body: //div[@class='copytext']\r
-strip: //ul[@class='lookupLinksArtikel']\r
-\r
-strip: //div[@id='pageTop']\r
-strip: //div[@id='toolbar']\r
-strip: //div[@id='articleTools']\r
-strip: //div[@id='weiterlesen']\r
-strip: //div[@id='communityCanvas']\r
+title: //div[@id='artikelHeader']/h1
+author: //span[@class='author']
+date: //span[@class='date']
+body: //div[@class='copytext']
+strip: //ul[@class='lookupLinksArtikel']
+
+strip: //div[@id='pageTop']
+strip: //div[@id='toolbar']
+strip: //div[@id='articleTools']
+strip: //div[@id='weiterlesen']
+strip: //div[@id='communityCanvas']
 
 test_url: http://derstandard.at/1318726018343/Breitband-LTE-Was-bringt-die-neue-Mobilfunk-Generation
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6096db0..9020847
@@ -1,11 +1,11 @@
-tidy: no\r
-body: //div[@class='main']\r
-\r
-author: substring-before(substring-after(//div[@class='meta-single'], 'erstellt von '), ' am')\r
-date: substring-before(substring-after(//div[@class='meta-single'], ' am '), ' | ')\r
-\r
-strip_id_or_class: pagelink\r
-strip_id_or_class: wp-polls \r
-\r
+tidy: no
+body: //div[@class='main']
+
+author: substring-before(substring-after(//div[@class='meta-single'], 'erstellt von '), ' am')
+date: substring-before(substring-after(//div[@class='meta-single'], ' am '), ' | ')
+
+strip_id_or_class: pagelink
+strip_id_or_class: wp-polls 
+
 next_page_link: //div[@class='post-page-next']/a
 test_url: http://www.designtagebuch.de/die-gefuehlte-lesbarkeit/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a6dac5f..efa85f7
@@ -1,5 +1,5 @@
-body: (//blockquote[contains(@class, 'postcontent')])[1]\r
-body: (//div[starts-with(@id, 'post_message')])[1]\r
-\r
-prune: no\r
+body: (//blockquote[contains(@class, 'postcontent')])[1]
+body: (//div[starts-with(@id, 'post_message')])[1]
+
+prune: no
 tidy: no
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 548caba..d1d8a29
@@ -1,8 +1,8 @@
-title: //h1[@class="content-headline"]\r
-body: //div[@class="headers-container"] | //div[@class="content-container"]\r
-prune: no\r
-tidy: no\r
-\r
-single_page_link: //li[@class='utility-print']/a\r
-\r
+title: //h1[@class="content-headline"]
+body: //div[@class="headers-container"] | //div[@class="content-container"]
+prune: no
+tidy: no
+
+single_page_link: //li[@class='utility-print']/a
+
 test_url: http://www.details.com/culture-trends/critical-eye/201108/best-new-designers-innovations
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 43a8f0a..7609b72
@@ -1,3 +1,3 @@
-title: //div[@class="bodyText"]/h1\r
+title: //div[@class="bodyText"]/h1
 author: //div[@class="picture"]/a/img/@alt
 test_url: https://developers.facebook.com/blog/post/2012/03/22/developer-spotlight--foodspotting/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index b960b37..6f1d4e2
@@ -1,6 +1,6 @@
-date: //h2[@class='date-header']\r
-body: //div[@class='post hentry']\r
-title: //h3\r
-strip: //div[@class='post-footer']\r
+date: //h2[@class='date-header']
+body: //div[@class='post hentry']
+title: //h3
+strip: //div[@class='post-footer']
 
 test_url: http://devlinsangle.blogspot.co.at/2012/03/difference-between-teaching-and_01.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a117202..f8b79c8
@@ -1,8 +1,8 @@
-title: //h1[@id='query_h1']\r
-body: //div[contains(@class, 'lunatext results_content')]\r
-strip_id_or_class: spl_unshd\r
-#replace_string(<div class="dicTl">): <div class="dicTl">------------------<br />\r
-\r
-prune: no\r
+title: //h1[@id='query_h1']
+body: //div[contains(@class, 'lunatext results_content')]
+strip_id_or_class: spl_unshd
+#replace_string(<div class="dicTl">): <div class="dicTl">------------------<br />
+
+prune: no
 
 test_url: http://www.wired.com/cloudline/2011/10/meet-arms-cortex-a15-the-future-of-the-ipad-and-possibly-the-macbook-air/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7e825a9..ced189c
@@ -1,6 +1,6 @@
-title: //div[@class='article']/h1\r
-date: substring-before(//p[@class='articletime'],'|')\r
-body: //div[@id='articletext']\r
-strip: //div[@class='inlineDiashow']\r
+title: //div[@class='article']/h1
+date: substring-before(//p[@class='articletime'],'|')
+body: //div[@id='articletext']
+strip: //div[@class='inlineDiashow']
 
 test_url: http://diepresse.com/home/politik/aussenpolitik/701905/TibeterProteste_Nonne-verbrennt-sich-selbst?_vl_backlink=/home/politik/index.do
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 2d2ae2c..80ce5ff
@@ -1,8 +1,8 @@
-# default parser works great\r
-# only add "author" and "next page link" reference\r
-# 2012-04-13\r
-\r
-next_page_link: //div[@class = 'pagination']/a[@class = 'next_page']\r
-\r
+# default parser works great
+# only add "author" and "next page link" reference
+# 2012-04-13
+
+next_page_link: //div[@class = 'pagination']/a[@class = 'next_page']
+
 author: //*[@class = 'author metadata']/a
 test_url: http://digiphoto.techbang.com/posts/2433--commercial-photography-communication-is-the-key-to-a-good-work
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 37192ac..18ce370
@@ -1,6 +1,6 @@
-title: //div[@class='post-title']/h1\r
-author: //a[@href='#author']\r
-body: //div[@class='post-content']\r
-strip: //div[@class='post-meta']\r
-\r
+title: //div[@class='post-title']/h1
+author: //a[@href='#author']
+body: //div[@class='post-content']
+strip: //div[@class='post-meta']
+
 test_url: http://www.digital-photography-school.com/10-ways-to-develop-yourself-photographically
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index b21431d..f48bdfd
@@ -1,5 +1,5 @@
-title: //div[@class="article_header"]/h1\r
-date: //div[@class="article_pub"]/span[@class="time"]\r
-author: //div[@class="article_pub"]/span[@class="editors"]/a/text()\r
+title: //div[@class="article_header"]/h1
+date: //div[@class="article_pub"]/span[@class="time"]
+author: //div[@class="article_pub"]/span[@class="editors"]/a/text()
 body: //div[@class="article_body clear_left"]
 test_url: http://www.digitalspy.co.uk/movies/at-the-movies/a364066/top-5-super-bowl-movie-trailers-the-avengers-battleship-more.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 413e550..85cc78e
@@ -1,8 +1,11 @@
-convert_double_br_tags: yes\r
-\r
-title: substring(substring-after(//title, ':'), 1, string-length(substring-after(//title, ':')) - 10)\r
-body: //*[contains(@class, 'SB_Content')]\r
-author: string('Scott Adams')\r
-date: //*[contains(@class, 'SB_Detail')]/text()[1]\r
+#title: substring(substring-after(//title, ':'), 1, string-length(substring-after(//title, ':')) - 10)
+title: //div[contains(@class, 'SB_Title')]//a
+body: //div[contains(@class, 'STR_Image')]
+body: //*[contains(@class, 'SB_Content')]
+author: string('Scott Adams')
+date: //*[contains(@class, 'SB_Detail')]/text()[1]
 
-test_url: http://dilbert.com/blog/entry/death_by_hypnosis_or_not/
\ No newline at end of file
+
+test_url: http://dilbert.com/blog/entry/death_by_hypnosis_or_not/
+test_url: http://dilbert.com/strips/comic/2013-10-22
+test_url: http://feed.dilbert.com/dilbert/daily_strip
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 9ef198c..bc315cf
@@ -1,19 +1,19 @@
-title: //div[@class='newsdetbd']\r
-body: //div[@id='innerleft'] \r
-#//p[@class = 'plnht']\r
-strip_image_src: /albums/\r
-strip: //div[@class='mrrt']\r
-prune: yes\r
-strip_id_or_class: 'fdpd'\r
-strip_id_or_class: 'epapt' \r
-strip_id_or_class: 'newsrtwd'\r
-strip_id_or_class: 'padtp'\r
-strip_id_or_class: 'newdt'\r
-strip_id_or_class: 'newdlt'\r
-strip: //div[@id='selNotes']\r
-strip_id_or_class: 'clsNotes'\r
-strip_id_or_class: 'clear'\r
-strip_id_or_class: 'cmtwrap'\r
-strip_id_or_class: 'sess'\r
+title: //div[@class='newsdetbd']
+body: //div[@id='innerleft'] 
+#//p[@class = 'plnht']
+strip_image_src: /albums/
+strip: //div[@class='mrrt']
+prune: yes
+strip_id_or_class: 'fdpd'
+strip_id_or_class: 'epapt' 
+strip_id_or_class: 'newsrtwd'
+strip_id_or_class: 'padtp'
+strip_id_or_class: 'newdt'
+strip_id_or_class: 'newdlt'
+strip: //div[@id='selNotes']
+strip_id_or_class: 'clsNotes'
+strip_id_or_class: 'clear'
+strip_id_or_class: 'cmtwrap'
+strip_id_or_class: 'sess'
 strip_id_or_class: 'parents'
 test_url: http://www.dinamalar.com/News_Detail.asp?Id=295725
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 86bb3b8..5283a0c
@@ -1,26 +1,28 @@
-# Since this element has class="clear", the Instapaper stylesheets (at least this text parser preview), will render it unreadable, with a 1px font size and line height.\r
-\r
-body: //div[@id="article-content"]\r
-\r
-\r
-# Ads\r
-strip_id_or_class: advert-space\r
-\r
-# Read more, recommend, comments etc\r
-strip_id_or_class: fbc-recommend\r
-strip_id_or_class: recommend\r
-strip_id_or_class: article-readers\r
-strip_id_or_class: article-addons\r
-strip_id_or_class: hook\r
-strip_id_or_class: right\r
-strip_id_or_class: footer\r
-\r
-# Other news\r
-strip: //div[@id="mirrors"]\r
-\r
-# Author\r
-author: //div[@id="byline"]/div/p/strong\r
-\r
-# Date\r
-date: substring(substring-after(//p[@class="published"], 'Publicerad '), 0, 11)\r
-test_url: http://www.dn.se/nyheter/varlden/landade-flygplan-mitt-i-villaomrade
\ No newline at end of file
+# Since this element has class="clear", the Instapaper stylesheets (at least this text parser preview), will render it unreadable, with a 1px font size and line height.
+
+body: //div[@id="article-content"]
+
+
+# Ads
+strip_id_or_class: advert-space
+
+# Read more, recommend, comments etc
+strip_id_or_class: fbc-recommend
+strip_id_or_class: recommend
+strip_id_or_class: article-readers
+strip_id_or_class: article-addons
+strip_id_or_class: hook
+strip_id_or_class: right
+strip_id_or_class: footer
+
+# Other news
+strip: //div[@id="mirrors"]
+
+# Author
+author: //div[@id="byline"]/div/p/strong
+
+# Date
+date: substring(substring-after(//p[@class="published"], 'Publicerad '), 0, 11)
+
+test_url: http://www.dn.se/nyheter/varlden/landade-flygplan-mitt-i-villaomrade
+test_url: http://www.dn.se/m/rss/senaste-nytt
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dobreprogramy.pl.txt b/inc/3rdparty/site_config/standard/dobreprogramy.pl.txt
new file mode 100755 (executable)
index 0000000..972293b
--- /dev/null
@@ -0,0 +1,6 @@
+title: //*[@class="news"]//h1[@class="title"]
+author: //*[@class="news"]//*[@class="newsInfo"]/a
+date: substring-before(//*[@class="news"]//*[@class="newsInfo"]/text(), ',')
+body: //*[@class="news"]//*[@class="newsContent"]
+footnotes: no
+test_url: http://www.dobreprogramy.pl/Sony-konczy-z-Foldinghome-na-PS3,Aktualnosc,36899.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 9f65ea9..1c518a9
@@ -1,8 +1,8 @@
-strip: //*[(@id = "featured")]\r
-\r
-author:substring-after( //div[@class='posttitle']/h2[@class='author'],'by ')\r
-\r
-date: concat(//div[@class='month'],' ',//div[@class='day'])\r
-\r
+strip: //*[(@id = "featured")]
+
+author:substring-after( //div[@class='posttitle']/h2[@class='author'],'by ')
+
+date: concat(//div[@class='month'],' ',//div[@class='day'])
+
 #doctac doesn't provide a year, but month/day is better than nothing
 test_url: http://www.doctac.com/mac/iphone/instapaper-update-app/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 81683f0..20566ee
@@ -1,21 +1,21 @@
-# TODO: clean up the extra junk at the end of articles\r
-\r
-# general text formatting\r
-prune: no\r
-convert_double_br_tags:yes\r
-\r
-# where to find the basic metadata\r
-author://a[@class='articleauthor']\r
-date://a[starts-with(@href,'/en/search/published/')]\r
-title:substring-before(//h2[@class='title'],'&mdash;')\r
-body://div[@id='maincontainer']\r
-\r
-dissolve://div[starts-with(@id,'commentableblock')]\r
-\r
-# clean up the crap\r
-strip://div[contains(@class,'domusnetwork')]\r
-strip://div[contains(@class,'relative_wrapper')]\r
-\r
-strip://div[contains(@class,'captionsubimage')]/img[contains(@class,'arrow')]\r
+# TODO: clean up the extra junk at the end of articles
+
+# general text formatting
+prune: no
+convert_double_br_tags:yes
+
+# where to find the basic metadata
+author://a[@class='articleauthor']
+date://a[starts-with(@href,'/en/search/published/')]
+title:substring-before(//h2[@class='title'],'&mdash;')
+body://div[@id='maincontainer']
+
+dissolve://div[starts-with(@id,'commentableblock')]
+
+# clean up the crap
+strip://div[contains(@class,'domusnetwork')]
+strip://div[contains(@class,'relative_wrapper')]
+
+strip://div[contains(@class,'captionsubimage')]/img[contains(@class,'arrow')]
 wrap_in(em): //div[contains(@class,'captionsubimage')]/span
 test_url: http://www.domusweb.it/en/design/in-praise-of-lost-time/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 22907c2..0f98311
@@ -1,8 +1,8 @@
-title: //h1[@itemprop="name"]\r
-\r
-author: //div[contains(@class, 'author')]//div[contains(@class, 'name')]/a\r
-\r
-date: //div[contains(@class, 'b-info')]//span[contains(@class, 'date')]\r
-\r
+title: //h1[@itemprop="name"]
+
+author: //div[contains(@class, 'author')]//div[contains(@class, 'name')]/a
+
+date: //div[contains(@class, 'b-info')]//span[contains(@class, 'date')]
+
 body: //div[contains(@class, 'b-typo')]
 test_url: http://dou.ua/lenta/interviews/andrej-havryuchenko/?from=sb_mostcomm
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 99d7e5d..d72a222
@@ -1,21 +1,21 @@
-# This filter is tested on:\r
-# http://www.douban.com/note/215003067/\r
-# http://www.douban.com/note/213540049/\r
-# http://www.douban.com/group/topic/31140104/\r
-\r
-title: //div[@class='note-header']/h1\r
-title: //div[@id='content']/h1\r
-\r
-author: //div[@class='info']/ul/li/a\r
-author: //h3/span/a\r
-\r
-date://div[@class='note-header']/div/span\r
-date://h3/span[contains(@class, 'color-green')]\r
-\r
-body://div[contains(@class, 'note')]\r
-body://div[contains(@class, 'topic-content')]\r
-\r
-strip://h3\r
-\r
-convert_double_br_tags: yes\r
+# This filter is tested on:
+# http://www.douban.com/note/215003067/
+# http://www.douban.com/note/213540049/
+# http://www.douban.com/group/topic/31140104/
+
+title: //div[@class='note-header']/h1
+title: //div[@id='content']/h1
+
+author: //div[@class='info']/ul/li/a
+author: //h3/span/a
+
+date://div[@class='note-header']/div/span
+date://h3/span[contains(@class, 'color-green')]
+
+body://div[contains(@class, 'note')]
+body://div[contains(@class, 'topic-content')]
+
+strip://h3
+
+convert_double_br_tags: yes
 test_url: http://www.douban.com/group/topic/31140104/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 30179a3..001c810
@@ -1,9 +1,9 @@
-# next_page_link for product review\r
-# example: http://www.dpreview.com/reviews/lytro/\r
-next_page_link: //img[@alt = 'Next page']/../@href\r
-\r
-# next_page_link for other articles\r
-# example: http://www.dpreview.com/articles/6126592906/first-impressions-using-the-fujifilm-x-pro1\r
-next_page_link: //*[@class = 'pages']/*/td[@class = 'next enabled']/a\r
+# next_page_link for product review
+# example: http://www.dpreview.com/reviews/lytro/
+next_page_link: //img[@alt = 'Next page']/../@href
+
+# next_page_link for other articles
+# example: http://www.dpreview.com/articles/6126592906/first-impressions-using-the-fujifilm-x-pro1
+next_page_link: //*[@class = 'pages']/*/td[@class = 'next enabled']/a
 single_page_link: //a[contains(.,'Print view')]
 test_url: http://www.dpreview.com/articles/6126592906/first-impressions-using-the-fujifilm-x-pro1
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7e46b0d..d8ec1ac
@@ -1,9 +1,9 @@
-title: //meta[@property='og:title']/@content\r
-author: //div[@class='articleFunctions']//a\r
-date: //meta[@name='pubdate']/@content\r
-\r
-# Can you strip elements from the body only? It is required here (`//div[@class='articleContent']/p` breaks for some reason)\r
-body: //div[@class='articleContent']\r
-\r
+title: //meta[@property='og:title']/@content
+author: //div[@class='articleFunctions']//a
+date: //meta[@name='pubdate']/@content
+
+# Can you strip elements from the body only? It is required here (`//div[@class='articleContent']/p` breaks for some reason)
+body: //div[@class='articleContent']
+
 tidy: no
 test_url: http://www.dr.dk/Nyheder/Udland/2011/10/24/150115.htm
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 659d044..4898353
@@ -1,10 +1,10 @@
-body: //div[@class='postext']\r
-\r
-strip_id_or_class: ratingblock\r
-strip_id_or_class: hreview-aggregate\r
-strip: //div[contains(@style, 'display: none;')]\r
-\r
-tidy: no\r
-prune: no\r
-\r
+body: //div[@class='postext']
+
+strip_id_or_class: ratingblock
+strip_id_or_class: hreview-aggregate
+strip: //div[contains(@style, 'display: none;')]
+
+tidy: no
+prune: no
+
 test_url: http://www.dramasonline.com/jago-pakistan-jago-7th-december-2012-ali-gul-pir/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6125ce7..d500cb8
@@ -1,12 +1,12 @@
-body: //div[@class = "description"]\r
-body: //div[@id = "post"]\r
-\r
-strip_id_or_class: vcard\r
-strip_id_or_class: journallist\r
-strip_id_or_class: infobox\r
-strip_id_or_class: terms\r
-strip_id_or_class: replieslist\r
-strip_id_or_class: communityside\r
-\r
+body: //div[@class = "description"]
+body: //div[@id = "post"]
+
+strip_id_or_class: vcard
+strip_id_or_class: journallist
+strip_id_or_class: infobox
+strip_id_or_class: terms
+strip_id_or_class: replieslist
+strip_id_or_class: communityside
+
 
 test_url: http://www.drive2.ru/cars/audi/a6/a6_c5/elysey/journal/288230376151836654/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dropbox.com.txt b/inc/3rdparty/site_config/standard/dropbox.com.txt
new file mode 100755 (executable)
index 0000000..92ae31b
--- /dev/null
@@ -0,0 +1 @@
+single_page_link: //a[@id='download_button_link']
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ffb77e4..2da3eb1
@@ -1,8 +1,8 @@
-title://h1\r
-author://div[@class="submitted"]/a\r
-date:substring-after(//div[@class="meta"],'modified: ')\r
-date:substring-after(//div[@class="submitted"],'on ')\r
-body://div[@class="node-content"]\r
-strip://div[@class="meta"]\r
+title://h1
+author://div[@class="submitted"]/a
+date:substring-after(//div[@class="meta"],'modified: ')
+date:substring-after(//div[@class="submitted"],'on ')
+body://div[@class="node-content"]
+strip://div[@class="meta"]
 strip_id_or_class:book-navigation
 test_url: http://drupal.org/node/1327354
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 418c9f6..2978797
@@ -1,11 +1,11 @@
-title: //h2/a\r
-author: substring-before(substring-after(//span[@class='byline'], 'by'), ',')\r
-date: substring-before(substring-after(//span[@class='byline'], ','), '|')\r
-body: //div[@class='entry']\r
-\r
-\r
-# strip out auction stuff at the end of posts\r
-# tidy kills the center tag, so disable it\r
-tidy: no\r
+title: //h2/a
+author: substring-before(substring-after(//span[@class='byline'], 'by'), ',')
+date: substring-before(substring-after(//span[@class='byline'], ','), '|')
+body: //div[@class='entry']
+
+
+# strip out auction stuff at the end of posts
+# tidy kills the center tag, so disable it
+tidy: no
 strip: //center//table
 test_url: http://www.dukebasketballreport.com/articles/?p=42660
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/dushumashang.com.txt b/inc/3rdparty/site_config/standard/dushumashang.com.txt
new file mode 100755 (executable)
index 0000000..6a50a77
--- /dev/null
@@ -0,0 +1,17 @@
+# This filter is tested on:
+# http://www.dushumashang.com/2389
+# http://www.dushumashang.com/2415
+# http://www.dushumashang.com/2355
+
+body://div[@class='main_content']
+#body://section[@class='entry_content fl']
+title://h2
+author://span[@class='article_author']/a
+date://span[@class='pub_date']/time
+
+strip://span[@class='article_author']
+strip://span[@class='pub_date']
+strip://div[@class='page_turn']
+strip://span[@class='source_link']/em
+wrap_in(strong)://span[@class='source_link']/a
+test_url: http://www.dushumashang.com/2355
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c816368..1a1990e
@@ -1,9 +1,9 @@
-strip://*[@id = 'blog_top_stories']\r
-strip://*[@id = 'takeover_off']\r
-strip://*[@id = 'right_gray_box']\r
-strip://*[@class = 'blog_topics']\r
-strip://*[@class = 'section_titles']\r
-\r
-author://div[@class = 'post_author_info']/a\r
+strip://*[@id = 'blog_top_stories']
+strip://*[@id = 'takeover_off']
+strip://*[@id = 'right_gray_box']
+strip://*[@class = 'blog_topics']
+strip://*[@class = 'section_titles']
+
+author://div[@class = 'post_author_info']/a
 date://div[@class = 'post_date_info']
 test_url: http://dvice.com/archives/2012/05/is-nfc-and-smar.php
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 908a1b5..89a68bc
@@ -1,8 +1,8 @@
-title: //div [@class="post contain"]/h1\r
-strip: //div [@class="post contain"]/h1\r
-body: //div [@class="post contain"]\r
-author: substring-before(//title, ':')\r
-author: substring-before(//title, ' ')\r
-\r
+title: //div [@class="post contain"]/h1
+strip: //div [@class="post contain"]/h1
+body: //div [@class="post contain"]
+author: substring-before(//title, ':')
+author: substring-before(//title, ' ')
+
 
 test_url: http://eamesinerudition.com/2012/03/hospital-numbers-are-bad-for-you
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c4c38f2..ba9d312
@@ -1,8 +1,8 @@
-title: //h1\r
-date: //div[@class="et_dateUnderTitle"]\r
-author: substring-after(//div[@class="et_authorUnderTitle"], 'By ')\r
-body: //div[@id="et_leftCol640split"]\r
-\r
-strip: //div[@id="et_leftCol640splitRight"]\r
+title: //h1
+date: //div[@class="et_dateUnderTitle"]
+author: substring-after(//div[@class="et_authorUnderTitle"], 'By ')
+body: //div[@id="et_leftCol640split"]
+
+strip: //div[@id="et_leftCol640splitRight"]
 strip: //div[@class="et_light_greybgboxlower"]
 test_url: http://eandt.theiet.org/magazine/2011/12/this-festive-waste.cfm
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d762091..36708da
@@ -1,18 +1,18 @@
-title: //div[@class='title_text']\r
-\r
-author: //div[@class='author_text']\r
-\r
-body: //div[@class='story_text']/..\r
-\r
-strip: //b\r
-\r
-strip_id_or_class: back_to_top\r
-strip_id_or_class: author_text\r
-strip_id_or_class: title_text\r
-\r
-wrap_in(center): //a\r
-\r
-dissolve: //a\r
\r
+title: //div[@class='title_text']
+
+author: //div[@class='author_text']
+
+body: //div[@class='story_text']/..
+
+strip: //b
+
+strip_id_or_class: back_to_top
+strip_id_or_class: author_text
+strip_id_or_class: title_text
+
+wrap_in(center): //a
+
+dissolve: //a
 footnotes: no
 test_url: http://www.eastoftheweb.com/short-stories/UBooks/Horl.shtml
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 5fa18ff..f17e1f7
@@ -1,5 +1,5 @@
-body: //h1[@class='it-ttl'] | //div[@id='mainImgHldr'] | //span[@id='prcIsum']\r
-\r
-strip_image_src: imgLoading_30x30.gif\r
-\r
+body: //h1[@class='it-ttl'] | //div[@id='mainImgHldr'] | //span[@id='prcIsum']
+
+strip_image_src: imgLoading_30x30.gif
+
 test_url: http://www.ebay.com/itm/BRAND-NEW-FM-Transmitter-Ca-r-Charger-iPhone-4S-4-4G-3GS-3G-2G-iPod-Touch-/190657497204
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ebafc19..729affd
@@ -1,6 +1,6 @@
-title: //h1[@class="title"]\r
-author: //div[@class="hosted"]/a\r
-date: substring-after(//div[@class="dateline"]/text(), '|')\r
-\r
+title: //h1[@class="title"]
+author: //div[@class="hosted"]/a
+date: substring-after(//div[@class="dateline"]/text(), '|')
+
 strip: //a[@class="top" and @href="#"]
 test_url: http://econlog.econlib.org/archives/2012/04/blinder_on_heal.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index b59f554..936a191
@@ -1,7 +1,7 @@
-date: //div[@class="bb-md-noticia-fecha"]\r
-body: //div[@class="corpo"]\r
-dissolve: //div[@class="bb-md-noticia-extras"]\r
-strip: //strong\r
-strip_id_or_class: bb-md-noticia-foto-autor\r
+date: //div[@class="bb-md-noticia-fecha"]
+body: //div[@class="corpo"]
+dissolve: //div[@class="bb-md-noticia-extras"]
+strip: //strong
+strip_id_or_class: bb-md-noticia-foto-autor
 strip_id_or_class: bb-md-noticia-foto-bajada
 test_url: http://economia.estadao.com.br/noticias/economia,cmn-aprova-r-67-bi-em-credito-para-20-setores-da-economia,118501,0.htm
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 71dd62f..16c9ed6
@@ -1,10 +1,8 @@
-title: //div[@class='ec-blog-headline']\r
-body: //div[@class='ec-blog-body']\r
-body: //div[@class='ec-article-content clear']\r
-strip: //div[@class='related-items']\r
-date: substring-before(//p[@class='ec-article-info'], '|')\r
-prune: no\r
-\r
-autodetect_next_page: no\r
-\r
+body: //div[@class='main-content']
+date: //time[@class='date-created']
+strip: //aside
+prune: no
+
+autodetect_next_page: no
+
 test_url: http://www.economist.com/node/21528429
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 461d909..cf58581
@@ -1,13 +1,13 @@
-title: //meta[@property="og:title"]/@content\r
-body: //h2[@class='strapline'] | //article[contains(@class, 'node-article')]\r
-date: //time[@pubdate]/@datetime\r
-author: //span[@class='author-name']\r
-prune: no\r
-tidy: no\r
-strip: //footer\r
-\r
-replace_string(<p>[ pagebreak ]</p>): <!-- pagebreak -->\r
-\r
-single_page_link: //a[contains(@href, '?page=show')]\r
-\r
+title: //meta[@property="og:title"]/@content
+body: //h2[@class='strapline'] | //article[contains(@class, 'node-article')]
+date: //time[@pubdate]/@datetime
+author: //span[@class='author-name']
+prune: no
+tidy: no
+strip: //footer
+
+replace_string(<p>[ pagebreak ]</p>): <!-- pagebreak -->
+
+single_page_link: //a[contains(@href, '?page=show')]
+
 test_url: http://www.edge-online.com/features/telling-modern-warfares-story
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 9980000..95805f6
@@ -1,5 +1,5 @@
-title: //div[@class='HomeLeftPannel IMGCTRL']/h2\r
-body: //div[@class='HomeLeftPannel IMGCTRL']//div[@class='Brownalink' or @id='shortdesc']\r
-tidy: no\r
-\r
+title: //div[@class='HomeLeftPannel IMGCTRL']/h2
+body: //div[@class='HomeLeftPannel IMGCTRL']//div[@class='Brownalink' or @id='shortdesc']
+tidy: no
+
 test_url: http://edge.org/print/conversation.php?cid=the-argumentative-theory
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/edition.channel5belize.com.txt b/inc/3rdparty/site_config/standard/edition.channel5belize.com.txt
new file mode 100755 (executable)
index 0000000..6d5f170
--- /dev/null
@@ -0,0 +1,9 @@
+title: //div[@id='singlePage']//h2
+body: //div[@id='singlePage']//div[contains(@class, 'post')]
+strip: //a[@title='Email This Story']
+strip_id_or_class: sociable
+
+prune: no
+
+test_url: http://edition.channel5belize.com/archives/86016
+test_url: http://edition.channel5belize.com/feed
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index dc8ebe1..6fc82d2
@@ -1,9 +1,18 @@
-body: //div[@id='cnnContentContainer']//div[contains(@class, 'cnn_strycntntlft')]\r
-strip: //div[@id='cnnCVP2']\r
-strip_id_or_class: cnn_strylftcexpbx\r
-strip_id_or_class: cnn_strylctcqrelt\r
-strip_id_or_class: cnn_strybtntoolsbttm\r
-strip_id_or_class: cnn_stryftsbttm\r
-strip_id_or_class: cnn_strybtmcntnt\r
+body: //div[@id='cnnContentContainer']//div[contains(@class, 'cnn_strycntntlft')]
+strip: //a[starts-with(@name, 'em')]
+strip: //div[@id='cnnCVP2']
+strip_id_or_class: cnn_strylftcexpbx
+strip_id_or_class: cnn_strylctcqrelt
+strip_id_or_class: cnn_strybtntoolsbttm
+strip_id_or_class: cnn_stryftsbttm
+strip_id_or_class: cnn_strybtmcntnt
+strip_id_or_class: cnn_stryshrwdgtbtm
+strip_id_or_class: cnnGalleryContainer
+strip_id_or_class: cnn_strycrcntr
+strip_id_or_class: cnn_html_slideshow
 prune: no
-test_url: http://edition.cnn.com/2011/US/04/29/severe.weather/index.html
\ No newline at end of file
+
+test_url: http://edition.cnn.com/2011/US/04/29/severe.weather/index.html
+test_url: http://edition.cnn.com/2013/08/15/world/africa/nigeria-boko-haram-commander-killed/index.html?eref=edition
+test_url: http://rss.cnn.com/rss/edition.rss
+test_url: http://rss.cnn.com/rss/edition_technology.rss
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/eetimes.com.txt b/inc/3rdparty/site_config/standard/eetimes.com.txt
new file mode 100755 (executable)
index 0000000..300db30
--- /dev/null
@@ -0,0 +1,8 @@
+body: //div[contains(@class, 'grayshowlinks')]
+
+next_page_link: //div[@id='sitecontentcol']//a[.='Next >']
+# Doesn't work (site doesn't always load full content in print view)
+#single_page_link: //div[@id='sitecontentcol']//a[contains(@href, 'print=yes')]
+
+test_url: http://www.eetimes.com/document.asp?doc_id=1319966&
+test_url: http://www.eetimes.com/rss_simple.asp
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 59f6a71..3756027
@@ -1,11 +1,11 @@
-title: //h1[@class='style6 nevek']\r
-\r
-body: //div[@class='bal3']\r
-\r
-\r
-prune: yes\r
-\r
-tidy: yes\r
-convert_double_br_tags: yes\r
+title: //h1[@class='style6 nevek']
+
+body: //div[@class='bal3']
+
+
+prune: yes
+
+tidy: yes
+convert_double_br_tags: yes
 
 test_url: http://ekultura.hu/olvasnivalo/egyeb/cikk/2010-12-15/interju-galvolgyi-judit-2010-december
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 52ffe2d..d4b0a9b
@@ -1,3 +1,3 @@
-body: //div[@id='jobDesc-bd']/p\r
+body: //div[@id='jobDesc-bd']/p
 
 test_url: http://www.elance.com/j/xml-technical-intergration/23687172/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/elderscrollsonline.com.txt b/inc/3rdparty/site_config/standard/elderscrollsonline.com.txt
new file mode 100755 (executable)
index 0000000..fa3892c
--- /dev/null
@@ -0,0 +1,22 @@
+date: //time
+title: //h1[contains(@class, "alpha")]
+body: //article[contains(@class, "news-post")]
+
+# fix dates - dates as they are won't work as strtotime doesn't understand format (03.28.2013)
+replace_string(<time class="gamma">01.): <time class="gamma">January.
+replace_string(<time class="gamma">02.): <time class="gamma">February.
+replace_string(<time class="gamma">03.): <time class="gamma">March.
+replace_string(<time class="gamma">04.): <time class="gamma">April.
+replace_string(<time class="gamma">05.): <time class="gamma">May.
+replace_string(<time class="gamma">06.): <time class="gamma">June.
+replace_string(<time class="gamma">07.): <time class="gamma">July.
+replace_string(<time class="gamma">08.): <time class="gamma">August.
+replace_string(<time class="gamma">09.): <time class="gamma">September.
+replace_string(<time class="gamma">10.): <time class="gamma">October.
+replace_string(<time class="gamma">11.): <time class="gamma">November.
+replace_string(<time class="gamma">12.): <time class="gamma">December.
+
+prune: no
+
+test_url: http://elderscrollsonline.com/en/rss
+test_url: http://elderscrollsonline.com/en/news/post/2013/03/27/developer-question-of-the-week-17
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0766471..56fba5f
@@ -1,27 +1,27 @@
-title: //h1\r
-date: //div[@class='datum']\r
-single_page_link: //a[contains(@href, '?type=99')]\r
-\r
-# this hack preserves the intro text, because it would be striped otherwise if the title is set to //h1\r
-dissolve: //div[@class='artikelMeldung']\r
-\r
-\r
-strip_id_or_class: anzeige\r
-strip_id_or_class: top_page_navigation\r
-strip_id_or_class: cr_image_container\r
-strip_id_or_class: cr_image_reference\r
-strip_id_or_class: cr_image_icon\r
-strip_id_or_class: _close_txt\r
-strip_id_or_class: _close_ico\r
-strip_id_or_class: clearer\r
-\r
-strip://h1\r
-strip://h6\r
-strip://div[contains(@id, 'plista')]\r
-strip://img[contains(@id,'tiny')]\r
-strip://img[@class='cr_image']\r
-\r
-# strip url at the top\r
-strip: //p[@style='font-size: 10px;']\r
+title: //h1
+date: //div[@class='datum']
+single_page_link: //a[contains(@href, '?type=99')]
+
+# this hack preserves the intro text, because it would be striped otherwise if the title is set to //h1
+dissolve: //div[@class='artikelMeldung']
+
+
+strip_id_or_class: anzeige
+strip_id_or_class: top_page_navigation
+strip_id_or_class: cr_image_container
+strip_id_or_class: cr_image_reference
+strip_id_or_class: cr_image_icon
+strip_id_or_class: _close_txt
+strip_id_or_class: _close_ico
+strip_id_or_class: clearer
+
+strip://h1
+strip://h6
+strip://div[contains(@id, 'plista')]
+strip://img[contains(@id,'tiny')]
+strip://img[@class='cr_image']
+
+# strip url at the top
+strip: //p[@style='font-size: 10px;']
 
 test_url: http://www.elektroniknet.de/automotive/technik-know-how/sicherheitselektronik/article/87717/0/Besser_als_die_Wirklichkeit/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 9fecd66..435c6c2
@@ -1,4 +1,4 @@
-single_page_link: //a[contains(@href, 'print_contenido')]\r
-title: //h2\r
+single_page_link: //a[contains(@href, 'print_contenido')]
+title: //h2
 author: //div[@class="autor"]
 test_url: http://www.elmalpensante.com/index.php?doc=display_contenido&id=668
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 32f9fc3..c6f9787
@@ -1,22 +1,22 @@
-title: //meta[@name='DC.title']/@content\r
-title: //div[contains(@class, 'cabecera_noticia')]//h1\r
-date: //meta[@name='DC.date']/@content\r
-date: //meta[@name='date']/@content\r
-body: //div[@class='columna_texto']\r
-body: //div[@id='cuerpo_noticia']\r
-body: //div[@class='estructura_2col_1zq']//div[@class='margen_n']\r
-\r
-prune: no\r
-\r
-strip_id_or_class: disposicion_vertical\r
-strip_id_or_class: ampliar_foto\r
-strip_id_or_class: utilidades\r
-strip_id_or_class: info_relacionada\r
-strip_id_or_class: m-kiosko\r
-strip_id_or_class: info_complementa\r
-\r
-strip: //div[starts-with(@id, 'sumario') and contains(., 'más información')]\r
-strip: //div[@id='coment' or @id='foros_not']\r
+title: //meta[@name='DC.title']/@content
+title: //div[contains(@class, 'cabecera_noticia')]//h1
+date: //meta[@name='DC.date']/@content
+date: //meta[@name='date']/@content
+body: //div[@class='columna_texto']
+body: //div[@id='cuerpo_noticia']
+body: //div[@class='estructura_2col_1zq']//div[@class='margen_n']
 
-test_url: http://elpais.com/elpais/2012/02/06/gente/1328526783_491687.html\r
+prune: no
+
+strip_id_or_class: disposicion_vertical
+strip_id_or_class: ampliar_foto
+strip_id_or_class: utilidades
+strip_id_or_class: info_relacionada
+strip_id_or_class: m-kiosko
+strip_id_or_class: info_complementa
+
+strip: //div[starts-with(@id, 'sumario') and contains(., 'más información')]
+strip: //div[@id='coment' or @id='foros_not']
+
+test_url: http://elpais.com/elpais/2012/02/06/gente/1328526783_491687.html
 test_url: http://www.elpais.com/articulo/cultura/mano/retrato/materia/elpepicul/20120207elpepicul_2/Tes
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/emaratalyoum.com.txt b/inc/3rdparty/site_config/standard/emaratalyoum.com.txt
new file mode 100755 (executable)
index 0000000..3d1313e
--- /dev/null
@@ -0,0 +1,7 @@
+body: //div[@id='main-column']//div[@class='content']
+
+prune: no
+
+test_url: http://www.emaratalyoum.com/sports/arab-and-international/2013-08-29-1.601844
+test_url: http://www.emaratalyoum.com/sports/arab-and-international/2013-08-29-1.601842
+test_url: http://www.emaratalyoum.com/public-sports-1.533088?ot=ot.AjaxPageLayout
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c1a9106..2ca0216
@@ -1,10 +1,10 @@
-body: //div[@id='content']\r
-strip: //div[@class='rl'] \r
-strip: //p[@class='authdesc']\r
-strip: //p[@class='strybtm']\r
-strip: //div[@id='stryFtrLft']\r
-strip: //div[@id='f1Conversation']\r
-strip: //div[@id='cmtSpncrRuler']\r
-strip: //div[@id='stryComments']\r
+body: //div[@id='content']
+strip: //div[@class='rl'] 
+strip: //p[@class='authdesc']
+strip: //p[@class='strybtm']
+strip: //div[@id='stryFtrLft']
+strip: //div[@id='f1Conversation']
+strip: //div[@id='cmtSpncrRuler']
+strip: //div[@id='stryComments']
 strip: //div[@id='athrData']
 test_url: http://en.espnf1.com/monaco/motorsport/story/50529.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6cc6b14..52acddb
@@ -1,7 +1,7 @@
-title: //meta[@property="og:title"]/@content\r
-body: //div[@class='post_body']\r
-date: //*[@class='post_time']\r
-\r
-prune: no\r
-\r
+title: //meta[@property="og:title"]/@content
+body: //div[@class='post_body']
+date: //*[@class='post_time']
+
+prune: no
+
 test_url: http://www.engadget.com/2011/05/20/screen-grabs-the-mentalist-takes-the-ipad-to-new-heights/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 35ace46..48f301f
@@ -1,7 +1,7 @@
-title: //h2\r
-body: //div[@class="post_content"]\r
-author: //p[@class="author"]/a\r
-date: //p[@class="date"]\r
-strip: //h2\r
+title: //h2
+body: //div[@class="post_content"]
+author: //p[@class="author"]/a
+date: //p[@class="date"]
+strip: //h2
 strip: //header
 test_url: http://engineering.tumblr.com/post/21276808338/tumblr-firehose
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index aed3a5f..9736599
@@ -1,7 +1,7 @@
-title: //span[@id='DetailedTitle']\r
-body: //div[@id='ctl00_cphBody_dvArticleInfoBlock'] | //td[@class='DetailedSummary']\r
-strip_id_or_class: sidebar\r
-strip_id_or_class: Skyscrapper_Body\r
-strip: //td[@class='DetailedSummary']/table[position() != 1]\r
-prune: no\r
+title: //span[@id='DetailedTitle']
+body: //div[@id='ctl00_cphBody_dvArticleInfoBlock'] | //td[@class='DetailedSummary']
+strip_id_or_class: sidebar
+strip_id_or_class: Skyscrapper_Body
+strip: //td[@class='DetailedSummary']/table[position() != 1]
+prune: no
 test_url: http://english.aljazeera.net//news/middleeast/2011/04/20114681444376835.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e2b99bf..ddd51c4
@@ -1,9 +1,9 @@
-body: //div[@id='article']//div[contains(@class, 'inside')]\r
-\r
-strip_id_or_class: tags\r
-strip_id_or_class: actions\r
-strip_id_or_class: google-ads\r
-\r
-prune: no\r
-\r
+body: //div[@id='article']//div[contains(@class, 'inside')]
+
+strip_id_or_class: tags
+strip_id_or_class: actions
+strip_id_or_class: google-ads
+
+prune: no
+
 test_url: http://www.enikos.gr/politics/98606,To_oxi_toy_Agorastoy_stoys_Germanoys.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3e7fba0..a756c45
@@ -1,10 +1,10 @@
-author://div[@class = 'article-author']/span[@class = 'byline']\r
-title://h1[@class = 'heading']\r
-body://div[@id = 'related-article-links']\r
-strip://div[@id = 'comment-sort-order']\r
-strip://div[@id = 'my-profile']\r
-strip://div[@class = 'article-author']\r
-strip://div[@class = 'bg-f8f1d8 width-385 text-left']\r
-strip://div[@id = 'login-status']\r
+author://div[@class = 'article-author']/span[@class = 'byline']
+title://h1[@class = 'heading']
+body://div[@id = 'related-article-links']
+strip://div[@id = 'comment-sort-order']
+strip://div[@id = 'my-profile']
+strip://div[@class = 'article-author']
+strip://div[@class = 'bg-f8f1d8 width-385 text-left']
+strip://div[@id = 'login-status']
 strip://div[@class = 'puff-padding']
 test_url: http://entertainment.timesonline.co.uk/tol/arts_and_entertainment/the_tls/article7177738.ece
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ericsuh.com.txt b/inc/3rdparty/site_config/standard/ericsuh.com.txt
new file mode 100755 (executable)
index 0000000..d25140c
--- /dev/null
@@ -0,0 +1,4 @@
+date: //h6[@class='datetime']/child::text()
+author: string("Eric J. Suh")
+footnotes: yes
+test_url: http://www.ericsuh.com/blog/posts/2012/8/strange-numbers.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 19a1e9d..21691a5
@@ -1,11 +1,11 @@
-title: concat(//div[@class='doc_author'], ' - ', upper-case(//div[@class='doc_title']))\r
-\r
-body: //div[@class='doc']\r
-\r
-prune: yes\r
-\r
-tidy: yes\r
-convert_double_br_tags: yes\r
-\r
+title: concat(//div[@class='doc_author'], ' - ', upper-case(//div[@class='doc_title']))
+
+body: //div[@class='doc']
+
+prune: yes
+
+tidy: yes
+convert_double_br_tags: yes
+
 strip: //a[contains(@href, 'www.facebook.com/pages/Elet-es-Irodalom/')]
 test_url: http://www.es.hu/2010-12-08_vissza-a-partpenzt
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7e17a04..fd453a1
@@ -1,2 +1,8 @@
+title: //h1[@class='headline']/div[@class='name']
+
+strip_image_src: 'http://cdn.themis-media.com/media/global/images/library/deriv/115/115825.png'
+
+next_page_link: //a[@class='next_page']
+
 strip_comments: no
-test_url: http://www.escapistmagazine.com/articles/view/columns/extraconsideration/8717-Extra-Consideration-The-Story
\ No newline at end of file
+test_url: http://www.escapistmagazine.com/articles/view/columns/criticalintel/10302-I-Hate-Magic
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 319d352..0647629
@@ -1,12 +1,12 @@
-title: //div[@class='headline'] | //div[@class='mod-header']/h3\r
-body: //div[contains(@class, 'article')]\r
-strip: //div[contains(@class, 'mod-inline')]\r
-strip: //*/span[@class='page-actions']\r
-strip: //div[@class='page-actions']/*\r
-strip: //div[@class='headline'] | //div[@class='mod-header']/h3\r
-strip: //div[@class='mod-blog-navigation']\r
-strip: //div[@class='monthday']\r
-strip: //div[@class='time']\r
-strip: //div[@class='timeofday']\r
+title: //div[@class='headline'] | //div[@class='mod-header']/h3
+body: //div[contains(@class, 'article')]
+strip: //div[contains(@class, 'mod-inline')]
+strip: //*/span[@class='page-actions']
+strip: //div[@class='page-actions']/*
+strip: //div[@class='headline'] | //div[@class='mod-header']/h3
+strip: //div[@class='mod-blog-navigation']
+strip: //div[@class='monthday']
+strip: //div[@class='time']
+strip: //div[@class='timeofday']
 strip: //div[contains(@class, 'mod-conversations')]
 test_url: http://espn.go.com/boston/mlb/story/_/id/7092528/terry-francona-victim-latest-red-sox-smear-campaign
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7566e8c..b9cb1e5
@@ -1,10 +1,11 @@
-title: //h1\r
-author: //div[@id='byline']\r
-\r
-body: //div[@id='printBody']\r
-\r
-single_page_link: concat('http://www.esquire.com/print-this/', substring-after(//link[@rel='canonical']/@href, 'esquire.com/'))\r
-\r
-prune: no\r
-\r
-test_url: http://www.esquire.com/features/impossible/price-is-right-perfect-bid-0810
\ No newline at end of file
+title: //h1
+author: //div[@id='byline']
+
+body: //div[@id='printBody']
+
+single_page_link: concat('http://www.esquire.com/print-this/', substring-after(//link[@rel='canonical']/@href, 'esquire.com/'))
+
+prune: no
+
+test_url: http://www.esquire.com/features/impossible/price-is-right-perfect-bid-0810
+test_url: http://www.esquire.com/blogs/politics/police-getting-leftover-armoured-iraq-trucks-112513
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 88c8c56..9a92239
@@ -1,6 +1,6 @@
-title: //*[@itemprop='headline']\r
-author: //*[@itemprop='author']\r
-date: //*[@itemprop='datePublished']\r
-body: //*[@itemprop='articleBody']\r
+title: //*[@itemprop='headline']
+author: //*[@itemprop='author']
+date: //*[@itemprop='datePublished']
+body: //*[@itemprop='articleBody']
 strip: //*[contains(@class, 'instapaper_ignore')]
 test_url: http://www.essentialpublicradio.org/story/2011-11-14/volunteers-sought-federal-tax-assistance-program-pennsylvania-9421
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 58da5ef..95f8cf7
@@ -1,6 +1,6 @@
-strip_id_or_class: 'left'\r
-strip_id_or_class: 'right'\r
-strip_id_or_class: 'block-belowcontent'\r
-author: //span[@class = 'name']/a\r
-date: //div[@class= 'datum']\r
+strip_id_or_class: 'left'
+strip_id_or_class: 'right'
+strip_id_or_class: 'block-belowcontent'
+author: //span[@class = 'name']/a
+date: //div[@class= 'datum']
 test_url: http://www.etc.se/intervju/lonsamt-att-radda-jorden
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6ecdf6b..8a35166
@@ -1,8 +1,8 @@
-body: //div[ @class='content' ]  |  //div[ @class='blog-entry' ]\r
-\r
-strip: //h2/abbr  |  //div[ @class='lowleader' ]  |  //*[ @class='discussion' ]  |  //img[ @class='play-button' ]  |  //div[ @class='boxout' ] | //h2/a | //h2 | //h2/div | //p[ @class='timestamp' ] | //a[ @class='eurogamer-author' ] | //p[ @class='aPager' ] | //h1 | //div[ @id='lowleader' ] | //a[ @class='next' ]  |  //div[contains(concat(' ', normalize-space(@class), ' '), ' pullquote ')]\r
-\r
-date://p[ @class='timestamp' ]\r
-\r
-author://a[ @class='eurogamer-author' ]\r
+body: //div[ @class='content' ]  |  //div[ @class='blog-entry' ]
+
+strip: //h2/abbr  |  //div[ @class='lowleader' ]  |  //*[ @class='discussion' ]  |  //img[ @class='play-button' ]  |  //div[ @class='boxout' ] | //h2/a | //h2 | //h2/div | //p[ @class='timestamp' ] | //a[ @class='eurogamer-author' ] | //p[ @class='aPager' ] | //h1 | //div[ @id='lowleader' ] | //a[ @class='next' ]  |  //div[contains(concat(' ', normalize-space(@class), ' '), ' pullquote ')]
+
+date://p[ @class='timestamp' ]
+
+author://a[ @class='eurogamer-author' ]
 test_url: http://www.eurogamer.net/articles/digitalfoundry-vs-unreal-engine-4
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0716251..ccb4f87
@@ -1,11 +1,11 @@
-author: substring-after(//div[@class='articleauthor'],'By ')\r
-\r
-# Blog posts\r
-date: //div[@class='articledate']\r
-# News\r
-date: //div[@class='articledate_b']\r
-\r
-body: //div[@class='articletext']\r
-\r
+author: substring-after(//div[@class='articleauthor'],'By ')
+
+# Blog posts
+date: //div[@class='articledate']
+# News
+date: //div[@class='articledate_b']
+
+body: //div[@class='articletext']
+
 convert_double_br_tags: yes
 test_url: http://www.evo.co.uk/carreviews/evolongtermtests/280072/bmw_330d_sport_touring.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d0cb283..d81d325
@@ -1,9 +1,10 @@
-title: //div[@id='article']/div[contains(@class, 'content')]/h1\r
-body: //div[@id='article']/div[contains(@class, 'content')]\r
-date: //div[contains(@class, 'article-slot')]/descendant::div[contains(@id, 'articledates')]\r
-\r
-strip: //img[contains(@src, 'img/px.gif')]\r
-prune: no\r
-# remove Facebook banner and obtrusive ad\r
-strip: //div[@id='article']/div[contains(@class, 'content')]/div[contains(@class, 'art-right')]\r
-test_url: http://www.expressen.se/kultur/1.2683904/medan-natet-dras-at
\ No newline at end of file
+title: //h1[contains(@class, 'b-headline_article')]
+body: //div[contains(@class, 'b-article_print')]
+
+single_page_link: //div[contains(@class, 'b-page__footer__actions')]//a[contains(@href, 'print=true')]
+
+prune: no
+
+test_url: http://www.expressen.se/kultur/1.2683904/medan-natet-dras-at
+test_url: http://www.expressen.se/gt/polis-om-styckmordet-extremt-markligt-fall/
+test_url: http://www.expressen.se/Pages/OutboundFeedsPage.aspx?id=3642159&viewstyle=rss
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/facebook.com.txt b/inc/3rdparty/site_config/standard/facebook.com.txt
new file mode 100755 (executable)
index 0000000..6a49276
--- /dev/null
@@ -0,0 +1,5 @@
+body: //div[@id='imagestage']
+prune: no
+tidy: no
+
+test_url: https://www.facebook.com/feeds/page.php?id=338077742912613&format=rss20
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c17e0b8..4c96a1a
@@ -1,3 +1,3 @@
-bosdy: //div[@class='content']\r
+bosdy: //div[@class='content']
 
 test_url: http://facta.co.jp/blog/archives/20111026001026.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index b941b74..2bfcc9b
@@ -1,18 +1,14 @@
-title: //h2[@class='related relatedTitle']\r
-author: //a[contains(@href, 'liste.php?author_id')]\r
-\r
-# can't think of a better way unfortunately, really bad markup on this site\r
-date: substring-after(//td[@style='width:85%;'], 'vom')\r
-\r
-# not sure why, but instapaper seems to suck up the teaser paragraph\r
-# not solved!\r
-body: //div[contains(@class, 'teaser')]\r
-body: //div[@id='content']\r
-\r
-# cleanup\r
-strip: //img[@src='http://www.falter.at/web/_pics/falterlogo_dblau.gif']\r
-strip: //div[@class='servicebox']\r
-strip: //h1\r
-strip: //br\r
-strip: //td[@id='adcol']
-test_url: http://www.falter.at/web/print/detail.php?id=1634
\ No newline at end of file
+title: //h1
+author: //a[contains(@href, '/kategorie/autoren')]
+date: //a[contains(@href, '/falter/ausgabe')]
+body: //article[@class='spanMain']
+
+# cleanup
+strip_id_or_class: 'respond'
+strip: //img[@src='http://www.falter.at/web/_pics/falterlogo_dblau.gif']
+strip_id_or_class: 'meta'
+strip_id_or_class: 'servicebox'
+strip_id_or_class: 'related'
+strip_id_or_class: 'twitter-share-button'
+strip: //br
+test_url: http://www.falter.at/falter/2013/03/26/der-dandy-auf-der-sinkenden-galeere/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8d0c4da..e7cab4d
@@ -1,6 +1,6 @@
-body: //*[@id = 'story text']\r
-author: //a[starts-with(@href, '/u/')]\r
-next_page_link: substring-after(//input[contains(@value, 'Next')]/@onclick, "self.location='")\r
-autodetect_next_page:yes\r
+body: //*[@id = 'story text']
+author: //a[starts-with(@href, '/u/')]
+next_page_link: substring-after(//input[contains(@value, 'Next')]/@onclick, "self.location='")
+autodetect_next_page:yes
 strip_id_or_class: 'a2a_kit'
 test_url: http://www.fanfiction.net/s/6497403/1/Spartan_Love
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 5547a76..a641723
@@ -1,16 +1,16 @@
-title: //h1\r
-author: //h5[@class='byline']//a\r
-date: //h5[@class='date']\r
-body: //figure[@class='node-poster'] | //div[contains(@class, "node-content")]\r
-strip_id_or_class: article-top-wrapper\r
-strip_id_or_class: footer-message\r
-strip_id_or_class: print-logo\r
-strip: //cite\r
-strip://*[@class='timestamp']\r
-strip://div[@id='page_right']\r
-strip://section[@id='header_region']\r
-strip://h1[@class='node-title']\r
-strip://div[@class='node-submitted']\r
-strip_id_or_class: skipnav\r
-test_url: http://www.fastcompany.com/3000226/link-between-quietness-and-productivity\r
+title: //h1
+author: //h5[@class='byline']//a
+date: //h5[@class='date']
+body: //figure[@class='node-poster'] | //div[contains(@class, "node-content")]
+strip_id_or_class: article-top-wrapper
+strip_id_or_class: footer-message
+strip_id_or_class: print-logo
+strip: //cite
+strip://*[@class='timestamp']
+strip://div[@id='page_right']
+strip://section[@id='header_region']
+strip://h1[@class='node-title']
+strip://div[@class='node-submitted']
+strip_id_or_class: skipnav
+test_url: http://www.fastcompany.com/3000226/link-between-quietness-and-productivity
 test_url: http://www.fastcompany.com/3003586/6-simple-rituals-reach-your-potential-every-day
\ No newline at end of file
index 4fe5968b5974e0f3e8cef285d53be565ec6170a0..47048a1b2dfbcc082e9fb64a3e8b3583af94b658 100644 (file)
-# Title\r
-title: //p[@class='Content HeadlineShort']\r
-\r
-# Authors\r
-# some are known and have a link, others don't\r
-author: substring-after(//span[@class='Autor'], 'Von')\r
-\r
-# Date\r
-date: //span[@class='Datum']\r
-\r
-# Body\r
-body: //div[@class='Artikel']\r
-\r
-# Removements before body text\r
-strip: //div[@class='Breadcrumbs']\r
-strip: //div[@class='QuickSearchBox']\r
-strip: //div[@class='FAZArtikelEinleitung']\r
-strip: //div[@class='FAZArtikelReiter']\r
-strip: //div[@class='clear']\r
-\r
-# General removements\r
-strip: //span[@class='Bildnachweis']\r
-\r
-# Removements after body text\r
-strip: //div[@class='ArtikelAbbinder']\r
-strip: //div[@class='ArtikelKommentieren Artikelfuss GETS;tk;boxen.top-lesermeinungen;tp;content']\r
-strip: //div[@class='FAZArtikelKommentare FAZArtikelContent']\r
-strip: //div[@class='FAZArtikelFunktionen']\r
-strip: //div[@id='FAZContentRight']\r
-test_url: http://www.faz.net/aktuell/gesellschaft/ehe-haltbarkeitsformel-verliebe-dich-oft-verlobe-dich-selten-heirate-vielleicht-11685306.html
\ No newline at end of file
+# Author: zinnober
+# Complete rewrite of the faz.net template as the standard one is broken
+# I tried to consider as many page variants as possible, which was some serious work
+
+tidy: no
+prune: no
+
+# Title
+title: //p[@class='Content HeadlineShort']
+
+# Set author
+author: substring-after(//span[@class='Autor'], 'von ')
+author: //span[@class='caps last']/span[@class='caps last']
+author: //a[@rel='author']
+
+# Set date
+date: //span[@class='Datum']
+date: //span[@class='Datum'],/span
+
+# Fetch full multipage articles
+next_page_link: //a[@title='Nächste Seite']
+
+# Content is here
+body: //div[@class='Artikel']
+
+# Tidy up before article
+strip: //div[@id='FAZHeaderNeu']
+strip: //h2[@itemprop='headline']
+strip: //span[@class='Datum']
+strip: //span[@class='Autor']
+strip_id_or_class: ArticlePagerTop
+
+# General cleanup
+strip: //div[@class='clear']
+strip: //a[@title='Zur Homepage FAZ.NET']
+strip: //iframe
+replace_string( ·  ):
+
+# Remove tracking and ads
+strip_image_src: /l.gif?
+strip: //div[contains(@style, 'background-image')]
+strip: //img[@width='1']
+strip_id_or_class: invisible
+strip_id_or_class: Anzeige
+strip_id_or_class: billboard
+
+# Remove various text boxes and social media foo
+strip_id_or_class: WeitereBeitraege
+strip_id_or_class: WBListe
+strip_id_or_class: AutorenModul
+strip_id_or_class: Community
+strip_id_or_class: SocialMediaStatus
+strip_id_or_class: RelatedLinkBox
+strip_id_or_class: MultimediaNavigation
+strip_id_or_class: IndexTitel
+
+# Fix picture caps and pictures (use better resolution and remove clutter)
+strip_id_or_class: LightBoxOverlay
+strip_id_or_class: exitLarge
+strip_id_or_class: PagerBox
+strip_id_or_class: Bildnachweis
+strip_id_or_class: Bildueberschrift
+strip_id_or_class: Bildbeschreibung
+strip_id_or_class: ArtikelBild610
+strip_id_or_class: MediaLink
+strip_id_or_class: FotoBoxInnerLeft
+strip_id_or_class: BilderRelatedLinks
+
+# Remove clutter after article
+strip_id_or_class: ArticlePagerBottom
+strip_id_or_class: backToHome
+strip_id_or_class: ArtikelAbbinder
+strip_id_or_class: lesermeinungscontainer
+strip_id_or_class: ThemenLinks
+strip_id_or_class: rechtehinweis
+strip_id_or_class: FAZArtikelMap
+strip_id_or_class: FAZArtikelKommentare
+strip_id_or_class: ArtikelKommentieren
+strip_id_or_class: FAZArtikelFunktionen
+strip_id_or_class: mailLB
+strip_id_or_class: FAZContentRight
+strip_id_or_class: stageModule
+strip_id_or_class: ContentFooter
+strip_id_or_class: ServicesFooter
+strip_id_or_class: FAZFooter
+
+# Clean up stuff present just in some articles
+strip_id_or_class: Teaser620
+strip_id_or_class: TeaserMultimedia
+strip_id_or_class: VideoBox
+
+# Remove as soon as Wallabag maight be able to embed flash video
+strip_id_or_class: mmoObjectAsTeaserInArticle
+strip_id_or_class: additionalStylesAudioVideo
+strip_id_or_class: hideMMElements
+
+# Try it yourself
+test_url: http://www.faz.net/aktuell/feuilleton/zum-tod-von-margaret-thatcher-die-reizfigur-12141919.html#Drucken
+test_url: http://www.faz.net/aktuell/politik/inland/allensbach-analyse-im-namen-des-volkes-13106492.html
+test_url: http://www.faz.net/aktuell/feuilleton/kino/video-filmkritiken/video-filmkritik-when-animals-dream-zerrissene-jugend-13105772.html
+
diff --git a/inc/3rdparty/site_config/standard/fertigung.de.txt b/inc/3rdparty/site_config/standard/fertigung.de.txt
new file mode 100755 (executable)
index 0000000..90145e5
--- /dev/null
@@ -0,0 +1,23 @@
+title: //title
+
+body: //div[@id='content']
+
+strip: (//div[@id='content']/h2)[1]
+
+strip: //h2[contains(., 'mehr News')]/following::*
+strip: //h2[contains(., 'mehr News')]
+
+strip: //div[contains(@class, 'indizar')]/following::*
+strip: //div[contains(@class, 'indizar')]
+
+strip: //h1[contains(@class, 'single')]/preceding::*
+strip: //h1[contains(@class, 'single')]
+
+strip_id_or_class: plista_widget
+
+prune: no
+
+next_page_link: //a[contains(., 'Weiter')]
+
+test_url: http://www.fertigung.de/2013/04/igus-neuer-energiekettenkatalog/
+test_url: http://www.fertigung.de/2013/04/dynamisch-und-hochpraezise/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 4a04e83..19ec16b
@@ -1,5 +1,5 @@
-body: id('storytext')\r
-author: //a[starts-with(@href, '/u/')]\r
-#next_page_link: substring-after(//input[contains(@value, 'Next')]/@onclick, "self.location='")\r
+body: id('storytext')
+author: //a[starts-with(@href, '/u/')]
+#next_page_link: substring-after(//input[contains(@value, 'Next')]/@onclick, "self.location='")
 strip_id_or_class: 'a2a_kit'
 test_url: http://www.fictionpress.com/s/2897964/1/All_We_Knew
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3dbfe76..081f0bb
@@ -1,12 +1,12 @@
-title: //h4\r
-author: //span[@class="author"]\r
-body: //div[@id="story"]\r
-strip_id_or_class: summary\r
-strip_id_or_class: meta\r
-strip_id_or_class: storyfoot\r
-convert_double_br_tags: yes\r
-prune: no\r
-\r
-# Note: this site still has trouble because single <br> tags are stripped, but I don't see a way to fix that with this interface.\r
+title: //h4
+author: //span[@class="author"]
+body: //div[@id="story"]
+strip_id_or_class: summary
+strip_id_or_class: meta
+strip_id_or_class: storyfoot
+convert_double_br_tags: yes
+prune: no
+
+# Note: this site still has trouble because single <br> tags are stripped, but I don't see a way to fix that with this interface.
 
 test_url: http://www.ficwad.com/story/158977
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 81c18fd..248522c
@@ -1,12 +1,12 @@
-title: //meta[@property='og:title']/@content\r
-body: //div[@id='y-article-bd']\r
-body: //div[contains(@class, 'yom-art-content')]\r
-strip: //div[contains(@class, 'related-companies')]\r
-strip: //div[@id='y-article-related']\r
-strip: //div[@id='ypf-article-related']\r
-prune: no\r
-\r
-single_page_link: //div[@class='ft']//a[contains(@href, 'page=all')]\r
-\r
-test_url: http://sg.finance.yahoo.com/news/Motorola-takes-wraps-249-rsg-3508842732.html?x=0&.v=1\r
+title: //meta[@property='og:title']/@content
+body: //div[@id='y-article-bd']
+body: //div[contains(@class, 'yom-art-content')]
+strip: //div[contains(@class, 'related-companies')]
+strip: //div[@id='y-article-related']
+strip: //div[@id='ypf-article-related']
+prune: no
+
+single_page_link: //div[@class='ft']//a[contains(@href, 'page=all')]
+
+test_url: http://sg.finance.yahoo.com/news/Motorola-takes-wraps-249-rsg-3508842732.html?x=0&.v=1
 test_url: http://finance.yahoo.com/news/super-young-retirement-savers.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 1a5cd2e..43aef75
@@ -1,10 +1,10 @@
-date: //div[@class='notes']/a\r
-body: //div[@id='content']\r
-\r
-strip_id_or_class: tags\r
-strip_id_or_class: permalink\r
-strip_id_or_class: notes\r
-strip_id_or_class: post_nav\r
-strip: //div[@id='content']//h2\r
+date: //div[@class='notes']/a
+body: //div[@id='content']
+
+strip_id_or_class: tags
+strip_id_or_class: permalink
+strip_id_or_class: notes
+strip_id_or_class: post_nav
+strip: //div[@id='content']//h2
 strip_id_or_class: right_column
 test_url: http://findtheswagger.tumblr.com/post/11589145141/moe-resners-end-of-an-era-1957-giants-final
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index dd56da2..ce972ba
@@ -1,7 +1,7 @@
-title: //div[@class='articleTitle']\r
-author: //div[@class='articleAuthor']\r
-body: //div[@class='articleContent']\r
-prune: no\r
-convert_double_br_tags: yes\r
-\r
+title: //div[@class='articleTitle']
+author: //div[@class='articleAuthor']
+body: //div[@class='articleContent']
+prune: no
+convert_double_br_tags: yes
+
 test_url: http://www.firstthings.com/article/2011/05/the-trouble-with-ayn-rand
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3d7b45a..d0a0a77
@@ -1,7 +1,7 @@
-title: substring-after(//title, 'Right:')\r
-body: //div[@class = 'post-body']\r
-author: substring-after(//*[@class='post-author'], 'by')\r
-date: concat(//*[@class='date-header'], ' ', //*[@class='post-timestamp']/a)\r
-convert_double_br_tags: yes\r
+title: substring-after(//title, 'Right:')
+body: //div[@class = 'post-body']
+author: substring-after(//*[@class='post-author'], 'by')
+date: concat(//*[@class='date-header'], ' ', //*[@class='post-timestamp']/a)
+convert_double_br_tags: yes
 
 test_url: http://www.fivethirtyeight.com/2010/07/does-rnc-have-structural-problems.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/flyingmachinestudios.com.txt b/inc/3rdparty/site_config/standard/flyingmachinestudios.com.txt
new file mode 100755 (executable)
index 0000000..2053f80
--- /dev/null
@@ -0,0 +1,2 @@
+strip_id_or_class: linenos
+test_url: http://www.flyingmachinestudios.com/programming/whoops-dci-refactoring/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 32d44c8..5db3e58
@@ -1,7 +1,7 @@
-author: //div[@class='authorDescription']/h2\r
-body: //div[@id='story']\r
-date: substring-before(substring-after(//p[@class='date'],'Erstellt am:'), '-')\r
-title: //h1[@class='detail']\r
-strip: //div[@class='fact']\r
+author: //div[@class='authorDescription']/h2
+body: //div[@id='story']
+date: substring-before(substring-after(//p[@class='date'],'Erstellt am:'), '-')
+title: //h1[@class='detail']
+strip: //div[@class='fact']
 
 test_url: http://fm4.orf.at/stories/1689156/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7faa6bf..e404ccb
@@ -1,15 +1,15 @@
-title: normalize(//h1)\r
-\r
-author: //td/p[position()=last()]/em\r
-\r
-# I swear, this is really the best way to do this\r
-date: normalize(//td[contains(@style, "color: #ffffff")])\r
-\r
-# my god, it's full of tables\r
-body: /table/tbody/tr[5]//table/tbody//table/tbody/tr/td\r
-strip: //h1\r
-\r
-# the following two lines strip the byline at the end of the article (the byline is a <p> that consists of an em dash and then some text in an <em>). I have no idea why I can't just strip //p[position()=last()], but trying to do so includes a bunch of other crap in the output.\r
-strip: //p[position()=last()]/em\r
+title: normalize(//h1)
+
+author: //td/p[position()=last()]/em
+
+# I swear, this is really the best way to do this
+date: normalize(//td[contains(@style, "color: #ffffff")])
+
+# my god, it's full of tables
+body: /table/tbody/tr[5]//table/tbody//table/tbody/tr/td
+strip: //h1
+
+# the following two lines strip the byline at the end of the article (the byline is a <p> that consists of an em dash and then some text in an <em>). I have no idea why I can't just strip //p[position()=last()], but trying to do so includes a bunch of other crap in the output.
+strip: //p[position()=last()]/em
 strip: //p[position()=last()]/child::text()
 test_url: http://www.fnal.gov/pub/today/archive_2011/today11-11-09_MuonDepartmentReadMore.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3ad5cab..6da3687
@@ -1,19 +1,19 @@
-title: //h1\r
-\r
-author: //div[@class='articleContent small']/div[@class='textBlock']//span[@class='created']\r
-\r
-date: //div[@class='articleHead']/span[@class='created']\r
-\r
-body: //div[@id='article']\r
-\r
-strip: //span[@class='markerText']\r
-strip: //div[@class='articleContent small']/div[@class='textBlock']//span[@class='created']\r
-strip: //div[@class='sidebar']\r
-strip: //div[@class='starbar']\r
-strip: //div[@class='actions clearfix']\r
-strip: //div[@id='commentForm']\r
-strip: //div[@id='commentSent']\r
-strip: //div[@id='comments']\r
-strip: //div[@class='similarityBlock']\r
+title: //h1
+
+author: //div[@class='articleContent small']/div[@class='textBlock']//span[@class='created']
+
+date: //div[@class='articleHead']/span[@class='created']
+
+body: //div[@id='article']
+
+strip: //span[@class='markerText']
+strip: //div[@class='articleContent small']/div[@class='textBlock']//span[@class='created']
+strip: //div[@class='sidebar']
+strip: //div[@class='starbar']
+strip: //div[@class='actions clearfix']
+strip: //div[@id='commentForm']
+strip: //div[@id='commentSent']
+strip: //div[@id='comments']
+strip: //div[@class='similarityBlock']
 
 test_url: http://www.focus.de/politik/ausland/ein-jahr-nach-bombenanschlag-u-bahn-attentaeter-von-minsk-hingerichtet_aid_724958.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/folklore.org.txt b/inc/3rdparty/site_config/standard/folklore.org.txt
new file mode 100755 (executable)
index 0000000..ed23a0b
--- /dev/null
@@ -0,0 +1,4 @@
+author: /html/body/table[3]/tbody/tr/td[1]/table[2]/tbody/tr[1]/td[2]
+date: /html/body/table[3]/tbody/tr/td[1]/table[2]/tbody/tr[2]/td[2]
+body: //div[@class='main']
+test_url: http://www.folklore.org/StoryView.py?story=Calculator_Construction_Set.txt
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/food.com.txt b/inc/3rdparty/site_config/standard/food.com.txt
new file mode 100755 (executable)
index 0000000..a70da76
--- /dev/null
@@ -0,0 +1,11 @@
+body: //div[@id='print-area']
+title: //h1[contains(@class, 'section-title')]
+single_page_link: //a[@id='prntrec']
+strip_image_src: food-logo-small
+strip_id_or_class: timer
+strip_id_or_class: photo-sm
+strip_id_or_class: page-header
+
+prune: no
+
+test_url: http://www.food.com/recipe/couldnt-be-easier-bbq-pork-tenderloin-crock-pot-317152
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 69867cc..89cb8b9
@@ -1,11 +1,11 @@
-body: //div[@class='entry-content']\r
-date: //meta[@name="date"]/@content\r
-author: //meta[@name="author"]/@content\r
-\r
-strip_id_or_class: ecapShell\r
-strip_id_or_class: noindent\r
-strip_id_or_class: targetedPromotion\r
-\r
-prune: no\r
-\r
+body: //div[@class='entry-content']
+date: //meta[@name="date"]/@content
+author: //meta[@name="author"]/@content
+
+strip_id_or_class: ecapShell
+strip_id_or_class: noindent
+strip_id_or_class: targetedPromotion
+
+prune: no
+
 test_url: http://www.fool.com/investing/general/2012/01/27/dfc-global-beats-up-on-analysts-yet-again.aspx
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 2381b56..9e1d04c
@@ -1,16 +1,27 @@
-title: //hgroup//h1\r
-title: //span[@class='mainarttitle']\r
-\r
-body: //div[@id='leftRail']//div[contains(@class, 'body')]\r
-\r
-author: //meta[@name="author"]/@content\r
-author: //span[@class='mainartauthor']\r
-\r
-date: substring-before(//hgroup//h6, '@')\r
-date: //span[@class='mainartdate']\r
-\r
-prune: no\r
-\r
-single_page_link: //a[contains(@href, '/print/')]\r
-\r
-test_url: http://www.forbes.com/forbes/2011/0509/technology-frog-design-jan-chipchase-ethnographer-birth-cool_print.html
\ No newline at end of file
+title: //hgroup//h1
+title: //span[@class='mainarttitle']
+
+body: //div[@id='leftRail']//div[contains(@class, 'body')]
+
+author: //meta[@name="author"]/@content
+author: //span[@class='mainartauthor']
+
+date: substring-before(//hgroup//h6, '@')
+date: //span[@class='mainartdate']
+
+prune: no
+strip: //aside
+strip_id_or_class: sticky_sharing
+strip_id_or_class: pagination
+strip_id_or_class: controlsbox
+strip_id_or_class: storyboxes
+strip_id_or_class: sponsoredlinks
+strip_id_or_class: nextpage
+strip_id_or_class: contextuallinks
+strip_id_or_class: article_actions
+strip_id_or_class: engagement_block
+
+single_page_link: //a[contains(@href, '/print/')]
+
+test_url: http://www.forbes.com/forbes/2011/0509/technology-frog-design-jan-chipchase-ethnographer-birth-cool_print.html
+test_url: http://www.forbes.com/sites/bruceupbin/2012/09/11/the-iphone-5-winners-and-losers/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/foreignaffairs.com.txt b/inc/3rdparty/site_config/standard/foreignaffairs.com.txt
new file mode 100755 (executable)
index 0000000..cf8b742
--- /dev/null
@@ -0,0 +1,34 @@
+# TIDY
+#tidy: no
+# PRUNE
+#prune: no
+
+# SINGLE PAGE
+single_page_link: //div[@class='showlinks']/a
+
+# TITLE
+title: //h1[@class="title"]
+
+# AUTHOR
+author: //div[contains(@class,"field-field-article-display-authors")]/div/div/a/text()
+
+# DATE
+date: //div[contains(@class,"field-field-article-issue")]/div/div/a/text() | //span[@class="date-display-single"]
+
+# BODY
+body: //div[contains(@class,"content-resize")]
+
+# Remove clutter
+strip: //div[@class="article-sidebar"]
+strip: //div[@class="showlinks"]
+strip: //div[contains(@class,"premium-box")]
+strip: //div[contains(@class,"premium-box")]
+strip: //table[contains(@border,"2")]
+
+# Fix picture captions
+wrap_in(small): //p/img/following-sibling::em
+wrap_in(small): //p[img]/text()
+
+# Fix sub-headlines
+wrap_in(h3): //div[contains(@class,"field-field-article-subtitle")]/div/div/text()
+test_url: http://www.foreignaffairs.com/articles/138810/pierre-n-leval/the-long-arm-of-international-law
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6ab7a09..4e84b98
@@ -1,11 +1,15 @@
-title: //div[@id='art-mast']//h1\r
-author: substring-after(//span[@id='by-line'], 'BY ')\r
-date: //span[@id='pub-date']\r
-body: //div[@id='art-mast']//h2 | //div[@id='art-mast']/h3 | //div[@id='art-body']//div[@class='translateBody']\r
-strip: //div[@id='share-box']\r
-prune: no\r
-\r
-single_page_link: //span[@id='controls']/a[contains(@href, 'print=yes')]\r
-\r
-test_url: http://www.foreignpolicy.com/articles/2011/08/01/a_murderers_manifesto_and_me\r
-test_url: test_url: http://www.foreignpolicy.com/articles/2012/02/29/five_years_in_damascus
\ No newline at end of file
+title: //div[@class='translateHead']//h1 | //div[@id='art-mast']//h1
+author: substring-after(//span[@id='by-line'], 'BY ')
+date: //span[@id='pub-date']
+body: //div[@id='art-mast']/h2 | //div[@class='translateBody'] | //div[@id='art-body']
+#Strip inside article content
+strip: //div[@id='share-box']
+strip: //div[@id='special-box']
+
+prune: no
+
+single_page_link: //span[@id='controls']/a[contains(@href, 'print=yes')]
+single_page_link: //a[text()='SINGLE PAGE']
+
+test_url: http://www.foreignpolicy.com/articles/2011/08/01/a_murderers_manifesto_and_me
+test_url: http://www.foreignpolicy.com/articles/2012/02/29/five_years_in_damascus
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3085c8f..c1bd2ba
@@ -1,9 +1,9 @@
-title: //div[@class="articleHeader"]/h1\r
-author: //p[@class="byline"]\r
-date: //p[contains(@class,"publishedDate")]/span\r
-# remove the right menu\r
-strip: //div[contains(@class,"aside")]\r
-# remove some SharePoint webpart label junk\r
-strip: //div[@id="ctl00_PlaceHolderMain_ArticleLeadField_label"]\r
+title: //div[@class="articleHeader"]/h1
+author: //p[@class="byline"]
+date: //p[contains(@class,"publishedDate")]/span
+# remove the right menu
+strip: //div[contains(@class,"aside")]
+# remove some SharePoint webpart label junk
+strip: //div[@id="ctl00_PlaceHolderMain_ArticleLeadField_label"]
 strip: //div[@id="ctl00_PlaceHolderMain_PublishingPageContentField_label"]
 test_url: http://forsvaret.no/aktuelt/publisert/nyheter/Sider/F5-fly-til-Skedsmo.aspx
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f1ee485..e19c77d
@@ -1,9 +1,9 @@
-prune: no\r
-\r
-author: //meta[@name="dc.publisher"]/@content\r
-date: //meta[@name="dc.date"]/@content\r
-strip: //p[contains(@class, 'contributor vcard')]\r
-replace_string(<ul><li><div class="photo">): <div class="photo">\r
-strip: //p[a[contains(., 'Click here to read more on this story ')]]\r
-\r
+prune: no
+
+author: //meta[@name="dc.publisher"]/@content
+date: //meta[@name="dc.date"]/@content
+strip: //p[contains(@class, 'contributor vcard')]
+replace_string(<ul><li><div class="photo">): <div class="photo">
+strip: //p[a[contains(., 'Click here to read more on this story ')]]
+
 test_url: http://www.foxnews.com/entertainment/2011/05/04/dwayne-johnson-guys-grow-pair-driving-hybrid/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f3d5425..78d3772
@@ -1,3 +1,3 @@
-body: //div[@id="projectDetailsContent"]//td\r
+body: //div[@id="projectDetailsContent"]//td
 
 test_url: http://www.freelancer.com/projects/PHP-Website-Design/debug-Forum-website-code.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8dc0dab..c83f830
@@ -1,5 +1,5 @@
-body: //div[@class = 'instapaperbody']\r
-convert_double_br_tags: no\r
-date: //div[@class='instadate']\r
+body: //div[@class = 'instapaperbody']
+convert_double_br_tags: no
+date: //div[@class='instadate']
 title: //h2[@class = 'instatitle']
 test_url: http://freytag-film.com/blog/artikel/shooting_a_feature_film_in_10_days
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/fria.nu.txt b/inc/3rdparty/site_config/standard/fria.nu.txt
new file mode 100755 (executable)
index 0000000..9d8eff9
--- /dev/null
@@ -0,0 +1,8 @@
+body: //div[contains(@class, 'layout__inner')]//div[contains(@class, 'file-image') or contains(@class, 'node__content')]
+author: //article//div[contains(@class, 'field-byline')]
+strip_id_or_class: rekommenderade
+strip_id_or_class: disqus
+strip_id_or_class: annonser
+
+test_url: http://www.fria.nu/artikel/112079
+test_url: http://www.fria.nu/taxonomy/term/1928/all/feed
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/friatidningen.se.txt b/inc/3rdparty/site_config/standard/friatidningen.se.txt
new file mode 100755 (executable)
index 0000000..1e4abc5
--- /dev/null
@@ -0,0 +1,7 @@
+body: //div[contains(@class, 'layout__inner')]//div[contains(@class, 'file-image') or contains(@class, 'node__content')]
+author: //article//div[contains(@class, 'field-byline')]
+strip_id_or_class: rekommenderade
+strip_id_or_class: disqus
+strip_id_or_class: annonser
+
+test_url: http://www.friatidningen.se/artikel/112074
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 39a9973..b067d88
@@ -1,11 +1,11 @@
-#body: (//div[@class='ftr-yt-vid'])[1]\r
-body: (//blockquote[contains(@class, 'postcontent')])[1]\r
-body: (//div[starts-with(@id, 'post_message')])[1]\r
-\r
-prune: no\r
-tidy: no\r
-\r
-#replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player"\r
-#replace_string(</iframe>): </iframe>&nbsp;</div>\r
-\r
+#body: (//div[@class='ftr-yt-vid'])[1]
+body: (//blockquote[contains(@class, 'postcontent')])[1]
+body: (//div[starts-with(@id, 'post_message')])[1]
+
+prune: no
+tidy: no
+
+#replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player"
+#replace_string(</iframe>): </iframe>&nbsp;</div>
+
 test_url: http://www.friendskorner.com/forum/f137/debate-personal-lives-leaders-west-vs-pakistan-must-read-297989/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 38d9d32..e66b960
@@ -1,5 +1,5 @@
-body: //div[contains(@class, 'ft-story-body')]\r
-\r
-author: substring-after(//div[contains(@class, 'ft-story-header')]/p[1], 'By ')\r
+body: //div[contains(@class, 'ft-story-body')]
+
+author: substring-after(//div[contains(@class, 'ft-story-header')]/p[1], 'By ')
 date: substring-before(substring-after(//div[contains(@class, 'ft-story-header')]/p[2], 'Published:'), '|')
 test_url: http://www.ft.com/cms/s/2/e1be4b5a-620c-11e0-8ee4-00144feab49a.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ftchinese.com.txt b/inc/3rdparty/site_config/standard/ftchinese.com.txt
new file mode 100755 (executable)
index 0000000..5c94d9b
--- /dev/null
@@ -0,0 +1,18 @@
+# Modified to define the single_page_link
+# This filter is tested on:
+# http://www.ftchinese.com/story/001047373
+# http://www.ftchinese.com/story/001047631
+# http://www.ftchinese.com/story/001047622/?print=y
+# http://www.ftchinese.com/story/001049052
+# http://www.ftchinese.com/story/001049088
+
+title:substring-before(//title, '-')
+author: //div[@class='byline']/a
+date: //a[@class='storytime']
+#Set date in print view
+#date: //div[@class='byline']/a/following-sibling::a
+body: //div[@id="bodytext"]
+strip://div[@class='pagination']
+single_page_link://div[@class='pagination']/a[.='全文']
+#next_page_link: //div[@class='pagination']//a[.='下一页']
+test_url: http://www.ftchinese.com/story/001049088
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a58765b..7d76af0
@@ -1,5 +1,5 @@
-body: //div[@class='boxIntroHead']/span[@class='h3'] | //div[@class='section']/div[@class='paragraph' or @class='embObjLeft']\r
-single_page_link: //a[@class='icon print']\r
+body: //div[@class='boxIntroHead']/span[@class='h3'] | //div[@class='section']/div[@class='paragraph' or @class='embObjLeft']
+single_page_link: //a[@class='icon print']
 
-test_url: http://www.ftd.de/it-medien/it-telekommunikation/:mobilfunk-vivendi-und-vodafone-trennen-sich-in-frankreich/60034691.html\r
+test_url: http://www.ftd.de/it-medien/it-telekommunikation/:mobilfunk-vivendi-und-vodafone-trennen-sich-in-frankreich/60034691.html
 test_url: http://www.ftd.de/it-medien/medien-internet/:verkauf-von-warner-music-musikbranche-auf-dem-sprung/60048185.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8e6356b..0dc3047
@@ -1,3 +1,3 @@
-body: //div[@class = 'entry']\r
+body: //div[@class = 'entry']
 
 test_url: http://www.fubiz.net/2011/05/31/world-press-photo-2011/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 50fc144..808c1f1
@@ -1,11 +1,11 @@
-date: //span[@class='date']\r
-strip: //div[@class='postsidebar']\r
-body: //div[@class='singlepost']\r
-title: //div[@class='singlepost']/h1\r
-move_into(//div[@class='singlepost']): //div[@class='info']\r
-strip: //div[@class='gallery']\r
-strip: //div[@class='biggallery']\r
-strip: //ul[@class='social']\r
-strip: //ul[@class='social_mail']\r
+date: //span[@class='date']
+strip: //div[@class='postsidebar']
+body: //div[@class='singlepost']
+title: //div[@class='singlepost']/h1
+move_into(//div[@class='singlepost']): //div[@class='info']
+strip: //div[@class='gallery']
+strip: //div[@class='biggallery']
+strip: //ul[@class='social']
+strip: //ul[@class='social_mail']
 
 test_url: http://futurezone.at/future/5502-erste-galileo-satelliten-starten-ins-all.php
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 35a8762..7c808cf
@@ -1,20 +1,20 @@
-# default view title\r
-title: //span[@class='newsTitle']\r
-# print view title\r
-title: //h3[@class='title']\r
-\r
-# default view author\r
-author: //span[@class='newsAuth']/a\r
-author: substring-after(//span[@class='newsAuth'], 'by ')\r
-\r
-# default view date\r
-date: //td[@class='newsDate']\r
-\r
-# default view body\r
-body: //td[@class='featureText']\r
-body: //td[@class='newsText']\r
-\r
-strip: //h3[@class='title']\r
-\r
+# default view title
+title: //span[@class='newsTitle']
+# print view title
+title: //h3[@class='title']
+
+# default view author
+author: //span[@class='newsAuth']/a
+author: substring-after(//span[@class='newsAuth'], 'by ')
+
+# default view date
+date: //td[@class='newsDate']
+
+# default view body
+body: //td[@class='featureText']
+body: //td[@class='newsText']
+
+strip: //h3[@class='title']
+
 single_page_link: //a[contains(@href, '?print=1')]
 test_url: http://www.gamasutra.com/view/feature/132559/staying_power_rethinking_feedback_.php
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 2cc4b37..73f8342
@@ -1,10 +1,10 @@
-title: //meta[@property="og:title"]/@content\r
-body: //div[@id='GBTVPlayer'] | //div[contains(@class, 'col490')]\r
-\r
-prune: no\r
-\r
-strip_id_or_class: noprint\r
-strip: //div[@id='gbNewsTextContent']/following-sibling::*\r
-\r
-test_url: http://www.gameblog.fr/news/26330-les-sims-3-showtime-s-annonce-en-video\r
+title: //meta[@property="og:title"]/@content
+body: //div[@id='GBTVPlayer'] | //div[contains(@class, 'col490')]
+
+prune: no
+
+strip_id_or_class: noprint
+strip: //div[@id='gbNewsTextContent']/following-sibling::*
+
+test_url: http://www.gameblog.fr/news/26330-les-sims-3-showtime-s-annonce-en-video
 test_url: http://www.gameblog.fr/news/26306-mise-a-jour-du-dashboard-de-la-xbox-360-disponible
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gamechurch.com.txt b/inc/3rdparty/site_config/standard/gamechurch.com.txt
new file mode 100755 (executable)
index 0000000..c9eea5f
--- /dev/null
@@ -0,0 +1,10 @@
+title: //h1[@class='title']
+
+date: substring-before(substring-after(//div[@class='comment-bubble']/.., 'Posted'), 'by')
+
+body: //div[@class='the-content']
+
+strip: //div[@class='article-image responsive']
+
+strip_id_or_class: 'pullquote'
+test_url: http://gamechurch.com/virtual-gun-control-the-best-amendment/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gamer.no.txt b/inc/3rdparty/site_config/standard/gamer.no.txt
new file mode 100755 (executable)
index 0000000..e76a59d
--- /dev/null
@@ -0,0 +1,11 @@
+body: //div[@class='pageContent description']
+date: //div[@class='authorsAndDateTime']/span[@title]
+single_page_link: //div[@class='pages']/a[last()-1]
+
+# fix images and captions
+wrap_in(figure): //div[contains(concat(' ', @class, ' '), ' image')]
+wrap_in(figcaption): //div[contains(concat(' ', @class, ' '), ' image')]/div[@class='text']/text()
+
+# get rid of videos
+strip_id_or_class: 'video full'
+test_url: http://www.gamer.no/artikler/142455/slik-blei-ambisiose-dragons-dogma-skapt/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gamereactor.no.txt b/inc/3rdparty/site_config/standard/gamereactor.no.txt
new file mode 100755 (executable)
index 0000000..6f7c1b9
--- /dev/null
@@ -0,0 +1,11 @@
+title: //div[@id='content']/div/h1
+
+author: //a[@itemprop='reviewer']
+
+date: //time[@itemprop='dtreviewed']/@datetime
+
+body: //div[@id='breadtext']
+
+# fix for NOT magically removing anchors with text identical to title
+dissolve: //a[text()=//div[@id='content']/div/h1/text()]
+test_url: http://www.gamereactor.no/previews/177481/The+Evil+Within/?sid=38b5bd30f56f1b7214de4ff5bed4b76f
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 1791e81..327ac55
@@ -1,3 +1,3 @@
-tidy: no\r
-\r
+tidy: no
+
 test_url: http://www.garythink.com/eft/testing.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ef68082..8eda0c3
@@ -1,4 +1,4 @@
-# These should work, but don't. They were given by Firefox XPather extension\r
-title: //article//header//a//h1\r
+# These should work, but don't. They were given by Firefox XPather extension
+title: //article//header//a//h1
 body: //article//section
 test_url: http://gasteroprod.com/blog/faut-il-continuer-a-supporter-internet-explorer-6.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7434632..2ab144f
@@ -1,8 +1,8 @@
-body: //div[@class='panel']\r
-strip: //div[@style='float:right']\r
-strip: //span[@class='titulosHomePublicidad']\r
-strip: //div[@id='TitTop5Der']\r
-strip: //img[@src='/ImagesGatoPardo/LogoGatopardo.png']\r
-\r
+body: //div[@class='panel']
+strip: //div[@style='float:right']
+strip: //span[@class='titulosHomePublicidad']
+strip: //div[@id='TitTop5Der']
+strip: //img[@src='/ImagesGatoPardo/LogoGatopardo.png']
+
 prune: yes
 test_url: http://www.gatopardo.com/ReportajesGP.php?R=95
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6531d81..9bc5613
@@ -1,6 +1,6 @@
-body: //div[@class="post-body"]\r
-\r
-# Remove 'content is restricted'\r
-strip: //div[@id='agegate_IDHERE']\r
-\r
+body: //div[@class="post-body"]
+
+# Remove 'content is restricted'
+strip: //div[@id='agegate_IDHERE']
+
 test_url: http://gawker.com/#!5782070/russian-bomb-squad-successfully-defuses-sex-toy
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 55586e1..89eb402
@@ -1,3 +1,3 @@
-author: substring-after(//span[@class='storyauthor'],'Posted by')\r
+author: substring-after(//span[@class='storyauthor'],'Posted by')
 date: //span[@class='storydate']
 test_url: http://www.geeksofdoom.com/2012/03/14/robert-rodriguez-says-machete-kills-and-sin-city-2-will-film-this-year/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f6dccf4..a664b4d
@@ -1,3 +1,3 @@
-body: //div[@id = 'article']\r
+body: //div[@id = 'article']
 strip: //div[@id = 'klasbox']
 test_url: http://www.geenstijl.nl/mt/archieven/2010/10/vrouw_lange_frans_wou_baas_b_d.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 537b4c2..e28d4b8
@@ -1,3 +1,3 @@
-body: //div[@class='post']\r
+body: //div[@class='post']
 strip: //ul[@id='bookmark_single']
 test_url: http://getnews.jp/archives/117312
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8a54bc0..61de51b
@@ -1,11 +1,11 @@
-# 2011-11-19 - carlo@... - Initial setup.\r
-\r
-strip_id_or_class: user-review-detail\r
-strip: //h1\r
-\r
-body: //div[@class="wiki-content"]  |  //div[@class="section-bd"]  |  //div[@class="news-story"]\r
-\r
-author: //span[@class="reviewer"]  |  //p[@class="byline"]/a/text()\r
-date: //span[@class="dtreviewed"]\r
+# 2011-11-19 - carlo@... - Initial setup.
+
+strip_id_or_class: user-review-detail
+strip: //h1
+
+body: //div[@class="wiki-content"]  |  //div[@class="section-bd"]  |  //div[@class="news-story"]
+
+author: //span[@class="reviewer"]  |  //p[@class="byline"]/a/text()
+date: //span[@class="dtreviewed"]
 
 test_url: http://www.giantbomb.com/the-elder-scrolls-v-skyrim/61-33394/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f60199a..e2689ea
@@ -1,20 +1,20 @@
-tidy:no\r
-title://h2[@class="title"]\r
-# author:"Ben Miller"\r
-date://div[@id="stats"]/span\r
-strip_id_or_class:stats\r
-strip_id_or_class:breadcrumbs\r
-strip_id_or_class:gn-why-content\r
-strip_id_or_class:single-social\r
-strip_id_or_class:sidebar-ads\r
-strip_id_or_class:sidebar-top\r
-strip_id_or_class:footer\r
-strip_id_or_class:post_meta\r
-# strip_id_or_class:\r
-# strip_id_or_class:\r
-# strip_id_or_class:\r
-# strip_id_or_class:\r
-# strip_id_or_class:\r
-# strip_id_or_class:\r
+tidy:no
+title://h2[@class="title"]
+# author:"Ben Miller"
+date://div[@id="stats"]/span
+strip_id_or_class:stats
+strip_id_or_class:breadcrumbs
+strip_id_or_class:gn-why-content
+strip_id_or_class:single-social
+strip_id_or_class:sidebar-ads
+strip_id_or_class:sidebar-top
+strip_id_or_class:footer
+strip_id_or_class:post_meta
+# strip_id_or_class:
+# strip_id_or_class:
+# strip_id_or_class:
+# strip_id_or_class:
+# strip_id_or_class:
+# strip_id_or_class:
 
 test_url: http://www.giga.de/benm/2011/10/17/probleme-mit-ios-5-wenn-die-daten-weg-sind/#more-58033
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 348bdf2..cc8fdfa
@@ -1,17 +1,12 @@
-date: //meta[@name='DC.date.issued']/@content\r
-date: //span[@class='post-meta the-date']\r
-\r
-title: //meta[@property='og:title']/@content\r
-\r
-author: //meta[@name='DC.creator']/@content\r
-\r
-body: //div[contains(@class, 'post-sub-head') or starts-with(@id, 'post-content-')]\r
-\r
-find_string: id="content"\r
-replace_string: id="content-ignore"\r
-\r
-strip_id_or_class: sharedaddy\r
-\r
-prune: no\r
-\r
-test_url: http://gigaom.com/2011/10/24/groupon-google-lawsuit/
\ No newline at end of file
+date: //meta[@name='dcterms.created']/@content
+title: //meta[@property='og:title']/@content
+author: //section[@class="post-meta"]//a[@rel="author"]
+
+body: //div[starts-with(@id, 'post-content-')]
+
+strip_id_or_class: sharedaddy
+
+prune: no
+
+test_url: http://gigaom.com/2011/10/24/groupon-google-lawsuit/
+test_url: http://gigaom.com/2012/12/26/snapchat-rises-why-pokes-decline-shows-facebooks-inability-to-invent/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 478b23a..d3534b2
@@ -1,3 +1,3 @@
-single_page_link: //p[@id='skip']//a[contains(@href, 'skip')]\r
+single_page_link: //p[@id='skip']//a[contains(@href, 'skip')]
 
 test_url: http://gihyo.jp/dev/serial/01/machine-learning/0010
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 53095b3..9020786
@@ -1,6 +1,6 @@
-body: //div[@class="highlight"]/pre\r
-\r
-prune: no\r
-tidy: no\r
-\r
+body: //div[@class="highlight"]/pre
+
+prune: no
+tidy: no
+
 test_url: https://gist.github.com/1258908
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 144ce04..0de0750
@@ -1,3 +1,3 @@
-single_page_link: //div[@id="content"]//h2/a\r
+single_page_link: //div[@id="content"]//h2/a
 
 test_url: http://givemesomethingtoread.com/post/6285838917/the-baddest-lawyer-in-the-history-of-jersey
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 285e76c..2eb82a6
@@ -1,7 +1,7 @@
-body: //div[@id="leadimage" or @class="postcontent"]\r
-author: //div[@class="contentauthor"]\r
-date: //div[@class="timestamp"]\r
-\r
-prune: no\r
-\r
+body: //div[@id="leadimage" or @class="postcontent"]
+author: //div[@class="contentauthor"]
+date: //div[@class="timestamp"]
+
+prune: no
+
 test_url: http://www.gizmodo.co.uk/2013/02/bbc-forcing-poor-old-sir-david-attenborough-to-go-on-twitter/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c953625..e73ec9d
@@ -1,7 +1,11 @@
-body: //div[@class="post-body" or contains(@class, 'illustration top')]\r
-author: (//cite//span[@class="plus-icon"])[1]\r
-date: //span[@class="date"]\r
-\r
-prune: no\r
-\r
-test_url: http://gizmodo.com/5880147/kuhn-rikon-improves-their-spice-grinder-with-grade-school-science
\ No newline at end of file
+#body: //div[@class="post-body" or contains(@class, 'illustration top')]
+body: //div[contains(@class, 'image-annotation-box') or contains(@class, 'post-content')]
+#author: (//cite//span[@class="plus-icon"])[1]
+author: //span[contains(@class, 'display-name')]
+date: //span[@class="date"]
+
+prune: no
+
+test_url: http://gizmodo.com/5880147/kuhn-rikon-improves-their-spice-grinder-with-grade-school-science
+test_url: http://gizmodo.com/what-van-goghs-paintings-would-look-like-if-they-came-874035680
+test_url: http://gizmodo.com/vip.xml
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gizmodo.uol.com.br.txt b/inc/3rdparty/site_config/standard/gizmodo.uol.com.br.txt
new file mode 100755 (executable)
index 0000000..d963d68
--- /dev/null
@@ -0,0 +1,6 @@
+title: //h1
+
+body: //div[@id='destaques']//div[contains(@class, 'img')] | //div[@id='maincontent']//p
+
+test_url: http://gizmodo.uol.com.br/nvidia-gtx-titan-z/
+test_url: http://gizmodo.uol.com.br/perfil-mark-zuckerberg-hackeado/
old mode 100644 (file)
new mode 100755 (executable)
index 135ed50..71fbc93
@@ -1,4 +1,18 @@
-# Look for Open Graph data - http://ogp.me\r
-title: //meta[@property="og:title"]/@content\r
-date: //meta[@property="article:published_time"]/@content\r
-# article:author is someties URL, e.g. on guardian.co.uk
\ No newline at end of file
+# Look for Open Graph data - http://ogp.me
+title: //meta[@property="og:title"]/@content
+date: //meta[@property="article:published_time"]/@content
+# article:author is someties URL, e.g. on guardian.co.uk
+
+# Remove Google Publisher Tags: https://support.google.com/dfp_sb/answer/1649768?hl=en
+#strip_id_or_class: div-gpt-ad
+
+# Strip doubleclick image ads
+strip_image_src: doubleclick.net
+
+# If you get chunks of Javascript code appearing in the extracted output, try uncommenting the lines below.
+# This tries to convert script tags to hidden div elements (which Full-Text RSS removes).
+# If you notice issues with this approach, please let us know.
+#find_string: <script 
+#replace_string: <div style="display:none" 
+#find_string: </script>
+#replace_string: </div>
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 95d4bec..ee50f68
@@ -1,15 +1,15 @@
-body: //div[@id='content']\r
-\r
-strip: //p[@class='top']\r
-strip: //h2[.='Where next?']\r
-strip_id_or_class: where-next\r
-strip_id_or_class: social-bookmarks\r
-strip_id_or_class: link-to-here\r
-strip_id_or_class: options-heading\r
-strip_id_or_class: page-options-content\r
-strip_id_or_class: page-info-bottom\r
-\r
-tidy: no\r
-prune: no\r
-\r
+body: //div[@id='content']
+
+strip: //p[@class='top']
+strip: //h2[.='Where next?']
+strip_id_or_class: where-next
+strip_id_or_class: social-bookmarks
+strip_id_or_class: link-to-here
+strip_id_or_class: options-heading
+strip_id_or_class: page-options-content
+strip_id_or_class: page-info-bottom
+
+tidy: no
+prune: no
+
 test_url: http://www.globalissues.org/article/39/a-primer-on-neoliberalism
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/globoesporte.globo.com.txt b/inc/3rdparty/site_config/standard/globoesporte.globo.com.txt
new file mode 100755 (executable)
index 0000000..fd8e70f
--- /dev/null
@@ -0,0 +1,25 @@
+title: //h1[@class="entry-title"]
+
+body: //div[@class='materia-titulo']/h2 | //*[@id="materia-letra"]
+
+date: //abbr[@class="published"]
+date: //abbr[@class="updated"]
+
+author: //*[@class="author"]/strong
+
+strip: //div[contains(@class,'foto')]/strong
+strip: //div[contains(@class,'frase-materia')]/div[@class='autor']
+strip: //div[contains(@class,'saibamais')]
+strip: //*[contains(text(),'Clique aqui e veja mais')]/ancestor::p
+strip: //ul[@class="toolbar"]
+
+# quotes
+wrap_in(blockquote): //div[@id='materia-letra']//div[contains(@class,'frase-materia')]/div[@class='frase']
+
+prune: no
+
+replace_string([Clique aqui e veja mais vídeos do Fluminense]): []
+
+test_url: http://globoesporte.globo.com/atletismo/noticia/2013/08/michael-johnson-diz-que-bolt-e-melhor-da-historia-nao-ha-duvidas.html
+test_url: http://globoesporte.globo.com/futebol/futebol-internacional/futebol-espanhol/noticia/2013/08/barca-atropela-levante-e-neymar-passa-em-branco-em-estreia-oficial.html
+test_url: http://globoesporte.globo.com/futebol/times/fluminense/noticia/2013/08/poupado-no-sabado-felipe-se-diz-pronto-para-ser-titular-contra-o-goias.html
diff --git a/inc/3rdparty/site_config/standard/gloswielkopolski.pl.txt b/inc/3rdparty/site_config/standard/gloswielkopolski.pl.txt
new file mode 100755 (executable)
index 0000000..1648795
--- /dev/null
@@ -0,0 +1,8 @@
+title: //article[@id='material']/header/h1
+author: //article[@id='material']/header/div[2]/p
+date: //article[@id='material']/header/p/time[1]
+body: //section[@id='tresc']
+next_page_link: .//section[@id='tresc']/div[@class='stronicowanie']/a[@rel='next']
+strip://div[@class='podobneSonda']
+
+test_url: http://www.gloswielkopolski.pl/artykul/803547,abc-telemarketingu-praca-ktora-zwalnia-z-myslenia,id,t.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 075c4d2..e25e9a0
@@ -1,16 +1,16 @@
-title: //div[@id='article_headline']//h1\r
-date: //div[contains(@class, 'articleDate')]//h4\r
-body: //div[@id='article_headline']/h2 | //div[@id='large_article_image' or @id='article_content']\r
-\r
-strip_id_or_class: relatedLinksBox\r
-strip_id_or_class: betting-widget\r
-strip_image_src: install_flash.gif\r
-\r
-strip: //table[contains(@style, 'float: right; width: 285px;')]\r
-strip: //div[@class='caption']\r
-\r
-tidy: no\r
-prune: no\r
-\r
-test_url: http://www.goal.com/en-gb/news/3284/euro-2012/2012/05/31/3139032/video-profile-back-to-his-very-best-for-bayern-frances-flair-and-\r
+title: //div[@id='article_headline']//h1
+date: //div[contains(@class, 'articleDate')]//h4
+body: //div[@id='article_headline']/h2 | //div[@id='large_article_image' or @id='article_content']
+
+strip_id_or_class: relatedLinksBox
+strip_id_or_class: betting-widget
+strip_image_src: install_flash.gif
+
+strip: //table[contains(@style, 'float: right; width: 285px;')]
+strip: //div[@class='caption']
+
+tidy: no
+prune: no
+
+test_url: http://www.goal.com/en-gb/news/3284/euro-2012/2012/05/31/3139032/video-profile-back-to-his-very-best-for-bayern-frances-flair-and-
 test_url: http://www.goal.com/en-gb/news/3284/euro-2012/2012/05/31/3139869/lampard-injury-a-bitter-blow-for-england-and-sorry-way-to#
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6c5d1c4..6afdebe
@@ -1,25 +1,25 @@
-# Jens Kohl, jens.kohl@...\r
-# - Added publication date\r
-# - Striped pagination block\r
-# - Added single page link\r
-# - Added xpath-querys for the printer friendly version\r
-\r
-title: //h1\r
-body: //div[@class='formatted']\r
-prune: no\r
-\r
-date: substring-after(//li[2][@class="text1"], 'Datum:')\r
-strip: //ol[@class="list-chapters"]\r
-strip_comments: yes\r
-\r
-# next: commands for printer friendly pages\r
-single_page_link: //a[contains(@href, 'print.php?a=')]/@href\r
-title: //body/h3\r
-strip_image_src: staticrl/images/logo.jpg\r
-strip_image_src: http://cpx.golem.de/cpx.php?class=7\r
-strip: //body/h3\r
-strip: //body/b[1]\r
-strip: //body/b[2]\r
-strip: //body/b[3]\r
-strip: //div[1]\r
+# Jens Kohl, jens.kohl@...
+# - Added publication date
+# - Striped pagination block
+# - Added single page link
+# - Added xpath-querys for the printer friendly version
+
+title: //h1
+body: //div[@class='formatted']
+prune: no
+
+date: substring-after(//li[2][@class="text1"], 'Datum:')
+strip: //ol[@class="list-chapters"]
+strip_comments: yes
+
+# next: commands for printer friendly pages
+single_page_link: //a[contains(@href, 'print.php?a=')]/@href
+title: //body/h3
+strip_image_src: staticrl/images/logo.jpg
+strip_image_src: http://cpx.golem.de/cpx.php?class=7
+strip: //body/h3
+strip: //body/b[1]
+strip: //body/b[2]
+strip: //body/b[3]
+strip: //div[1]
 test_url: http://www.golem.de/1112/88696.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 5cf6701..94159fb
@@ -1,4 +1,4 @@
-title: //div[@class="title"]/div/h1\r
-body: //div[@class="body"]\r
-date: //li[@class="date-time"]\r
+title: //div[@class="title"]/div/h1
+body: //div[@class="body"]
+date: //li[@class="date-time"]
 test_url: http://www.good.is/post/why-amazon-is-the-next-top-tech-company/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/goodfil.ms.txt b/inc/3rdparty/site_config/standard/goodfil.ms.txt
new file mode 100755 (executable)
index 0000000..f8bbbc6
--- /dev/null
@@ -0,0 +1,2 @@
+strip_id_or_class: gutter
+test_url: http://goodfil.ms/blog/posts/2012/08/13/angularjs-and-the-goodfilms-mobile-site-part-1/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c2fe4e4..e2d2d0b
@@ -1,14 +1,14 @@
-date: //meta[@name='og:article:published_time']/@value\r
-\r
-body: //div[@class='itemIntroText' or @class='itemImageBlock' or @class='itemFullText']\r
-\r
-strip_id_or_class: itemImageGallery\r
-\r
-# remove extras at end of post content\r
-find_string: <div style="margin:5px 0 10px;">\r
-replace_string: </div></body></html><!--\r
-\r
-prune: no\r
-\r
-test_url: http://www.gossip-tv.gr/story/158902/aggelike-daliane-semera-duskoleuontai-oloi-sta-epaggelmatika-tous\r
+date: //meta[@name='og:article:published_time']/@value
+
+body: //div[@class='itemIntroText' or @class='itemImageBlock' or @class='itemFullText']
+
+strip_id_or_class: itemImageGallery
+
+# remove extras at end of post content
+find_string: <div style="margin:5px 0 10px;">
+replace_string: </div></body></html><!--
+
+prune: no
+
+test_url: http://www.gossip-tv.gr/story/158902/aggelike-daliane-semera-duskoleuontai-oloi-sta-epaggelmatika-tous
 test_url: http://www.gossip-tv.gr/lifestyle/Taste/story/230266/lahtaristo-kai-ygieino-tost-sokolatas
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/goteborgsfria.se.txt b/inc/3rdparty/site_config/standard/goteborgsfria.se.txt
new file mode 100755 (executable)
index 0000000..c90aed0
--- /dev/null
@@ -0,0 +1,7 @@
+body: //div[contains(@class, 'layout__inner')]//div[contains(@class, 'file-image') or contains(@class, 'node__content')]
+author: //article//div[contains(@class, 'field-byline')]
+strip_id_or_class: rekommenderade
+strip_id_or_class: disqus
+strip_id_or_class: annonser
+
+test_url: http://www.goteborgsfria.se/artikel/112079
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 5179fc1..3645387
@@ -1,7 +1,7 @@
-title: //div[@class='entry-header']\r
-author: //span[@class='vcard author']\r
-date: //abbr[@class='published']\r
-#move_into(//div[@class='entry-body']): //img[@id='photo_1']\r
-body: //div[@class='entry-body']\r
+title: //div[@class='entry-header']
+author: //span[@class='vcard author']
+date: //abbr[@class='published']
+#move_into(//div[@class='entry-body']): //img[@id='photo_1']
+body: //div[@class='entry-body']
 strip: //div[@class='galleryEaseThumbs']
 test_url: http://gothamist.com/2012/03/15/fancy_cocktail_lounge_the_randolph.php
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7fb0ee0..f8af732
@@ -1,21 +1,21 @@
-title: //span[@id="showTitle"]\r
-author: //span[@id="showAuthor"]\r
-date: //span[@id="showRefDate"]\r
-\r
-strip: //span[@class="black_bold"]\r
-strip: //div[@id="sectionName"]\r
-strip: //div[@id="storyHeader"]\r
-\r
-body: //div[@id="newsBodyText"]\r
-\r
-strip_image_src: "http://www.gotomanager.com/img/mgrm/space.gif"\r
-strip_image_src: "http://www.gotomanager.com/images/separator.gif"\r
-strip_image_src: "http://www.gotomanager.com/images/spaces.gif"\r
-\r
-convert_double_br_tags: yes\r
-tidy: yes\r
-\r
-strip: //div[@id="smallLeadImage"]\r
-strip: //div[@id="truehitsSurvey"]\r
+title: //span[@id="showTitle"]
+author: //span[@id="showAuthor"]
+date: //span[@id="showRefDate"]
+
+strip: //span[@class="black_bold"]
+strip: //div[@id="sectionName"]
+strip: //div[@id="storyHeader"]
+
+body: //div[@id="newsBodyText"]
+
+strip_image_src: "http://www.gotomanager.com/img/mgrm/space.gif"
+strip_image_src: "http://www.gotomanager.com/images/separator.gif"
+strip_image_src: "http://www.gotomanager.com/images/spaces.gif"
+
+convert_double_br_tags: yes
+tidy: yes
+
+strip: //div[@id="smallLeadImage"]
+strip: //div[@id="truehitsSurvey"]
 strip: //table[@id="relatedInfoTable"]
 test_url: http://www.gotomanager.com/news/details.aspx?id=86759
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gov.ky.txt b/inc/3rdparty/site_config/standard/gov.ky.txt
new file mode 100755 (executable)
index 0000000..294ece3
--- /dev/null
@@ -0,0 +1,4 @@
+strip: //body//title
+
+test_url: http://www.gov.ky/pls/portal/PORTAL.wwv_media.show?p_id=7593947&p_settingssetid=1&p_settingssiteid=0&p_siteid=2425&p_type=basetext&p_textid=7593948
+test_url: http://www.rcips.ky/pls/portal/wlacomp.wlafeed.show_cignewsfeed_agency?p_sitecode=POL&p_agency=Police
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/gp.se.txt b/inc/3rdparty/site_config/standard/gp.se.txt
new file mode 100755 (executable)
index 0000000..158ae4e
--- /dev/null
@@ -0,0 +1,11 @@
+body: //div[@id='articleContainer']
+author: //div[@id='articleContent']//div[contains(@class, 'byline')]//span[contains(@class, 'name fn')]
+strip_id_or_class: toolbar
+strip_id_or_class: ADad
+strip_id_or_class: articleSerieWrapper
+strip_id_or_class: articleFloatContainer
+strip: //div[contains(@class, 'byline')]//img
+prune: no
+
+test_url: http://www.gp.se/nyheter/bohuslan/1.2045564-styckade-mannen-hade-mordat-hustrun
+test_url: http://www.gp.se/1.16560
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 233c4a7..8ad8a14
@@ -1,9 +1,9 @@
-next_page_link: //div[@class='pagination']//span[@class='paginationNext']/a\r
-strip_id_or_class: utility\r
-strip_id_or_class: keywords\r
-strip_id_or_class: pagination\r
-strip_id_or_class: position2_content\r
-body: //div[@class='article']\r
-title: //h1[@class='content-headline']\r
+next_page_link: //div[@class='pagination']//span[@class='paginationNext']/a
+strip_id_or_class: utility
+strip_id_or_class: keywords
+strip_id_or_class: pagination
+strip_id_or_class: position2_content
+body: //div[@class='article']
+title: //h1[@class='content-headline']
 author: //span[@class='contributor']//a
 test_url: http://www.gq.com/news-politics/newsmakers/201203/terry-thompson-ohio-zoo-massacre-chris-heath-gq-february-2012
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3269e08..b8d419f
@@ -1,20 +1,20 @@
-# this is fragile with footnotes -- leave it for now\r
-\r
-#tidy: no\r
-#prune: no\r
-#move_into(//article): //aside[@id='footnotes']\r
-author: //cite/a\r
-date: //time\r
-\r
-strip: //a[text()='Grantland']\r
-strip_id_or_class: ad-wrapper\r
-strip_id_or_class: fb-connect-link\r
-strip_id_or_class: fb-status\r
-strip: //li[@class='print']\r
-strip: //cite\r
-strip: //a[contains(text(), '[+]')]\r
-strip: //a[@id='jump-nav-link']\r
-strip: //h1[text()='Share This']\r
-strip: //h1[text()='Top Stories']\r
-strip: //div[@id="update-text-size"]\r
+# this is fragile with footnotes -- leave it for now
+
+#tidy: no
+#prune: no
+#move_into(//article): //aside[@id='footnotes']
+author: //cite/a
+date: //time
+
+strip: //a[text()='Grantland']
+strip_id_or_class: ad-wrapper
+strip_id_or_class: fb-connect-link
+strip_id_or_class: fb-status
+strip: //li[@class='print']
+strip: //cite
+strip: //a[contains(text(), '[+]')]
+strip: //a[@id='jump-nav-link']
+strip: //h1[text()='Share This']
+strip: //h1[text()='Top Stories']
+strip: //div[@id="update-text-size"]
 test_url: http://www.grantland.com/story/_/id/8421241/examining-new-albums-rock-veterans-no-doubt-green-day
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a525803..31a4107
@@ -1,11 +1,11 @@
-title: //div[@class="blogpost"]/h2\r
-author: //div[@class="blogpost"]/p[@class="byline"]/a\r
-date: //div[@class="blogpost"]/p[@class="byline"]/span[@class="time_posted"]\r
-body: //div[@class="blogpost"]\r
-strip_id_or_class: flag\r
-strip_id_or_class: byline\r
-strip_id_or_class: post_footer\r
-strip_id_or_class: related_posts\r
-strip_id_or_class: post_author_bios\r
+title: //div[@class="blogpost"]/h2
+author: //div[@class="blogpost"]/p[@class="byline"]/a
+date: //div[@class="blogpost"]/p[@class="byline"]/span[@class="time_posted"]
+body: //div[@class="blogpost"]
+strip_id_or_class: flag
+strip_id_or_class: byline
+strip_id_or_class: post_footer
+strip_id_or_class: related_posts
+strip_id_or_class: post_author_bios
 strip: //h2
 test_url: http://greatergreaterwashington.org/post/12457/ask-ggw-what-will-happen-to-the-1000-series-railcars/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7e15a5c..0fe30ef
@@ -1,5 +1,5 @@
-title://h1\r
-author://span[@class="submitted"]/a\r
-date:substring-after(//span[@class="submitted"],'on ')\r
+title://h1
+author://span[@class="submitted"]/a
+date:substring-after(//span[@class="submitted"],'on ')
 body://div[@class="content"]
 test_url: http://groups.drupal.org/node/36816
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e69044b..97b620d
@@ -1,5 +1,5 @@
-body: //div[@class='wrapper_half']//ul[@class='details'] | //div[@class='wrapper_half']//p[@class='synopsis'] | //div[@class='wrapper_half']//div[@class='image'] | //div[@class='wrapper_half']//div[@class='article']\r
-strip: //div[@class='wrapper_half']//ul[@class='details']/li[position()>1]\r
-prune: no\r
-tidy: no\r
+body: //div[@class='wrapper_half']//ul[@class='details'] | //div[@class='wrapper_half']//p[@class='synopsis'] | //div[@class='wrapper_half']//div[@class='image'] | //div[@class='wrapper_half']//div[@class='article']
+strip: //div[@class='wrapper_half']//ul[@class='details']/li[position()>1]
+prune: no
+tidy: no
 test_url: http://gulfnews.com/news/gulf/uae/government/abu-dhabi-centre-offers-useful-information-1.811084
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 00255eb..f8327be
@@ -1,22 +1,22 @@
-# To administrator:\r
-# Please change the hostname to "www.guokr.com/article/*"\r
-# Not working for "www.guokr.com/post/" pages configured by carlosliu913@gmail.com\r
-\r
-# This filter is tested on:\r
-# http://www.guokr.com/article/274325/\r
-# http://www.guokr.com/article/275013/\r
-\r
-title://h1\r
-author://div[contains(@class, 'content-th-info')]/a\r
-date://div[contains(@class, 'content-th-info')]/span\r
-body://div[contains(@class, 'Content')]\r
-\r
-strip://div[contains(@class, 'bottom-i')]\r
-strip://div[contains(@class, 'copyright')]\r
-strip://div[contains(@class, 'fr')]\r
-strip://div[contains(@class, 'content-th-info')]\r
-strip://h1[contains(@id, 'articleTitle')]\r
-strip://div[contains(@class, 'side')]\r
-strip://div[contains(@class, 'top-wp')]\r
-test_url: http://www.guokr.com/article/275013/\r
+# To administrator:
+# Please change the hostname to "www.guokr.com/article/*"
+# Not working for "www.guokr.com/post/" pages configured by carlosliu913@gmail.com
+
+# This filter is tested on:
+# http://www.guokr.com/article/274325/
+# http://www.guokr.com/article/275013/
+
+title://h1
+author://div[contains(@class, 'content-th-info')]/a
+date://div[contains(@class, 'content-th-info')]/span
+body://div[contains(@class, 'Content')]
+
+strip://div[contains(@class, 'bottom-i')]
+strip://div[contains(@class, 'copyright')]
+strip://div[contains(@class, 'fr')]
+strip://div[contains(@class, 'content-th-info')]
+strip://h1[contains(@id, 'articleTitle')]
+strip://div[contains(@class, 'side')]
+strip://div[contains(@class, 'top-wp')]
+test_url: http://www.guokr.com/article/275013/
 test_url: http://www.guokr.com/article/338387/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index bc1ce68..1bb2bc7
@@ -1,5 +1,5 @@
-title: //div[@id="habermetni"]/h1[@id="haber_baslik"]\r
-body: //div[@id="habermetni"]/p\r
-strip: //img[@class='newsDetailLeft']\r
+title: //div[@id="habermetni"]/h1[@id="haber_baslik"]
+body: //div[@id="habermetni"]/p
+strip: //img[@class='newsDetailLeft']
 strip_image_src: /haber-resimleri/
 test_url: http://www.haberler.com/emniyete-atacakti-elinde-patladi-3198733-haberi/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/habrahabr.ru.txt b/inc/3rdparty/site_config/standard/habrahabr.ru.txt
new file mode 100755 (executable)
index 0000000..6753835
--- /dev/null
@@ -0,0 +1,21 @@
+title: //span[@class="post_title"]
+author: //div[@class="author"]
+date: //div[@class="published
+
+body: //div[@class='content html_format'] | //div[@id='comments']
+
+strip: //a[@class="link_to_comment"]
+strip: //div[@class="show_tree"]
+strip: //a[@class="to_parent"]
+
+
+replace_string(class="reply_comments"): style="padding-left: 20px"
+replace_string(class="voting   "): style="float: right"
+replace_string(src="//habrastorage.org/getpro/habr/avatars/): style="width:24px; height:24px;" class="123" src="//habrastorage.org/getpro/habr/avatars/
+replace_string(class="info  "): style="padding-top:5px;font-size:0.85em;line-height:24px;"
+
+
+prune: no
+tidy: no
+
+test_url: http://habrahabr.ru/post/229883/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/hackmake.org.txt b/inc/3rdparty/site_config/standard/hackmake.org.txt
new file mode 100755 (executable)
index 0000000..9814011
--- /dev/null
@@ -0,0 +1,7 @@
+date: //article//time[@pubdate]
+body: //article/div[@id="post-wide"]
+title: //article/header/h2
+strip: /div[@id="comment"]
+strip: //footer
+author: substring-after(//footer/p[@class='byline'] , 'By')
+test_url: http://hackmake.org/2012/12/21/mindfulness-of-concentration
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7989d09..1802efe
@@ -1,5 +1,5 @@
-title:substring-before(id("maincontent")/table, 'Posted')\r
-body:id("maincontent")/p\r
-# eventually convert linebreaks better\r
+title:substring-before(id("maincontent")/table, 'Posted')
+body:id("maincontent")/p
+# eventually convert linebreaks better
 
 test_url: http://halo.bungie.org/fanfic/?story=Delahunt0312112316071.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 747f90a..33f7e72
@@ -1,7 +1,7 @@
-# Remove right column\r
-strip: //*[(@class = 'right_col')]\r
-\r
-# Remove comments etc.\r
-strip: //*[(@class = 'category')]\r
+# Remove right column
+strip: //*[(@class = 'right_col')]
+
+# Remove comments etc.
+strip: //*[(@class = 'category')]
 strip: /html/body/div[1][@class='absolute_content_high']/div[1][@class='wrapper']/div[1][@class='main_col']/div[@class='main_content']/h3
 test_url: http://hammers.theoffside.com/carling-cup/a-funny-thing-happened-on-the-way-to-4-nil.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/handelsblatt.com.txt b/inc/3rdparty/site_config/standard/handelsblatt.com.txt
new file mode 100755 (executable)
index 0000000..7d067aa
--- /dev/null
@@ -0,0 +1,31 @@
+#Single Page
+single_page_link: //li[contains(@class,"hcf-print")]/a
+
+# Title  hcf-headline
+title: //span[@class='hcf-headline']
+
+# Authors 
+author: //div[@class="hcf-author"]/a/text()
+author: substring-after(//div[@class='hcf-author'], 'von ')
+
+# Date 
+date: //div[@class='hcf-article-date']
+
+# Body
+body: //div[@class='article']
+
+# General removements
+strip: //div[contains(@class,"hcf-smartbox")]
+strip: //div[contains(@class,"hcf-stopper")]
+strip: //div[contains(@class,"hcf-img-controls")]
+strip: //span[@class='hcf-location-mark']
+strip: //span[@class='hcf-copyright']
+strip: //div[@class='hcf-copyright']
+strip: //div[@class='hcf-origin']
+
+
+
+
+# Fix picture captions
+wrap_in(small): //div[@class="hcf-caption"]
+test_url: http://www.handelsblatt.com/meinung/gastbeitraege/gastkommentar-zum-emissionshandel-kurskorrekturen-fuehren-zum-kentern/8044326.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d3ffeab..1dca632
@@ -1,4 +1,4 @@
-date: //span[@class="item-date"]\r
-body: //div[@class="item-content"]\r
+date: //span[@class="item-date"]
+body: //div[@class="item-content"]
 strip_comments: no
 test_url: http://www.hanselman.com/blog/BrainBytesBackBunsTheProgrammersPriorities.aspx
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 318885c..e4f1f6b
@@ -1,6 +1,6 @@
-title: //h1\r
-author: //a[@class='a_aut']\r
-body: //div[@class='content_dossier']\r
-strip: //div[@id='pagination']\r
+title: //h1
+author: //a[@class='a_aut']
+body: //div[@class='content_dossier']
+strip: //div[@id='pagination']
 next_page_link: //div[@class='sommaire_colonne']//span[@class='page_actuelle']/following::span[@class='autres_page']//a/@href
 test_url: http://www.hardware.fr/articles/850-1/pci-express-3-0-impact-performances.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/hardware.no.txt b/inc/3rdparty/site_config/standard/hardware.no.txt
new file mode 100755 (executable)
index 0000000..cbbcf84
--- /dev/null
@@ -0,0 +1,16 @@
+title: //h1[@class='headline']
+title: //h2[@itemprop='alternativeHeadline']
+title: //h1[@itemprop='headline']
+author: //span[@itemprop='name']
+date: //time[@itemprop='datePublished']
+body: //div[@itemprop='reviewBody']
+
+wrap_in(blockquote): //div[@class='factBox']
+
+next_page_link: //a[@rel='next']
+
+strip_id_or_class: 'product-box'
+strip: //a[@rel='next']
+strip: //a[text()='Del på Facebook']
+strip: //a[text()='Del på Twitter']
+test_url: http://www.hardware.no/artikler/asus-vg248qe/132792
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index fd6145e..c2f292e
@@ -1,6 +1,7 @@
-title: //div[@id='article-title']\r
-author: //div[@id='articleAuthors']\r
-body: //div[@id='article']\r
-strip: //div[@class='module wide']\r
-next_page_link: //a[@title='Next Page']
-test_url: http://hbr.org/2012/04/the-real-leadership-lessons-of-steve-jobs/ar/
\ No newline at end of file
+title: //div[@id='article-title']
+author: //div[@id='articleAuthors']
+body: //div[@id='article']
+strip: //div[@class='module wide']
+#single_page_link: //a[@class='social-print']
+test_url: http://hbr.org/2012/04/the-real-leadership-lessons-of-steve-jobs/ar/
+test_url: http://hbr.org/2013/03/big-bang-disruption/ar/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/headrush.typepad.com.txt b/inc/3rdparty/site_config/standard/headrush.typepad.com.txt
new file mode 100755 (executable)
index 0000000..a314677
--- /dev/null
@@ -0,0 +1,14 @@
+title://div[@class='content']/h3[1]
+body://div[@class='content']
+
+# Article nav
+strip://div[@class='content']/p[1]
+
+# Comments and trackbacks
+strip://h2/following-sibling::p
+strip://h2
+
+# Posted on
+strip://b/p
+strip://div[@class='content']/p[@class='posted']
+test_url: http://headrush.typepad.com/creating_passionate_users/2005/05/the_case_for_ea.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 1da82ac..daff614
@@ -1,3 +1,3 @@
-body: //div[@id='content']/div\r
+body: //div[@id='content']/div
 date: //p[@class='author_date']/span[@class='date']
 test_url: http://heise-online.mobi/newsticker/meldung/Amazons-Appstore-in-der-Kritik-Ein-Desaster-fuer-Kunden-und-Entwickler-1273936.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 5f19d3f..c51af56
@@ -1,7 +1,7 @@
-single_page_link: //p[@class='news_option']/a\r
-\r
-date: //p[@class='news_datum']\r
-title: //h1\r
-body: //div[@class='meldung_wrapper']\r
-\r
+single_page_link: //p[@class='news_option']/a
+
+date: //p[@class='news_datum']
+title: //h1
+body: //div[@class='meldung_wrapper']
+
 test_url: http://www.heise.de/newsticker/meldung/Europa-soll-Grundrechteschutz-im-Netz-staerken-1392664.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/hemmings.com.txt b/inc/3rdparty/site_config/standard/hemmings.com.txt
new file mode 100755 (executable)
index 0000000..a02b4a6
--- /dev/null
@@ -0,0 +1,9 @@
+title: //h2
+body: //div[@id='leftdetail']
+single_page_link: //a[contains(@href, 'printable=1')]
+strip: //a[contains(., 'Full Version')]
+
+prune: no
+
+test_url: http://www.hemmings.com/classifieds/dealer/ferrari/330gtc/1601235.html
+test_url: http://www.hemmings.com/rss/keyword.xml?adtype=carsforsale&make=ferrari
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/heroturko.me.txt b/inc/3rdparty/site_config/standard/heroturko.me.txt
new file mode 100755 (executable)
index 0000000..07b6adf
--- /dev/null
@@ -0,0 +1,6 @@
+title: //div[contains(@class, 'title')]//h1
+body: //div[contains(@class, 'story')]
+
+prune: no
+
+test_url: http://www.heroturko.me/5223034-ds-catia-p3-v5-6r2014-gasp0-x86x64-multilanguage-english-docs.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d866f62..4ed0b8b
@@ -1,7 +1,7 @@
-body: //div[@id='article_holder']//div[@class='image'] | //div[@id='article_body']\r
-\r
-prune: no\r
-tidy: no\r
-\r
-test_url: http://hespress.com/videos/73684.html\r
+body: //div[@id='article_holder']//div[@class='image'] | //div[@id='article_body']
+
+prune: no
+tidy: no
+
+test_url: http://hespress.com/videos/73684.html
 test_url: http://hespress.com/permalink/73678.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/hiamag.com.txt b/inc/3rdparty/site_config/standard/hiamag.com.txt
new file mode 100755 (executable)
index 0000000..3c7ba5a
--- /dev/null
@@ -0,0 +1,3 @@
+body: (//div[contains(@class, 'gallery-slides')]//img)[1] | //div[contains(@class, 'node_body_inner')]
+
+test_url: http://www.hiamag.com/rss.xml
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index fd50b6a..5a808fa
@@ -1,3 +1,3 @@
-body: //div[@class='journal-entry-text']\r
+body: //div[@class='journal-entry-text']
 
 test_url: http://highscalability.com/blog/2011/3/14/6-lessons-from-dropbox-one-million-files-saved-every-15-minu.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c57c1aa..d869a86
@@ -1,4 +1,4 @@
-body: //div[@class = 'pd']\r
-strip: //div[@id = 'overzicht-albumrecensies']\r
+body: //div[@class = 'pd']
+strip: //div[@id = 'overzicht-albumrecensies']
 strip: //div[@id = 'jc']
 test_url: http://hiphopleeft.nl/index.php?option=com_content&view=article&id=2767:mark-ronson-record-collection&catid=66:m&Itemid=142
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index dc687f3..78fb60a
@@ -1,10 +1,10 @@
-body://div[@id = 'content']\r
-author://span[@class = 'authors']\r
-author://span[@class = 'ht-vtag'][1]\r
-date:substring-before(//meta[@name = 'dc.date']/@content,'T')\r
-strip://div[contains(@class, 'region-ubercontent')]\r
-strip://h1\r
-strip://div[@id = 'ht-author']\r
-strip://ul[@class = 'links inline'] \r
-strip://div[@id = 'ht-tools']\r
+body://div[@id = 'content']
+author://span[@class = 'authors']
+author://span[@class = 'ht-vtag'][1]
+date:substring-before(//meta[@name = 'dc.date']/@content,'T')
+strip://div[contains(@class, 'region-ubercontent')]
+strip://h1
+strip://div[@id = 'ht-author']
+strip://ul[@class = 'links inline'] 
+strip://div[@id = 'ht-tools']
 test_url: http://www.historytoday.com/carol-dyhouse/skin-deep-fall-fur
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index eeee159..2da13a8
@@ -1,5 +1,5 @@
-title: //*[@class='ptitle']\r
-date: //span[@class='date']\r
-body: //div[@class='body']\r
+title: //*[@class='ptitle']
+date: //span[@class='date']
+body: //div[@class='body']
 prune: no
 test_url: http://hmercer.com/2011/07/why-i-switched-to-jekyll/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/hollywoodlife.com.txt b/inc/3rdparty/site_config/standard/hollywoodlife.com.txt
new file mode 100755 (executable)
index 0000000..975ffa2
--- /dev/null
@@ -0,0 +1,22 @@
+date: //meta[@name='sailthru.date']/@content
+body: //article[contains(@class, 'entry-content')]
+
+strip_image_src: subscribe.png
+
+strip_id_or_class: wpcom-iframe-form
+strip_id_or_class: gallery-thumbs
+strip_id_or_class: twitter
+strip_id_or_class: fb-link
+strip_id_or_class: pinterest
+
+strip: //div[@class='data']
+strip: //iframe[contains(@name, 'wpcom')]
+
+find_string: <a href="http://www.youtube.com/subscription_center?add_user_id=2rJLq19N0dGrxfib80M
+replace_string: </p></div></body></html><!--
+
+find_string: <h3>More
+replace_string: </div></body></html><!--
+
+test_url: http://hollywoodlife.com/2013/10/04/miriam-carey-dead-capitol-hill-car-chase-shooting-postpartum-depression/
+test_url: http://hollywoodlife.com/feed/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d43e644..8ed26ff
@@ -1,4 +1,4 @@
-body: //div[@id='entry-body']\r
-strip_id_or_class: paginate\r
+body: //div[@id='entry-body']
+strip_id_or_class: paginate
 strip: //p[contains(., 'Additional Resources')]
 test_url: http://hometheaterreview.com/dreamvision-starlight-3-three-chip-d-ila-projector-reviewed/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e19dd52..dfd8193
@@ -1,5 +1,5 @@
-body: //table[@class='ap-smallphoto-table'] | //div[@class='body']//*[@class='entry-content']\r
-tidy: no\r
-strip_image_src: analytics.apnewsregistry\r
-\r
+body: //table[@class='ap-smallphoto-table'] | //div[@class='body']//*[@class='entry-content']
+tidy: no
+strip_image_src: analytics.apnewsregistry
+
 test_url: http://hosted.ap.org/dynamic/stories/U/US_SPENDING_SHOWDOWN?SITE=FLPET&SECTION=HOME&TEMPLATE=DEFAULT&CTIME=2011-04-06-07-46-50
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/howtogeek.com.txt b/inc/3rdparty/site_config/standard/howtogeek.com.txt
new file mode 100755 (executable)
index 0000000..baa2ed4
--- /dev/null
@@ -0,0 +1,11 @@
+body: //div[contains(@class, 'thecontent')]
+
+strip_image_src: loading.gif
+find_string:src="http://cdn.howtogeek.com/public/images/blank.gif"
+replace_string:-
+find_string:data-href=
+replace_string:src=
+
+strip_id_or_class: relatedside
+
+test_url: http://www.howtogeek.com/school/microsoft-excel-formulas-and-functions/lesson1/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 67125fb..360dc72
@@ -1,3 +1,3 @@
-prune: yes\r
+prune: yes
 tidy: yes
 test_url: http://www.hs.fi/kotimaa/Teollisuushallin%20palo%20levitt%C3%A4%C3%A4%20vaarallista%20savua%20Tuusulassa/a1305571582405
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a8412d2..4653508
@@ -1,3 +1,3 @@
-single_page_link: //iframe[@id='hootFrame']/@src\r
-\r
+single_page_link: //iframe[@id='hootFrame']/@src
+
 test_url: http://ht.ly/bOiZV
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d40513b..d4618c1
@@ -1,16 +1,21 @@
-title: //meta[@property="og:title"]/@content\r
-body: //div[img[starts-with(@id, 'img_caption')]] | //div[@class="big_photo"] | //div[contains(@class, 'entry_body_text')]\r
-date: //meta[@name="publish_date"]/@content\r
-author: //a[@rel="author"]\r
-author: //meta[@name="author"]/@content\r
-prune: no\r
-tidy: no\r
-strip: //footer\r
-strip_id_or_class: ps-slideshow\r
-strip_id_or_class: fs-slideshow\r
-strip: //p[contains(., 'Related on HuffPost:')]\r
-# end early\r
-replace_string(<div class="sbm-main): </body></html><div class="not-interested \r
-\r
-test_url: http://www.huffingtonpost.com/mitch-moxley/tracking-beijings-boom-th_b_1209828.html\r
-test_url: http://www.huffingtonpost.com/2012/09/11/president-obama-iphone-throwdown_n_1873826.html
\ No newline at end of file
+title: //meta[@property="og:title"]/@content
+body: //div[img[starts-with(@id, 'img_caption')]] | //div[@class="big_photo"] | //div[contains(@class, 'entry_body_text')]
+date: //meta[@name="publish_date"]/@content
+author: //a[@rel="author"]
+author: //meta[@name="author"]/@content
+
+prune: no
+tidy: no
+
+strip: //footer
+strip_id_or_class: ps-slideshow
+strip_id_or_class: fs-slideshow
+strip: //p[contains(., 'Related on HuffPost:')]
+strip_id_or_class: contribute-story
+strip_id_or_class: promo_holder
+
+# end early
+replace_string(<div class="sbm-main): </body></html><div class="not-interested 
+
+test_url: http://www.huffingtonpost.com/mitch-moxley/tracking-beijings-boom-th_b_1209828.html
+test_url: http://www.huffingtonpost.com/2012/09/11/president-obama-iphone-throwdown_n_1873826.html
old mode 100644 (file)
new mode 100755 (executable)
index ec7d3c0..92d3c67
@@ -1,5 +1,5 @@
-title: //h3[@class="entry-header"]\r
-date: //h2[@class="date-header"]\r
-body: //div[contains(@class, 'entry')]\r
+title: //h3[@class="entry-header"]
+date: //h2[@class="date-header"]
+body: //div[contains(@class, 'entry')]
 
 test_url: http://www.humantransit.org/2012/06/can-network-primers-reduce-grief-about-network-design.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ccf09dc..68fd220
@@ -1,7 +1,7 @@
-title: //div[@class='HaberDetayTitleHold Title']/h1\r
-body: //div[@id='YazarDetayText']\r
-author: //div[@class='HaberDetayTitleHold Title']/h1\r
-prune: no\r
-\r
-test_url: http://www.hurriyet.com.tr/ekonomi/19490260.asp\r
+title: //div[@class='HaberDetayTitleHold Title']/h1
+body: //div[@id='YazarDetayText']
+author: //div[@class='HaberDetayTitleHold Title']/h1
+prune: no
+
+test_url: http://www.hurriyet.com.tr/ekonomi/19490260.asp
 test_url: http://www.hurriyet.com.tr/yazarlar/22078439.asp
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 06fa98d..05e7b5f
@@ -1,9 +1,9 @@
-title: //div[@id='pg-content']//h1\r
-body: //div[@id='articleBody0']\r
-replace_string(</table>): </table><br /><br />\r
-\r
-single_page_link: //div[@class="up-header"]/a\r
-\r
-prune: no\r
+title: //div[@id='pg-content']//h1
+body: //div[@id='articleBody0']
+replace_string(</table>): </table><br /><br />
+
+single_page_link: //div[@class="up-header"]/a
+
+prune: no
 
 test_url: http://hvg.hu/w/20111125_sparta
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 49b46da..23e4754
@@ -1,10 +1,10 @@
-body: //div[@id='content']//div[contains(@class, 'wp-image-') or contains(@class, 'entry')][1]\r
-author: //span[@class='author']/a\r
-\r
-strip_id_or_class: disqus\r
-strip_id_or_class: paginator\r
-strip_id_or_class: photo-number\r
-\r
-prune: no\r
-\r
+body: //div[@id='content']//div[contains(@class, 'wp-image-') or contains(@class, 'entry')][1]
+author: //span[@class='author']/a
+
+strip_id_or_class: disqus
+strip_id_or_class: paginator
+strip_id_or_class: photo-number
+
+prune: no
+
 test_url: http://hypebeast.com/2012/11/stussy-2012-fall-winter-november-releases/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/icannabis.tumblr.com.txt b/inc/3rdparty/site_config/standard/icannabis.tumblr.com.txt
new file mode 100755 (executable)
index 0000000..3bda753
--- /dev/null
@@ -0,0 +1,9 @@
+tidy:no
+prune:no
+
+body://div[contains(@id,'content')]
+
+strip_id_or_class:meta
+strip_id_or_class:notes
+strip_id_or_class:pagination
+test_url: http://icannabis.tumblr.com/post/28660592471/reviewmswireless3000
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/idealog.co.nz.txt b/inc/3rdparty/site_config/standard/idealog.co.nz.txt
new file mode 100755 (executable)
index 0000000..ca88f60
--- /dev/null
@@ -0,0 +1,12 @@
+body: //div[@class='content']
+
+strip: //p[@class='dateline']
+strip: //hr
+strip_id_or_class: share
+strip_id_or_class: comments
+strip_id_or_class: tags
+
+title: substring-before(//title,' ::')
+author: substring-before(//p[@class='dateline'],',') 
+date: //p[@class='dateline']/time
+test_url: http://www.idealog.co.nz/blog/2012/12/geeks-plane-help-kiwis-take-san-francisco
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e1badef..f3b3379
@@ -1,7 +1,7 @@
-title: //a[@class='post_title']\r
-body: //div[@class='entrybox']\r
-strip_id_or_class: post_title\r
-date: //div[@class='entrybox']/b[1]\r
-strip: //div[@class='entrybox']/b[1]\r
+title: //a[@class='post_title']
+body: //div[@class='entrybox']
+strip_id_or_class: post_title
+date: //div[@class='entrybox']/b[1]
+strip: //div[@class='entrybox']/b[1]
 author: string('Maciej Cegłowski')
 test_url: http://idlewords.com/2011/08/why_arabic_is_terrific.htm
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d7ec2da..45dd5f2
@@ -1,5 +1,5 @@
-author: substring-after(substring-after(//span[@class='submitted'],'- '),'- ')\r
-date: substring-before(//span[@class='submitted'], concat('- ',substring-after(substring-after(//span[@class='submitted'],'- '),'- ')))\r
-body: //div[@class='content clear-block zoneApple']\r
+author: substring-after(substring-after(//span[@class='submitted'],'- '),'- ')
+date: substring-before(//span[@class='submitted'], concat('- ',substring-after(substring-after(//span[@class='submitted'],'- '),'- ')))
+body: //div[@class='content clear-block zoneApple']
 
 test_url: http://www.igeneration.fr/iphone/l-iphone-et-l-ipad-chouchous-des-tpe-et-pme-55112
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f74178a..6063530
@@ -1,7 +1,7 @@
-title://h1[@class='page-title']\r
-body://*[@id='content']//div[contains(@class,'node-content')]\r
-\r
-author://*[@id='content']//div[contains(@class,'node-submitted')]/a\r
-\r
+title://h1[@class='page-title']
+body://*[@id='content']//div[contains(@class,'node-content')]
+
+author://*[@id='content']//div[contains(@class,'node-submitted')]/a
+
 date:substring-after(//div[contains(@class,'node-submitted')],' on ')
 test_url: http://ignoredbydinosaurs.com/2011/09/great-lie-lorem-ipsum
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ca1e54a..9880b51
@@ -1,13 +1,13 @@
-# Get proper Title, Author and Date info\r
-title: substring-before(//title, '|')\r
-author: substring-after(//h4/a[@href='http://www.ilounge.com/index.php/ilounge/aboutus/'], 'By')\r
-date: //span[@class='instapaper_date']\r
-\r
-# For Reviews & First Looks, get the intro paragraph and put it in front of the main body.\r
-move_into(//div[@id='instapaper_para1']): //div[@id='instapaper_body']\r
-body: //div[@id='instapaper_para1']\r
-strip: //div[@class='reviewinfo']\r
-\r
-# We don't use footnotes, so why bother checking for them? \r
+# Get proper Title, Author and Date info
+title: substring-before(//title, '|')
+author: substring-after(//h4/a[@href='http://www.ilounge.com/index.php/ilounge/aboutus/'], 'By')
+date: //span[@class='instapaper_date']
+
+# For Reviews & First Looks, get the intro paragraph and put it in front of the main body.
+move_into(//div[@id='instapaper_para1']): //div[@id='instapaper_body']
+body: //div[@id='instapaper_para1']
+strip: //div[@class='reviewinfo']
+
+# We don't use footnotes, so why bother checking for them? 
 footnotes: no
 test_url: http://www.ilounge.com/index.php/reviews/entry/luxa2-alum-x-for-iphone-4-4s/?utm_source=twitterfeed&utm_medium=twitter
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index da6a60f..51a7eb9
@@ -1,5 +1,5 @@
-title: //div[@class='published visible e2-smart-title']//span\r
-author: //span[@id='e2-blog-title']\r
-date: //p[@class='super-h']\r
+title: //div[@class='published visible e2-smart-title']//span
+author: //span[@id='e2-blog-title']
+date: //p[@class='super-h']
 body: //div[@class='text published visible']
 test_url: http://ilyabirman.ru/meanwhile/2011/11/15/2/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0589aaa..5410e64
@@ -1,21 +1,21 @@
-author: substring-after(substring-before(//div[@id='byline'],'|'),'By')\r
-author: //div[@class='byline']/a\r
-date: //span[@class='pubdate']\r
-# print friendly page\r
-body: //div[@id='text']\r
-# regular page\r
-body: //div[@id= 'articlecontent']\r
-\r
-strip: //div[@id= 'articlecontent']/h1\r
-strip: //div[@id='articlecontent']/p[@class='deck']\r
-strip: //div[@id='articlecontent']/div[@class='byline']\r
-strip: //div[@id='articlespacer']\r
-strip: //div[@id='incsharebox']\r
-strip: //div[@id='articlesidebar']\r
-\r
-prune: no\r
-\r
-single_page_link: //a[contains(@href, 'Printer_Friendly.html')]\r
-strip: //a[contains(., 'Dig Deeper')]\r
-test_url: http://www.inc.com/guides/2010/11/seven-tips-for-lobbying-politicians.html\r
+author: substring-after(substring-before(//div[@id='byline'],'|'),'By')
+author: //div[@class='byline']/a
+date: //span[@class='pubdate']
+# print friendly page
+body: //div[@id='text']
+# regular page
+body: //div[@id= 'articlecontent']
+
+strip: //div[@id= 'articlecontent']/h1
+strip: //div[@id='articlecontent']/p[@class='deck']
+strip: //div[@id='articlecontent']/div[@class='byline']
+strip: //div[@id='articlespacer']
+strip: //div[@id='incsharebox']
+strip: //div[@id='articlesidebar']
+
+prune: no
+
+single_page_link: //a[contains(@href, 'Printer_Friendly.html')]
+strip: //a[contains(., 'Dig Deeper')]
+test_url: http://www.inc.com/guides/2010/11/seven-tips-for-lobbying-politicians.html
 test_url: http://www.inc.com/eric-schurenberg/startups-are-we-geting-irrationally-exuberant.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 47baf36..af74220
@@ -1,9 +1,9 @@
-title: //meta[@property='og:title']/@content\r
-body: //div[contains(@class, 'articleContent')]\r
-date: //meta[@property='article:published_time']/@content\r
-author: //div[@id='main']//div[@class='byline']//span[@class='authorName']\r
-\r
-strip_id_or_class: RelatedArtTag\r
-\r
+title: //meta[@property='og:title']/@content
+body: //div[contains(@class, 'articleContent')]
+date: //meta[@property='article:published_time']/@content
+author: //div[@id='main']//div[@class='byline']//span[@class='authorName']
+
+strip_id_or_class: RelatedArtTag
+
 tidy: no
 test_url: http://www.independent.co.uk/news/world/middle-east/syria-could-face-human-rights-probe-2274326.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e7a35e8..8112105
@@ -1,6 +1,6 @@
-body: //figure[@class='mainVideo']\r
-strip: //figcaption\r
-\r
-prune: no\r
-\r
+body: //figure[@class='mainVideo']
+strip: //figcaption
+
+prune: no
+
 test_url: http://www.indiatimes.com/bollywood/kareena-insecure-about-saif-working-with-bipasha-23386.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 312cec4..5225245
@@ -1,5 +1,5 @@
-title: //div[@class='weblogPost']/h3[1]\r
-author: ("Brent Simmons")\r
-date: //span[@class="weblogPostDisplayDate"]\r
+title: //div[@class='weblogPost']/h3[1]
+author: ("Brent Simmons")
+date: //span[@class="weblogPostDisplayDate"]
 body: //div[@class='weblogPostBody']
 test_url: http://inessential.com/2011/10/25/why_just_store_the_app_data_on_dropbo
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 64cf3c8..dee69f8
@@ -1,4 +1,4 @@
-title://h1\r
-body://div[@id='texto_link']\r
+title://h1
+body://div[@id='texto_link']
 
 test_url: http://info.abril.com.br/noticias/internet/filme-do-youtube-vai-estrear-nos-cinemas-22042011-6.shl
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3a4e402..f4a328a
@@ -1,14 +1,14 @@
-body: //div[@id="intTranscript"]\r
-body: //div[@class="box-content"]\r
-title: //div[@class="box-content"]//h1[1]\r
-author: //p[@class="info"]/strong \r
-date: substring-before(substring-after(//p[@class="info"], "on"), "Length")\r
-strip: //div[@class="box-content"]//h1[1]\r
-strip: //div[@class="box-content"]//p[@class="info"]\r
-strip_id_or_class: vendor-content-box\r
-strip_id_or_class: tags2\r
-strip_id_or_class: instructions\r
-strip_id_or_class: comments\r
-strip_id_or_class: forum-list-tree\r
+body: //div[@id="intTranscript"]
+body: //div[@class="box-content"]
+title: //div[@class="box-content"]//h1[1]
+author: //p[@class="info"]/strong 
+date: substring-before(substring-after(//p[@class="info"], "on"), "Length")
+strip: //div[@class="box-content"]//h1[1]
+strip: //div[@class="box-content"]//p[@class="info"]
+strip_id_or_class: vendor-content-box
+strip_id_or_class: tags2
+strip_id_or_class: instructions
+strip_id_or_class: comments
+strip_id_or_class: forum-list-tree
 strip: //div[@class="addthis_toolbox addthis_default_style"]
 test_url: http://www.infoq.com/interviews/oleg-zhurakousky-javaone2011-interview
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index eedec24..7798749
@@ -1,9 +1,9 @@
-title: //div[@class='tituloInt']\r
-body: //div[@class='notaPortada']\r
-strip: //img[@id='imgHorizontalInt imgDetalleImg imagenNota']\r
-date: //span[@class='publi']\r
-author: //span[@class='autor']\r
-tidy: no\r
-prune: no\r
+title: //div[@class='tituloInt']
+body: //div[@class='notaPortada']
+strip: //img[@id='imgHorizontalInt imgDetalleImg imagenNota']
+date: //span[@class='publi']
+author: //span[@class='autor']
+tidy: no
+prune: no
 
 test_url: http://www.informador.com.mx/tecnologia/2011/337606/6/iran-desarrolla-antivirus-tras-afectaciones-por-duqu.htm
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6e3c3b1..3ade754
@@ -1,7 +1,7 @@
-title: //meta[@property='og:title']/@content\r
-author: //*[@property='dc:creator']\r
-date: //*[@property='dc:date']/@content\r
-body: //div[@id='page-content']//div[contains(@class, 'article-body')]\r
-\r
+title: //meta[@property='og:title']/@content
+author: //*[@property='dc:creator']
+date: //*[@property='dc:date']/@content
+body: //div[@id='page-content']//div[contains(@class, 'article-body')]
+
 tidy: no
 test_url: http://www.information.dk/282307
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 134306c..1330a04
@@ -1,10 +1,10 @@
-title://h1[@class="post_title"]\r
-body://article[@class="post"]\r
-date://h1[@class="section_separator"]\r
-author://span[@class="post_author"]\r
-strip://nav[@class="arrow_nav"]\r
-strip://section[@id="contact"]\r
-strip_id_or_class:post_title\r
-strip_id_or_class:post_author\r
+title://h1[@class="post_title"]
+body://article[@class="post"]
+date://h1[@class="section_separator"]
+author://span[@class="post_author"]
+strip://nav[@class="arrow_nav"]
+strip://section[@id="contact"]
+strip_id_or_class:post_title
+strip_id_or_class:post_author
 strip_id_or_class:section_separator
 test_url: http://informationarchitects.net/blog/nzz-relaunch-a-quick-review/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0879e9e..60b798e
@@ -1,6 +1,6 @@
-title: //head/title\r
-body: //table[@id='table3']//div[@class='postContent']\r
-prune: no\r
-tidy: no\r
-\r
+title: //head/title
+body: //table[@id='table3']//div[@class='postContent']
+prune: no
+tidy: no
+
 test_url: http://www.informationclearinghouse.info/article28238.htm
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 84c1fdc..24bf624
@@ -1,7 +1,7 @@
-title: //div[@id='content']/h1\r
-body: //div[@id="content"]\r
-strip: //img[contains(@src, 'informit_printer.png')]\r
-single_page_link: //div[contains(@class, 'articleTools')]//a[contains(@href, '/printerfriendly.')]\r
-prune: no\r
-\r
+title: //div[@id='content']/h1
+body: //div[@id="content"]
+strip: //img[contains(@src, 'informit_printer.png')]
+single_page_link: //div[contains(@class, 'articleTools')]//a[contains(@href, '/printerfriendly.')]
+prune: no
+
 test_url: http://www.informit.com/articles/article.aspx?p=1729268
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index dd588ed..d335bc4
@@ -1,12 +1,12 @@
-body: //div[@id='main_text']\r
-title: //div[@id='main_text']/h1\r
-strip: //div[@id='main_text']/h1\r
-strip: //div[@id='main_text']/h2\r
-strip_id_or_class: tools\r
-strip_id_or_class: articleTools\r
-strip_id_or_class: pagination\r
-strip_id_or_class: byline\r
-strip_id_or_class: tweet\r
-date: //div[@class='date']\r
+body: //div[@id='main_text']
+title: //div[@id='main_text']/h1
+strip: //div[@id='main_text']/h1
+strip: //div[@id='main_text']/h2
+strip_id_or_class: tools
+strip_id_or_class: articleTools
+strip_id_or_class: pagination
+strip_id_or_class: byline
+strip_id_or_class: tweet
+date: //div[@class='date']
 strip: //div[@class='date']
 test_url: http://www.infoworld.com/d/the-industry-standard/it-jobs-the-rise-both-offshore-and-in-us-187689
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 012c873..489d5af
@@ -1,9 +1,9 @@
-# This filter is tested on:\r
-# http://www.infzm.com/content/71068\r
-# http://www.infzm.com/content/41577\r
-\r
-author://em[contains(@class, 'toAuthor')]\r
-date:substring(//em[contains(@class, 'pubTime')],1)\r
-body://section[contains(@id, 'articleContent')]\r
+# This filter is tested on:
+# http://www.infzm.com/content/71068
+# http://www.infzm.com/content/41577
+
+author://em[contains(@class, 'toAuthor')]
+date:substring(//em[contains(@class, 'pubTime')],1)
+body://section[contains(@id, 'articleContent')]
 title://h1[contains(@class ,'articleHeadline clearfix')]
 test_url: http://www.infzm.com/content/41577
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6629daf..c63f53a
@@ -1,8 +1,8 @@
-# set body\r
-body: //div[@class='post-listing']\r
-\r
-# remove clutter\r
-strip: //a/big\r
-strip: //a/em\r
+# set body
+body: //div[@class='post-listing']
+
+# remove clutter
+strip: //a/big
+strip: //a/em
 strip: //p/em
 test_url: http://inhabitat.com/2010/11/18/sliding-walls-transform-this-tokyo-house-into-an-office/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ad9e821..522caeb
@@ -1,6 +1,6 @@
-title: //div[@class='caption']\r
-author: //p[@class='username']\r
-\r
-strip: //div[@class='contents']/h3\r
+title: //div[@class='caption']
+author: //p[@class='username']
+
+strip: //div[@class='contents']/h3
 strip: //div[@class='location']
 test_url: http://instagr.am/p/G-s_aciyDJ/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d196059..afe058d
@@ -1,7 +1,7 @@
-body: //div[@id = 'post']\r
-strip: //div[@class = 'postinfo']\r
-strip: //div[@id = 'postmetanew']\r
-strip: //div[@class = 'paginator']\r
-strip: //div[@class = 'col-2']\r
+body: //div[@id = 'post']
+strip: //div[@class = 'postinfo']
+strip: //div[@id = 'postmetanew']
+strip: //div[@class = 'paginator']
+strip: //div[@class = 'col-2']
 strip: //div[@id = 'adfactor-label']
 test_url: http://www.ipadclub.nl/15808/text-writer-ipad-tekstverwerker-met-functieknoppen/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a2e4900..dedb557
@@ -1,7 +1,7 @@
-body: //div[@id = 'post']\r
-strip: //div[@class = 'postinfo']\r
-strip: //div[@id = 'postmetanew']\r
-strip: //div[@class = 'paginator']\r
-strip: //div[@class = 'col-2']\r
+body: //div[@id = 'post']
+strip: //div[@class = 'postinfo']
+strip: //div[@id = 'postmetanew']
+strip: //div[@class = 'paginator']
+strip: //div[@class = 'col-2']
 strip: //div[@id = 'adfactor-label']
 test_url: http://www.ipadplanet.nl/11723/steve-jobs-bevestigt-verdwijnen-fysieke-rotatieschakelaar-in-ios-4-2/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f8d4f6a..850a24e
@@ -1,7 +1,7 @@
-body: //div[@id = 'post']\r
-strip: //div[@class = 'postinfo']\r
-strip: //div[@id = 'postmetanew']\r
-strip: //div[@class = 'paginator']\r
-strip: //div[@class = 'col-2']\r
-strip: //div[@id = 'adfactor-label']\r
+body: //div[@id = 'post']
+strip: //div[@class = 'postinfo']
+strip: //div[@id = 'postmetanew']
+strip: //div[@class = 'paginator']
+strip: //div[@class = 'col-2']
+strip: //div[@id = 'adfactor-label']
 test_url: http://www.iphoneclub.nl/105808/t-mobile-mobiel-internet-wordt-duurder-maar-blijft-onbeperkt/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c97ff43..e8ccea0
@@ -1,9 +1,9 @@
-title: //meta[@name='og:title']/@content\r
-body: //small[@class='postmetadata'] | //div[contains(@class, 'entry-content')]\r
-\r
-strip: //span[@vanilla-identifier]\r
-\r
-prune: no\r
-tidy: no\r
-\r
+title: //meta[@name='og:title']/@content
+body: //small[@class='postmetadata'] | //div[contains(@class, 'entry-content')]
+
+strip: //span[@vanilla-identifier]
+
+prune: no
+tidy: no
+
 test_url: http://www.iphonehacks.com/2012/07/app-review-process-behind-the-scenes.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a1c16a1..215fdf8
@@ -1,6 +1,6 @@
-# Remove social buttons\r
-strip: //div[@id='temp_Content_Right']\r
-\r
-# Remove duplicate article title\r
+# Remove social buttons
+strip: //div[@id='temp_Content_Right']
+
+# Remove duplicate article title
 strip: //*[(@class='storytitle')]
 test_url: http://isource.com/2010/10/24/swearch-a-cool-iphone-web-app/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8da78cb..3ba484a
@@ -1,6 +1,6 @@
-author: //p[@class = 'writer']\r
-\r
-date: //p[@class = 'published-time']\r
-\r
+author: //p[@class = 'writer']
+
+date: //p[@class = 'published-time']
+
 body: //div[@class = 'text main']
 test_url: http://www.itavisen.no/899786/old-republic-blir-gratis
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/itmedia.co.jp.txt b/inc/3rdparty/site_config/standard/itmedia.co.jp.txt
new file mode 100755 (executable)
index 0000000..97f00ce
--- /dev/null
@@ -0,0 +1,8 @@
+body: //div[@id='cmsBody']
+
+next_page_link: //span[@id='next']/a
+
+strip_id_or_class: cmsCopyright
+strip_id_or_class: masterSocialbuttonBtm
+
+test_url: http://www.itmedia.co.jp/enterprise/articles/0912/05/news002.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 550875e..b8cb461
@@ -1,12 +1,12 @@
-title: //h1[@class="entry-title"]\r
-body: //div[@class='format_text entry-content']\r
-author: //span[@class="author vcard"]/a\r
-date: //abbr[@class="published"]\r
-\r
-strip_id_or_class:  related-posts\r
-strip_id_or_class: membershipbox\r
-strip_id_or_class: share_this_compact_bt\r
-\r
-\r
+title: //h1[@class="entry-title"]
+body: //div[@class='format_text entry-content']
+author: //span[@class="author vcard"]/a
+date: //abbr[@class="published"]
+
+strip_id_or_class:  related-posts
+strip_id_or_class: membershipbox
+strip_id_or_class: share_this_compact_bt
+
+
 footnotes: no
 test_url: http://www.itstactical.com/warcom/knives/exclusive-triple-aught-design-production-dauntless-knife-video-walkthrough/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/itwire.com.txt b/inc/3rdparty/site_config/standard/itwire.com.txt
new file mode 100755 (executable)
index 0000000..72b4106
--- /dev/null
@@ -0,0 +1,5 @@
+author: //a[@rel="author"]
+date: //li[@class="itemDateCreated"]
+strip: //div[contains(@class, 'legend-rounded')]
+
+test_url: http://www.itwire.com/it-industry-news/market/59661-ibm-looks-to-high-value-solutions-to-meet-changing-demands
old mode 100644 (file)
new mode 100755 (executable)
index d4fa604..1ee0ee5
@@ -1,5 +1,5 @@
-title: //*[@id="article-title"]\r
-author: //*[@id="article-info"]/strong\r
-date: //*[@class="article-dateline"]/strong\r
+title: //*[@id="article-title"]
+author: //*[@id="article-info"]/strong
+date: //*[@class="article-dateline"]/strong
 body: //*[@id="article-content"]
 test_url: http://www.itworld.com/open-source/140916/android-sued-microsoft-not-linux
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index af3f299..b0114d3
@@ -1,4 +1,4 @@
-body: //div[starts-with(@id, 'news-id-')]\r
-prune: no\r
-\r
+body: //div[starts-with(@id, 'news-id-')]
+prune: no
+
 test_url: http://izismile.com/2011/06/13/uncanny_factoid_fashion_or_creepy_2_pics.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f1dd3d1..343fd6f
@@ -1,6 +1,6 @@
-body: //div[@id='content']//div[@class = 'post f']\r
-strip_id_or_class: comment-big\r
-strip_id_or_class: avatar\r
-strip: //div[@class='time_s']\r
+body: //div[@id='content']//div[@class = 'post f']
+strip_id_or_class: comment-big
+strip_id_or_class: avatar
+strip: //div[@class='time_s']
 
 test_url: http://jandan.net/2011/04/03/iphone-5-sony.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6e8af93..00e4cf6
@@ -1,22 +1,22 @@
-title: //h1\r
-author: //p[contains(@class, 'author')]/a\r
-date: //p[contains(@class, 'time')]\r
-body: //div[@class='content']/div[contains(@class, 'text')]\r
-\r
-# prevent "no text" errors on multi-page articles\r
-tidy: no\r
-\r
-# we use a custom next-link detector instead of the print view because\r
-# it's pretty hard to strip out the unwanted parts in the print view\r
-autodetect_next_page: no\r
-next_page_link: //div[contains(@class, 'text')]/div/div[contains(@class, 'paging')]/a[@class='more ']\r
-\r
-strip: //h1\r
-\r
-strip_id_or_class: meta\r
-strip_id_or_class: author\r
-strip_id_or_class: paging\r
-\r
-# prevent "Report an Error" from being recognized as footnote\r
+title: //h1
+author: //p[contains(@class, 'author')]/a
+date: //p[contains(@class, 'time')]
+body: //div[@class='content']/div[contains(@class, 'text')]
+
+# prevent "no text" errors on multi-page articles
+tidy: no
+
+# we use a custom next-link detector instead of the print view because
+# it's pretty hard to strip out the unwanted parts in the print view
+autodetect_next_page: no
+next_page_link: //div[contains(@class, 'text')]/div/div[contains(@class, 'paging')]/a[@class='more ']
+
+strip: //h1
+
+strip_id_or_class: meta
+strip_id_or_class: author
+strip_id_or_class: paging
+
+# prevent "Report an Error" from being recognized as footnote
 footnotes: no
 test_url: http://jetzt.sueddeutsche.de/texte/anzeigen/544308/Alles-flicken
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 95c45ee..d45c889
@@ -1,4 +1,4 @@
-body: //div[@class='entry']\r
-prune: no\r
+body: //div[@class='entry']
+prune: no
 
 test_url: http://www.jjahnke.net/rundbr87.html#2514
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index af8d7d1..1dbe207
@@ -1,5 +1,5 @@
-body: //div[@id='formatCont_en']\r
-\r
-prune: no\r
-\r
+body: //div[@id='formatCont_en']
+
+prune: no
+
 test_url: http://www.jobbank.gc.ca/detail-eng.aspx?Source=JobPosting&OrderNum=6397922
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 75fbee5..241a361
@@ -1,21 +1,21 @@
-# Works with old posts too, such as http://www.joelonsoftware.com/articles/fog0000000332.html\r
-\r
-author: substring-after(//div[@class="author"], 'by ')\r
-date: //div[@class="date"]\r
-\r
-## Clean stuff at top ##\r
-\r
-strip: //h1[1]\r
-strip: //h2[1]\r
-strip: //div[@class="date"]\r
-strip: //div[@class="author"]\r
-\r
-## Clean stuff at bottom ##\r
-\r
-strip: //blockquote[@class="textmessage"]\r
-strip: //div[@style="width:500px"]/p[last()]\r
-strip: //div[@style="width:500px"]/p[last()-1]\r
-strip: //div[@style="width:500px"]/h4[last()]\r
-strip: //div[@style="width:500px"]/h4[last()-1]\r
+# Works with old posts too, such as http://www.joelonsoftware.com/articles/fog0000000332.html
+
+author: substring-after(//div[@class="author"], 'by ')
+date: //div[@class="date"]
+
+## Clean stuff at top ##
+
+strip: //h1[1]
+strip: //h2[1]
+strip: //div[@class="date"]
+strip: //div[@class="author"]
+
+## Clean stuff at bottom ##
+
+strip: //blockquote[@class="textmessage"]
+strip: //div[@style="width:500px"]/p[last()]
+strip: //div[@style="width:500px"]/p[last()-1]
+strip: //div[@style="width:500px"]/h4[last()]
+strip: //div[@style="width:500px"]/h4[last()-1]
 strip: //div[@style="width:500px"]/div[last()]
 test_url: http://www.joelonsoftware.com/items/2011/09/15.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 535a501..3cf6067
@@ -1,3 +1,3 @@
-author: //h1\r
+author: //h1
 date: //p[contains(@class,'date')]
 test_url: http://jouire.com/2011/01/exquisite-whispers/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7fbd467..7a8e56f
@@ -1,8 +1,8 @@
-author: //a[@class="byline-author"]\r
-title: //h1[@class="headline"]\r
-strip: //div[@id="info-card"]\r
-strip: //div[@id="breaking-news"]\r
-strip: //div[@class="rmod list-post-mod"]\r
-strip: //div[@id="footer"]\r
+author: //a[@class="byline-author"]
+title: //h1[@class="headline"]
+strip: //div[@id="info-card"]
+strip: //div[@id="breaking-news"]
+strip: //div[@class="rmod list-post-mod"]
+strip: //div[@id="footer"]
 strip: //div[@id="GH_strip"]
 test_url: http://www.joystiq.com/2012/06/20/magic-the-gathering-duels-of-the-planeswalkers-2013-review/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index be844e5..ff5a024
@@ -1,19 +1,19 @@
-body: //div[@id='article_container']\r
-author: //h4//a[@class='author']\r
-title: //h1\r
-\r
-replace_string(lang="en"): lang="de"\r
-replace_string(/>1</a>):/></a>\r
-\r
-strip_id_or_class: share_toolbox\r
-strip_id_or_class: article_header\r
-strip_id_or_class: phototext\r
-\r
-strip_image_src: icon_author.gif\r
-\r
-strip: //img[@src='']\r
-strip: //h4[@id='author']\r
-\r
-prune: no\r
-\r
+body: //div[@id='article_container']
+author: //h4//a[@class='author']
+title: //h1
+
+replace_string(lang="en"): lang="de"
+replace_string(/>1</a>):/></a>
+
+strip_id_or_class: share_toolbox
+strip_id_or_class: article_header
+strip_id_or_class: phototext
+
+strip_image_src: icon_author.gif
+
+strip: //img[@src='']
+strip: //h4[@id='author']
+
+prune: no
+
 test_url: http://www.juedische-allgemeine.de/article/view/id/13366
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e2d07f2..fdf7cdc
@@ -1,8 +1,8 @@
-convert_double_br_tags: yes\r
-\r
-title: //div[@id="storycredits"]/p/span[@class="title"]\r
-author: //div[@id="storycredits"]/p/br[1]/following-sibling::text()\r
-\r
-strip: //div[@id="storycredits"]\r
+convert_double_br_tags: yes
+
+title: //div[@id="storycredits"]/p/span[@class="title"]
+author: //div[@id="storycredits"]/p/br[1]/following-sibling::text()
+
+strip: //div[@id="storycredits"]
 
 test_url: http://www.juppy.org/santa/stories.php?ForAuthorID=35&Year=2005
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 34404e9..535693c
@@ -1,3 +1,3 @@
-body: //div[contains(@class, 'inner_content')]\r
+body: //div[contains(@class, 'inner_content')]
 
 test_url: http://kachestvo.ru/promtovar/odezhda/denim.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/kachiblog.com.txt b/inc/3rdparty/site_config/standard/kachiblog.com.txt
new file mode 100755 (executable)
index 0000000..35baf8d
--- /dev/null
@@ -0,0 +1,7 @@
+title: //h3[contains(@class, 'entry-title')]
+date: //abbr[@itemprop='datePublished']/@title
+body: //div[@itemprop='articleBody']
+tidy: no
+
+test_url: http://www.kachiblog.com/2013/05/samsung-galaxy-s4-vs-samsung-galaxy.html
+test_url: http://www.kachiblog.com/feeds/posts/default
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/kathimerini.gr.txt b/inc/3rdparty/site_config/standard/kathimerini.gr.txt
new file mode 100755 (executable)
index 0000000..2c7c518
--- /dev/null
@@ -0,0 +1,4 @@
+title: //td[contains(@class, 'articleTitlos')]
+body: //td[contains(@class, 'eelantext')]
+
+test_url: http://www.kathimerini.gr/4dcgi/_w_articles_kathremote_1_03/12/2013_530490
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e6d100e..90c64cb
@@ -1,7 +1,7 @@
-# Ads\r
-strip: //table[@align="right"][@width="120"]\r
-\r
-# Affiliate link paragraphs\r
-strip: //a[.="Adorama"]/parent::p[contains(., "goodies")]\r
+# Ads
+strip: //table[@align="right"][@width="120"]
+
+# Affiliate link paragraphs
+strip: //a[.="Adorama"]/parent::p[contains(., "goodies")]
 strip: //a[.="Adorama"]/parent::p[contains(., "This free website's biggest source of")]
 test_url: http://www.kenrockwell.com/tech/composition.htm
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7d5daa4..db4f63c
@@ -1,21 +1,21 @@
-# set body\r
-body: //div[@id='ovArtikel']\r
-\r
-# set title\r
-title: //div[@id='ovArtikel']/h1\r
-# strip main title and leave sub title\r
-strip: //div[@id='ovArtikel']/h1\r
-\r
-date: //div[@class='publicdate']\r
-\r
-#remove captions\r
-strip: //*/div[@class='bu']\r
-strip: //*/div[@class='credit']\r
-\r
-#remove adds\r
-strip: //*/div[@class='ad-head']\r
-strip: //*/div[@class='linksebay']\r
-\r
-# remove video content\r
+# set body
+body: //div[@id='ovArtikel']
+
+# set title
+title: //div[@id='ovArtikel']/h1
+# strip main title and leave sub title
+strip: //div[@id='ovArtikel']/h1
+
+date: //div[@class='publicdate']
+
+#remove captions
+strip: //*/div[@class='bu']
+strip: //*/div[@class='credit']
+
+#remove adds
+strip: //*/div[@class='ad-head']
+strip: //*/div[@class='linksebay']
+
+# remove video content
 strip: //*/div[@class='ovVideo']
 test_url: http://www.kicker.de/news/fussball/frauen/wmfr/frauen-weltmeisterschaft/2011/3/1123662/spielbericht_frankreich-frauen_deutschland-frauen.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c055659..7b3daa5
@@ -1,7 +1,7 @@
-title: //h1[@id='name']\r
-body: //*[@id='leftcol']\r
-\r
-strip_id_or_class: 'share-box'\r
-strip_id_or_class: 'project-faqs'\r
+title: //h1[@id='name']
+body: //*[@id='leftcol']
+
+strip_id_or_class: 'share-box'
+strip_id_or_class: 'project-faqs'
 strip_id_or_class: 'report-issue-wrap'
 test_url: http://www.kickstarter.com/projects/hop/elevation-dock-the-best-dock-for-iphone
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 2f6783a..b27539f
@@ -1,4 +1,4 @@
-title: //div[@class='post']/h2\r
-body: //div[@class='entry']\r
+title: //div[@class='post']/h2
+body: //div[@class='entry']
 strip: //p[contains(.,'Tags:')]
 test_url: http://www.kingarthurflour.com/blog/2011/01/28/a-big-sandwich-for-the-big-game/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f93a61e..582f251
@@ -1,6 +1,6 @@
-title: //h2\r
-author: //*[@id='main']/div/a[1]\r
-date: substring-before(substring-after(//div[@class='meta'],'&bull;'),'&bull;')\r
-body: //div[@id='main']\r
-strip: //div[@class='meta']\r
+title: //h2
+author: //*[@id='main']/div/a[1]
+date: substring-before(substring-after(//div[@class='meta'],'&bull;'),'&bull;')
+body: //div[@id='main']
+strip: //div[@class='meta']
 test_url: http://kottke.org/08/02/king-of-kong-a-fistful-of-quarters
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 9e15cc3..2f604de
@@ -1,3 +1,3 @@
-body: //div[@class = "entry-full"]\r
+body: //div[@class = "entry-full"]
 
 test_url: http://www.kumailplus.com/2011/12/02/24308
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3f0d236..fe35062
@@ -1,10 +1,10 @@
-title: //div[@id='centrediv']/h1\r
-\r
-author: substring-after(//div[@id='centrediv']/h3,'By: ')\r
-\r
-date: substring-after(substring-before(//div[@id='centrediv']/h3,'By: '),'Filed: ')\r
-\r
-body: //div[@class='KonaBody']\r
-\r
+title: //div[@id='centrediv']/h1
+
+author: substring-after(//div[@id='centrediv']/h3,'By: ')
+
+date: substring-after(substring-before(//div[@id='centrediv']/h3,'By: '),'Filed: ')
+
+body: //div[@class='KonaBody']
+
 convert_double_br_tags: yes
 test_url: http://www.kumb.com/story.php?id=126084
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 879b4d6..cf4d3b8
@@ -1,9 +1,9 @@
-date: //span[@class='datum']\r
-title: //div[@class='artikel']/h2\r
-body: //div[@class='entry']\r
-strip: //p[@class='tags']\r
-author: substring-after(//div[@class='authorinfo']/em,'Dies ist ein Artikel von ')\r
-strip: //div[@class='authorinfo']\r
-strip: //div[@class='authorpic']\r
+date: //span[@class='datum']
+title: //div[@class='artikel']/h2
+body: //div[@class='entry']
+strip: //p[@class='tags']
+author: substring-after(//div[@class='authorinfo']/em,'Dies ist ein Artikel von ')
+strip: //div[@class='authorinfo']
+strip: //div[@class='authorpic']
 
 test_url: http://kwerfeldein.de/index.php/2011/10/17/doppelbelichtungen-mit-konzept/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/landetsfria.se.txt b/inc/3rdparty/site_config/standard/landetsfria.se.txt
new file mode 100755 (executable)
index 0000000..e5317a5
--- /dev/null
@@ -0,0 +1,7 @@
+body: //div[contains(@class, 'layout__inner')]//div[contains(@class, 'file-image') or contains(@class, 'node__content')]
+author: //article//div[contains(@class, 'field-byline')]
+strip_id_or_class: rekommenderade
+strip_id_or_class: disqus
+strip_id_or_class: annonser
+
+test_url: http://www.landetsfria.se/artikel/112070
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a34e39d..d25999d
@@ -1,13 +1,13 @@
-title: //h1[@class='headline']\r
-body: //div[@class='article']\r
-strip: //div[@class='article']//h3[contains(@class, 'section')]\r
-strip: //div[@class='article']//ul[contains(@class, 'article-actions')]\r
-strip: //div[@id='syndication-upper']\r
-strip: //a[@id='syndication']\r
-strip: //dl[@id='article-tags']\r
-strip: //div[@id='article-like']\r
-prune: no\r
-\r
-single_page_link: //li[@class='single-page']/a\r
-\r
+title: //h1[@class='headline']
+body: //div[@class='article']
+strip: //div[@class='article']//h3[contains(@class, 'section')]
+strip: //div[@class='article']//ul[contains(@class, 'article-actions')]
+strip: //div[@id='syndication-upper']
+strip: //a[@id='syndication']
+strip: //dl[@id='article-tags']
+strip: //div[@id='article-like']
+prune: no
+
+single_page_link: //li[@class='single-page']/a
+
 test_url: http://www.laphamsquarterly.org/essays/balanced-diets.php
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e771f81..82374c0
@@ -1,3 +1,3 @@
-tidy: no\r
+tidy: no
 
 test_url: http://www.laprensagrafica.com/opinion/editorial/229252-reflexiones-sobre-la-educacion-que-necesitamos.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 5bad8e6..746bfca
@@ -1,10 +1,10 @@
-body: //div[@id='content-content']//div[@class='content']\r
-title: //h1[@class='title']\r
-date: substring-after(//*[@class='submitted'],'Submitted on')\r
-tidy: no\r
-strip: //div[@class='terms terms-inline']\r
-strip: //div[@class='more']\r
-strip: //div[@class='share-links']\r
-strip: //table[@id='attachments']\r
-\r
+body: //div[@id='content-content']//div[@class='content']
+title: //h1[@class='title']
+date: substring-after(//*[@class='submitted'],'Submitted on')
+tidy: no
+strip: //div[@class='terms terms-inline']
+strip: //div[@class='more']
+strip: //div[@class='share-links']
+strip: //table[@id='attachments']
+
 test_url: http://www.laquadrature.net/en/finalization-of-eu-parliaments-weak-net-neutrality-resolution
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 504dbea..25e3654
@@ -1,12 +1,12 @@
-#meta data\r
-title:substring-after(title,'|')\r
-\r
-author:substring-before( substring-after(//meta[@name = 'description']/@content, normalize-space(substring-after(//title,'|'))),' respond ')\r
-date://h5[@class = 'postDate']\r
-\r
-#text\r
-body://div[@class = 'articleBody']\r
-\r
-#clean up\r
-strip://center
-test_url: http://lareviewofbooks.org/post/14066007115/literary-transactions-and-their-vicissitudes
\ No newline at end of file
+#metadata
+title: substring-before(//title,' |')
+author: //a[contains(@class,'person') and starts-with(@href, '/contributor')]
+
+#text
+body: //div[contains(@class, 'article_body')]
+
+#clean up
+strip_id_or_class: recommended_section
+
+test_url: http://lareviewofbooks.org/review/american-politics-redeembale-robert-gates-hillary-clinton-two-memoirs-washington-dc
+test_url: http://lareviewofbooks.org/interview/souvenirs-future
old mode 100644 (file)
new mode 100755 (executable)
index 0d6ac85..b2db37b
@@ -1,11 +1,11 @@
-strip: //div[@id="tugs_story_display"]\r
-strip: //div[@id="search_overlay"]\r
-strip: //div[@id="adv_search"]\r
-body: //div[@class='story']\r
-tidy: no\r
-convert_double_br_tags: yes\r
-single_page_link: //a[contains(@href, ',print.')]\r
-strip: //p[starts-with(., 'latimes.com')]\r
-strip: //h1[starts-with(., 'latimes.com')]\r
+strip: //div[@id="tugs_story_display"]
+strip: //div[@id="search_overlay"]
+strip: //div[@id="adv_search"]
+body: //div[@class='story']
+tidy: no
+convert_double_br_tags: yes
+single_page_link: //a[contains(@href, ',print.')]
+strip: //p[starts-with(., 'latimes.com')]
+strip: //h1[starts-with(., 'latimes.com')]
 strip_id_or_class: cubead
 test_url: http://www.latimes.com/news/opinion/commentary/la-oe-gartonash-wilders-20110512,0,2876761.story
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 1814988..ab2f834
@@ -1,3 +1,3 @@
-title: //h1[@class='entry-title']\r
+title: //h1[@class='entry-title']
 body: //div[@class='entry-content']
 test_url: http://laughingsquid.com/mysterious-tiny-doors-appearing-around-san-francisco/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0a4c84b..e78cf7e
@@ -1,9 +1,9 @@
-title: //div[@id="content"]/h1[1]\r
-date: substring-before(//p[@class="postdate"], ' at ')\r
-author: ("Dr. Drang")\r
-\r
-strip: //div[@id="content"]/h1[1]\r
-strip: //p[@class="postdate"]\r
-strip: //h2[@id="respond"]\r
+title: //div[@id="content"]/h1[1]
+date: substring-before(//p[@class="postdate"], ' at ')
+author: ("Dr. Drang")
+
+strip: //div[@id="content"]/h1[1]
+strip: //p[@class="postdate"]
+strip: //h2[@id="respond"]
 strip: //blockquote[@class="bbpTweet"]/p/span/a/img
 test_url: http://www.leancrew.com/all-this/2011/12/more-shell-less-egg/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f5494b9..e720e37
@@ -1,8 +1,8 @@
-title: //meta[@name='title']/@content\r
-author: //span[@class='sign']//a[@class='journaliste']\r
-author: //meta[@name='author']/@content\r
-body: //*[@id='article']/div[@class='photo'] | //*[@id='article']/h2 | //*[@id='article']/div[@class='texte']\r
-date: //time[@pubdate]/@datetime\r
-prune: no\r
-test_url: http://www.lefigaro.fr/environnement/2011/11/10/01029-20111110ARTFIG00801-la-chine-confrontee-a-un-immense-defi-ecologique.php\r
+title: //meta[@name='title']/@content
+author: //span[@class='sign']//a[@class='journaliste']
+author: //meta[@name='author']/@content
+body: //*[@id='article']/div[@class='photo'] | //*[@id='article']/h2 | //*[@id='article']/div[@class='texte']
+date: //time[@pubdate]/@datetime
+prune: no
+test_url: http://www.lefigaro.fr/environnement/2011/11/10/01029-20111110ARTFIG00801-la-chine-confrontee-a-un-immense-defi-ecologique.php
 test_url: http://www.lefigaro.fr/conjoncture/2012/11/20/20002-20121120ARTFIG00609-l-usager-devrait-payer-plus-pour-financer-les-transports.php
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index eb20527..097999b
@@ -1,13 +1,18 @@
-title: //h1\r
-\r
-# they have a single component containing both author and date\r
-#author: //p[@class='source']\r
-#date: //p[@class='source']\r
-\r
-body: //div[@class='contenu_article']\r
-#Shoot the insane "conjugaison.lemonde.fr" links :\r
-strip: //a[contains(@class, 'listLink')]\r
-\r
-prune: no\r
-\r
-test_url: http://www.lemonde.fr/economie/article/2011/07/05/moody-s-abaisse-la-note-du-portugal-de-quatre-crans_1545237_3234.html
\ No newline at end of file
+title: //h1
+
+# We can have multiple authors
+author: //a[@class='auteur']
+
+# Last edition date (if any)
+date: //time[@itemprop='dateModified']/@datetime
+# Publication date
+date: //time[@itemprop='datePublished']/@datetime
+
+
+body: //div[@id='articleBody']
+#Shoot the insane "conjugaison.lemonde.fr" links :
+#strip: //a[contains(@class, 'conjug')]
+
+prune: no
+
+test_url: http://www.lemonde.fr/economie/article/2011/07/05/moody-s-abaisse-la-note-du-portugal-de-quatre-crans_1545237_3234.html
old mode 100644 (file)
new mode 100755 (executable)
index 9b57f72..51e025a
@@ -1,9 +1,9 @@
-title: //h1/following::span[@class='fn']\r
-# Author: should stop parsing until <br> reached, but I don't know how to do this.\r
-author: //following::div[@class='PDate2']\r
-date: //following::div[@class='PDate2']/strong\r
-\r
-body: //div[@class='ArTexte']\r
-body: //div[@id='prod_txt_b']\r
-body: //div[@class='ArPhotoP']\r
+title: //h1/following::span[@class='fn']
+# Author: should stop parsing until <br> reached, but I don't know how to do this.
+author: //following::div[@class='PDate2']
+date: //following::div[@class='PDate2']/strong
+
+body: //div[@class='ArTexte']
+body: //div[@id='prod_txt_b']
+body: //div[@class='ArPhotoP']
 test_url: http://www.lesnumeriques.com/disque-dur-multimedia/popcorn-hour-300-p12231/test.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c4bee7e..49b019f
@@ -1,3 +1,3 @@
-title: //h2\r
+title: //h2
 strip_image_src: logo.gif
 test_url: http://www.letemps.ch/Facet/print/Uuid/7c9f912c-07c9-11e0-9b50-4d96c9eca37f
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/libcom.org.txt b/inc/3rdparty/site_config/standard/libcom.org.txt
new file mode 100755 (executable)
index 0000000..d1404d1
--- /dev/null
@@ -0,0 +1,7 @@
+date: //span[contains(@class, 'page-date')]
+body: //div[@id='node-page']
+strip_id_or_class: book-navigation
+prune: no
+
+test_url: http://libcom.org/library/what-was-the-ussr-aufheben-1
+test_url: http://libcom.org/library-latest/feed
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c3888aa..0e1dceb
@@ -1,3 +1,3 @@
-title: //h2[@class="entry-title"]\r
+title: //h2[@class="entry-title"]
 body: //div[@class="entry-content"]
 test_url: http://www.lifeandculture.fr/digital/facebook-and-the-epiphanator-an-end-to-endings/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 32ade14..ec97f06
@@ -1,42 +1,47 @@
-# Adds author text: Gawker sites commonly show as "Author: View Profile"\r
-author://a[@class="plus-icon modfont"]\r
-\r
-# Add date and time\r
-date: //span[@class="date"]\r
-\r
-# Remove date and time from article text\r
-strip: //span[@class="date"]\r
-\r
-# Remove login/comment text\r
-strip: //*[(@class="presence_control_external smalltype")]\r
-\r
-strip: //div[@class="nodebyline modfont"]\r
-\r
-# Remove right sidebar\r
-strip: //div[@id="rightwrapper"]\r
-\r
-# Remove print header\r
-strip: //div[@id='printhead']/h1\r
-\r
-# Remove 'content is restricted'\r
-strip: //div[@id='agegate_IDHERE']\r
-\r
-# Remove follow text\r
-strip: //*[(@class="permalink_ads")]\r
-\r
-# Remove view/comment count\r
-strip: //div[@id='wrapper']/div[2][@class='postmeta_permalink_wrapper']/div[1][@class='postmeta_permalink']/div[2][@class='pm_line']\r
-\r
-# Remove contact text\r
-strip: //div[@id='wrapper']/div[1][@class='content permalink']/p[6][@class='contactinfo']\r
-\r
-# Remove medium duplicates of the article image\r
-strip_image_src: medium.jpg\r
-\r
-# Remove "arrow" class at bottom of page\r
-strip: //p[@class="arrow"]\r
-\r
-# Remove "track" image from article body\r
-strip: //img[@alt="track"]\r
-test_url: http://lifehacker.com/5925801/how-can-i-turn-vague-goals-into-actionable-to+dos\r
-test_url: http://lifehacker.com/5941600/hack-an-old-computer-mouse-into-a-wireless-bluetooth-mouse
\ No newline at end of file
+# Adds author text: Gawker sites commonly show as "Author: View Profile"
+author://a[@class="plus-icon modfont"]
+
+# Add date and time
+date: //span[@class="date"]
+
+body: //div[contains(@class, 'marquee-asset-wrapper') or contains(@class, 'post-content')]
+
+# Remove date and time from article text
+strip: //span[@class="date"]
+
+# Remove login/comment text
+strip: //*[(@class="presence_control_external smalltype")]
+
+strip: //div[@class="nodebyline modfont"]
+
+# Remove right sidebar
+strip: //div[@id="rightwrapper"]
+
+# Remove print header
+strip: //div[@id='printhead']/h1
+
+# Remove 'content is restricted'
+strip: //div[@id='agegate_IDHERE']
+
+# Remove follow text
+strip: //*[(@class="permalink_ads")]
+
+strip_id_or_class: inset_groups
+
+# Remove view/comment count
+strip: //div[@id='wrapper']/div[2][@class='postmeta_permalink_wrapper']/div[1][@class='postmeta_permalink']/div[2][@class='pm_line']
+
+# Remove contact text
+strip: //div[@id='wrapper']/div[1][@class='content permalink']/p[6][@class='contactinfo']
+
+# Remove medium duplicates of the article image
+strip_image_src: medium.jpg
+
+# Remove "arrow" class at bottom of page
+strip: //p[@class="arrow"]
+
+# Remove "track" image from article body
+strip: //img[@alt="track"]
+test_url: http://lifehacker.com/5925801/how-can-i-turn-vague-goals-into-actionable-to+dos
+test_url: http://lifehacker.com/5941600/hack-an-old-computer-mouse-into-a-wireless-bluetooth-mouse
+test_url: http://lifehacker.com/what-happens-to-the-brain-when-you-meditate-and-how-it-1202533314
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/lifestyle.inquirer.net.txt b/inc/3rdparty/site_config/standard/lifestyle.inquirer.net.txt
new file mode 100755 (executable)
index 0000000..25d544a
--- /dev/null
@@ -0,0 +1,7 @@
+title: //h1[@class='singlePageTitle']
+
+strip: //p[contains(text(), 'Follow Us')]
+strip: //p/strong[contains(text(), 'Recent Stories:')]
+strip: //div[@id="sharefeature"]
+
+test_url: http://lifestyle.inquirer.net/100223/dusting-your-ceiling-fan
diff --git a/inc/3rdparty/site_config/standard/lifeweek.com.cn.txt b/inc/3rdparty/site_config/standard/lifeweek.com.cn.txt
new file mode 100755 (executable)
index 0000000..e09f669
--- /dev/null
@@ -0,0 +1,23 @@
+# This filter is tested on:
+# http://www.lifeweek.com.cn/2012/1211/39439.shtml
+# http://www.lifeweek.com.cn/2013/0308/40213.shtml
+
+title:substring-before(//h1, '(')
+title://h1
+date://ul[@class='authorbox']/li
+author: substring-after(//ul[@class='authorbox']/li/following-sibling::li, '作者:')
+
+next_page_link: //div[@class='pageturn_list']/a[@class='pagedown']
+body: //div[@class='original ']
+
+strip://h1
+strip://ul[@class='authorbox']
+strip://span[@class='app_p']
+strip://div[@style='text-align:right;']
+strip://div[@class='pageturn_list']
+strip://div[@class='lifespeaks']
+strip://div[@class='vright fr']
+strip://div[@class='copyrt mg20']
+strip://div[@class='keyabout mg20']
+strip://ul[@class='readabout mg20']
+test_url: http://www.lifeweek.com.cn/2013/0308/40213.shtml
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/livescience.com.txt b/inc/3rdparty/site_config/standard/livescience.com.txt
new file mode 100755 (executable)
index 0000000..5275d34
--- /dev/null
@@ -0,0 +1,20 @@
+title: //div[@class="album_title"]//h1
+author: substring-before(//div[@class='by_line'], ',')
+date: substring-after(substring-before(//div[@class="album_time"], ' Time'), 'Date: ')
+body: //div[@class="about_text"]
+
+strip: //div[@class='large_popper']
+strip: //span[contains(@id, 'mag_glass')]
+strip: //span[contains(@class, 'img_overlay')]
+strip: //td//span
+strip: //div[@class="center_adsense"]
+strip: //div[@class="article_info"]//div[@class='asset_section']
+strip: //div[@class="article_additional"]
+strip: //div[contains(@style, 'overflow:hidden')]
+strip: //div[@class="aa_text"]
+strip: //div[@id='nointelliTXT']
+
+prune: no
+autodetect_on_failure: no
+
+test_url: http://www.livescience.com/34569-why-flowers-close-at-night-nyctinasty.html
old mode 100644 (file)
new mode 100755 (executable)
index 48d5e1a..1310ec0
@@ -1,3 +1,3 @@
-single_page_link: //div[@class="post"]/div[@class="title"]/a\r
+single_page_link: //div[@class="post"]/div[@class="title"]/a
 
 test_url: http://longform.org/2011/05/06/disconcerting-new-answers-in-models-suicide/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 08ad90c..730af94
@@ -1,9 +1,9 @@
-body: //div[@class='container_16']//div[@class='grid_11']\r
-strip: //h2[@class='mast']\r
-strip: //div[@class='container_16']//div[@class='grid_11']/h1\r
-strip: //div[@class='container_16']//div[@class='grid_11']/p[1]\r
-strip: //div[@class='container_16']//div[@class='grid_11']/div\r
-author: //a[starts-with(@title, 'Posts by')]\r
-date: substring-before(substring-after(//time, 'Posted on '), ' at')\r
-test_url: http://www.loopinsight.com/2012/09/13/forget-iphone-5-naysayers-this-thing-is-big/\r
+body: //div[@class='container_16']//div[@class='grid_11']
+strip: //h2[@class='mast']
+strip: //div[@class='container_16']//div[@class='grid_11']/h1
+strip: //div[@class='container_16']//div[@class='grid_11']/p[1]
+strip: //div[@class='container_16']//div[@class='grid_11']/div
+author: //a[starts-with(@title, 'Posts by')]
+date: substring-before(substring-after(//time, 'Posted on '), ' at')
+test_url: http://www.loopinsight.com/2012/09/13/forget-iphone-5-naysayers-this-thing-is-big/
 test_url: http://www.loopinsight.com/2011/05/20/playbook-returns-high-misses-sales-targets-by-90/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a823e64..d7eb0fa
@@ -1,3 +1,3 @@
-prune: no\r
+prune: no
 convert_double_br_tags: yes
 test_url: http://www.lostgarden.com/2012/04/loops-and-arcs.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/lovefm.com.txt b/inc/3rdparty/site_config/standard/lovefm.com.txt
new file mode 100755 (executable)
index 0000000..20d26c5
--- /dev/null
@@ -0,0 +1,6 @@
+title: //*[@id='title']
+date: //*[@id='date']
+body: //*[@id='desc']
+tidy: no
+
+test_url: http://www.lovefm.com/local_news.php?item=2176
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/lovetv.com.bz.txt b/inc/3rdparty/site_config/standard/lovetv.com.bz.txt
new file mode 100755 (executable)
index 0000000..a71fccd
--- /dev/null
@@ -0,0 +1,9 @@
+title: //div[contains(@class, 'post')]//h1
+body: //div[contains(@class, 'post')]
+strip: //hr
+strip_id_or_class: post-meta
+
+prune: no
+
+test_url: http://www.lovetv.com.bz/2013/06/28/recently-discovered-ancient-maya-wooden-canoe-paddle-to-be-handed-over-to-archaeology/
+test_url: http://www.lovetv.com.bz/feed/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ce5053d..f1aacb7
@@ -1,8 +1,12 @@
-title: substring-before(//title, ' · LRB')\r
-\r
-body: //div[@class="article-body indent"]\r
-\r
-date: substring-after(//p[@class="meta-info"]/a, '· ')\r
-\r
-prune: no
-test_url: http://www.lrb.co.uk/v33/n18/james-meek/its-already-happened
\ No newline at end of file
+title: //div[contains(@class, "article-body")]/hgroup/h1
+body: //div[contains(@class, "article-body")]
+
+date: substring-after(//p[@class="meta-info"]/a, '· ')
+
+author: //div[contains(@class, "article-body")]/hgroup/h2
+
+strip_id_or_class: print-hide
+strip_id_or_class: books
+
+test_url: http://www.lrb.co.uk/v33/n18/james-meek/its-already-happened
+test_url: http://www.lrb.co.uk/v36/n13/benjamin-kunkel/paupers-and-richlings
old mode 100644 (file)
new mode 100755 (executable)
index 92ccf3b..b445f5e
@@ -1,6 +1,6 @@
-title: //h2\r
-\r
-body: // div[@id='content']\r
-\r
+title: //h2
+
+body: // div[@id='content']
+
 strip: //div[@class='sidebar_wrapper']
 test_url: http://www.luminous-landscape.com/tutorials/optimizing_exposure.shtml
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/luxuo.com.txt b/inc/3rdparty/site_config/standard/luxuo.com.txt
new file mode 100755 (executable)
index 0000000..a3d5cb1
--- /dev/null
@@ -0,0 +1,4 @@
+body: //div[@class='post-content']
+prune: no
+
+test_url: http://www.luxuo.com/watches/feed
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a8af543..d1ff0b4
@@ -1,8 +1,8 @@
-title: //div[@class="story-body"]/div[@class="story-inner"]/h1\r
-body: //div[@class="story-body"]\r
-date: //p[@class='date']/strong\r
-author: substring-after(//div[@class="story-inner"]/div[@class="byline"]//span[@class='name'], 'By')\r
-\r
-strip: //div[@class="story-inner"]/div[@class="byline"]\r
+title: //div[@class="story-body"]/div[@class="story-inner"]/h1
+body: //div[@class="story-body"]
+date: //p[@class='date']/strong
+author: substring-after(//div[@class="story-inner"]/div[@class="byline"]//span[@class='name'], 'By')
+
+strip: //div[@class="story-inner"]/div[@class="byline"]
 
 test_url: http://m.bbc.co.uk/news/science-environment-19144464
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/m.douban.com.txt b/inc/3rdparty/site_config/standard/m.douban.com.txt
new file mode 100755 (executable)
index 0000000..ce9a316
--- /dev/null
@@ -0,0 +1,13 @@
+# This filter is tested on: 
+# http://m.douban.com/note/240776310/?session=6ac86d1e 
+# http://m.douban.com/note/208270705/?session=e00ec732_3433229
+
+title: //h2 
+author: //a[@class='founder'] 
+date: substring-after(//span[@class='info'],' | ') 
+body: //div[contains(@class,'entry item')] 
+
+strip://span[contains(@class,'info')] 
+
+convert_double_br_tags: yes
+test_url: http://m.douban.com/note/240776310/?session=6ac86d1e 
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/m.vanityfair.com.txt b/inc/3rdparty/site_config/standard/m.vanityfair.com.txt
new file mode 100755 (executable)
index 0000000..e47ce2c
--- /dev/null
@@ -0,0 +1,11 @@
+# Article Metadata
+title: //h1
+author: //span[@class="name"]/a
+date: //time
+
+# Content Pruning
+strip: //h5
+strip: //time
+strip: //div[@class="byline"]
+strip: //h2[@class="headline "]
+test_url: http://m.vanityfair.com/politics/2012/10/michael-lewis-profile-barack-obama
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/m00natic.github.io.txt b/inc/3rdparty/site_config/standard/m00natic.github.io.txt
new file mode 100644 (file)
index 0000000..911fcbd
--- /dev/null
@@ -0,0 +1,7 @@
+# Generated by FiveFilters.org's web-based selection tool
+# Place this file inside your site_config/custom/ folder
+# Source: http://siteconfig.fivefilters.org/grab.php?url=https%3A%2F%2Fm00natic.github.io%2Femacs%2Femacs-wiki.html
+
+body: //div[@id='content']
+strip_id_or_class: table-of-contents
+test_url: https://m00natic.github.io/emacs/emacs-wiki.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 892b47f..9999758
@@ -1,5 +1,5 @@
-author: substring-after(//div[@class='author'],'Par ')\r
-date: //div[@class='date']\r
-body: //div[@class='content']\r
+author: substring-after(//div[@class='author'],'Par ')
+date: //div[@class='date']
+body: //div[@class='content']
 
 test_url: http://www.mac4ever.com/news/64182/icloud_les_prix_en_euros_et_en_chf/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index fd1ede7..e57bd64
@@ -1,2 +1,2 @@
-title: substring-before(//title,' « Macdrifter')
+title: substring-before(//title,' Â« Macdrifter')
 test_url: http://www.macdrifter.com/2012/03/instacast-on-my-mac/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 109eae4..522efb4
@@ -1,9 +1,9 @@
-# Remove news feed\r
-strip: //div[@id='news_feed_front']\r
-\r
-# Remove pull quote\r
-strip: //div[@class='field field-type-text field-field-pull-quote']\r
-\r
-# Remove login\r
+# Remove news feed
+strip: //div[@id='news_feed_front']
+
+# Remove pull quote
+strip: //div[@class='field field-type-text field-field-pull-quote']
+
+# Remove login
 strip: //div[@class='right_bar_login']
 test_url: http://macformat.techradar.com/blog/solid-state-storage-bringing-parity-back-mac-29-10-10&article=89189666
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e6bbe28..739eff4
@@ -1,5 +1,5 @@
-author: substring-before(substring-after(//div[@class='dateNews'],'par '),' le')\r
-date: substring-after(//div[@class='dateNews'],' le ')\r
-body: //div[@class='singleNews zoneApple']\r
+author: substring-before(substring-after(//div[@class='dateNews'],'par '),' le')
+date: substring-after(//div[@class='dateNews'],' le ')
+body: //div[@class='singleNews zoneApple']
 
 test_url: http://www.macgeneration.com/news/voir/211162/dropbox-encore-un-mac-et-deux-comptes-dropbox
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 47ebfd7..da7df69
@@ -1,21 +1,21 @@
-# Remove sliders\r
-strip: //*[(@class="slides_container")]\r
-strip: //div[(@id="slides_two")]\r
-\r
-# Remove tag cloud\r
-strip: //span[(@class="secao")]\r
-\r
-# Fix date article\r
-# TODO\r
-\r
-# Remove other stuff\r
-strip: //div[(@id="idc-container")]\r
-strip: //div[(@id="idc-noscript")]\r
-strip: //div[(@class="linkwithin_div")]\r
-strip: //div[(@class="navPosts")]\r
-strip: //div[(@id="lateral")]\r
-strip: //div[(@id="autor")]\r
-strip: //div[(@id="rodape")]\r
-strip: //div[(@id="post")]/h1\r
+# Remove sliders
+strip: //*[(@class="slides_container")]
+strip: //div[(@id="slides_two")]
+
+# Remove tag cloud
+strip: //span[(@class="secao")]
+
+# Fix date article
+# TODO
+
+# Remove other stuff
+strip: //div[(@id="idc-container")]
+strip: //div[(@id="idc-noscript")]
+strip: //div[(@class="linkwithin_div")]
+strip: //div[(@class="navPosts")]
+strip: //div[(@id="lateral")]
+strip: //div[(@id="autor")]
+strip: //div[(@id="rodape")]
+strip: //div[(@id="post")]/h1
 strip: //div[(@id="post")]/div[(@id="boxInformacoes")]
 test_url: http://macmagazine.com.br/2011/08/01/skype-para-ipad-esta-finalmente-chegando-a-app-store/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 76f999d..83cfb4a
@@ -1,10 +1,12 @@
-author: substring-after(//div[@class='byline'], " by ")\r
-date: substring-before(//div[@class='byline'], " by ")\r
-\r
-# set body\r
-body: //div[@class='content']\r
-\r
-# set title\r
-title: //h3\r
+author: substring-after(//div[@class='byline'], " by ")
+date: substring-before(//div[@class='byline'], " by ")
+
+# set body
+body: //div[@class='content']
+strip_id_or_class: commentsContainer
+strip_id_or_class: linkback
+
+# set title
+title: //h3
 #strip: //div[@class='content']/h3
-test_url: http://www.macrumors.com/2010/11/10/apple-debuts-new-apple-tv-and-itunes-movie-content-in-japan/
\ No newline at end of file
+test_url: http://www.macrumors.com/2010/11/10/apple-debuts-new-apple-tv-and-itunes-movie-content-in-japan/
old mode 100644 (file)
new mode 100755 (executable)
index 6e651ca..639fdd1
@@ -1,8 +1,8 @@
-strip: //*[(@id = "featured")]\r
-\r
-author:substring-after( //div[@class='posttitle']/h2[@class='author'],'by ')\r
-\r
-date: concat(//div[@class='month'],' ',//div[@class='day'])\r
-\r
-#macstories doesn't provide a year, but month/day is better than nothing\r
+strip: //*[(@id = "featured")]
+
+author:substring-after( //div[@class='posttitle']/h2[@class='author'],'by ')
+
+date: concat(//div[@class='month'],' ',//div[@class='day'])
+
+#macstories doesn't provide a year, but month/day is better than nothing
 test_url: http://www.macstories.net/news/instapaper-4-0-available-completely-redesigned-ipad-ui-new-features-search-subscription/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e8d6052..9be865a
@@ -1,4 +1,4 @@
-author://div[@class="article_username_container_full"]\r
-date://div[@class="article_username_container"]\r
+author://div[@class="article_username_container_full"]
+date://div[@class="article_username_container"]
 body://div[@class="article cms_clear restore postcontainer"]
 test_url: http://www.mactalk.com.au/content/chat-basil-shkara-developer-taptax-2452/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c3fc0e4..5c03518
@@ -1,3 +1,3 @@
-title: substring-after(substring-after(//title, '>'), '>')\r
+title: substring-after(substring-after(//title, '>'), '>')
 body: //div[@class='NewsArticleContent']
 test_url: http://www.mactechnews.de/news/index/Apple-Pressekonferenz-zum-iPhone-4-147316.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 9617587..e7d9720
@@ -1,24 +1,24 @@
-title: //article//h1\r
-date: //meta[@name="date"]/@content\r
-author: //div[@class="author-name" or @class="article-byline"]/a[1]\r
-\r
-body: //section[@class="page"]\r
-\r
-# remove 'From the Lab' and 'Recent posts' text\r
-strip: //div[@class='blogLabel']\r
-\r
-# remove byline and meta info\r
-strip: //div[@class="article-meta"]\r
-strip: //div[@class="author-info"]\r
-\r
-#strip tags and categories\r
-strip: //div[@class="department"]\r
-\r
-#strip product cap links\r
-strip: //div[@class="cap-main"]\r
-strip: //div[@id="compare-lede"]\r
-\r
-prune: no\r
-\r
-# copes less well with Review pages, seems fine for News\r
+title: //article//h1
+date: //meta[@name="date"]/@content
+author: //div[@class="author-name" or @class="article-byline"]/a[1]
+
+body: //section[@class="page"]
+
+# remove 'From the Lab' and 'Recent posts' text
+strip: //div[@class='blogLabel']
+
+# remove byline and meta info
+strip: //div[@class="article-meta"]
+strip: //div[@class="author-info"]
+
+#strip tags and categories
+strip: //div[@class="department"]
+
+#strip product cap links
+strip: //div[@class="cap-main"]
+strip: //div[@id="compare-lede"]
+
+prune: no
+
+# copes less well with Review pages, seems fine for News
 test_url: http://www.macworld.com/article/163184/2011/10/the_ipod_as_an_iconic_cultural_force.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e701207..414a2f5
@@ -1,3 +1,3 @@
-body: //div[@class='NewsArticle']\r
+body: //div[@class='NewsArticle']
 
 test_url: http://mainichi.jp/select/weathernews/20110311/news/20110520k0000e040062000c.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a2d25d5..2136de3
@@ -1,28 +1,28 @@
-title: substring-before(//title, '|')\r
-body: //*[@id='content-left']\r
-\r
-# Why is this not working here?\r
-# body: //*[@id='content-left']/div[@class='content-container'][2]/div[@class='content-body']/div[@class='inner-container']/div[@class='detail']\r
-\r
-\r
-#Header\r
-strip_id_or_class: 'subHead'\r
-strip_id_or_class: 'fl_right'\r
-strip_id_or_class: 'infolink'\r
-strip_id_or_class: 'content-head'\r
-strip_id_or_class: 'tab'\r
-strip_id_or_class: 'tab-active'\r
-strip: //*[contains(@class,'trenner')]\r
-\r
-# Headline\r
-strip: //h1/*\r
-strip_id_or_class: 'font16'\r
-\r
-#Images\r
-strip_id_or_class: 'leftimage'\r
-strip_id_or_class: 'rightimage'\r
-\r
-#Comments\r
-strip: //table\r
+title: substring-before(//title, '|')
+body: //*[@id='content-left']
+
+# Why is this not working here?
+# body: //*[@id='content-left']/div[@class='content-container'][2]/div[@class='content-body']/div[@class='inner-container']/div[@class='detail']
+
+
+#Header
+strip_id_or_class: 'subHead'
+strip_id_or_class: 'fl_right'
+strip_id_or_class: 'infolink'
+strip_id_or_class: 'content-head'
+strip_id_or_class: 'tab'
+strip_id_or_class: 'tab-active'
+strip: //*[contains(@class,'trenner')]
+
+# Headline
+strip: //h1/*
+strip_id_or_class: 'font16'
+
+#Images
+strip_id_or_class: 'leftimage'
+strip_id_or_class: 'rightimage'
+
+#Comments
+strip: //table
 strip: //p/following-sibling::*[0]
 test_url: http://www.mainpost.de/ueberregional/meinung/Dioxin-Skandal-bringt-Agrarministerin-in-Bedraengnis;art9517,5920211
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6809afe..078e8d0
@@ -1,3 +1,9 @@
-tidy: no
+title: //h1[@class='entry-title']
 
-test_url: http://www.makeuseof.com/dir/kindle-it-web-pages-kindle-friendly/
\ No newline at end of file
+body: //article//header//img | //article//section[@class='post']
+
+strip: //article//section[@class='post']/aside
+strip: //article//section[@class='post']/footer
+
+test_url: http://www.makeuseof.com/tag/cool-websites-and-tools-advanced-photo-editor-keep-your-kids-stuff-online-identify-60-languages/
+test_url: http://www.makeuseof.com/tag/what-do-you-think-of-our-new-look-makeuseof-poll/
diff --git a/inc/3rdparty/site_config/standard/manager.co.th.txt b/inc/3rdparty/site_config/standard/manager.co.th.txt
new file mode 100755 (executable)
index 0000000..cd6c5c0
--- /dev/null
@@ -0,0 +1,26 @@
+title: //td[@class="headline"]
+author: //font[@color="#003366"]
+date: //td[@class="date"]
+
+strip: //td[@class="headline"]
+strip: //font[@color="#003366"]
+strip: //td[@class="date"]
+
+strip: //img[@src="images/2009/logo_en.gif"]
+
+body: //tbody[@class="body"]
+convert_double_br_tags:yes
+
+strip: //img[@src="/images/TabOver.gif"]
+strip: //td[@width="160"]
+strip: //img[@src="/images/TabUnder.gif"]
+
+strip: //td[@class="small"]
+strip: //td[@height="47"]
+
+strip: //td[@valign="middle"]
+strip: //td[@background="/images/menu_bottombg.gif"]
+strip: //img[@src="/images/sc_footer_l.gif"]
+strip: //img[@src="/images/sc_footer_m.gif"]
+strip: //img[@src="/images/sc_footer_r.gif"]
+test_url: http://www.manager.co.th/Entertainment/ViewNews.aspx?NewsID=9550000101979
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ef2e03d..4bb24a6
@@ -1,8 +1,8 @@
-tidy: no\r
-prune: no\r
-date: //article//time[@pubdate]\r
-title: //article/header/h2\r
-body: //article\r
-strip: //header\r
-test_url: http://www.marco.org/2012/09/08/businessweek-gruber\r
+tidy: no
+prune: no
+date: //article//time[@pubdate]
+title: //article/header/h2
+body: //article
+strip: //header
+test_url: http://www.marco.org/2012/09/08/businessweek-gruber
 test_url: http://www.marco.org/2012/04/24/might-upgrade-someday
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8e0e349..4ff4a9c
@@ -1,8 +1,8 @@
-date: //div[@id="main"]/p[@class="date"]\r
-author: string("Martin Fowler")\r
-body: //div[@id="main"]\r
-strip_id_or_class: date\r
-strip_id_or_class: tags\r
-strip_id_or_class: tagLabel\r
+date: //div[@id="main"]/p[@class="date"]
+author: string("Martin Fowler")
+body: //div[@id="main"]
+strip_id_or_class: date
+strip_id_or_class: tags
+strip_id_or_class: tagLabel
 strip: //div[@id="main"]/h1[1]
 test_url: http://martinfowler.com/bliki/DatabaseThaw.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 2c5a14a..b6efb6c
@@ -1,4 +1,11 @@
-title: //header[@class='entry-title']/h1
-body: //div[@class='description']
+title: //h1[@class='title']
+author: substring-after(//span[@class='author_name'], 'By ')
+date: //time
+
+body: //article
 strip: //div[@class='ytm-gallery-box']
-test_url: http://mashable.com/2011/12/05/india-wants-google-and-facebook-to-censor-user-content/
\ No newline at end of file
+strip: //div[contains(@class, 'adsense')]
+strip: //aside[contains(@class, 'social')]
+strip_id_or_class: article-topics
+
+test_url: http://mashable.com/2013/05/24/myspace-architects-rebuilding-a-brand/
diff --git a/inc/3rdparty/site_config/standard/matt.might.net.txt b/inc/3rdparty/site_config/standard/matt.might.net.txt
new file mode 100755 (executable)
index 0000000..30d585c
--- /dev/null
@@ -0,0 +1,5 @@
+title: //h1
+author: string("Matt Might")
+strip: //h1/following-sibling::div
+
+test_url: http://matt.might.net/articles/oo-cesk/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
old mode 100644 (file)
new mode 100755 (executable)
index 94f27b7..4c333aa
@@ -1,2 +1,4 @@
-strip: //div[contains(@class, 'article-tools')]\r
+strip_id_or_class: article-tools
+strip_id_or_class: pagenav
+prune: no
 test_url: http://www.medialens.org/index.php/alerts/alert-archive/2012/713-the-illusion-of-democracy.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/medium.com.txt b/inc/3rdparty/site_config/standard/medium.com.txt
new file mode 100755 (executable)
index 0000000..acf7cc9
--- /dev/null
@@ -0,0 +1,7 @@
+body: //div[contains(@class, 'post-content-inner')]
+strip_id_or_class: follow-ups
+strip_id_or_class: footer
+
+prune: no
+
+test_url: https://medium.com/p/6844c0d7893b
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/megamp3.eu.txt b/inc/3rdparty/site_config/standard/megamp3.eu.txt
new file mode 100755 (executable)
index 0000000..1b6a127
--- /dev/null
@@ -0,0 +1,8 @@
+title: //h3[@class='episode_title']
+body: //ul[contains(@class, 'episode_imgdesc')]/li/descendant::*
+prune: no
+strip://*[contains(@class, 'plugin')]
+strip://*[contains(@class, 'episode_keywords')]
+
+test_url: http://www.megamp3.eu/?p=episode&name=2013-04-19_la_filiere_progressive_431.mp3
+test_url: http://www.megamp3.eu/feed.xml
old mode 100644 (file)
new mode 100755 (executable)
index e7e1e26..a1a46f6
@@ -1,16 +1,16 @@
-# need to find a way to eliminate <span> content for "related content" without eliminating important content\r
-\r
-convert_double_br_tags: [yes]\r
-#body: //div[@id='leftside']\r
-title: //h1\r
-title: //h2\r
-Author: substring-after(//h4, 'By ')\r
-Author: substring-after(//h4, 'By: ')\r
-#Strip: //span\r
-strip_id_or_class: morefromcat\r
-strip_id_or_class: mostpopular\r
-strip_id_or_class: articlepagination\r
-strip_id_or_class: toolbar\r
-body: //div[@id='zmodcontent']\r
-single_page_link: //li[@class='onepage'] //a[contains (@href, 'printer.php')]\r
+# need to find a way to eliminate <span> content for "related content" without eliminating important content
+
+convert_double_br_tags: [yes]
+#body: //div[@id='leftside']
+title: //h1
+title: //h2
+Author: substring-after(//h4, 'By ')
+Author: substring-after(//h4, 'By: ')
+#Strip: //span
+strip_id_or_class: morefromcat
+strip_id_or_class: mostpopular
+strip_id_or_class: articlepagination
+strip_id_or_class: toolbar
+body: //div[@id='zmodcontent']
+single_page_link: //li[@class='onepage'] //a[contains (@href, 'printer.php')]
 test_url: http://www.menshealth.com/mhlists/pursuit_of_happiness/index.php
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/metafilter.com.txt b/inc/3rdparty/site_config/standard/metafilter.com.txt
new file mode 100755 (executable)
index 0000000..a2f3ada
--- /dev/null
@@ -0,0 +1,8 @@
+body: //div[contains(@class, 'copy') or contains(@class, 'comments')]
+strip_id_or_class: related
+strip: //a[. = 'Subscribe']
+strip: //h1/span[@class = 'smallcopy']
+strip: //a[@class = 'skip']
+strip: //div[@id = 'logo']
+strip: //div[contains(@class, 'comments') and contains(., 'You are not currently logged in')]
+test_url: http://www.metafilter.com/128101/Probably-more-secure-than-the-Drafts-folder-on-a-shared-Gmail-account
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mforum.cari.com.my.txt b/inc/3rdparty/site_config/standard/mforum.cari.com.my.txt
new file mode 100755 (executable)
index 0000000..c295d73
--- /dev/null
@@ -0,0 +1,6 @@
+body: (//td[starts-with(@id, 'postmessage_')])[1]
+
+prune: no
+
+test_url: http://mforum.cari.com.my/forum.php?mod=viewthread&tid=788033
+test_url: http://mforum.cari.com.my/forum.php?mod=rss&fid=265&auth=0
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index af8a7d3..abaa6a8
@@ -1,5 +1,5 @@
-title: //div[@class="blogtitle"]\r
-strip: //div[@class="blogtitle"]\r
-\r
+title: //div[@class="blogtitle"]
+strip: //div[@class="blogtitle"]
+
 author: substring-after(//span[@class="blogheader"], 'Author: ')
 test_url: http://www.mikeash.com/pyblog/friday-qa-2012-01-13-the-mac-toolbox.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3d488e1..fb4636c
@@ -1,9 +1,9 @@
-title: //div[@class='post_content']/h2\r
-date: //div[@class='dateline']\r
-body: //div[@class='entry']\r
-\r
-strip: //div[@class='closer']\r
-strip: //div[@class='navigation']\r
-strip: //div[@class='aux_pane']\r
+title: //div[@class='post_content']/h2
+date: //div[@class='dateline']
+body: //div[@class='entry']
+
+strip: //div[@class='closer']
+strip: //div[@class='navigation']
+strip: //div[@class='aux_pane']
 strip: //div[@class='aux_aux_pane']
 test_url: http://www.mikeindustries.com/blog/archive/2011/10/never-be-another
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7e43d63..773a627
@@ -1,10 +1,10 @@
-title: //*[@class="article"]/h1\r
-date: //*[@class="article"]/div[@class="date"]\r
-\r
-# strip the title and date from the article text\r
-strip: //*[@class="article"]/h1\r
-strip: //*[@class="article"]/div[@class="date"]\r
-\r
-# strip annoying <br> between metadata and article\r
+title: //*[@class="article"]/h1
+date: //*[@class="article"]/div[@class="date"]
+
+# strip the title and date from the article text
+strip: //*[@class="article"]/h1
+strip: //*[@class="article"]/div[@class="date"]
+
+# strip annoying <br> between metadata and article
 strip: //*[@class="article"]/div[@class="date"]/following-sibling::br
 test_url: http://minnesota.publicradio.org/display/web/2012/06/19/health/senators-want-health-care-ruling-on-tv/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 51a0630..dc926a6
@@ -1,5 +1,5 @@
-title: //*[@id="content-header"]/h1\r
-author: //*[contains(@class, 'byline')]/a/text()\r
-date: substring-after(//*[contains(@class, 'byline')]/text()[2], '|')\r
+title: //*[@id="content-header"]/h1
+author: //*[contains(@class, 'byline')]/a/text()
+date: substring-after(//*[contains(@class, 'byline')]/text()[2], '|')
 body: //*[contains(@class, 'node-body')]
 test_url: http://www.minnpost.com/eric-black-ink/2012/06/overturning-obamacare-would-be-game-changer-supreme-court
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 4215a05..2033cf3
@@ -1,3 +1,3 @@
-# Remove extra links\r
+# Remove extra links
 strip: //*[@class='appended_html']
 test_url: http://www.mirrorfootball.co.uk/news/West-Ham-crisis-Carlton-Cole-slams-diabolical-performance-and-rips-into-Avram-Grant-lack-of-tactical-nous-following-Liverpool-mauling-article636151.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ae542aa..73c485e
@@ -1,5 +1,5 @@
-strip_id_or_class: 'book-ad'\r
-strip_id_or_class: 'bigger pullquote'\r
-strip_id_or_class: 'subscribe'\r
+strip_id_or_class: 'book-ad'
+strip_id_or_class: 'bigger pullquote'
+strip_id_or_class: 'subscribe'
 strip_id_or_class: 'blog-link'
 test_url: http://mises.org/daily/4804
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 30e8aff..765fab3
@@ -1,14 +1,14 @@
-title: //h1[@class='article-headline']\r
-date: //span[@class='timeStamp']\r
-author: substring-before(//p[@class='article-byline'], '/')\r
-body: //div[@id='article']\r
-#strip: //div[@class='inner']\r
-strip: //div[@id='article_head']\r
-strip: //p[@class='tagLine']\r
-strip: //div[@id='article_related_links']\r
-strip: //div[@id='article_related_mlb']\r
-strip: //span[@class='more']\r
-strip: //div[@class='article_component']\r
-strip: //span[@class='screen_reader']\r
-strip: //ul[@class='columnists_blurb']\r
+title: //h1[@class='article-headline']
+date: //span[@class='timeStamp']
+author: substring-before(//p[@class='article-byline'], '/')
+body: //div[@id='article']
+#strip: //div[@class='inner']
+strip: //div[@id='article_head']
+strip: //p[@class='tagLine']
+strip: //div[@id='article_related_links']
+strip: //div[@id='article_related_mlb']
+strip: //span[@class='more']
+strip: //div[@class='article_component']
+strip: //span[@class='screen_reader']
+strip: //ul[@class='columnists_blurb']
 test_url: http://mlb.mlb.com/news/article.jsp?ymd=20120403&content_id=27880830
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c4e3389..8480e30
@@ -1,14 +1,14 @@
-title: //h1[@id = 'stream_title']\r
-author: //p[@class = 'byline']/a\r
-date: //span[@class = 'datetime']\r
-\r
-body: //div[@id = 'stream_container']\r
-strip: //p[@class = 'byline']\r
-strip_id_or_class: stream_summary\r
-strip_id_or_class: social-spoken\r
-strip_id_or_class: datetime\r
-strip_id_or_class: author-mini-profile\r
-strip_id_or_class: social-tools\r
-strip_id_or_class: entry-tags\r
+title: //h1[@id = 'stream_title']
+author: //p[@class = 'byline']/a
+date: //span[@class = 'datetime']
+
+body: //div[@id = 'stream_container']
+strip: //p[@class = 'byline']
+strip_id_or_class: stream_summary
+strip_id_or_class: social-spoken
+strip_id_or_class: datetime
+strip_id_or_class: author-mini-profile
+strip_id_or_class: social-tools
+strip_id_or_class: entry-tags
 strip_id_or_class: fb-like-box
 test_url: http://mlb.sbnation.com/2011/10/17/2495845/2011-world-series-st-louis-cardinals-texas-rangers-home-field-advantage
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 41e1513..5d706f8
@@ -1,6 +1,6 @@
-title: //*[@class="header_title"]/h1\r
-date: //*[@class="field-date"]\r
-author: //*[@class="field-author"]\r
-body: //div[contains(@class, 'content')]\r
+title: //*[@class="header_title"]/h1
+date: //*[@class="field-date"]
+author: //*[@class="field-author"]
+body: //div[contains(@class, 'content')]
 
 test_url: http://www.mlssoccer.com/news/article/2012/06/19/lack-depth-front-forces-arena-alter-las-formation
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 918fae3..50d8a24
@@ -1,5 +1,5 @@
-title: //h1\r
-body: //div[@id = 'article_content']/div[contains(@class,'article')]\r
-author: //sub[@class = 'article_promoted_text']/a[starts-with(@href, 'member')]\r
+title: //h1
+body: //div[@id = 'article_content']/div[contains(@class,'article')]
+author: //sub[@class = 'article_promoted_text']/a[starts-with(@href, 'member')]
 date: //div[@class = 'article_username_container']
 test_url: http://www.mmo-champion.com/content/2688-Other-Press-Tour-Interviews-A-Night-in-Mists-of-Pandaria-Blue-Posts-MoP-Screenshot
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ddfe6fa..d3576df
@@ -1,11 +1,11 @@
-tidy: no\r
-author: //div[@id="above-content"]//img/@alt | //div[@class="comment-auth"]/span[1]/a/text()\r
-date: //div[@class="comment-auth"]/div | //div[@class="comment-auth"]/span[2]\r
-body: //div[@class="node"]\r
-\r
-strip_id_or_class: vertical-social-bar\r
-strip_id_or_class: blogs_paginator\r
-strip_id_or_class: horizontal-social-links\r
-strip_id_or_class: servicelinksdiv\r
+tidy: no
+author: //div[@id="above-content"]//img/@alt | //div[@class="comment-auth"]/span[1]/a/text()
+date: //div[@class="comment-auth"]/div | //div[@class="comment-auth"]/span[2]
+body: //div[@class="node"]
+
+strip_id_or_class: vertical-social-bar
+strip_id_or_class: blogs_paginator
+strip_id_or_class: horizontal-social-links
+strip_id_or_class: servicelinksdiv
 
 test_url: http://www.mnn.com/green-tech/research-innovations/blogs/5-breakthroughs-that-will-make-solar-power-cheaper-than-coal
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ba15895..8a3f939
@@ -1,14 +1,14 @@
-title: //title\r
-\r
-author: //div[@class="author"]\r
-\r
-strip_id_or_class: 'header'\r
-strip_id_or_class: 'cikk_ajanlo'\r
-strip_id_or_class: 'buttons'\r
-strip_id_or_class: 'related'\r
-strip_id_or_class: 'adbox ad_cikk_kozepre'\r
-strip_id_or_class: 'cikk-cimkek'\r
-strip_id_or_class: 'cikk_ertekeles'\r
-\r
+title: //title
+
+author: //div[@class="author"]
+
+strip_id_or_class: 'header'
+strip_id_or_class: 'cikk_ajanlo'
+strip_id_or_class: 'buttons'
+strip_id_or_class: 'related'
+strip_id_or_class: 'adbox ad_cikk_kozepre'
+strip_id_or_class: 'cikk-cimkek'
+strip_id_or_class: 'cikk_ertekeles'
+
 strip_comments: yes
 test_url: http://mno.hu/grund/a-gumibottal-hadonaszo-rendort-joval-konnyebb-utalni-1055351
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/mobile.nytimes.com.txt b/inc/3rdparty/site_config/standard/mobile.nytimes.com.txt
new file mode 100755 (executable)
index 0000000..c60252e
--- /dev/null
@@ -0,0 +1,4 @@
+title: //h1[contains(@class, 'headline')]
+body: //article[contains(@class, 'full-art')]
+strip_id_or_class: image-credit
+test_url: http://mobile.nytimes.com/2014/06/19/opinion/gail-collins-romney-and-the-2016-contenders-huddle.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d5d8103..6ffcd18
@@ -1,5 +1,5 @@
-title: //h2[@class="article_title"]\r
-strip: //a[@class="houseAdLink"]\r
-strip: //h1\r
+title: //h2[@class="article_title"]
+strip: //a[@class="houseAdLink"]
+strip: //h1
 strip: //div[@class="more_articles"]
 test_url: http://mobile.slate.com/rss.jsp?rssid=411&item=http%3a%2f%2fwww.slate.com%2fdefault.aspx%3fdisplaymode%3d201%26id%3d2293749%26device%3drss
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a1cc531..82da4ae
@@ -1,11 +1,11 @@
-body: //div[@class='post uncustomized-post-template']\r
-\r
-# remove duplicate of post title, which is a link\r
-strip: //h3[@class='post-title']\r
-\r
-# remove permalink and timestamp, which isn't useful as it's a time with no date\r
-strip: //span[@class='post-timestamp']\r
-\r
-# remove labels (tags)\r
+body: //div[@class='post uncustomized-post-template']
+
+# remove duplicate of post title, which is a link
+strip: //h3[@class='post-title']
+
+# remove permalink and timestamp, which isn't useful as it's a time with no date
+strip: //span[@class='post-timestamp']
+
+# remove labels (tags)
 strip: //span[@class='post-labels']
 test_url: http://mobileopportunity.blogspot.com/2010/12/rims-q3-financials-tale-of-two.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 4c93d0c..306ef8d
@@ -1,8 +1,8 @@
-title: //meta[@property="og:title"]/@content\r
-author: //meta[@name="author"]/@content\r
-date: //span[@class='date1']\r
-body: //div[@id='newsimage'] | //div[@id='bodytext']\r
-tidy: no\r
-prune: no\r
-\r
+title: //meta[@property="og:title"]/@content
+author: //meta[@name="author"]/@content
+date: //span[@class='date1']
+body: //div[@id='newsimage'] | //div[@id='bodytext']
+tidy: no
+prune: no
+
 test_url: http://www.modernghana.com/news/323765/1/039ghost039-teachers-removed-salaries-allowances-p.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a0d1628..d5e03d2
@@ -1,24 +1,24 @@
-title: //meta[@property="og:title"]/@content\r
-title: //h1[@class='storyheadline']\r
-author: //meta[@name="AUTHOR"]/@content\r
-date: //span[@class='cnnDateStamp']\r
-date: //meta[@name="DATE"]/@content\r
-body: //div[@id='storytext' or @class='storytext']\r
-\r
-strip_id_or_class: ie_column\r
-strip_id_or_class: sharewidgets\r
-strip_image_src: bug.gif\r
-\r
-strip: //div[@class="hed_side"]\r
-strip: //span[@class="byline"]\r
-strip: //a[@class="soc-twtname"]\r
-strip: //span[@class="cnnDateStamp"]\r
-strip: //div[@class="storytimestamp"]\r
-strip: //div[@class="cnnCol_side"]\r
-\r
-prune: no\r
-tidy: no\r
-\r
-test_url: http://money.cnn.com/2011/03/15/news/companies/steve_jobs_thought_process.fortune/index.htm?section=money_topstories&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fmoney_topstories+%28Top+Stories%29\r
-test_url: http://money.cnn.com/2012/01/27/markets/markets_newyork/index.htm\r
+title: //meta[@property="og:title"]/@content
+title: //h1[@class='storyheadline']
+author: //meta[@name="AUTHOR"]/@content
+date: //span[@class='cnnDateStamp']
+date: //meta[@name="DATE"]/@content
+body: //div[@id='storytext' or @class='storytext']
+
+strip_id_or_class: ie_column
+strip_id_or_class: sharewidgets
+strip_image_src: bug.gif
+
+strip: //div[@class="hed_side"]
+strip: //span[@class="byline"]
+strip: //a[@class="soc-twtname"]
+strip: //span[@class="cnnDateStamp"]
+strip: //div[@class="storytimestamp"]
+strip: //div[@class="cnnCol_side"]
+
+prune: no
+tidy: no
+
+test_url: http://money.cnn.com/2011/03/15/news/companies/steve_jobs_thought_process.fortune/index.htm?section=money_topstories&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fmoney_topstories+%28Top+Stories%29
+test_url: http://money.cnn.com/2012/01/27/markets/markets_newyork/index.htm
 test_url: http://money.cnn.com/2012/05/13/technology/yahoo-ceo-out-rumor/index.htm
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 061a8d5..55026ee
@@ -1,13 +1,13 @@
-strip_image_src: menu\r
-strip_image_src: templates\r
-strip: //div/a\r
-strip: //div/b\r
-strip: //div/strong\r
-strip: //td[@width='30%']\r
-strip: //br[1]\r
-strip: //br[2]\r
-strip: //br[3]\r
-strip: //br[4]\r
-strip: //a[@href='http://www.moonsault.de/newzboard/index.php?act=home']\r
+strip_image_src: menu
+strip_image_src: templates
+strip: //div/a
+strip: //div/b
+strip: //div/strong
+strip: //td[@width='30%']
+strip: //br[1]
+strip: //br[2]
+strip: //br[3]
+strip: //br[4]
+strip: //a[@href='http://www.moonsault.de/newzboard/index.php?act=home']
 strip_id_or_class: cse-branding-right
 test_url: http://www.moonsault.de/newzboard/index.php?news=22321&act=previous
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a7e59c3..780cca4
@@ -1,7 +1,7 @@
-title: //h1[@class='print-title']\r
-body: //div[@class='print-submitted' or @class='print-created' or @class='print-content']\r
-prune: no\r
-\r
-single_page_link: //li[@class='print']/a\r
-\r
+title: //h1[@class='print-title']
+body: //div[@class='print-submitted' or @class='print-created' or @class='print-content']
+prune: no
+
+single_page_link: //li[@class='print']/a
+
 test_url: http://moreintelligentlife.com/content/places/paul-markillie/they-trash-cars-dont-they
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6faf1c9..c6312c0
@@ -1,5 +1,5 @@
-author: //span[@class="author"]/a\r
-date: //span[@class="date"]\r
-body: //div[@class="story-content"]\r
-strip: //aside\r
+author: //span[@class="author"]/a
+date: //span[@class="date"]
+body: //div[@class="story-content"]
+strip: //aside
 test_url: http://motherboard.vice.com/blog/you-can-carry-a-copy-of-the-pirate-bay-in-your-pocket
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a9d9195..a34adff
@@ -1,7 +1,7 @@
-title: //h2[contains(@class,'post_headline')]\r
-body: //div[@class='entry']\r
-convert_double_br_tags: yes\r
-strip_image_src: _selected.gif\r
-strip_id_or_class: addthis_\r
+title: //h2[contains(@class,'post_headline')]
+body: //div[@class='entry']
+convert_double_br_tags: yes
+strip_image_src: _selected.gif
+strip_id_or_class: addthis_
 strip: //a[contains(@href,'feedburner.com')]
 test_url: http://mothering.com/all-things-mothering/inspiration/motherhood-brings-me-down
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d58c7d2..851feb7
@@ -1,15 +1,15 @@
-title: //h1\r
-body: //div[@id = 'content-area']\r
-next_page_link: //div[@class='node-pager']/a[contains(@class, 'next')]\r
-tidy: no\r
-author: //p[contains(@class, 'byline')]/a\r
-\r
-strip_id_or_class: node-header\r
-strip_id_or_class: hdr-tools\r
-strip_id_or_class: node-body-break\r
-strip_id_or_class: pullquote\r
-strip_id_or_class: node-pager\r
-strip_id_or_class: author-bio\r
-strip_id_or_class: node-footer\r
+title: //h1
+body: //div[@id = 'content-area']
+next_page_link: //div[@class='node-pager']/a[contains(@class, 'next')]
+tidy: no
+author: //p[contains(@class, 'byline')]/a
+
+strip_id_or_class: node-header
+strip_id_or_class: hdr-tools
+strip_id_or_class: node-body-break
+strip_id_or_class: pullquote
+strip_id_or_class: node-pager
+strip_id_or_class: author-bio
+strip_id_or_class: node-footer
 
 test_url: http://motherjones.com/politics/2012/02/mac-mcclelland-free-online-shipping-warehouses-labor
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/movie.douban.com.txt b/inc/3rdparty/site_config/standard/movie.douban.com.txt
new file mode 100755 (executable)
index 0000000..eae211e
--- /dev/null
@@ -0,0 +1,12 @@
+# This filter is tested on:
+# http://movie.douban.com/review/1062013/
+
+title: //span[contains(@property, 'v:summary')]
+author: //span[contains(@property, 'v:reviewer')]
+date://span[contains(@property, 'v:dtreviewed')]
+body://div[contains(@class, 'main-bd')]
+
+strip://img[contains(@class,'rating')]|//img[contains(@class,'review-stat')]
+convert_double_br_tags: yes
+test_url: http://movie.douban.com/review/1062013/
+test_url: http://movie.douban.com/review/1021870/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f4f2045..7a28427
@@ -1,3 +1,3 @@
-body: //div[class="mainBody"]\r
+body: //div[class="mainBody"]
 footnotes: no
 test_url: http://msdn.microsoft.com/en-us/library/hh542796(VS.103).aspx
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ad89cda..f008d2d
@@ -1,21 +1,21 @@
-title: //title\r
-author: //div[@id='byline']\r
-\r
-date: //div[contains(@class,'timestamp')]/abbr/text()\r
-\r
-body: //div[@id='intellitTXT']\r
-\r
-strip: //div[@id='byline']\r
-strip: //div[contains(@class,'timestamp')]\r
-strip: //div[contains(@class, 'ad-label')]\r
-strip: //div[contains(@class, 'ad-break')]\r
-strip: //span[contains(@class, 'x-video')]\r
-strip: //span[contains(@class, 'inline')]\r
-strip: //div[contains(@class, 'video')]\r
-strip: //div[contains(@class, 'discuss')]\r
-strip: //div[@id='most-popular']\r
-strip: //div[contains(@class,'drawer')]\r
-strip: //*[contains(@class, 'hide')]\r
-\r
+title: //title
+author: //div[@id='byline']
+
+date: //div[contains(@class,'timestamp')]/abbr/text()
+
+body: //div[@id='intellitTXT']
+
+strip: //div[@id='byline']
+strip: //div[contains(@class,'timestamp')]
+strip: //div[contains(@class, 'ad-label')]
+strip: //div[contains(@class, 'ad-break')]
+strip: //span[contains(@class, 'x-video')]
+strip: //span[contains(@class, 'inline')]
+strip: //div[contains(@class, 'video')]
+strip: //div[contains(@class, 'discuss')]
+strip: //div[@id='most-popular']
+strip: //div[contains(@class,'drawer')]
+strip: //*[contains(@class, 'hide')]
+
 footnotes: no
 test_url: http://www.msnbc.msn.com/id/44748412/ns/business-world_business/#.TolUv-vfDbE
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/myfoxatlanta.com.txt b/inc/3rdparty/site_config/standard/myfoxatlanta.com.txt
new file mode 100755 (executable)
index 0000000..8a7590a
--- /dev/null
@@ -0,0 +1,5 @@
+body: //div[@id='WNStoryBody']
+author: //div[@id='WNStoryByline']
+prune: no
+
+test_url: http://www.myfoxatlanta.com/category/233685/local-news?clienttype=rss
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 1a35b4f..9ad8ce0
@@ -1,4 +1,4 @@
-body: //div[@class="col1"]//div[@class="photo"] | //div[@class="detail"]/p[@class="fontStyle21"] | //div[@class="story last"]\r
-tidy: no\r
-\r
+body: //div[@class="col1"]//div[@class="photo"] | //div[@class="detail"]/p[@class="fontStyle21"] | //div[@class="story last"]
+tidy: no
+
 test_url: http://www.myfoxboston.com/dpp/news/local/transit-police-say-woman-spat-on-mbta-bus-driver-2010611
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8b99d22..956be1e
@@ -1,12 +1,12 @@
-title: //h2[contains(@class, 'name')]\r
-body: //div[@class='printFullPageContentContainer']//div[contains(@class, 'recipe')]\r
-\r
-strip_id_or_class: photoBy\r
-strip_id_or_class: link\r
-\r
-single_page_link: //li[@class='print']/a[contains(@href, '/print/')]\r
-\r
-prune: no\r
-tidy: no\r
-\r
+title: //h2[contains(@class, 'name')]
+body: //div[@class='printFullPageContentContainer']//div[contains(@class, 'recipe')]
+
+strip_id_or_class: photoBy
+strip_id_or_class: link
+
+single_page_link: //li[@class='print']/a[contains(@href, '/print/')]
+
+prune: no
+tidy: no
+
 test_url: http://www.myrecipes.com/recipe/hummingbird-cake-10000000387218/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d95530f..7df1112
@@ -1,8 +1,8 @@
-title: //div[@class='address']/span\r
-author: substring-before(//span[@class='credits'],',')\r
-date: //div[@class='promodatepress']/span\r
-body: //div[@class='default_style_wrap']\r
-strip: //div[@class='text_adjust']\r
-strip: //div[@class='skiplink']\r
+title: //div[@class='address']/span
+author: substring-before(//span[@class='credits'],',')
+date: //div[@class='promodatepress']/span
+body: //div[@class='default_style_wrap']
+strip: //div[@class='text_adjust']
+strip: //div[@class='skiplink']
 strip: //h2
 test_url: http://www.nasa.gov/mission_pages/kepler/news/kepler-21b.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0b722d3..2645d40
@@ -1,10 +1,10 @@
-date://span[contains(@class,'date')]\r
-\r
-body://div[contains(@class,'contWarp')]\r
-\r
-strip://div[contains(@class,'keyWord')]\r
-strip://div[contains(@class,'submitComt')]\r
-strip://div[contains(@class,'cmts')]\r
-strip://div[contains(@class,'notice')]\r
+date://span[contains(@class,'date')]
+
+body://div[contains(@class,'contWarp')]
+
+strip://div[contains(@class,'keyWord')]
+strip://div[contains(@class,'submitComt')]
+strip://div[contains(@class,'cmts')]
+strip://div[contains(@class,'notice')]
 strip://div[contains(@class,'part pt-second')]
 test_url: http://www.nbweekly.com/news/china/201203/29316.aspx
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 45136a2..e7cc431
@@ -1,17 +1,17 @@
-#host configuration should be http://www.neh.gov/news/humanities/\r
-\r
-\r
-#meta data \r
-title:substring-after(substring-after(//title,':'),':')\r
-author:substring-after(//h2[@class = 'subHead'],'By')\r
-date:substring-before(substring-after(//title,':'),':')\r
-\r
-#img and caption handling\r
-wrap_in(small)://div[@id = 'mainContent']/table/descendant::p/descendant::text()\r
-wrap_in(fieldset)://div[@id = 'mainContent']/table\r
-\r
-# clean up\r
-strip: //table[@class = 'marginpaddingTop']\r
-strip: //h2[@class = 'subHead']\r
+#host configuration should be http://www.neh.gov/news/humanities/
+
+
+#meta data 
+title:substring-after(substring-after(//title,':'),':')
+author:substring-after(//h2[@class = 'subHead'],'By')
+date:substring-before(substring-after(//title,':'),':')
+
+#img and caption handling
+wrap_in(small)://div[@id = 'mainContent']/table/descendant::p/descendant::text()
+wrap_in(fieldset)://div[@id = 'mainContent']/table
+
+# clean up
+strip: //table[@class = 'marginpaddingTop']
+strip: //h2[@class = 'subHead']
 
 test_url: http://www.neh.gov/news/humanities/2011-11/IslamicScholar.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 564d549..2089fc3
@@ -1,3 +1,3 @@
-title: //*[@class="header_title"]/h1\r
+title: //*[@class="header_title"]/h1
 body: //div[contains(@class, 'content')]
 test_url: http://neomoney.co/personal/expatriate-and-migrant-loans/expatriate-loans/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 4e6d66d..b7fedbf
@@ -1,7 +1,7 @@
-title: //div[@class='content-title']\r
-#date: substring-after(//div[@class='dernek-text-under'],'Posted on')\r
-body: //div[@class='content-item']\r
-next_page_link: //li[@class='next']/a\r
-convert_double_br_tags: yes\r
+title: //div[@class='content-title']
+#date: substring-after(//div[@class='dernek-text-under'],'Posted on')
+body: //div[@class='content-item']
+next_page_link: //li[@class='next']/a
+convert_double_br_tags: yes
 
 test_url: http://www.net-security.org/article.php?id=1732
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8688544..dcea047
@@ -1,16 +1,16 @@
-title: //h1\r
-author: //div[@class="submitted"]/span\r
-\r
-# seems like this should work, but nothing is returned. Issue with xpath parser?\r
-date: //div[@class="submitted"]/time\r
-\r
-body: //div[@id="main-content"]\r
-\r
-strip_comments: no\r
-\r
-strip: //h1\r
-strip: //div[@class="submitted"]\r
-strip: //dd[@class="profile-avatar"]\r
-strip: //div[@class="author-profile"]/dl/dt[1]\r
+title: //h1
+author: //div[@class="submitted"]/span
+
+# seems like this should work, but nothing is returned. Issue with xpath parser?
+date: //div[@class="submitted"]/time
+
+body: //div[@id="main-content"]
+
+strip_comments: no
+
+strip: //h1
+strip: //div[@class="submitted"]
+strip: //dd[@class="profile-avatar"]
+strip: //div[@class="author-profile"]/dl/dt[1]
 strip: //div[@id="right-col"]
 test_url: http://www.netmagazine.com/opinions/nielsen-wrong-mobile
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 87dc3cd..7fa43fd
@@ -1,6 +1,6 @@
-title: //h1[@class='entry-title']\r
-author: //a[@ref='author']\r
-date: //span[@class='entry-date']\r
-body: //div[@class='entry-content']\r
+title: //h1[@class='entry-title']
+author: //a[@ref='author']
+date: //span[@class='entry-date']
+body: //div[@class='entry-content']
 
 test_url: http://netzpolitik.org/2011/buch-generation-facebook/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/newleftproject.org.txt b/inc/3rdparty/site_config/standard/newleftproject.org.txt
new file mode 100755 (executable)
index 0000000..d9af99d
--- /dev/null
@@ -0,0 +1,3 @@
+title: //div[contains(@class, 'article_header')]//h3
+
+test_url: http://www.newleftproject.org/index.php/site/article_comments/do_we_need_a_facebook_of_the_left
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ab76684..f17ecdc
@@ -1,9 +1,9 @@
-title: //div[@id="maincontent"]/h1\r
-body: //div[@id="maincontent"]\r
-date: //div[@id="maincontent"]/p[2]\r
-author: //ul[@id="contributors"]/li/p/b\r
-\r
-strip: //p[@*]\r
-strip: //h1\r
+title: //div[@id="maincontent"]/h1
+body: //div[@id="maincontent"]
+date: //div[@id="maincontent"]/p[2]
+author: //ul[@id="contributors"]/li/p/b
+
+strip: //p[@*]
+strip: //h1
 strip: //div[@id="maincontent"]/div
 test_url: http://newmatilda.com/2011/07/22/turnbull-makes-sense-climate
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/newrepublic.com.txt b/inc/3rdparty/site_config/standard/newrepublic.com.txt
new file mode 100755 (executable)
index 0000000..039f038
--- /dev/null
@@ -0,0 +1,8 @@
+author: //span[@class="authors"]
+date: //span[@class="date"]
+body: //div[@class="primary"]
+
+strip: //div[@id="controls"]
+strip: //div[@id="read-next"]
+
+test_url: http://www.newrepublic.com/article/112731/moocs-will-online-education-ruin-university-experience
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 1f1e5d3..2b35270
@@ -1,8 +1,8 @@
-title: //div[@id="main-content"]//h2\r
-\r
-author: //div[@id="main-content"]//span[@class="authors"]\r
-\r
-date: //div[@id="main-content"]//span[@class="timestamp"]\r
-\r
+title: //div[@id="main-content"]//h2
+
+author: //div[@id="main-content"]//span[@class="authors"]
+
+date: //div[@id="main-content"]//span[@class="timestamp"]
+
 body: //div[@id="main-content"]//div[@class="content"]
 test_url: http://www.news-gazette.com/news/business/economy/2011-08-08/ibm-drops-out-blue-waters-project.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index b7ab224..78af70f
@@ -1,12 +1,12 @@
-#This should apply to *.cnet.com. Not just news.cnet.com.\r
-title: //h1\r
-author: //img[@class="mugshot"]/@alt\r
-strip: //h1\r
-strip_id_or_class: breadcrumb\r
-strip: //p[@id="introP"]\r
-strip: //div[@class="postByline"]\r
-strip: //div[@class="editorBio"]\r
-strip: //div[@class="inline-slideshow"]\r
-strip: //div[@class="related"]\r
+#This should apply to *.cnet.com. Not just news.cnet.com.
+title: //h1
+author: //img[@class="mugshot"]/@alt
+strip: //h1
+strip_id_or_class: breadcrumb
+strip: //p[@id="introP"]
+strip: //div[@class="postByline"]
+strip: //div[@class="editorBio"]
+strip: //div[@class="inline-slideshow"]
+strip: //div[@class="related"]
 body: //div[@class="postBody txtWrap"]
 test_url: http://news.cnet.com/8301-27076_3-57405303-248/apple-ipad-charging-fine-keep-it-plugged-in/?tag=mncol;posts
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3ed1dc8..629bc91
@@ -1,8 +1,8 @@
-title://div[@class="content_detail"]/h1\r
-\r
-author://div[@class="author"]/strong\r
-\r
-date:substring-before(substring-after(//div[@class="content_detail"]/span[@class="date"], ','), ' WIB')\r
-\r
+title://div[@class="content_detail"]/h1
+
+author://div[@class="author"]/strong
+
+date:substring-before(substring-after(//div[@class="content_detail"]/span[@class="date"], ','), ' WIB')
+
 body://div[@class="text_detail"]
 test_url: http://news.detik.com/read/2012/05/22/225531/1922307/10/menkeu-cek-soal-lolosnya-315-kg-sabu-dari-bea-cukai
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6fc8613..5754d47
@@ -1,9 +1,9 @@
-body: //div[@id='main']\r
-strip: //div[@id='sbs']\r
-strip: //div[@id='fsizeSwitch']\r
-strip: //div[@id='googleAd']\r
-strip: //div[@id='detailFoot']\r
-strip_image_src: counter?key\r
-convert_double_br_tags: yes\r
+body: //div[@id='main']
+strip: //div[@id='sbs']
+strip: //div[@id='fsizeSwitch']
+strip: //div[@id='googleAd']
+strip: //div[@id='detailFoot']
+strip_image_src: counter?key
+convert_double_br_tags: yes
 
 test_url: http://news.kanaloco.jp/localnews/article/1105200018/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ded680f..1df4731
@@ -1,11 +1,11 @@
-title: //h2[@class="lyt-hdg-02-04"]\r
-\r
-author: //div[@class="lyt-namearea"]/a\r
-\r
-date: //div[@class="lyt-namearea"]/text()\r
-\r
-body: //div[@class="articleContent"]\r
-\r
-strip: //div[@id="tab-aside"]\r
+title: //h2[@class="lyt-hdg-02-04"]
+
+author: //div[@class="lyt-namearea"]/a
+
+date: //div[@class="lyt-namearea"]/text()
+
+body: //div[@class="articleContent"]
+
+strip: //div[@id="tab-aside"]
 
 test_url: http://news.mynavi.jp/articles/2011/12/07/nico/index.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index b60deea..3b1d3cc
@@ -1,11 +1,11 @@
-single_page_link: //div[@id='content']//p[@class='readMore']/a\r
-\r
-title: //div[@class='hidden offscreen']/h2\r
-body: //div[@id="storyText"]\r
-move_into(//div[@id='storyText']): //div[@class='fact']\r
-strip: //small[@class='credit']\r
-strip: //small[@class='caption']\r
-date: substring-after(//div[@id='storyMeta']//p[@class='date'],'Publiziert am')\r
-strip: //p[@class='toplink']\r
+single_page_link: //div[@id='content']//p[@class='readMore']/a
+
+title: //div[@class='hidden offscreen']/h2
+body: //div[@id="storyText"]
+move_into(//div[@id='storyText']): //div[@class='fact']
+strip: //small[@class='credit']
+strip: //small[@class='caption']
+date: substring-after(//div[@id='storyMeta']//p[@class='date'],'Publiziert am')
+strip: //p[@class='toplink']
 
 test_url: http://news.orf.at/stories/2084731/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 743245f..1d54733
@@ -1,9 +1,9 @@
-body: //article\r
-title: //h1\r
-author: //span[@class='b-article-source-dropdown']\r
-strip: //span[@class='b-article-photo-incut__source']\r
-strip: //a[@class='b-read-more b-read-more_bottom']\r
-\r
-\r
+body: //article
+title: //h1
+author: //span[@class='b-article-source-dropdown']
+strip: //span[@class='b-article-photo-incut__source']
+strip: //a[@class='b-read-more b-read-more_bottom']
+
+
 tidy:no
 test_url: http://news.rambler.ru/12972208/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c80c332..ba4db82
@@ -1,4 +1,4 @@
-body: //div[@class='main']/div[@class='item']\r
-strip: //div[@class='right']\r
-\r
+body: //div[@class='main']/div[@class='item']
+strip: //div[@class='right']
+
 test_url: http://news.techmeme.com/110516/fh-rip
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 5ee0404..fc1739c
@@ -1,12 +1,12 @@
-title: //meta[@property='og:title']/@content\r
-title: //h1[@class='headline']\r
-author: //cite[contains(@class, 'byline') and contains(@class, 'vcard')]//span[@class='fn']\r
-date: //cite[contains(@class, 'byline') and contains(@class, 'vcard')]//abbr/@title\r
-body: //div[@id='mediaarticlelead']//a[@class='media'] | //div[contains(@class,'yom-art-content')]\r
-#strip: //cite/abbr\r
-strip_id_or_class: action\r
-strip_id_or_class: prefetch\r
-tidy: no\r
-prune: no\r
+title: //meta[@property='og:title']/@content
+title: //h1[@class='headline']
+author: //cite[contains(@class, 'byline') and contains(@class, 'vcard')]//span[@class='fn']
+date: //cite[contains(@class, 'byline') and contains(@class, 'vcard')]//abbr/@title
+body: //div[@id='mediaarticlelead']//a[@class='media'] | //div[contains(@class,'yom-art-content')]
+#strip: //cite/abbr
+strip_id_or_class: action
+strip_id_or_class: prefetch
+tidy: no
+prune: no
 
 test_url: http://news.yahoo.com/cold-la-nina-winter-forecast-west-coast-183535067.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0b01f8a..f7441d1
@@ -1,3 +1,3 @@
-strip_comments: no\r
+strip_comments: no
 strip: //a[. = 'reply']
 test_url: http://news.ycombinator.com/item?id=1516461
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/news.zing.vn.txt b/inc/3rdparty/site_config/standard/news.zing.vn.txt
new file mode 100755 (executable)
index 0000000..af81e90
--- /dev/null
@@ -0,0 +1,3 @@
+body://div[@class="newsdetail_wrapper"]
+strip://div[@class="more_news"]
+test_url: http://news.zing.vn/xa-hoi/s-phat-nang-xe-may-di-duong-tren-cao-ha-noi/a280838.html#home_noibat1
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/news247.gr.txt b/inc/3rdparty/site_config/standard/news247.gr.txt
new file mode 100755 (executable)
index 0000000..87637be
--- /dev/null
@@ -0,0 +1,6 @@
+title: //h1[@class='title']
+
+body: //img[@id='relPicsMainPic'] | //div[contains(@class, 'storyContent')]
+
+test_url: http://news247.gr/eidiseis/katatheseis_fwtia_htan_apofasismenoi_akomh_kai_na_afairesoyn_zwes_an_thewrousan_oti_to_thuma_htan_antipalos_toys.2433351.html
+test_url: http://news247.gr/?widget=rssfeed&view=feed&contentId=38291
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0500890..5eb0ea4
@@ -1,9 +1,9 @@
-date: //meta[@name='og:article:published_time']/@value\r
-\r
-body: //div[@class='itemIntroText' or @class='itemImageBlock' or @class='itemFullText']\r
-\r
-strip_id_or_class: itemImageGallery\r
-\r
-prune: no\r
-\r
+date: //meta[@name='og:article:published_time']/@value
+
+body: //div[@class='itemIntroText' or @class='itemImageBlock' or @class='itemFullText']
+
+strip_id_or_class: itemImageGallery
+
+prune: no
+
 test_url: http://www.newsbomb.gr/gossip/story/257234/i-proin-moy-protimoyse-na-serfarei-apo-to-na-kanoyme-sex
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index eb7d335..1a99031
@@ -1,12 +1,12 @@
-title: //h1\r
-body: (//div[@class='articleImg']//img)[1] | //p[contains(@class, 'commentTextArticle') or contains(@class, 'articlePublished')] | //div[@id='articleLeftContent']\r
-author: //div[@class='byline']//a[contains(@href, '/user/')]\r
-\r
-strip_id_or_class: facts\r
-strip_id_or_class: articleBlogsHolder\r
-strip_id_or_class: byline\r
-\r
-prune: no\r
-tidy: no\r
-\r
+title: //h1
+body: (//div[@class='articleImg']//img)[1] | //p[contains(@class, 'commentTextArticle') or contains(@class, 'articlePublished')] | //div[@id='articleLeftContent']
+author: //div[@class='byline']//a[contains(@href, '/user/')]
+
+strip_id_or_class: facts
+strip_id_or_class: articleBlogsHolder
+strip_id_or_class: byline
+
+prune: no
+tidy: no
+
 test_url: http://www.newsmill.se/artikel/2012/05/06/medielogiken-v-ger-tyngre-n-reportrarnas-sikter
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 860ad66..247bbeb
@@ -1,10 +1,10 @@
-body: //div[@class='right']//div[@class='articles']\r
-author: //div[@id='artinfo']//a[contains(@href, '/author/')]\r
-strip: //div[@id='artinfo']\r
-strip: //table[//a[contains(@href, 'twitter.com')]]\r
-strip_id_or_class: twitter\r
-\r
-prune: no\r
-tidy: no\r
-\r
+body: //div[@class='right']//div[@class='articles']
+author: //div[@id='artinfo']//a[contains(@href, '/author/')]
+strip: //div[@id='artinfo']
+strip: //table[//a[contains(@href, 'twitter.com')]]
+strip_id_or_class: twitter
+
+prune: no
+tidy: no
+
 test_url: http://www.newsunspun.org/eotn/bbc-headline-change-iran-goes-from-not-building-to-undecided-on-nuclear-bomb
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/newsweek.com.txt b/inc/3rdparty/site_config/standard/newsweek.com.txt
new file mode 100755 (executable)
index 0000000..565648b
--- /dev/null
@@ -0,0 +1,6 @@
+body: //div[@class = 'article-body']
+title: //h1[@class = 'article-title']
+strip: //aside
+
+test_url: http://www.newsweek.com/day-steve-mcqueen-met-his-new-nazi-neighbor-keith-moon-229741
+test_url: http://www.newsweek.com/2014/06/13/how-greylock-partners-finds-next-facebook-253329.html
diff --git a/inc/3rdparty/site_config/standard/newswise.com.txt b/inc/3rdparty/site_config/standard/newswise.com.txt
new file mode 100755 (executable)
index 0000000..10120ea
--- /dev/null
@@ -0,0 +1,17 @@
+prune: no
+tidy: no
+
+title: //h1/a[2]
+body: //div[@id="main"]
+author: //span[@id="articlesource"]
+date: //span[contains(@class, 'releasedate')]
+
+strip: //div[@class="inst-logo"]
+strip: //h1[1]
+
+strip_id_or_class: addthis
+strip_id_or_class: released
+strip_id_or_class: skiptranslate
+strip_id_or_class: flash
+
+test_url: http://www.newswise.com/articles/first-heat-wave-of-season-puts-elderly-at-risk
old mode 100644 (file)
new mode 100755 (executable)
index 5624aa8..950324a
@@ -1,10 +1,11 @@
-title: //h1[@id='articlehed'] | //h2[@id="articleintro"]\r
-body: //div[@id='articletext']\r
-\r
-strip: //ul[@id="bc"] | //div[@id="yrail"] | //div[@class="entry-keywords"] | //div[@class="entry-categories"] | //div[@class="socialUtils"] | //div[@id="footer"]\r
-\r
-date: //h4[@id='articleauthor']/span[@class='dd dds']\r
-date: //div[@id="pagebody"]/div[@class='hentry entry']/div[@class='published']\r
-\r
-single_page_link: //div[@class='paginationViewSinglePage']/a\r
-test_url: http://www.newyorker.com/online/blogs/culture/2012/06/mug-shot-web-sites.html
\ No newline at end of file
+title: //h1[@id='articlehed'] | //h2[@id="articleintro"]
+body: //div[@id='articletext']
+
+strip: //ul[@id="bc"] | //div[@id="yrail"] | //div[@class="entry-keywords"] | //div[@class="entry-categories"] | //div[@class="socialUtils"] | //div[@id="footer"] | //div[@class="cartoon"]
+
+date: //h4[@id='articleauthor']/span[@class='dd dds']
+date: //div[@id="pagebody"]/div[@class='hentry entry']/div[@class='published']
+
+single_page_link: //div[@class='paginationViewSinglePage']/a
+test_url: http://www.newyorker.com/online/blogs/culture/2012/06/mug-shot-web-sites.html
+test_url: http://www.newyorker.com/reporting/2013/04/22/130422fa_fact_bilger?currentPage=all&mobify=0
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 806a3df..b8d235d
@@ -1,16 +1,16 @@
-# 2011-08-22 [carlo@...] initial version\r
-# 2011-08-22 [carlo@...] removed comments & social links\r
-\r
-tidy: no\r
-\r
-single_page_link: //a[@class="single active"]\r
-\r
-body: //div[@id="main"]//div[@class="content-region"]/article\r
-author: //span[@class="author-name"]\r
-date: //time/text()\r
-\r
-strip_id_or_class: //aside[@id="related"]\r
-strip: //footer\r
-\r
+# 2011-08-22 [carlo@...] initial version
+# 2011-08-22 [carlo@...] removed comments & social links
+
+tidy: no
+
+single_page_link: //a[@class="single active"]
+
+body: //div[@id="main"]//div[@class="content-region"]/article
+author: //span[@class="author-name"]
+date: //time/text()
+
+strip_id_or_class: //aside[@id="related"]
+strip: //footer
+
 title: //h1
 test_url: http://www.next-gen.biz/reviews/deus-ex-human-revolution-review
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 70f9247..956b288
@@ -1,11 +1,11 @@
-# doesn't look like selecting an attribute value works?\r
-# author: //meta[@id="authorName"]@value\r
-\r
-author: substring-after(//li[@id="article-hdr-meta-author"]/text(), "By ")\r
-date: //abbr[@id="article-time"]\r
-title: //div[@id="article-hdr"]/h1\r
-body: //div[@class="articleText"]\r
-\r
-# strip miscellaneous teasers & etc\r
+# doesn't look like selecting an attribute value works?
+# author: //meta[@id="authorName"]@value
+
+author: substring-after(//li[@id="article-hdr-meta-author"]/text(), "By ")
+date: //abbr[@id="article-time"]
+title: //div[@id="article-hdr"]/h1
+body: //div[@class="articleText"]
+
+# strip miscellaneous teasers & etc
 strip: //div[@class="removeformobile"]
 test_url: http://www.nfl.com/news/story/09000d5d82388707/article/close-shave-chiefs-haley-perseveres-through-rough-start?module=HP11_content_stream
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6083486..44a82a9
@@ -1,7 +1,7 @@
-next_page_link: //div[@class='nextpage_continue']/a\r
-strip: //div[@class='nextpage_continue']\r
-strip_id_or_class: nextpage\r
-title: //div[@class='article_title']//h1\r
-body: //div[@class='article_title']/..\r
+next_page_link: //div[@class='nextpage_continue']/a
+strip: //div[@class='nextpage_continue']
+strip_id_or_class: nextpage
+title: //div[@class='article_title']//h1
+body: //div[@class='article_title']/..
 body: //div[@class='content']
 test_url: http://ngm.nationalgeographic.com/2012/02/tsunami/folger-text
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
old mode 100644 (file)
new mode 100755 (executable)
index 409a897..f0e28af
@@ -1,13 +1,13 @@
-body: //div[@id="main"]\r
-title: //div[@id="main"]/h3\r
-\r
-# Remove &lsquo;Review&rsquo; and &lsquo;Wii&rsquo;.\r
-strip: //div[@class="badge"]\r
-\r
-# Remove duplicate title and country flag.\r
-strip: //h3\r
-\r
-# Commented out below are attempts to extract the author and date, which did not work.\r
-# author: //p[@class="extra "]/a\r
+body: //div[@id="main"]
+title: //div[@id="main"]/h3
+
+# Remove &lsquo;Review&rsquo; and &lsquo;Wii&rsquo;.
+strip: //div[@class="badge"]
+
+# Remove duplicate title and country flag.
+strip: //h3
+
+# Commented out below are attempts to extract the author and date, which did not work.
+# author: //p[@class="extra "]/a
 # date: //p[@class="extra "]/span[@class="when"]
 test_url: http://www.nintendoworldreport.com/review/28400
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ae2d7e4..b15f061
@@ -1,5 +1,5 @@
-author: //span[@class='meta']/span[@class='username']\r
-body: //div[@class='article-content']\r
-\r
+author: //span[@class='meta']/span[@class='username']
+body: //div[@class='article-content']
+
 strip_id_or_class: 'article-actions'
 test_url: http://nojesguiden.se/blogg/maja-bredberg/maja-laser-tidningen-en-helt-vanlig-lordag-i
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 04a0a34..88429a7
@@ -1,11 +1,11 @@
-title: //h1\r
-body: //div[@id='pn-maincontent']\r
-strip_id_or_class: z-menu\r
-strip_id_or_class: news_category\r
-strip_id_or_class: news_title\r
-strip_id_or_class: news_modify\r
-strip_id_or_class: news_morearticlesincat\r
-strip_id_or_class: ezc_comments\r
-strip_comments: yes\r
-\r
+title: //h1
+body: //div[@id='pn-maincontent']
+strip_id_or_class: z-menu
+strip_id_or_class: news_category
+strip_id_or_class: news_title
+strip_id_or_class: news_modify
+strip_id_or_class: news_morearticlesincat
+strip_id_or_class: ezc_comments
+strip_comments: yes
+
 test_url: http://www.northumberlandview.ca/index.php?module=news&func=display&sid=5972
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/nosalty.hu.txt b/inc/3rdparty/site_config/standard/nosalty.hu.txt
new file mode 100755 (executable)
index 0000000..7e20cad
--- /dev/null
@@ -0,0 +1,6 @@
+title: //div[@id='tab-recept']//h1
+body: //div[@id='tab-recept']//div[contains(@class, 'column-container')]
+strip_id_or_class: ajanlo-box
+prune: no
+
+test_url: http://www.nosalty.hu/recept/szupergyors-fank
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 205b1af..1b817c0
@@ -1,6 +1,6 @@
-title: /html/body/div[3]/div/div/h1\r
-\r
-body: //*[@id="article-body"]\r
-\r
+title: /html/body/div[3]/div/div/h1
+
+body: //*[@id="article-body"]
+
 
 test_url: http://nplusonemag.com/the-outskirts-of-progress
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index afab0eb..acd73e4
@@ -1,32 +1,34 @@
-title: //div[contains(@class, 'storytitle')]//h1\r
-author: //p[@class="byline"]/span\r
-body: //div[@id='storyspan02']//*[@class='duration' or @class='download' or contains(@class, 'photo')] | //div[@id='storytext'] | //div[@class='transcript']\r
-date: //meta[@name="date"]/@content\r
-\r
-strip: //div[@class='enlarge_measure']\r
-strip: //div[@class='enlarge_html']\r
-strip: //a[@class='enlargeicon']\r
-strip: //div[contains(@class, 'bookedition')]\r
-strip: //div[@class='textsize']\r
-strip: //ul[@class='genres']\r
-strip: //span[@class='bull']\r
-strip_id_or_class: secondary\r
-strip_id_or_class: con1col\r
-strip: //h3[@class='conheader']\r
-\r
-replace_string(<a name="more">&nbsp;</a>): <!-- no more -->\r
-replace_string(<div class="transcript">): <div class="transcript"><h2>Transcript</h2>\r
-\r
-prune: no\r
-strip://div[@class="ecommercepop"]\r
-strip://span[@class="bull"]\r
-strip://span[@class="purchaseLink"]\r
-strip://div[@class="enlarge_html"]\r
-strip://div[@class="enlarge_measure"]\r
-strip://div[@class="container con1col small"]\r
-strip://a[contains(@class, "enlargebtn")]\r
-strip://div[contains(@class, "bucketwrap internallink")]\r
-\r
-test_url: http://www.npr.org/blogs/thetwo-way/2011/07/12/137799301/sports-loses-its-escapist-gleam-in-a-summer-of-court-dates\r
-test_url: http://www.npr.org/2012/07/04/156190948/feeling-under-siege-catholic-leadership-shifts-right\r
-test_url: http://www.npr.org/2012/12/13/166480907/the-years-best-sci-fi-crosses-galaxies-and-genres
\ No newline at end of file
+title: //div[contains(@class, 'storytitle')]//h1
+author: //p[@class="byline"]/span
+body: //div[@id='primaryaudio']//*[@class='duration' or @class='download' or contains(@class, 'photo')] | //div[@id='storytext' or @id='supplementarycontent' or contains(@class, 'transcript')]
+date: //meta[@name="date"]/@content
+
+strip_id_or_class: enlarge_measure
+strip_id_or_class: enlarge_html
+strip: //a[contains(@class, 'enlargeicon')]
+strip: //div[contains(@class, 'bookedition')]
+strip: //div[@class='textsize']
+strip: //ul[@class='genres']
+strip: //span[@class='bull']
+strip_id_or_class: secondary
+strip_id_or_class: con1col
+strip: //h3[@class='conheader']
+
+replace_string(<a name="more">&nbsp;</a>): <!-- no more -->
+replace_string(<div class="transcript">): <div class="transcript"><h2>Transcript</h2>
+replace_string(<div class="transcript storytext">): <div class="transcript storytext"><h2>Transcript</h2>
+
+prune: no
+strip://div[@class="ecommercepop"]
+strip://span[@class="bull"]
+strip://span[@class="purchaseLink"]
+strip://div[@class="enlarge_html"]
+strip://div[@class="enlarge_measure"]
+strip://div[@class="container con1col small"]
+strip://a[contains(@class, "enlargebtn")]
+strip://div[contains(@class, "bucketwrap internallink")]
+
+test_url: http://www.npr.org/blogs/thetwo-way/2011/07/12/137799301/sports-loses-its-escapist-gleam-in-a-summer-of-court-dates
+test_url: http://www.npr.org/2012/07/04/156190948/feeling-under-siege-catholic-leadership-shifts-right
+test_url: http://www.npr.org/2012/12/13/166480907/the-years-best-sci-fi-crosses-galaxies-and-genres
+test_url: http://www.npr.org/templates/story/story.php?storyId=229103221
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8ecb896..d95ec68
@@ -1,13 +1,13 @@
-strip_id_or_class: sIFR-alternate\r
-title: //div[@id='page-title-wrapper']/div[@id='page-title']/h2\r
-single_page_link: //a[contains(@href, 'pagination=false') and not(contains(@href, 'printpage=true'))]\r
-\r
-body: //div[@id = 'article-body']\r
-strip_id_or_class:article-tools\r
-strip_id_or_class:js_target\r
-strip_id_or_class:marker\r
-author://div[@id = 'page-title']/h3\r
-date://div[@id = 'page-title']/h5/a[starts-with(@href,'/issues/')]\r
-\r
-\r
+strip_id_or_class: sIFR-alternate
+title: //div[@id='page-title-wrapper']/div[@id='page-title']/h2
+single_page_link: //a[contains(@href, 'pagination=false') and not(contains(@href, 'printpage=true'))]
+
+body: //div[@id = 'article-body']
+strip_id_or_class:article-tools
+strip_id_or_class:js_target
+strip_id_or_class:marker
+author://div[@id = 'page-title']/h3
+date://div[@id = 'page-title']/h5/a[starts-with(@href,'/issues/')]
+
+
 test_url: http://www.nybooks.com/articles/archives/2012/feb/23/were-more-unequal-you-think/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f664c93..7a1d62d
@@ -1,8 +1,8 @@
-title: //h2[contains(@class, 'primary')]\r
-body: //div[@id='story']\r
-author: //*[@class='by']/a\r
-date: substring-after(//*[@class='date'], 'Published')\r
-\r
-next_page_link: //div[@class='page-navigation']//li[@class='next']/a\r
-\r
+title: //h2[contains(@class, 'primary')]
+body: //div[@id='story']
+author: //*[@class='by']/a
+date: substring-after(//*[@class='date'], 'Published')
+
+next_page_link: //div[@class='page-navigation']//li[@class='next']/a
+
 test_url: http://nymag.com/news/features/wall-street-2012-2/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8c9e37f..f4bedb6
@@ -1,8 +1,8 @@
-title: //div[@class="article default-article"]/h1\r
-author: //p[@class="author"]/a[2]\r
-\r
-# Article introduction:\r
-#move_into(//div[@class="article-bread"]): //p[@class="lead"]\r
-\r
+title: //div[@class="article default-article"]/h1
+author: //p[@class="author"]/a[2]
+
+# Article introduction:
+#move_into(//div[@class="article-bread"]): //p[@class="lead"]
+
 body: //div[@class="article-bread"]
 test_url: http://www.nyteknik.se/nyheter/energi_miljo/energi/article3391426.ece
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8d9a794..23c9ad1
@@ -1,36 +1,49 @@
-title://h1[@class="articleHeadline"]\r
-body://div[@id="article"]\r
-strip_id_or_class:articleTools\r
-strip_id_or_class:readerscomment\r
-#strip://div[contains(@class, "articleInline runaroundLeft")]\r
-strip: //div[contains(@class, "doubleRule")]\r
-# strip image credit - appears as a bold heading\r
-strip: //div[contains(@class, "articleInline")]//h6\r
-strip_id_or_class:enlargeThis\r
-strip_id_or_class:pageLinks\r
-strip_id_or_class:memberTools\r
-strip_id_or_class:articleExtras\r
-strip_id_or_class:singleAd\r
-strip_id_or_class:byline\r
-strip_id_or_class:dateline\r
-strip_id_or_class:articleheadline\r
-strip_id_or_class:articleBottomExtra\r
-strip://a[contains(@href, 'nytimes.com/adx/')]\r
-strip: //nyt_byline\r
-strip: //span[contains(@class, 'slideshow') or contains(@class, 'video')]\r
-strip: //p[@class='caption']//a[contains(., 'More Photos')]\r
-\r
-prune: no\r
-tidy: no\r
-\r
-date: substring-after(//*[contains(@class, 'dateline')], 'Published:')\r
-\r
-single_page_link: //link[contains(@href, 'pagewanted=all')]\r
-#single_page_link: //a[contains(@href, 'pagewanted=all') and not(contains(@href, 'login'))]\r
-\r
-strip://ul[@id = 'toolsList']\r
-strip://h6[@class = 'kicker']\r
-author:substring-after(//h6[@class='byline'],'By ')\r
-\r
-test_url: http://www.nytimes.com/2011/07/24/books/review/an-academic-authors-unintentional-masterpiece.html\r
-test_url: http://www.nytimes.com/2012/06/10/arts/television/the-newsroom-aaron-sorkins-return-to-tv.html
\ No newline at end of file
+title://h1[@class="articleHeadline"]
+body://div[@id="article"]
+body://*[@itemprop="articleBody"]
+strip_id_or_class:articleTools
+strip_id_or_class:readerscomment
+#strip://div[contains(@class, "articleInline runaroundLeft")]
+strip: //div[contains(@class, "doubleRule")]
+# strip image credit - appears as a bold heading
+strip: //div[contains(@class, "articleInline")]//h6
+strip_id_or_class:enlargeThis
+strip_id_or_class:pageLinks
+strip_id_or_class:memberTools
+strip_id_or_class:articleExtras
+strip_id_or_class:singleAd
+strip_id_or_class:byline
+strip_id_or_class:dateline
+strip_id_or_class:articleheadline
+strip_id_or_class:articleBottomExtra
+strip_id_or_class:shareTools
+strip://a[contains(@href, 'nytimes.com/adx/')]
+strip: //nyt_byline
+strip: //span[contains(@class, 'slideshow') or contains(@class, 'video')]
+strip: //p[@class='caption']//a[contains(., 'More Photos')]
+
+prune: no
+tidy: no
+
+find_string: <script 
+replace_string: <div style="display:none" 
+find_string: </script>
+replace_string: </div>
+
+date: substring-after(//*[contains(@class, 'dateline')], 'Published:')
+
+single_page_link: //link[contains(@href, 'pagewanted=all')]
+single_page_link: //link[@rel='alternate' and contains(@href, 'mobile.nytimes.com')]/@href
+single_page_link: concat(substring-before(//div[@id='pageLinks']//a[contains(@href, 'pagewanted=')]/@href, 'pagewanted='), 'pagewanted=all') 
+#single_page_link: //a[contains(@href, 'pagewanted=all') and not(contains(@href, 'login'))]
+
+strip://ul[@id = 'toolsList']
+strip://h6[@class = 'kicker']
+author:substring-after(//h6[@class='byline'],'By ')
+
+test_url: http://www.nytimes.com/2011/07/24/books/review/an-academic-authors-unintentional-masterpiece.html
+test_url: http://www.nytimes.com/2012/06/10/arts/television/the-newsroom-aaron-sorkins-return-to-tv.html
+test_url: http://www.nytimes.com/2013/03/25/world/middleeast/israeli-military-responds-after-patrols-come-under-fire-from-syria.html
+test_url: http://www.nytimes.com/2013/08/15/nyregion/when-the-new-york-city-subway-ran-without-rails.html
+test_url: http://www.nytimes.com/2004/02/29/weekinreview/correspondence-class-consciousness-china-s-wealthy-live-creed-hobbes-darwin-meet.html
+test_url: http://www.nytimes.com/2014/06/19/opinion/gail-collins-romney-and-the-2016-contenders-huddle.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 81faaba..749f4f2
@@ -1,12 +1,12 @@
-body: //*[@class='article-full']\r
-title: //h3\r
-strip: //header[@class='group']\r
-#body: //p[@class='lead']\r
-#move_into(//p[@class='lead']): //*[@class='article-full']/figure\r
-#move_into(//p[@class='lead']): //div[@id='articleBodyText']\r
-strip: //div[@id='social-media-floater']\r
-strip: //div[@class='advertisement']\r
-strip: //div[@class='infobox']\r
-strip: //div[@id='articleComments']\r
-\r
+body: //*[@class='article-full']
+title: //h3
+strip: //header[@class='group']
+#body: //p[@class='lead']
+#move_into(//p[@class='lead']): //*[@class='article-full']/figure
+#move_into(//p[@class='lead']): //div[@id='articleBodyText']
+strip: //div[@id='social-media-floater']
+strip: //div[@class='advertisement']
+strip: //div[@class='infobox']
+strip: //div[@id='articleComments']
+
 test_url: http://www.nzz.ch/wissen/wissenschaft/sonnenschutz-fuer-die-erde-1.17282213
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e409ca2..0b10753
@@ -1,7 +1,7 @@
-body: //article[contains(@class, 'instapaper_body')]\r
-\r
-prune: no\r
-\r
-single_page_link: //a[@id='print-button']\r
-\r
+body: //article[contains(@class, 'instapaper_body')]
+
+prune: no
+
+single_page_link: //a[@id='print-button']
+
 test_url: http://www.observer.com/2008/would-you-take-tumblr-man
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a2fb5f2..bf10787
@@ -1,7 +1,7 @@
-body: //div[(@id = "content")]\r
-strip: //div[(@class = "links-bar")]\r
-strip: //div[(@class = "povrzani")]\r
-strip: //div[(@class = "povrzani-dolu")]\r
-strip: //div[(@class = "tags")]\r
+body: //div[(@id = "content")]
+strip: //div[(@class = "links-bar")]
+strip: //div[(@class = "povrzani")]
+strip: //div[(@class = "povrzani-dolu")]
+strip: //div[(@class = "tags")]
 strip: //h1[(@id = "page-title")]
 test_url: http://off.net.mk/zhivot-i-zabava/gadzheti/dzhabe-raboti-dzhabe-ne-dishi
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
old mode 100644 (file)
new mode 100755 (executable)
index 1b39b62..4b3a720
@@ -1,9 +1,9 @@
-title: //div[@id='squeeze']/h1\r
-strip: //div[@id='squeeze']/h1\r
-author: //div[@class='submitted']/a\r
-strip: //div[@class='submitted']/a\r
-convert_double_br_tags: yes\r
-\r
-\r
+title: //div[@id='squeeze']/h1
+strip: //div[@id='squeeze']/h1
+author: //div[@class='submitted']/a
+strip: //div[@class='submitted']/a
+convert_double_br_tags: yes
+
+
 
 test_url: http://omiliya.org/content/predchuvstvie.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index be7a17e..a95c2b0
@@ -1,5 +1,5 @@
-body: //div[(@class = "statija")]\r
-strip: //div[(@class = "relatedBlock")]\r
-strip: //div[(@class = "swftools")]\r
+body: //div[(@class = "statija")]
+strip: //div[(@class = "relatedBlock")]
+strip: //div[(@class = "swftools")]
 strip: //table[(@class = "links")]
 test_url: http://on.net.mk/video/na-trkala/lamborghini-aventador-avionot-shto-ne-leta
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index edb5285..448bb7e
@@ -1,23 +1,25 @@
-title: //meta[@property="og:title"]/@content\r
-body: //div[@id='article_story_body']\r
-\r
-author: //h3[@class='byline']/a\r
-# for slid show content\r
-body: //ul[@id='imageSlide']//li[@class='firstSlide']//img | (//div[@class='txt_body']//p)[1]\r
-date: //li[@class='dateStamp']/small\r
-\r
-strip_id_or_class: insetFullBracket\r
-strip_id_or_class: insettipBox\r
-#strip_id_or_class: legacyInset\r
-strip_id_or_class: recipeACShopAndBuyText\r
-\r
-strip: //div[contains(@class, 'insetContent')]//cite\r
-strip: //*[contains(@style, 'visibility: hidden;')]\r
-strip: //div[contains(@class, 'insetContent') and not(contains(@class, 'image'))]\r
-\r
-prune: no\r
-tidy: no\r
-\r
-test_url: http://online.wsj.com/article/SB10001424052970203363504577185322849515102.html\r
-# slide show\r
-test_url: http://online.wsj.com/article/SB10001424052970204791104577110550376458164.html
\ No newline at end of file
+title: //meta[@property="og:title"]/@content
+body: //div[@id='article_story_body']
+
+author: //h3[@class='byline']/a
+# for slide show content
+body: //ul[@id='imageSlide']//li[@class='firstSlide']//img | (//div[@class='txt_body']//p)[1]
+date: //li[@class='dateStamp']/small
+
+strip_id_or_class: insetFullBracket
+strip_id_or_class: insettipBox
+#strip_id_or_class: legacyInset
+strip_id_or_class: recipeACShopAndBuyText
+
+strip: //div[contains(@class, 'insetContent')]//cite
+strip: //*[contains(@style, 'visibility: hidden;')]
+strip: //div[contains(@class, 'insetContent') and not(contains(@class, 'image'))]
+strip: //div[contains(@class, 'carousel')]
+
+prune: no
+tidy: no
+
+test_url: http://online.wsj.com/news/articles/SB10001424052702304626304579509100018004342
+test_url: http://online.wsj.com/article/SB10001424052970203363504577185322849515102.html
+# slide show
+test_url: http://online.wsj.com/article/SB10001424052970204791104577110550376458164.html
diff --git a/inc/3rdparty/site_config/standard/ontologicalgeek.com.txt b/inc/3rdparty/site_config/standard/ontologicalgeek.com.txt
new file mode 100755 (executable)
index 0000000..a9bf71e
--- /dev/null
@@ -0,0 +1,8 @@
+title: //h1[@class='entry-title']
+
+author: //a[@rel='author']
+
+date: substring-before(//aside[@class='entry-meta'], '|')
+
+body: //div[@class='entry-content']
+test_url: http://ontologicalgeek.com/change-or-live-final-fantasy-x-as-catholic-dystopia/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 510eb25..6913eb0
@@ -1,4 +1,4 @@
-body: //div[@id = 'content-inner']\r
-strip: //div[@id = 'content-bottom']\r
+body: //div[@id = 'content-inner']
+strip: //div[@id = 'content-bottom']
 strip_id_or_class: print_sharebutton
 test_url: http://openthemagazine.com/article/nation/sania-vs-saina
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index b766354..a5dcdb5
@@ -1,4 +1,4 @@
-body: //div[@class="chapter"]\r
-prune: no\r
-tidy: no\r
+body: //div[@class="chapter"]
+prune: no
+tidy: no
 test_url: http://openwebx.org/docs/springext.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ff16ca7..fb4f218
@@ -1,11 +1,11 @@
-single_page_link: //div[@id='content']//p[@class='readMore']/a\r
-\r
-title: //div[@class='hidden offscreen']/h2\r
-body: //div[@id="storyText"]\r
-move_into(//div[@id='storyText']): //div[@class='fact']\r
-strip: //small[@class='credit']\r
-strip: //small[@class='caption']\r
-date: substring-after(//div[@id='storyMeta']//p[@class='date'],'Publiziert am')\r
-strip: //p[@class='toplink']\r
+single_page_link: //div[@id='content']//p[@class='readMore']/a
+
+title: //div[@class='hidden offscreen']/h2
+body: //div[@id="storyText"]
+move_into(//div[@id='storyText']): //div[@class='fact']
+strip: //small[@class='credit']
+strip: //small[@class='caption']
+date: substring-after(//div[@id='storyMeta']//p[@class='date'],'Publiziert am')
+strip: //p[@class='toplink']
 
 test_url: http://orf.at/stories/2084731/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0dedac3..50717f2
@@ -1,18 +1,18 @@
-title: /html/body/div[5]/div[2]/h1\r
-body: /html/body/div[5]/div[2]/div[6]/div/div\r
-body: //*[@id="cikk"]\r
-strip: /html/body/div[5]/div[2]/h1\r
-strip: /html/body/div[5]/div[2]/div[4]\r
-strip: //*[@id="multidoboz"]\r
-strip: /html/body/div[5]/div[2]/div[6]/div[2]\r
-strip: //*[@id="comments"]\r
-strip: //*[@id="rating-doboz"]\r
-strip: /html/body/div[5]/div[2]/div[10]\r
-strip: /html/body/div[5]/div[2]/a\r
-strip: /html/body/div[5]/div[2]/span\r
-strip: /html/body/div[5]/div[2]/span[2]\r
-strip: /html/body/div[5]/div[2]/span[3]\r
-strip: /html/body/div[5]/div[2]/span[4]\r
-strip: /html/body/div[5]/div[2]/span[5]\r
+title: /html/body/div[5]/div[2]/h1
+body: /html/body/div[5]/div[2]/div[6]/div/div
+body: //*[@id="cikk"]
+strip: /html/body/div[5]/div[2]/h1
+strip: /html/body/div[5]/div[2]/div[4]
+strip: //*[@id="multidoboz"]
+strip: /html/body/div[5]/div[2]/div[6]/div[2]
+strip: //*[@id="comments"]
+strip: //*[@id="rating-doboz"]
+strip: /html/body/div[5]/div[2]/div[10]
+strip: /html/body/div[5]/div[2]/a
+strip: /html/body/div[5]/div[2]/span
+strip: /html/body/div[5]/div[2]/span[2]
+strip: /html/body/div[5]/div[2]/span[3]
+strip: /html/body/div[5]/div[2]/span[4]
+strip: /html/body/div[5]/div[2]/span[5]
 strip: //*[@id="kommentszam"]
 test_url: http://www.origo.hu/itthon/20110119-lemondott-a-kulturaert-felelos-helyettes-allamtitkar.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/oschina.net.txt b/inc/3rdparty/site_config/standard/oschina.net.txt
new file mode 100755 (executable)
index 0000000..5645153
--- /dev/null
@@ -0,0 +1,3 @@
+title: //h1
+strip_id_or_class: syntaxhighlighter
+test_url: http://www.oschina.net/translate/event-based-programming-what-async-has-over-sync?print
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f03c955..7e2985e
@@ -1,11 +1,11 @@
-#body: (//div[@class='ftr-yt-vid'])[1]\r
-body: (//blockquote[contains(@class, 'postcontent')])[1]\r
-body: (//div[starts-with(@id, 'post_message')])[1]\r
-\r
-prune: no\r
-tidy: no\r
-\r
-#replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player"\r
-#replace_string(</iframe>): </iframe>&nbsp;</div>\r
-\r
+#body: (//div[@class='ftr-yt-vid'])[1]
+body: (//blockquote[contains(@class, 'postcontent')])[1]
+body: (//div[starts-with(@id, 'post_message')])[1]
+
+prune: no
+tidy: no
+
+#replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player"
+#replace_string(</iframe>): </iframe>&nbsp;</div>
+
 test_url: http://pakistantvdekho.com/showthread.php?647741-Sitam-Gar-by-HUM-TV-Episode-07&p=659080#post659080
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/pakmedia.tv.txt b/inc/3rdparty/site_config/standard/pakmedia.tv.txt
new file mode 100755 (executable)
index 0000000..5d6e4c8
--- /dev/null
@@ -0,0 +1,17 @@
+title: //h1[@class='entry-title']
+body: //article//div[@class='entry']
+strip_id_or_class: addthis
+strip_id_or_class: gdsrcacheloader
+strip_id_or_class: entry-meta
+strip_id_or_class: entry-tags
+strip_id_or_class: authorbox
+strip: //div[@class='entry']/p[1]
+strip: //img[@width='600' and @height='70']
+# related posts
+strip: //h3[contains(., 'Related posts')]
+strip: //div[contains(@style, 'border: 0pt none ; margin: 0pt; padding: 0pt;')]
+
+prune: no
+tidy: no
+
+test_url: http://pakmedia.tv/tv-one/feed
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d0d2a5d..35121e1
@@ -1,5 +1,5 @@
-title://h2\r
-author://div[@class="posted"]/a\r
-date://div[@class="date"]\r
+title://h2
+author://div[@class="posted"]/a
+date://div[@class="date"]
 body://div[@class="entry"]
 test_url: http://pandagon.net/index.php/site/its-okay-to-admit-that-mass-hysteria-is-real
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7d1c218..a5d427a
@@ -1,5 +1,5 @@
-tidy: no\r
-body: //article\r
-date: //time/@datetime\r
+tidy: no
+body: //article
+date: //time/@datetime
 strip_id_or_class: sharedaddy
 test_url: http://pandodaily.com/2012/01/19/ibooks-author-is-not-going-to-hurt-publishers-it-might-even-help-them/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0361f06..e0e2595
@@ -1,3 +1,3 @@
-body: //div[@class='entry']\r
+body: //div[@class='entry']
 date: //h3[@class='postDate']
 test_url: http://www.panic.com/blog/2011/07/panic-is-ready-for-lion/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/papodehomem.com.br.txt b/inc/3rdparty/site_config/standard/papodehomem.com.br.txt
new file mode 100755 (executable)
index 0000000..2c522da
--- /dev/null
@@ -0,0 +1,6 @@
+title: //h2[@class="page_title"]
+body: //div[@class="entry arquivo"]
+author: //span[@class="author"]
+footnotes: yes
+prune: yes
+test_url: http://papodehomem.com.br/um-relato-confessional-sobre-a-maioridade-penal/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a3bd4b0..cd9bd55
@@ -1,6 +1,6 @@
-title: //h2[@class="post-title"]\r
-author: substring-after(//div[@class="description"],'Words by ')\r
-date: //li[@class="date"]\r
-strip: //h2[@class="post-title"]\r
+title: //h2[@class="post-title"]
+author: substring-after(//div[@class="description"],'Words by ')
+date: //li[@class="date"]
+strip: //h2[@class="post-title"]
 body: //div[@class="copy"]
 test_url: http://parislemon.com/post/13462682469/the-15-inch-air
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 478a669..caaa2e9
@@ -1,3 +1,3 @@
-title: //h1\r
+title: //h1
 body: //div[@id='news-article']
 test_url: http://www.parliament.uk/business/committees/committees-a-z/commons-select/backbench-business-committee/news/guidance-for-e-petitioners/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 89d13b2..03b67b7
@@ -1,6 +1,6 @@
-title://div[@class="paste_box_line1"]/h1\r
-author://div[@class="paste_box_line2"]/a\r
-body://div[@class="text"]\r
-date:substring-before(substring-after(//div[@class="paste_box_line2"],'|'),'|')\r
+title://div[@class="paste_box_line1"]/h1
+author://div[@class="paste_box_line2"]/a
+body://div[@class="text"]
+date:substring-before(substring-after(//div[@class="paste_box_line2"],'|'),'|')
 dissolve://li
 test_url: http://pastebin.com/LAykd1es
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 40a049e..c535158
@@ -1,5 +1,5 @@
-title: //h1\r
-body: //div[@id='ff-pastepad-content']\r
-prune: no\r
+title: //h1
+body: //div[@id='ff-pastepad-content']
+prune: no
 # todo: add test file
 test_url: http://pastepad.fivefilters.org/test.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 1a4cd25..2504222
@@ -1,8 +1,8 @@
-title://*[contains(@class,'post-title')]\r
-body://div[contains(@class,'post-body')]\r
-body://div[contains(@class,'entry-content')]\r
-strip_comments:no\r
-prune:no\r
-convert_double_br_tags:yes\r
+title://*[contains(@class,'post-title')]
+body://div[contains(@class,'post-body')]
+body://div[contains(@class,'entry-content')]
+strip_comments:no
+prune:no
+convert_double_br_tags:yes
 tidy:yes
 test_url: http://www.pathawks.com/2011/06/crazyawesomecoloradotrip.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
old mode 100644 (file)
new mode 100755 (executable)
index cebea4d..96bdd95
@@ -1,10 +1,10 @@
-prune:yes\r
-\r
-date://*[contains(@class,'date')]\r
-\r
-body://div[contains(@id,'content')]\r
-\r
-next_page_link://a[contains(.,'Next >')]\r
-\r
+prune:yes
+
+date://*[contains(@class,'date')]
+
+body://div[contains(@id,'content')]
+
+next_page_link://a[contains(.,'Next >')]
+
 strip_id_or_class:sponsors
 test_url: http://www.pcmag.com/article2/0,2817,2401676,00.asp
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 30ccbb5..7193f87
@@ -1,19 +1,19 @@
-title: //div[@class='articleHead']//h1\r
-author: //div[@class="author-name"]/a[1]\r
-body: //div[@class="main"]\r
-\r
-# remove 'From the Lab' and 'Recent posts' text\r
-strip: //div[@class='blogLabel']\r
-\r
-# remove byline and meta info\r
-strip: //h1\r
-strip: //div[@class="article-meta"]\r
-strip: //div[@class="author-info"]\r
-\r
-#strip tags and categories\r
-strip: //div[@class="department"]\r
-\r
-#strip product cap links\r
-strip: //div[@class="cap-main"]\r
-strip: //div[@id="compare-lede"]\r
+title: //div[@class='articleHead']//h1
+author: //div[@class="author-name"]/a[1]
+body: //div[@class="main"]
+
+# remove 'From the Lab' and 'Recent posts' text
+strip: //div[@class='blogLabel']
+
+# remove byline and meta info
+strip: //h1
+strip: //div[@class="article-meta"]
+strip: //div[@class="author-info"]
+
+#strip tags and categories
+strip: //div[@class="department"]
+
+#strip product cap links
+strip: //div[@class="cap-main"]
+strip: //div[@id="compare-lede"]
 test_url: http://www.pcworld.com/article/262034/are-printer-companies-gouging-us-on-laser-toner-pricing.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f97615f..a0d5099
@@ -1,23 +1,23 @@
-# 2012-01-14 carlo@... - fixed title, body; added author, date\r
-\r
-title: //div[@class="title"]/h2/a\r
-# body: //div[@class="post"]\r
-# author: //p[@class="iconEmail"]/a\r
-# date: //p[@class="iconDate"]\r
-\r
-# 1/24/2013 yosoyju - fixed author, date, and body, added support for PA Report\r
-\r
-# Penny Arcade\r
-\r
-author: //li[@class="iconEmail"]/a\r
-date: //li[@class="iconDate"]\r
-body: //div[@class="body"]\r
-\r
-# PA Report\r
-\r
-author: //div[@class="meta"]/p/a\r
-date: substring-after(//div[@class="meta"]/p, '/ ')\r
-title: substring-after(//title, '- ')\r
-\r
-test_url: http://penny-arcade.com/2012/01/13/i-put-some-news-in-your-news\r
+# 2012-01-14 carlo@... - fixed title, body; added author, date
+
+title: //div[@class="title"]/h2/a
+# body: //div[@class="post"]
+# author: //p[@class="iconEmail"]/a
+# date: //p[@class="iconDate"]
+
+# 1/24/2013 yosoyju - fixed author, date, and body, added support for PA Report
+
+# Penny Arcade
+
+author: //li[@class="iconEmail"]/a
+date: //li[@class="iconDate"]
+body: //div[@class="body"]
+
+# PA Report
+
+author: //div[@class="meta"]/p/a
+date: substring-after(//div[@class="meta"]/p, '/ ')
+title: substring-after(//title, '- ')
+
+test_url: http://penny-arcade.com/2012/01/13/i-put-some-news-in-your-news
 test_url: http://penny-arcade.com/report/editorial-article/the-dystopian-future-of-casual-games-personalized-targeted-pricing-and-mech
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a369fd6..5ba5f77
@@ -1,6 +1,6 @@
-prune: no\r
-tidy: no\r
-body: //div[@class='article-content']\r
-dissolve: //nobr/a\r
+prune: no
+tidy: no
+body: //div[@class='article-content']
+dissolve: //nobr/a
 dissolve: //nobr
 test_url: http://www.philadelphiaeagles.com/news/article-1/Jacksons-Light-Shined-On-Sunday-Night/51a862de-42b4-40f1-a5a8-ba0fb8a435b7
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 41318f6..accbd60
@@ -1,10 +1,10 @@
-title: //h1[@class='entry-title']\r
-author: //p[@class='byline']/span\r
-body: //@id='body-content'\r
-date: //div[@class='article_timestamp']/span\r
-\r
-strip: //@class=b-group\r
-strip: //*[contains(@style, 'none')]\r
-strip: //a[contains(@href, 'comments')]\r
+title: //h1[@class='entry-title']
+author: //p[@class='byline']/span
+body: //@id='body-content'
+date: //div[@class='article_timestamp']/span
+
+strip: //@class=b-group
+strip: //*[contains(@style, 'none')]
+strip: //a[contains(@href, 'comments')]
 strip: //*[contains(@class, 'comment')]
 test_url: http://www.philly.com/philly/sports/eagles/20120127_Ohio_State_s_Posey_didn_t_waste_time_lost_to_suspension.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 4e2ccb0..7f7e383
@@ -1,6 +1,6 @@
-author: substring-before(//div[@class='post_meta'],' on')\r
-date: substring-after(substring-before(//div[@class='post_meta'],'with'),' on')\r
-title: //h1[class='post_title']\r
-body: //div[@class='article']\r
+author: substring-before(//div[@class='post_meta'],' on')
+date: substring-after(substring-before(//div[@class='post_meta'],'with'),' on')
+title: //h1[class='post_title']
+body: //div[@class='article']
 
 test_url: http://photo.tutsplus.com/articles/news/a-brilliant-beginners-guide-to-architectural-photography/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7c57a84..cc643f0
@@ -1,6 +1,6 @@
-body: //div[@id='content']\r
-strip_id_or_class: manualnavbar\r
-\r
-prune: no\r
+body: //div[@id='content']
+strip_id_or_class: manualnavbar
+
+prune: no
 
 test_url: http://www.php.net/manual/en/migration5.incompatible.php
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a816399..624055b
@@ -1,7 +1,7 @@
-title: //div[@class='abstitle']//h1\r
-author: //div[@class='authorList']\r
-body: //div[@id='fulltext_body']\r
-\r
-prune: no\r
+title: //div[@class='abstitle']//h1
+author: //div[@class='authorList']
+body: //div[@id='fulltext_body']
+
+prune: no
 
 test_url: http://www.physicstoday.org/resource/1/phtoad/v64/i10/p48_s1?bypassSSO=1
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/pinterest.com.txt b/inc/3rdparty/site_config/standard/pinterest.com.txt
new file mode 100755 (executable)
index 0000000..01b6df4
--- /dev/null
@@ -0,0 +1,5 @@
+title: //title
+body: //div[contains(@class, 'imageContainer')]
+
+test_url: http://pinterest.com/pin/380906080954441188/
+test_url: http://pinterest.com/michaelsorm/architecture/rss
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3decc53..eee96a9
@@ -1,16 +1,16 @@
-title:concat(//h1,' - ',//h2,' - ',//h3)\r
-author://address\r
-date://span[@class='pub-date']\r
-body://div[@id='main']\r
-single_page_link://link[@rel='canonical']\r
-strip://div[@class='info']\r
-strip_id_or_class:'object-grid related-content'\r
-strip_id_or_class:'object-prevnext'\r
-strip_id_or_class:'object-header'\r
-strip_id_or_class:'source'\r
-strip_id_or_class:'label'\r
-strip_id_or_class:'title'\r
-dissolve://ul\r
-strip://li[@class='next']\r
+title:concat(//h1,' - ',//h2,' - ',//h3)
+author://address
+date://span[@class='pub-date']
+body://div[@id='main']
+single_page_link://link[@rel='canonical']
+strip://div[@class='info']
+strip_id_or_class:'object-grid related-content'
+strip_id_or_class:'object-prevnext'
+strip_id_or_class:'object-header'
+strip_id_or_class:'source'
+strip_id_or_class:'label'
+strip_id_or_class:'title'
+dissolve://ul
+strip://li[@class='next']
 strip://li[@class='prev']
 test_url: http://pitchfork.com/features/why-we-fight/8796-on-the-far-slope-of-the-uncanny-valley/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 9277707..c302526
@@ -1,8 +1,8 @@
-title: //h2[@class='post-title']\r
-author: substring-before(substring-after(//h3[@class='post-byline'],'By:'),'/')\r
-date: substring-before(substring-after(//p[@class='post-details'],'Posted on '),'in')\r
-strip: //h2[@class='post-title']\r
-strip: //p[@class='post-details']\r
-strip: //h3[@class='post-byline']\r
+title: //h2[@class='post-title']
+author: substring-before(substring-after(//h3[@class='post-byline'],'By:'),'/')
+date: substring-before(substring-after(//p[@class='post-details'],'Posted on '),'in')
+strip: //h2[@class='post-title']
+strip: //p[@class='post-details']
+strip: //h3[@class='post-byline']
 body: //div[@id='content']
 test_url: http://pittnews.com/newsstory/mens-basketball-pitt-recruit-robinson-to-bring-leadership/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 824cb06..f294852
@@ -1,15 +1,15 @@
-title: substring-before(//title,'pirates.com')\r
-date: //span[@class='timeStamp']\r
-author: substring-before(substring-after(//div[@class='byLine'],'By'),'/')\r
-body: //div[@id='article']\r
-#strip: //div[@class='inner']\r
-strip: //div[@id='article_head']\r
-strip: //p[@class='tagLine']\r
-strip: //div[@id='article_related_links']\r
-strip: //div[@id='article_related_mlb']\r
-strip: //div[@id='article_related_club']\r
-strip: //span[@class='more']\r
-strip: //div[@class='article_component']\r
-strip: //span[@class='screen_reader']\r
+title: substring-before(//title,'pirates.com')
+date: //span[@class='timeStamp']
+author: substring-before(substring-after(//div[@class='byLine'],'By'),'/')
+body: //div[@id='article']
+#strip: //div[@class='inner']
+strip: //div[@id='article_head']
+strip: //p[@class='tagLine']
+strip: //div[@id='article_related_links']
+strip: //div[@id='article_related_mlb']
+strip: //div[@id='article_related_club']
+strip: //span[@class='more']
+strip: //div[@class='article_component']
+strip: //span[@class='screen_reader']
 strip: //ul[@class='columnists_blurb']
 test_url: http://pittsburgh.pirates.mlb.com/news/article.jsp?ymd=20120330&content_id=27759040&vkey=news_pit&c_id=pit
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index b3e6616..cc7891f
@@ -1,7 +1,7 @@
-title: substring-before(//title,'- Pittsburgh Tribune')\r
-author: substring-before(substring-after(//div[@class='byline'],'By '),',')\r
-date: substring-after(substring-after(//div[@class='byline'],','),',')\r
-body: //div[@id='storyBody']\r
-strip: //div[@class='morestories']\r
+title: substring-before(//title,'- Pittsburgh Tribune')
+author: substring-before(substring-after(//div[@class='byline'],'By '),',')
+date: substring-after(substring-after(//div[@class='byline'],','),',')
+body: //div[@id='storyBody']
+strip: //div[@class='morestories']
 dissolve: //p[@class='subheader']
 test_url: http://www.pittsburghlive.com/x/pittsburghtrib/sports/columnists/s_785654.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index dd715d8..4d02f6b
@@ -1,8 +1,8 @@
-title: //title\r
-author: substring-after(//div[@class='by-line'],'BY')\r
-\r
-body: //div[@id='article-body']\r
-\r
-strip: //div[@class='by-line']\r
+title: //title
+author: substring-after(//div[@class='by-line'],'BY')
+
+body: //div[@id='article-body']
+
+strip: //div[@class='by-line']
 strip: //div[@id='article-body']/h1
 test_url: http://www.pittsburghmagazine.com/Pittsburgh-Magazine/May-2012/Verde-Lights-the-Night/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6113b96..c372284
@@ -1,4 +1,4 @@
-title: //span[@class='StoryHeadline']\r
-strip: //div[@class='fivevert']\r
+title: //span[@class='StoryHeadline']
+strip: //div[@class='fivevert']
 body: //div[@id='Content']
 test_url: http://www.pittsburghpanthers.com/sports/m-baskbl/recaps/031412aaa.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3936310..571874a
@@ -1,8 +1,8 @@
-title: //h1[@class='articletitle']\r
-author: substring-after(//span[@class='author'],'by')\r
-date: //span[@class='created']\r
-body: //div[@class='article']\r
-strip: //div[@class='headline']\r
-strip: //p[@class='articleinfo']\r
+title: //h1[@class='articletitle']
+author: substring-after(//span[@class='author'],'by')
+date: //span[@class='created']
+body: //div[@class='article']
+strip: //div[@class='headline']
+strip: //p[@class='articleinfo']
 #dissolve: //p[@class='subheader']
 test_url: http://www.pittscriptblog.com/2012-articles/march/2012-football-opponents-set-and-the-attendance-dilemma.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/planetvita.de.txt b/inc/3rdparty/site_config/standard/planetvita.de.txt
new file mode 100755 (executable)
index 0000000..bfc3342
--- /dev/null
@@ -0,0 +1,5 @@
+title: //div[@id='frnRahmen']/div/div[@id='content']/div[2]/h2
+author: //div[@id='content']/div[1]/div/a
+body: //div[@id='content']/div[2]/span
+strip: //div[@id='commenthead']
+test_url: http://www.planetvita.de/news/10389-psn-store-update-vom-03-april-neue-inhalte-fuer-psvita.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 07b347a..9283494
@@ -1,6 +1,6 @@
-author: //article//*[@class="author"]\r
-date: //article//*[@class="publication-date"]\r
-body: //article\r
-strip: //article/header\r
+author: //article//*[@class="author"]
+date: //article//*[@class="publication-date"]
+body: //article
+strip: //article/header
 strip: //article/section
 test_url: http://www.playboy.com/playground/view/playboy-interview-jon-hamm
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 50a5dbf..4a7ea12
@@ -1,17 +1,17 @@
-body: //div[@id='contentPane']//div[@class='vg']\r
-body: //div[@id='contentPane']\r
-\r
-# Grab the author by finding the first profile pic, then backing up a node and getting the title of <a> tag which will be the author hopefully. Sorry can't test this due to parser errors, thanks google :(\r
-\r
-author: //div[@id='contentPane']//img[contains(@alt, 'profile photo')][1]/../@title\r
-\r
-\r
-strip: //*[@title="People who +1'd this"]/../..\r
-strip: //*[contains(@class, 'a-b-f-i-Hg-Uf')]\r
-strip: //*[@role='menu']\r
-strip: //img[contains(@alt, 'profile photo')]\r
-strip: //*[@class='a-f-i-Ad']\r
-\r
-tidy: no\r
-\r
+body: //div[@id='contentPane']//div[@class='vg']
+body: //div[@id='contentPane']
+
+# Grab the author by finding the first profile pic, then backing up a node and getting the title of <a> tag which will be the author hopefully. Sorry can't test this due to parser errors, thanks google :(
+
+author: //div[@id='contentPane']//img[contains(@alt, 'profile photo')][1]/../@title
+
+
+strip: //*[@title="People who +1'd this"]/../..
+strip: //*[contains(@class, 'a-b-f-i-Hg-Uf')]
+strip: //*[@role='menu']
+strip: //img[contains(@alt, 'profile photo')]
+strip: //*[@class='a-f-i-Ad']
+
+tidy: no
+
 test_url: http://plus.google.com/u/0/117840649766034848455/posts/FddaP6jeCqp
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index bb9be0a..ec151b4
@@ -1,4 +1,4 @@
-title: //h2[@class='jcw-pagetitle'\r
-date: //p[@class='postinfo']\r
+title: //h2[@class='jcw-pagetitle'
+date: //p[@class='postinfo']
 body: //div[@class='contenttext']
 test_url: http://plzkthxbai.com/blog/2011/06/28/1password-and-internet-security/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 880311d..65ddba5
@@ -1,4 +1,4 @@
-body: //div[@id="content"]/div[1]\r
-\r
+body: //div[@id="content"]/div[1]
+
 title: //h1[@class="entry-title"]
 test_url: http://pogue.blogs.nytimes.com/2011/05/12/the-future-of-skype/
\ No newline at end of file
index c5302d1bac527785345b3ec4e1f58fccff63d32c..d8f5e5758d973433b6fc992f4c35d28488400ce4 100755 (executable)
@@ -1,17 +1,13 @@
-title://div[contains(@class, "article")]/h1\r
-body://div[contains(@class,"story-text")]\r
-\r
-# Why doesn't this work? next_page_link://ul[contains(@class,"pagination")]/li/a[@rel="next"]\r
-\r
-next_page_link://ul[contains(@class,"pagination")]/li[contains(@class, "current")]/following-sibling::node()/a\r
-next_page_link://div[contains(@class,"pagination")]/ol/li[contains(@class, "current")]/following-sibling::node()/a\r
-date://meta[@name="publish_date"]/@content\r
-\r
-strip://div[contains(@class, "breadcrumbs")]\r
-strip://a[contains(@class, "hidden")]\r
-strip://div[contains(@class, "story-embed")]\r
+title://div[contains(@class, "article")]/h1
+body://div[contains(@class,"story-text")]
+
+# Why doesn't this work? next_page_link://ul[contains(@class,"pagination")]/li/a[@rel="next"]
+
+next_page_link://ul[contains(@class,"pagination")]/li[contains(@class, "current")]/following-sibling::node()/a
+date://meta[@name="publish_date"]/@content
+
+strip://div[contains(@class, "breadcrumbs")]
+strip://a[contains(@class, "hidden")]
+strip://div[contains(@class, "story-embed")]
 strip://div[contains(@class, "story-text")]//p/a[contains(text(), "Also on POLITICO:")]/..
-strip://div[contains(@class, "story-interrupt")]\r
-strip://footer[contains(@class, "author-bio")]\r
-\r
 test_url: http://www.politico.com/news/stories/0712/78105.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index fd247b5..65a8fc5
@@ -1,4 +1,4 @@
-body: //div[@id="content"]\r
-\r
+body: //div[@id="content"]
+
 strip: //div[@class="pfcontentmid"]/div[position()>4]|//div[@class="pfad"]
 test_url: http://www.politifact.com/truth-o-meter/statements/2011/may/30/barbara-boxer/barbara-boxer-says-medicare-overhead-far-lower-pri/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8deecbc..b13f8f8
@@ -1,13 +1,13 @@
-# 21/10-2011:\r
-# Added Author+Date\r
-# Remove fakta-boks if found\r
-# Deleted 'Læs også...' filter \r
-#  - Change in markup caused it to strip too much.\r
-\r
-author://span[@class='autor-name']\r
-date:substring-after(//div[@class='art-created'], ' ')\r
-title: //h1[contains(@class, 'stor-type')]\r
-body: //div[@id='art-body']\r
-strip: //div[@class='art-fakta article-box']\r
+# 21/10-2011:
+# Added Author+Date
+# Remove fakta-boks if found
+# Deleted 'Læs også...' filter 
+#  - Change in markup caused it to strip too much.
+
+author://span[@class='autor-name']
+date:substring-after(//div[@class='art-created'], ' ')
+title: //h1[contains(@class, 'stor-type')]
+body: //div[@id='art-body']
+strip: //div[@class='art-fakta article-box']
 
 test_url: http://politiken.dk/kultur/boger/skonlitteratur_boger/ECE1426386/makabre-tegneserie-zombier-aeder-alt-levende/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/polygon.com.txt b/inc/3rdparty/site_config/standard/polygon.com.txt
new file mode 100755 (executable)
index 0000000..8fe9b1b
--- /dev/null
@@ -0,0 +1,34 @@
+body: //div[@id='article-content']
+body: //article[@id='entry-top']/div[@class='float_wrapper']
+author: //header/p[@class='byline']/em/a
+date: //header/p[@class='byline']/span[@class='timestamp']
+
+strip: //div[@id='article-content']//header
+strip: //label
+
+#photos on left column (delete all)
+strip: //div[@class='big_photo']
+
+#photos on left column (remove extras used for scroll effect)
+#strip: //div[@class='big_photo']/div[./img]
+#strip: //div[@class='big_photo']/img[position()>1]
+
+strip_id_or_class: vox-lazy-load
+strip_id_or_class: social_buttons
+strip_id_or_class: feature_toc
+
+prune: no
+
+find_string: <noscript>
+replace_string: <div>
+find_string: </noscript>
+replace_string: </div>
+
+#find_string: <script
+#replace_string: <div style="display:none"
+#find_string: </script>
+#replace_string: </div>
+
+strip: //div[@class='float_wrapper']/header
+test_url: http://www.polygon.com/2013/4/5/4189028/donkey-kong-country-returns-3d-new-content
+test_url: http://www.polygon.com/features/2013/8/22/4602568/30-years-xbox-360-playstation-3-wii
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 85b7656..2582e6f
@@ -1,8 +1,8 @@
-next_page_link: //div[@id='longPagination']/a[@class='next']\r
-\r
-title: //div[@id='contentHeader']//h1\r
-\r
-body: //div[@id='articleBody']\r
-# this is so sad\r
+next_page_link: //div[@id='longPagination']/a[@class='next']
+
+title: //div[@id='contentHeader']//h1
+
+body: //div[@id='articleBody']
+# this is so sad
 body: //div[@id='intelliTXT']
 test_url: http://www.popularmechanics.com/technology/aviation/crashes/what-really-happened-aboard-air-france-447-6611877
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/portertech.ca.txt b/inc/3rdparty/site_config/standard/portertech.ca.txt
new file mode 100755 (executable)
index 0000000..2897cb5
--- /dev/null
@@ -0,0 +1,3 @@
+author: //*[(@class = "author")]
+date: //*[(@class = "date")]
+test_url: http://portertech.ca/2012/12/10/iac-morning-market/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 21cd833..f8eeb0a
@@ -1,19 +1,19 @@
-title: //div[@id="newsDetailTitle"]\r
-author: //span[@id="showAuthor"]\r
-date: //span[@id="showRefDate"]\r
-\r
-strip: //div[@id="breadcrumbs"]\r
-strip: //span[@id="PageTitle"]\r
-strip: //div[@id="newsDetailAuthorPublish"]\r
-\r
-strip: //div[@class="leadPix"]\r
-\r
-strip: //span[@id="ctl00_PageTitle"]\r
-strip: //div[@id="newsDetailTitle"]\r
-convert_double_br_tags:yes\r
-\r
-strip: //div[@id="newsDetailCredential"]\r
-strip: //div[@id="sidebar2"]\r
-strip: //div[@id="footer"]\r
+title: //div[@id="newsDetailTitle"]
+author: //span[@id="showAuthor"]
+date: //span[@id="showRefDate"]
+
+strip: //div[@id="breadcrumbs"]
+strip: //span[@id="PageTitle"]
+strip: //div[@id="newsDetailAuthorPublish"]
+
+strip: //div[@class="leadPix"]
+
+strip: //span[@id="ctl00_PageTitle"]
+strip: //div[@id="newsDetailTitle"]
+convert_double_br_tags:yes
+
+strip: //div[@id="newsDetailCredential"]
+strip: //div[@id="sidebar2"]
+strip: //div[@id="footer"]
 
 test_url: http://www.positioningmag.com/magazine/details.aspx?id=41083
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 1ea945a..baa9d69
@@ -1,26 +1,26 @@
-title: //div[@class='story_headline']\r
-author: substring-before(substring-after(//div[@class='story_byline'],'By'),'/')\r
-date: //div[@class='story_lastupdate'] \r
-body: //div[@id='story']\r
-strip: //div[@class='story_byline']\r
-strip: //div[@class='story_lastupdate']\r
-strip: //div[@class='story_headline']\r
-strip: //div[@id='abuse']\r
-strip: //h2\r
-strip: //div[@class='pagenumbers_wrap']\r
-strip: //ul[@class='pagenumbers']\r
-strip: //div[starts-with(., 'To report inappropriate comments')]\r
-\r
-strip_id_or_class: story_share\r
-strip_id_or_class: OUTBRAIN\r
-strip_id_or_class: story_box_right\r
-strip: //div[a[@href='http://www.post-gazette.com/pg/12062/1213990-42.stm']]\r
-strip: //ul[@id='pikame']/li[position()>1]\r
-\r
-prune: no\r
-tidy: no\r
-\r
-single_page_link: //a[contains(@href, '?p=0')]\r
-\r
-test_url: http://www.post-gazette.com/stories/sports/penguins/pens-crosby-expects-to-return-thursday-226648/\r
+title: //div[@class='story_headline']
+author: substring-before(substring-after(//div[@class='story_byline'],'By'),'/')
+date: //div[@class='story_lastupdate'] 
+body: //div[@id='story']
+strip: //div[@class='story_byline']
+strip: //div[@class='story_lastupdate']
+strip: //div[@class='story_headline']
+strip: //div[@id='abuse']
+strip: //h2
+strip: //div[@class='pagenumbers_wrap']
+strip: //ul[@class='pagenumbers']
+strip: //div[starts-with(., 'To report inappropriate comments')]
+
+strip_id_or_class: story_share
+strip_id_or_class: OUTBRAIN
+strip_id_or_class: story_box_right
+strip: //div[a[@href='http://www.post-gazette.com/pg/12062/1213990-42.stm']]
+strip: //ul[@id='pikame']/li[position()>1]
+
+prune: no
+tidy: no
+
+single_page_link: //a[contains(@href, '?p=0')]
+
+test_url: http://www.post-gazette.com/stories/sports/penguins/pens-crosby-expects-to-return-thursday-226648/
 test_url: http://www.post-gazette.com/stories/sports/pirates/pirates-fork-over-changes-for-fans-at-pnc-park-629789
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 86cb5d0..0f01149
@@ -1,15 +1,15 @@
-title: //div[@id='divAdnetKeyword']/h1\r
-body: //div[@id='_middle_content_bottom']\r
-\r
-wrap_in(fieldset)://div[@id='_middle_content_bottom_child2']/img\r
-\r
-strip: //div[@id='_middle_content_bottom_child1']\r
-strip: //div[@id='_middle_content_bottom_child4']\r
-strip: //div[@class='cls']\r
-strip: //div[@class='iphoneBox']\r
-strip: //ul[@class='ilgiliHaber']\r
-strip: //div[@class='yorumlar']\r
-strip: //div[@class='kategoriler']\r
-strip: //div[@class='textSize']\r
+title: //div[@id='divAdnetKeyword']/h1
+body: //div[@id='_middle_content_bottom']
+
+wrap_in(fieldset)://div[@id='_middle_content_bottom_child2']/img
+
+strip: //div[@id='_middle_content_bottom_child1']
+strip: //div[@id='_middle_content_bottom_child4']
+strip: //div[@class='cls']
+strip: //div[@class='iphoneBox']
+strip: //ul[@class='ilgiliHaber']
+strip: //div[@class='yorumlar']
+strip: //div[@class='kategoriler']
+strip: //div[@class='textSize']
 strip: //span[@class='tarih']
 test_url: http://www.posta.com.tr/yasam/teknoloji/HaberDetay/Fedailer_Istanbul_da.htm?ArticleID=101044
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7f7a503..3952ea9
@@ -1,8 +1,8 @@
-title: //h1\r
-date: /html/head/meta[@name="date"]/@content\r
-body: //div[@id="featuredlinksbox"]\r
-strip: //div[@class="relatedbox"]\r
-strip: //h1\r
-strip: //br\r
+title: //h1
+date: /html/head/meta[@name="date"]/@content
+body: //div[@id="featuredlinksbox"]
+strip: //div[@class="relatedbox"]
+strip: //h1
+strip: //br
 strip_image_src: "/images"
 test_url: http://www.prb.org/Journalists/Webcasts/2011/military-families.aspx
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 906c27a..9a49557
@@ -1,9 +1,9 @@
-title: //h1\r
-body: //div[@id='left']\r
-strip: //h1\r
-convert_double_br_tags: yes\r
-strip_id_or_class: entry-footer\r
-strip: //h1[. = 'Previously']/following::*\r
-author: string('James Hague')\r
+title: //h1
+body: //div[@id='left']
+strip: //h1
+convert_double_br_tags: yes
+strip_id_or_class: entry-footer
+strip: //h1[. = 'Previously']/following::*
+author: string('James Hague')
 date: //div[@class = 'entry-footer']/text()
 test_url: http://prog21.dadgum.com/105.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index cef811d..82ebf6b
@@ -1,4 +1,4 @@
-body: //div[@class='body']\r
-title: //h2[@class='title']\r
+body: //div[@class='body']
+title: //h2[@class='title']
 date: //span[@class='posted-on']
 test_url: http://prolost.com/blog/2011/10/13/real-men-comp-with-film.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 11e63bd..d141ac9
@@ -1,11 +1,11 @@
-title: //h1[@class="article-title"]\r
-author: //meta[@name="author"]/@content\r
-body: //div[@class="article-full"]\r
-strip_id_or_class: sidebar_inject\r
-strip_id_or_class: callout\r
-strip_id_or_class: content-inset\r
-strip_id_or_class: byline-block\r
-strip_id_or_class: photo-caption\r
-strip_id_or_class: foot-tools\r
+title: //h1[@class="article-title"]
+author: //meta[@name="author"]/@content
+body: //div[@class="article-full"]
+strip_id_or_class: sidebar_inject
+strip_id_or_class: callout
+strip_id_or_class: content-inset
+strip_id_or_class: byline-block
+strip_id_or_class: photo-caption
+strip_id_or_class: foot-tools
 
 test_url: http://www.propublica.org/article/pardon-applicants-benefit-from-friends-in-high-places
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index dedd33d..ba9ce8b
@@ -1,4 +1,4 @@
-author: //p[@class='name']\r
-date: substring-before(//p[@class='date'], ' | ')\r
+author: //p[@class='name']
+date: substring-before(//p[@class='date'], ' | ')
 body: //div[@class='news_single_item']
 test_url: http://www.prosa.dk/aktuelt/nyhed/artikel/internetaktivisten-uden-maske/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 19059c4..739d1b9
@@ -1,26 +1,26 @@
-#basics\r
-author: (//div[contains(@class,'author')])[1]\r
-date: substring-before(//a[@class='issue'], '&mdash;')\r
-#body://div[@class = 'entry']\r
-# use this until move_into support is ready\r
-body: //div[@class = 'entry' or @class='standfirst' or @class='lead_image']\r
-\r
-#moves header image and tagline into body\r
-move_into(//div[@class='entry']/div)://div[@class = 'lead_image']\r
-move_into(//div[@class='entry']/div)://div[@class = 'standfirst']\r
-\r
-\r
-# moves author info to end of text\r
-move_into(//p[strong[string(.) = 'Follow Prospect on Twitter']])://div[@id='sidebar_content']/p/em\r
-\r
-prune: no\r
-\r
-# strips social links\r
-strip_id_or_class:login-status\r
-strip_id_or_class:shareinpost\r
-strip_id_or_class:content_subscribe\r
-strip_id_or_class:postinfo\r
-strip_id_or_class:postutils\r
-strip_id_or_class:comments\r
-strip://strong[string(.) = 'Follow Prospect on Twitter']\r
+#basics
+author: (//div[contains(@class,'author')])[1]
+date: substring-before(//a[@class='issue'], '&mdash;')
+#body://div[@class = 'entry']
+# use this until move_into support is ready
+body: //div[@class = 'entry' or @class='standfirst' or @class='lead_image']
+
+#moves header image and tagline into body
+move_into(//div[@class='entry']/div)://div[@class = 'lead_image']
+move_into(//div[@class='entry']/div)://div[@class = 'standfirst']
+
+
+# moves author info to end of text
+move_into(//p[strong[string(.) = 'Follow Prospect on Twitter']])://div[@id='sidebar_content']/p/em
+
+prune: no
+
+# strips social links
+strip_id_or_class:login-status
+strip_id_or_class:shareinpost
+strip_id_or_class:content_subscribe
+strip_id_or_class:postinfo
+strip_id_or_class:postutils
+strip_id_or_class:comments
+strip://strong[string(.) = 'Follow Prospect on Twitter']
 test_url: http://www.prospectmagazine.co.uk/2011/07/postmodernism-is-dead-va-exhibition-age-of-authenticism/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/protothema.gr.txt b/inc/3rdparty/site_config/standard/protothema.gr.txt
new file mode 100755 (executable)
index 0000000..fae261b
--- /dev/null
@@ -0,0 +1,6 @@
+body: //a[contains(@rel, 'mainphotos')] | //div[contains(@class, 'article-content')]
+
+prune: no
+
+test_url: http://www.protothema.gr//politics/article/326464/diamadopoulou-floridis-kaminis-kai-boutaris-se-ekdilosi-ton-europaion-fileleutheron/
+test_url: http://www.protothema.gr/rss/news/politics/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3da3cea..1bb63c2
@@ -1,9 +1,9 @@
-title: //div[@class="page-title"]/h1\r
-author: //a[@title="View Bio"]\r
-date: substring-before(substring-after(//span[@class="submitted"], 'Published on '), ' by')\r
-strip://div[@class="page-title"]/h1\r
-strip://div[@class="article-abstract"]\r
-strip://div[@class="article-meta"]\r
-strip://div[@id="rightColumn"]\r
+title: //div[@class="page-title"]/h1
+author: //a[@title="View Bio"]
+date: substring-before(substring-after(//span[@class="submitted"], 'Published on '), ' by')
+strip://div[@class="page-title"]/h1
+strip://div[@class="article-abstract"]
+strip://div[@class="article-meta"]
+strip://div[@id="rightColumn"]
 strip://div[@id="inline-content-bottom-left"]
 test_url: http://www.psychologytoday.com/blog/how-happiness/201205/my-quibble-facebook
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index fa09947..8f32d7a
@@ -1,4 +1,4 @@
-author: //meta[@name="Author"]\r
-date: //meta[@name="Date"]\r
+author: //meta[@name="Author"]
+date: //meta[@name="Date"]
 strip: //h5
 test_url: http://www.publications.parliament.uk/pa/ld201011/ldhansrd/text/111109-0003.htm
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/publico.pt.txt b/inc/3rdparty/site_config/standard/publico.pt.txt
new file mode 100755 (executable)
index 0000000..bb6a05e
--- /dev/null
@@ -0,0 +1,12 @@
+title: //h1[@class="entry-title"]
+author: //span[@class="author"]
+body: //article[@itemtype="http://schema.org/Article"]
+date: //time[@itemprop="dateCreated"]
+
+strip: //header[@class="entry-header single-header"]
+strip: //aside[@class="entry-assets"]
+strip: //div[@class="entry-options entry-options-above group"]
+strip: //div[@class="entry-options entry-options-below group"]
+
+convert_double_br_tags: yes
+test_url: http://www.publico.pt/politica/noticia/passos-diz-que-se-limitacao-de-mandatos-fosse-para-todos-os-concelhos-estaria-claro-na-lei-1577691
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 126f9e2..0f1392a
@@ -1,4 +1,4 @@
-title: //div[@class='title']\r
-body: //div[@class='body']\r
+title: //div[@class='title']
+body: //div[@class='body']
 next_page_link: //div[@class='source']/text()[contains(., 'page')]/following-sibling::a
 test_url: http://purpleplanetmedia.com/eye/inte/ngaiman.php
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/qctimes.com.txt b/inc/3rdparty/site_config/standard/qctimes.com.txt
new file mode 100755 (executable)
index 0000000..3c3edfe
--- /dev/null
@@ -0,0 +1,5 @@
+# this site seems to work OK in the web view, but only occasionally in the instapaper app itself.
+
+body: //div[@class='entry-content']
+author: //span[@class='byline']
+test_url: http://qctimes.com/news/local/woman-faces-perjury-charges-in-meth-case/article_83f4c470-956a-11e2-a921-001a4bcf887a.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a366c1b..c17fb31
@@ -1,14 +1,14 @@
-title: //div[contains(@class, "hentry")]/h3\r
-\r
-author: //div[contains(@class, "hentry")]/h2[contains(@class, "author_bio")]\r
-\r
-date: substring-before(substring-after(normalize-space(//p[contains(@class, "postmetadata")]/small), "was posted on "), " and is filed under")\r
-\r
-body: //div[contains(@class, "entry")]\r
-\r
-strip_id_or_class: addtoany_share_save_container\r
-strip_id_or_class: postmetadata\r
-strip_id_or_class: author_bio\r
-strip_id_or_class: author_bio_2\r
+title: //div[contains(@class, "hentry")]/h3
+
+author: //div[contains(@class, "hentry")]/h2[contains(@class, "author_bio")]
+
+date: substring-before(substring-after(normalize-space(//p[contains(@class, "postmetadata")]/small), "was posted on "), " and is filed under")
+
+body: //div[contains(@class, "entry")]
+
+strip_id_or_class: addtoany_share_save_container
+strip_id_or_class: postmetadata
+strip_id_or_class: author_bio
+strip_id_or_class: author_bio_2
 strip: //div[contains(@class, "hentry")]/h3
 test_url: http://www.quantumdiaries.org/2011/10/25/piling-up/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 655f8b8..fc7ab37
@@ -1,3 +1,3 @@
-body: //div[@class='copy']\r
+body: //div[@class='copy']
 title: //h1[@class='hed']
 test_url: http://www.queerty.com/rawhide-radicals-meet-five-heroes-from-the-leather-community-20120302/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index fae4e6a..fb09a8f
@@ -1,6 +1,6 @@
-title: //h1\r
-\r
-body: //div[@class="cuerpoArticulo"]\r
-\r
+title: //h1
+
+body: //div[@class="cuerpoArticulo"]
+
 
 test_url: http://www.quepasa.cl/magazine/articulo/print.html?id=5299
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3d34f2f..732d12d
@@ -1,17 +1,17 @@
-tidy: no\r
-prune: no\r
-body: //div[contains(@class, 'main_col')]\r
-title: //h1\r
-\r
-strip_id_or_class: hidden\r
-strip_id_or_class: item_action_bar\r
-strip_id_or_class: answer_voters\r
-strip_id_or_class: question_topics\r
-strip_id_or_class: answer_header_text\r
-strip_id_or_class: editor_link\r
-strip_id_or_class: view_tag\r
-strip_id_or_class: include_details\r
-strip_id_or_class: sig_edit\r
-strip_id_or_class: profile_photo_img\r
+tidy: no
+prune: no
+body: //div[contains(@class, 'main_col')]
+title: //h1
+
+strip_id_or_class: hidden
+strip_id_or_class: item_action_bar
+strip_id_or_class: answer_voters
+strip_id_or_class: question_topics
+strip_id_or_class: answer_header_text
+strip_id_or_class: editor_link
+strip_id_or_class: view_tag
+strip_id_or_class: include_details
+strip_id_or_class: sig_edit
+strip_id_or_class: profile_photo_img
 
 test_url: http://www.quora.com/What-everyday-habit-do-you-wish-you-had-developed-earlier-in-life
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/racjonalista.pl.txt b/inc/3rdparty/site_config/standard/racjonalista.pl.txt
new file mode 100755 (executable)
index 0000000..19c719d
--- /dev/null
@@ -0,0 +1,5 @@
+author: /html/body/center/b
+date: /html/body/table/tr[2]/td/i
+single_page_link: //*[@id='oTxt']/table[3]/tr[2]/td/a[1]
+
+test_url: http://www.racjonalista.pl/kk.php/s,7214/q,Geneza.szubrawstwa
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 99ab4bb..fa66b81
@@ -1,3 +1,3 @@
-date://span[@class='date']\r
+date://span[@class='date']
 body://div[@class='entry-body']
 test_url: http://radar.oreilly.com/2012/01/genome-cloud-digital-humanities-hadoop-world-strata.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e2617dc..2496dda
@@ -1,3 +1,3 @@
-body: //div[@class='body']\r
+body: //div[@class='body']
 title: //div[@class='newsstory']/h2
 test_url: http://www.radionz.co.nz/news/stories/2010/07/18/12481029a86d
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f0c91c5..6970a74
@@ -1,11 +1,11 @@
-title: //div[@id='center-col']/h4\r
-author: substring-before(//title,'In')\r
-date: substring-after(//div[@class='commenttext']/span,'#')\r
-body: //div[@id='center-col']\r
-strip: //div[@id='center-col']/h4\r
-strip: //div[@class='graytext']\r
-\r
-# Anthony Perez-Sanz 2012.3.14\r
-# Removed long gif from the end\r
-strip: //img[@src='http://www.randsinrepose.com/spreader.gif']\r
+title: //div[@id='center-col']/h4
+author: substring-before(//title,'In')
+date: substring-after(//div[@class='commenttext']/span,'#')
+body: //div[@id='center-col']
+strip: //div[@id='center-col']/h4
+strip: //div[@class='graytext']
+
+# Anthony Perez-Sanz 2012.3.14
+# Removed long gif from the end
+strip: //img[@src='http://www.randsinrepose.com/spreader.gif']
 test_url: http://www.randsinrepose.com/archives/2012/03/13/hacking_is_important.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8033729..2d5aba7
@@ -1,3 +1,3 @@
-single_page_link: //link[@rel='canonical']/@href\r
+single_page_link: //link[@rel='canonical']/@href
 
 test_url: http://www.readability.com/read?url=http://feeds.gawker.com/~r/lifehacker/full/~3/jaxAjSay_Rw/add-a-rain-gutter-to-a-picnic-table-for-a-built+in-drink-cooler
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ff799aa..e2aabda
@@ -1,8 +1,8 @@
-title: //h1[@class="titlelink"]\r
-date: //span[@class="timestamp"]/@data-published\r
-body: //div[@class="asset-content"]\r
-strip_id_or_class: related-entries\r
-strip_id_or_class: like-and-retweet\r
-\r
-author: //div[@id="submeta"]/a[1]\r
+title: //h1[@class="titlelink"]
+date: //span[@class="timestamp"]/@data-published
+body: //div[@class="asset-content"]
+strip_id_or_class: related-entries
+strip_id_or_class: like-and-retweet
+
+author: //div[@id="submeta"]/a[1]
 test_url: http://www.readwriteweb.com/archives/why_facebook_terrifies_google.php
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index fe5ab67..1a33610
@@ -1,3 +1,3 @@
-body: //div[@id='_ctl12__ctl0_Article']\r
-prune: no\r
+body: //div[@id='_ctl12__ctl0_Article']
+prune: no
 autodetect_on_failure: no
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8c8f0e0..a01aaef
@@ -1,10 +1,10 @@
-body: //div[@class='recipedetailsleft' or @id='recipePrepAndServe' or @id='recipeingredients']\r
-\r
-strip_id_or_class: location\r
-strip_id_or_class: savings\r
-strip_id_or_class: recipeDetailDescButton\r
-\r
-prune: no\r
-tidy: no\r
-\r
+body: //div[@class='recipedetailsleft' or @id='recipePrepAndServe' or @id='recipeingredients']
+
+strip_id_or_class: location
+strip_id_or_class: savings
+strip_id_or_class: recipeDetailDescButton
+
+prune: no
+tidy: no
+
 test_url: http://www.recipe.com/avocado-basil-pasta/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3ae959b..0403ee8
@@ -1,5 +1,5 @@
-body: //div[@class='short-text' or starts-with(@id, 'news-id-')]\r
-prune: no\r
-tidy: no\r
-\r
+body: //div[@class='short-text' or starts-with(@id, 'news-id-')]
+prune: no
+tidy: no
+
 test_url: http://red-hot-girls.com/2011/06/10/the_red_hot_natalia_maria_53_pics.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 58ca9ec..8871f56
@@ -1,16 +1,20 @@
-# This setup grabs the text from a Reddit self post. It ignores all comments etc.\r
-\r
-title: //p[@class="title"]/a/text()\r
-\r
-author: //p[@class="tagline"]/a\r
-\r
-# this doesn't work for some reason...?\r
-date: //p[@class="tagline"]//@datetime\r
-\r
-body: //div[@class="expando"]//div[@class="usertext-body"]\r
-\r
-strip_id_or_class: tagline\r
-strip_id_or_class: unvotable-message\r
-strip_id_or_class: buttons\r
-\r
-test_url: http://www.reddit.com/r/truegaming/comments/wfe7r/i_wrote_about_the_problems_i_honestly_feel_that/
\ No newline at end of file
+# This setup grabs the text from a Reddit self post. It ignores all comments etc.
+
+title: //p[@class="title"]/a/text()
+
+author: //p[@class="tagline"]/a
+
+# this doesn't work for some reason...?
+date: //p[@class="tagline"]//@datetime
+
+body: //div[@class="expando"]//div[@class="usertext-body"]
+
+strip_id_or_class: tagline
+strip_id_or_class: unvotable-message
+strip_id_or_class: buttons
+
+# follow the posted link (unless it's a self post - relative URL, no http://)
+single_page_link: //p[@class="title"]/a[contains(@href, 'http://')]
+
+test_url: http://www.reddit.com/r/truegaming/comments/wfe7r/i_wrote_about_the_problems_i_honestly_feel_that/
+test_url: http://www.reddit.com/r/worldnews/comments/1as37r/twelve_north_korean_soldiers_attempting_to_defect/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 12a9618..66cc170
@@ -1,13 +1,13 @@
-title: //div[@class='posthead']//h2\r
-body: //div[contains(@class, 'postcontent') or @class='posthead']\r
-author: //div[@class='posthead']//a[@rel='author']\r
-\r
-strip: //div[@class='posthead']//h2\r
-replace_string(>Advertisements</div>): ></div>\r
-replace_string(<p>You can follow us on): <p style="display:none;">\r
-strip_id_or_class: likeThisPost\r
-\r
-prune: no\r
-tidy: no\r
-\r
+title: //div[@class='posthead']//h2
+body: //div[contains(@class, 'postcontent') or @class='posthead']
+author: //div[@class='posthead']//a[@rel='author']
+
+strip: //div[@class='posthead']//h2
+replace_string(>Advertisements</div>): ></div>
+replace_string(<p>You can follow us on): <p style="display:none;">
+strip_id_or_class: likeThisPost
+
+prune: no
+tidy: no
+
 test_url: http://www.redmondpie.com/how-to-play-music-directly-from-home-screen-folders-on-iphone/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 4f195a0..8541a0d
@@ -1,20 +1,20 @@
-# Think there might be something up with your parser that it strips out 'print' from the title :)\r
-\r
-title: //meta[@name='title']/@content\r
-author: //meta[@name='author']/@content\r
-date: //meta[@name='date']/@content\r
-\r
-body: //div[@class='articleText']\r
-\r
-strip: //div[contains(@class, 'day')]\r
-strip: //div[contains(@class, 'month')]\r
-strip: //div[contains(@class, 'year')]\r
-strip: //div[contains(@class, 'time')]\r
-strip: //h1[@class='gl_headline']\r
-strip: //div[@class='byline']\r
-strip: //div[@id='left_ear']\r
-strip: //div[@id='right_ear']\r
-strip: //div[contains(@class, 'PopularPosts')]\r
-strip ://div[@class='discuss_page_break']\r
+# Think there might be something up with your parser that it strips out 'print' from the title :)
+
+title: //meta[@name='title']/@content
+author: //meta[@name='author']/@content
+date: //meta[@name='date']/@content
+
+body: //div[@class='articleText']
+
+strip: //div[contains(@class, 'day')]
+strip: //div[contains(@class, 'month')]
+strip: //div[contains(@class, 'year')]
+strip: //div[contains(@class, 'time')]
+strip: //h1[@class='gl_headline']
+strip: //div[@class='byline']
+strip: //div[@id='left_ear']
+strip: //div[@id='right_ear']
+strip: //div[contains(@class, 'PopularPosts')]
+strip ://div[@class='discuss_page_break']
 strip ://div[contains(@class, 'p-content_TagList')]
 test_url: http://redtape.msnbc.msn.com/_news/2011/09/28/8020661-sprint-raises-fee-but-wont-free-users-from-two-year-contracts?preview=true
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 4a9fab6..98a2bbf
@@ -1,5 +1,5 @@
-body://div[@class='storycontent']\r
-date://div[@class='date']\r
-strip://li[@class='sharing_label']\r
+body://div[@class='storycontent']
+date://div[@class='date']
+strip://li[@class='sharing_label']
 strip://a[@class='FlattrButton']
 test_url: http://reflets.info/orange-nokia-siemens-deep-packet-inspection/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0b3dee1..a5361fd
@@ -1,3 +1,3 @@
-title: //*[@class='entry-title']\r
+title: //*[@class='entry-title']
 body: //div[@class='entry-content']
 test_url: http://www.renenekuda.cz/recept-na-produktivitu/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/resume.se.txt b/inc/3rdparty/site_config/standard/resume.se.txt
new file mode 100755 (executable)
index 0000000..17122a9
--- /dev/null
@@ -0,0 +1,9 @@
+date: //meta[@name='bi3dPubDate']/@content
+body: //div[contains(@class, 'articleBody')]
+
+prune: no
+
+test_url: http://www.resume.se/nyheter/media/2013/09/18/kvallspress-och-tv-slass-om-playtittarna-men-youtube-ohotat-storst/
+test_url: http://www.resume.se/nyheter/media/2013/09/18/cecilia-blankens-lamnar-mama-for-konkurrent/
+test_url: http://www.resume.se/nyheter/reklam/2013/09/18/ravelli-trodde-jag-var-med-i-blasningen/
+test_url: http://www.resume.se/rss-nyheter
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 1264ee3..a0a2394
@@ -1,6 +1,6 @@
-single_page_link://a[contains(@href, 'print')]\r
-\r
-# Grab metadata from the "printer-friendly" page, after specifying single_page_link\r
-title://h2\r
+single_page_link://a[contains(@href, 'print')]
+
+# Grab metadata from the "printer-friendly" page, after specifying single_page_link
+title://h2
 date://cite
 test_url: http://www.retrieverweekly.com/?cmd=displaystory&story_id=7548&format=html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c5c94a4..7411e62
@@ -1,10 +1,10 @@
-title: //h1[@class='headline3']\r
-author: substring-after(//p[@class="byline"], 'By ')\r
-date: //meta[@name="REVISION_DATE"]/@content\r
-body: //div[@id='articleImage' or @id='frame_fd1fade'] | //span[@id='articleText'] | //div[@class='pageNavigation']\r
-strip: //li[@class='next']\r
-strip: //span[@class='articleLocation']\r
-prune: no\r
-tidy: no\r
-\r
+title: //h1[@class='headline3']
+author: substring-after(//p[@class="byline"], 'By ')
+date: //meta[@name="REVISION_DATE"]/@content
+body: //div[@id='articleImage' or @id='frame_fd1fade'] | //span[@id='articleText'] | //div[@class='pageNavigation']
+strip: //li[@class='next']
+strip: //span[@class='articleLocation']
+prune: no
+tidy: no
+
 test_url: http://www.reuters.com/article/2011/04/08/us-ivorycoast-killings-idUSTRE73732A20110408
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index dbe4293..30e627d
@@ -1,10 +1,10 @@
-title: //div[@class="article_header"]/h3\r
-author: //div[@class="autor"]/p/*\r
-date: substring-after(substring-after(//div[@class="flt-left"],"> "), "> ")\r
-\r
-move_into(//div[@class="new_article"]): //div[@class="img_article"]/img\r
-\r
-body: //div[@class="article_content"]\r
-convert_double_br_tags: yes\r
+title: //div[@class="article_header"]/h3
+author: //div[@class="autor"]/p/*
+date: substring-after(substring-after(//div[@class="flt-left"],"> "), "> ")
+
+move_into(//div[@class="new_article"]): //div[@class="img_article"]/img
+
+body: //div[@class="article_content"]
+convert_double_br_tags: yes
 
 test_url: http://revistapiaui.estadao.com.br/edicao-68/questoes-latino-americanas/filhos-da-guerra-suja
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/rezeptwelt.de.txt b/inc/3rdparty/site_config/standard/rezeptwelt.de.txt
new file mode 100644 (file)
index 0000000..2093573
--- /dev/null
@@ -0,0 +1,5 @@
+body: //div[@class='step-content'] | //div[@class='global-active ingredients-box']
+title: //div[@class='step-1-container']
+
+tidy: no
+test_url: http://www.rezeptwelt.de/backen-herzhaft-rezepte/w%C3%BCrstchen-schlangen/530372
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 904a11d..b0ee92d
@@ -1,5 +1,5 @@
-body: //div[@id="post"]\r
-strip: //div[@id="author-description"]\r
-date: //span[@class="entry-date"]\r
+body: //div[@id="post"]
+strip: //div[@id="author-description"]
+date: //span[@class="entry-date"]
 author: //span[@class="author vcard"]
 test_url: http://richardmuscat.wordpress.com/2011/06/20/the-price-of-free/?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+TheBrooksReview+%28The+Brooks+Review%29
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 82cfaf2..ed72915
@@ -1,5 +1,5 @@
-body: //div[@class='post-body entry-content']\r
-strip: //div[@id='lws_0']\r
-prune: no\r
-\r
+body: //div[@class='post-body entry-content']
+strip: //div[@id='lws_0']
+prune: no
+
 test_url: http://ritemail.blogspot.com/2011/06/hayden-panettiere-candids-in-los.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/ritholtz.com.txt b/inc/3rdparty/site_config/standard/ritholtz.com.txt
new file mode 100755 (executable)
index 0000000..d598479
--- /dev/null
@@ -0,0 +1,5 @@
+title: //div[@class='post']/h2
+author: substring-before(substring-after(//div[@class='alignright']/small, 'By '),'-')
+date: substring-after(//div[@class='alignright']/small, '-')
+strip: //div[@class='alignleft']
+test_url: http://www.ritholtz.com/blog/2012/09/situational-awareness/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/robertsspaceindustries.com.txt b/inc/3rdparty/site_config/standard/robertsspaceindustries.com.txt
new file mode 100755 (executable)
index 0000000..b0b90fb
--- /dev/null
@@ -0,0 +1,4 @@
+strip_id_or_class: 'sharedaddy'
+strip_id_or_class: 'respond'
+strip_id_or_class: 'meta'
+test_url: http://www.robertsspaceindustries.com/news-update-ai-pilots/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/robots.thoughtbot.com.txt b/inc/3rdparty/site_config/standard/robots.thoughtbot.com.txt
new file mode 100755 (executable)
index 0000000..da5b7bd
--- /dev/null
@@ -0,0 +1,5 @@
+body: //section[@class='post text']
+title: //h1[@class='title']
+date: //p[@class='post-date']
+strip: //section[@class='meta-info']
+test_url: http://robots.thoughtbot.com/post/32455387133/four-phase-test
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3035527..f8c9541
@@ -1,8 +1,8 @@
-title: //h2\r
-\r
-strip: //div[ contains(@class, 'respond') ]  |  //h2  |  //h1\r
-\r
-date: substring-after(//p[@class='info'], ' on ')\r
-\r
+title: //h2
+
+strip: //div[ contains(@class, 'respond') ]  |  //h2  |  //h1
+
+date: substring-after(//p[@class='info'], ' on ')
+
 author: //p[@class='info']//a
 test_url: http://www.rockpapershotgun.com/2010/07/29/rps-half-verdict-starcraft-2/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index abe7035..eef8b11
@@ -1,7 +1,7 @@
-author: //article/header/span[@class='author']\r
-title://article/header/h1\r
-body: //article\r
-strip: //article/header\r
-strip: //article/p[@class='metadata']\r
+author: //article/header/span[@class='author']
+title://article/header/h1
+body: //article
+strip: //article/header
+strip: //article/p[@class='metadata']
 footnotes: yes
 test_url: http://rodrigo.sharpcube.com/2010/06/20/using-and-sharing-a-vpn-connection-on-your-mac/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 2679233..da21510
@@ -1,8 +1,8 @@
-title: substring-before(//title,':')\r
-author: substring-after(substring-before(//div[@class='text']/b,'/'),'BY')\r
-\r
-body: //div[@class='text']\r
-\r
-strip: //a[contains(@href,'printart')]\r
+title: substring-before(//title,':')
+author: substring-after(substring-before(//div[@class='text']/b,'/'),'BY')
+
+body: //div[@class='text']
+
+strip: //a[contains(@href,'printart')]
 strip_id_or_class: enlarge_photo
 test_url: http://rogerebert.com/apps/pbcs.dll/article?AID=/20120411/REVIEWS/120419998/1005/GLOSSARY
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d618c23..2365c42
@@ -1,6 +1,6 @@
-body: //div[contains(@class, 'inhoud')]\r
-date: //span[@class ='published']\r
-author: //span[@class ='author']\r
-strip: //div[@class = 'grid_2']\r
-strip: //div[@class = 'block-citation-text']\r
+body: //div[contains(@class, 'inhoud')]
+date: //span[@class ='published']
+author: //span[@class ='author']
+strip: //div[@class = 'grid_2']
+strip: //div[@class = 'block-citation-text']
 test_url: http://www.rolfinjapan.nl/2011/06/duizend-kraanvogels/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index b5b29fe..ef32769
@@ -1,11 +1,11 @@
-body: //div[@class='movie_content_area']\r
-strip_id_or_class: tomatometer_bar_help\r
-strip_id_or_class: critic-links\r
-strip_id_or_class: top-critics-numbers\r
-strip_id_or_class: fan_side\r
-strip_id_or_class: fblike\r
-strip_id_or_class: rating_widget\r
-strip_id_or_class: friend_reviews\r
-prune: no\r
+body: //div[@class='movie_content_area']
+strip_id_or_class: tomatometer_bar_help
+strip_id_or_class: critic-links
+strip_id_or_class: top-critics-numbers
+strip_id_or_class: fan_side
+strip_id_or_class: fblike
+strip_id_or_class: rating_widget
+strip_id_or_class: friend_reviews
+prune: no
 
 test_url: http://www.rottentomatoes.com/m/thor/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f2f0039..a012a67
@@ -1,5 +1,5 @@
-body: //div[@class='content']\r
-strip: //p[@class='postmeta']/following::*\r
-strip: //p[@class='postmeta']\r
+body: //div[@class='content']
+strip: //p[@class='postmeta']/following::*
+strip: //p[@class='postmeta']
 strip: //p[@align='left']
 test_url: http://www.roughtype.com/archives/2012/01/power_to_the_da.php
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e7f29bb..9ddbf0f
@@ -1,4 +1,4 @@
-body:  //div[@id='news-text']\r
-prune: no\r
-test_url: http://www.rpgsite.net/news/1964-tetsuya-nomura-says-hell-soon-show-the-future-of-final-fantasy\r
+body:  //div[@id='news-text']
+prune: no
+test_url: http://www.rpgsite.net/news/1964-tetsuya-nomura-says-hell-soon-show-the-future-of-final-fantasy
 test_url: http://www.rpgsite.net/news/1965-new-atelier-totori-plus-screens-and-artwork
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d9df768..d9d9a43
@@ -1,9 +1,9 @@
-author: //div[contains(@class, 'author_text')]/h4/text()\r
-date: //li[@class='date']\r
-\r
-# stripping excessive tags\r
-strip: //div[contains(@class, 'entry_meta')]\r
-strip: //div[contains(@class, 'single_meta')]\r
-strip: //br[contains(@class, 'clear')]\r
+author: //div[contains(@class, 'author_text')]/h4/text()
+date: //li[@class='date']
+
+# stripping excessive tags
+strip: //div[contains(@class, 'entry_meta')]
+strip: //div[contains(@class, 'single_meta')]
+strip: //br[contains(@class, 'clear')]
 strip: //h3[contains(., 'Komentarz')]
 test_url: http://rubysfera.pl/2011/09/10-porad-o-rvm/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7a21c4a..e54b0f0
@@ -1,6 +1,6 @@
-title: //h1[@class='entry-title']\r
-author: ///span[@class='author vcard']\r
-date: //abbr[@class='published']\r
-body: //div[@class='entry-content']\r
+title: //h1[@class='entry-title']
+author: ///span[@class='author vcard']
+date: //abbr[@class='published']
+body: //div[@class='entry-content']
 
 test_url: http://ruhlman.com/2009/05/cookbooks-that-teach/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c036dcf..43e130a
@@ -1,3 +1,3 @@
-author: //a[@class='author']\r
+author: //a[@class='author']
 tidy: no
 test_url: http://ruttloff.org/2012/06/13/intervention
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 04f8afd..2b47f74
@@ -1,11 +1,11 @@
-title: //meta[@property='og:title']/@content\r
-author: (//span[@class="byline"]/a)[1]\r
-date: //span[contains(@class, "toLocalTime")]\r
-body: (//div[contains(@class, "articleInner")]//img[contains(@src, 'media.salon.com') and contains(@src, '460x')])[1] | //div[contains(@class, "articleContent") or contains(@class, "writerMeta")]\r
-\r
-prune: no\r
-\r
-# deal with singleton links\r
-single_page_link: (//h1/a[contains(@href, '/singleton')])[1]\r
-\r
+title: //meta[@property='og:title']/@content
+author: (//span[@class="byline"]/a)[1]
+date: //span[contains(@class, "toLocalTime")]
+body: (//div[contains(@class, "articleInner")]//img[contains(@src, 'media.salon.com') and contains(@src, '460x')])[1] | //div[contains(@class, "articleContent") or contains(@class, "writerMeta")]
+
+prune: no
+
+# deal with singleton links
+single_page_link: (//h1/a[contains(@href, '/singleton')])[1]
+
 test_url: http://www.salon.com/2011/10/25/occupying_the_rust_belt/singleton/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3106748..464f99f
@@ -1,6 +1,6 @@
-body: //p[@class='teaser1 darkgrey myriad']\r
-move_into(//p[@class='teaser1 darkgrey myriad']): //div[@class='artikel clear']\r
-strip: //div[@class='hidden']\r
-strip: //div[@id='article_related_source']\r
-\r
+body: //p[@class='teaser1 darkgrey myriad']
+move_into(//p[@class='teaser1 darkgrey myriad']): //div[@class='artikel clear']
+strip: //div[@class='hidden']
+strip: //div[@id='article_related_source']
+
 test_url: http://www.salzburg.com/nachrichten/oesterreich/politik/sn/artikel/deutliche-nachbesserungen-bei-lehrerdienstrecht-19469/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sanpedrosun.com.txt b/inc/3rdparty/site_config/standard/sanpedrosun.com.txt
new file mode 100755 (executable)
index 0000000..3f19cce
--- /dev/null
@@ -0,0 +1,10 @@
+title: //div[contains(@class, 'post')]//h1
+date: //div[contains(@class, 'post')]//h6
+body: //div[contains(@class, 'entry')]
+strip_id_or_class: post_stats
+strip_id_or_class: related-posts
+strip_id_or_class: after_story
+prune: no
+
+test_url: http://www.sanpedrosun.com/community-and-society/2013/06/05/little-angelspre-school-talent-show/
+test_url: http://www.sanpedrosun.com/feed/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 354f591..5a5605d
@@ -1,25 +1,25 @@
-title://h1\r
-\r
-# my section divs seem to interfere with the Instapaper parser, so I ditch 'em\r
-dissolve://div[contains(@class, 'section')]\r
-\r
-#these don't seem to be necessary, but just in case\r
-strip_id_or_class:'masthead'\r
-strip_id_or_class:'footer'\r
-\r
-#again, Instapaper seems to understand where my content is, but just in case\r
-body://div[@id='content']\r
-\r
-# in general, I want the Instapaper view to look like my print CSS, so I remove things specified for the screen or non-printing\r
-strip_id_or_class:'screen-only'\r
-strip_id_or_class:'no-print'\r
-\r
-#other misc removals and simplifications\r
-strip_id_or_class:'popup'\r
-strip_id_or_class:'ZoomSpin'\r
-\r
-#I have a lot of content in sidebars and "meta" asides that can work inline just fine, but has to be distinguished somehow with some minimal formatting, so I put them in blockquotes\r
-wrap_in(blockquote)://div[contains(@class, 'sidebar')]\r
-wrap_in(blockquote)://div[contains(@class, 'meta')]\r
+title://h1
+
+# my section divs seem to interfere with the Instapaper parser, so I ditch 'em
+dissolve://div[contains(@class, 'section')]
+
+#these don't seem to be necessary, but just in case
+strip_id_or_class:'masthead'
+strip_id_or_class:'footer'
+
+#again, Instapaper seems to understand where my content is, but just in case
+body://div[@id='content']
+
+# in general, I want the Instapaper view to look like my print CSS, so I remove things specified for the screen or non-printing
+strip_id_or_class:'screen-only'
+strip_id_or_class:'no-print'
+
+#other misc removals and simplifications
+strip_id_or_class:'popup'
+strip_id_or_class:'ZoomSpin'
+
+#I have a lot of content in sidebars and "meta" asides that can work inline just fine, but has to be distinguished somehow with some minimal formatting, so I put them in blockquotes
+wrap_in(blockquote)://div[contains(@class, 'sidebar')]
+wrap_in(blockquote)://div[contains(@class, 'meta')]
 wrap_in(blockquote)://p[contains(@class, 'meta')]
 test_url: http://saveyourself.ca/tutorials/low-back-pain.php
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sayidaty.net.txt b/inc/3rdparty/site_config/standard/sayidaty.net.txt
new file mode 100755 (executable)
index 0000000..2d9f188
--- /dev/null
@@ -0,0 +1,4 @@
+date: //meta[@property='article:published_time']/@content
+body: (//div[contains(@class, 'article-slider')]//img)[1] | //div[contains(@class, 'bottom-article-con')]
+
+test_url: http://www.sayidaty.net/taxonomy/term/10/all/feed
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c213843..41b3675
@@ -1,28 +1,28 @@
-title: //h1[@id='stream_title']\r
-\r
-# Author and date don't work\r
-author: //div[@class='byline']\r
-date: //div[@class='date-stamp']\r
-\r
-body: //div[@class='node-article']\r
-\r
-strip_id_or_class: fb-like-box\r
-strip_id_or_class: stream-fb-like\r
-strip_id_or_class: social-meta\r
-strip_id_or_class: social-spoken\r
-strip_id_or_class: twitter-share-button\r
-strip_id_or_class: twitter-follow-button\r
-strip_id_or_class: spinner_node_list\r
-strip_id_or_class: node-sort-link\r
-strip_id_or_class: stream_title\r
-strip_id_or_class: stream_summary\r
-strip_id_or_class: update-count-container\r
-strip_id_or_class: major-updates\r
-strip_id_or_class: newsletter-slide\r
-strip_id_or_class: author-mini-profile\r
-strip_id_or_class: byline\r
-strip_id_or_class: header\r
-strip_id_or_class: footer\r
-\r
+title: //h1[@id='stream_title']
+
+# Author and date don't work
+author: //div[@class='byline']
+date: //div[@class='date-stamp']
+
+body: //div[@class='node-article']
+
+strip_id_or_class: fb-like-box
+strip_id_or_class: stream-fb-like
+strip_id_or_class: social-meta
+strip_id_or_class: social-spoken
+strip_id_or_class: twitter-share-button
+strip_id_or_class: twitter-follow-button
+strip_id_or_class: spinner_node_list
+strip_id_or_class: node-sort-link
+strip_id_or_class: stream_title
+strip_id_or_class: stream_summary
+strip_id_or_class: update-count-container
+strip_id_or_class: major-updates
+strip_id_or_class: newsletter-slide
+strip_id_or_class: author-mini-profile
+strip_id_or_class: byline
+strip_id_or_class: header
+strip_id_or_class: footer
+
 # Works, but "no text" errors on: http://www.sbnation.com/nba/2012/3/9/2856780/nba-scores-dwight-howard-bulls-magic-mavs-suns
 test_url: http://www.sbnation.com/nba/2012/3/13/2867226/dwight-howard-trade-rumors-2012-faq-orlando-magic
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 67181b6..0074a86
@@ -1,25 +1,25 @@
-author: //p[@class='mastname']\r
-\r
-body: //div[@class='indivbody']\r
-date: //div[@class='indivbody']/h2[1]\r
-\r
-# Remove blog title. Specify first occurrence in case h1 is used in article\r
-strip: //div[@class='indivbody']/h1[1]\r
-\r
-# Remove blog description (the first p element)\r
-strip: //div[@class='indivbody']/p[1]\r
-\r
-# Remove navigation (second p element)\r
-strip: //div[@class='indivbody']/p[2]\r
-\r
-# Remove duplicate of article title. Specify first occurrence in case h3 is used in article\r
-strip: //div[@class='indivbody']/h3[1]\r
-\r
-# Remove publishing date, it's extracted by rule above\r
-strip: //div[@class='indivbody']/h2[1]\r
-\r
-# Remove duplicate of date at end, and newsletter signup\r
-strip: //p[@class='posted']\r
-\r
-# Leave date at top\r
+author: //p[@class='mastname']
+
+body: //div[@class='indivbody']
+date: //div[@class='indivbody']/h2[1]
+
+# Remove blog title. Specify first occurrence in case h1 is used in article
+strip: //div[@class='indivbody']/h1[1]
+
+# Remove blog description (the first p element)
+strip: //div[@class='indivbody']/p[1]
+
+# Remove navigation (second p element)
+strip: //div[@class='indivbody']/p[2]
+
+# Remove duplicate of article title. Specify first occurrence in case h3 is used in article
+strip: //div[@class='indivbody']/h3[1]
+
+# Remove publishing date, it's extracted by rule above
+strip: //div[@class='indivbody']/h2[1]
+
+# Remove duplicate of date at end, and newsletter signup
+strip: //p[@class='posted']
+
+# Leave date at top
 test_url: http://www.schneier.com/blog/archives/2010/12/security_in_202.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 89ebfe0..c4b2183
@@ -1,11 +1,11 @@
-body: //div[@class="storybox"]\r
-title: //div[@class="storybox"]//h1\r
-strip: //p[@class='metaline']\r
-date: substring-after(//*[@class='time'],'Erstellt am')\r
-strip: //div[@class='fact']\r
-strip: //p[@class='backlink']\r
-strip: //div[@class='mailto']\r
-strip: //div[@id='forumDisclaimer']\r
-strip: //div[@class='forum']\r
+body: //div[@class="storybox"]
+title: //div[@class="storybox"]//h1
+strip: //p[@class='metaline']
+date: substring-after(//*[@class='time'],'Erstellt am')
+strip: //div[@class='fact']
+strip: //p[@class='backlink']
+strip: //div[@class='mailto']
+strip: //div[@id='forumDisclaimer']
+strip: //div[@class='forum']
 
 test_url: http://science.orf.at/stories/1700900/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 08c1684..b0dec3d
@@ -1,12 +1,12 @@
-single_page_link: //div[@class='c2c1']/div[@class='toptheme further line']//ul//li/a\r
-\r
-author: //div[@class='details clear']//a[@class='hi']\r
-body: //div[@class='title']\r
-strip: //p[@class='entrypagination']\r
-strip: //p[@class='details_top']\r
-date: //p[@class='details_top']\r
-title: //div[@class='title']/h1\r
-strip: //p[@class='details']\r
-strip: //p[@class='details_bottom']\r
+single_page_link: //div[@class='c2c1']/div[@class='toptheme further line']//ul//li/a
+
+author: //div[@class='details clear']//a[@class='hi']
+body: //div[@class='title']
+strip: //p[@class='entrypagination']
+strip: //p[@class='details_top']
+date: //p[@class='details_top']
+title: //div[@class='title']/h1
+strip: //p[@class='details']
+strip: //p[@class='details_bottom']
 
 test_url: http://www.scienceblogs.de/astrodicticum-simplex/2011/10/weltuntergang-reloaded-das-jungste-gericht-findet-am-21-oktober-statt.php
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 75a5282..2a06f73
@@ -1,11 +1,11 @@
-body: //div[@class='post']\r
-title: //h1[@id='singlePageTitle']\r
-date: substring-before(//small,'&bull; Rubrik')\r
-\r
-strip: //div[@class='post-ratings']\r
-strip: //div[@class='post-ratings-loading']\r
-strip: //a[@title='Empfehlen Sie den Text weiter!']\r
-strip: //a[@title='Drucken']\r
-strip: //div[@class='share']\r
+body: //div[@class='post']
+title: //h1[@id='singlePageTitle']
+date: substring-before(//small,'&bull; Rubrik')
+
+strip: //div[@class='post-ratings']
+strip: //div[@class='post-ratings-loading']
+strip: //a[@title='Empfehlen Sie den Text weiter!']
+strip: //a[@title='Drucken']
+strip: //div[@class='share']
 
 test_url: http://www.scienceticker.info/2011/11/24/forscher-finden-gedachtnismolekul/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d510407..1b3f31c
@@ -1,25 +1,25 @@
-#\r
-# After site revisions at SciAm, this configuration does\r
-# not work, especially for multi-page articles. For\r
-# every article there is now a "Print" link which\r
-# is far more reliable. So this configuration should be\r
-# removed or  disabled.\r
-# 2/3/13\r
-#\r
-\r
-# meta data\r
-title://h1[@class = 'articleTitle']\r
-author:substring-after(//span[@class = 'byline'],'By ')\r
-date:substring-before(//span[@class = 'datestamp'],'|')\r
-\r
-#body content\r
-body://div[@id = 'articleContent']\r
-#next_page_link://li[@id = 'flairPagination']/a[last()]\r
-\r
-single_page_link: //a[contains(@href, 'print=true')]\r
-\r
-#cleanup\r
-strip://div[@class = 'fsgBooks']\r
-\r
-test_url: http://www.scientificamerican.com/article.cfm?id=do-brain-scans-comatose-patients-reveal-conscious-state\r
+#
+# After site revisions at SciAm, this configuration does
+# not work, especially for multi-page articles. For
+# every article there is now a "Print" link which
+# is far more reliable. So this configuration should be
+# removed or  disabled.
+# 2/3/13
+#
+
+# meta data
+title://h1[@class = 'articleTitle']
+author:substring-after(//span[@class = 'byline'],'By ')
+date:substring-before(//span[@class = 'datestamp'],'|')
+
+#body content
+body://div[@id = 'articleContent']
+#next_page_link://li[@id = 'flairPagination']/a[last()]
+
+single_page_link: //a[contains(@href, 'print=true')]
+
+#cleanup
+strip://div[@class = 'fsgBooks']
+
+test_url: http://www.scientificamerican.com/article.cfm?id=do-brain-scans-comatose-patients-reveal-conscious-state
 test_url: http://www.scientificamerican.com/article.cfm?id=solar-wind-transforms-venus-into-shape-of-comet
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/scilogs.de.txt b/inc/3rdparty/site_config/standard/scilogs.de.txt
new file mode 100755 (executable)
index 0000000..b24d784
--- /dev/null
@@ -0,0 +1,15 @@
+title: //h1
+author: //div[@class='date']/a
+date: substring-after(//div[@class='date'], ',')
+body: //div[@class='entrybody']
+
+strip_id_or_class: socialshareprivacy
+strip: //div[@class='entrybody']/br[1]
+
+# Strip related articles
+# 'p'-Tag strips 'Ähnliche Artikel: ' (<br> tags become <p>)
+strip: //div[@class='entrybody']/p[last()]
+strip: //div[@class='entrybody']/ul[last()]
+
+convert_double_br_tags: yes
+test_url: http://www.scilogs.de/wblogs/blog/formbar/fusion/2012-10-08/rundgang-durch-deutschlands-gr-tes-fusionsexperiment
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f29e37f..8881bb4
@@ -1,8 +1,8 @@
-title: //title\r
-author: //p[@id='author-name-role']/a\r
-date: substring-after(//p[@class='time'],'Posted')\r
-body: //div[@id='main']\r
-strip: //div[@id='author-info']\r
-strip: //div[@id='author-links']\r
+title: //title
+author: //p[@id='author-name-role']/a
+date: substring-after(//p[@class='time'],'Posted')
+body: //div[@id='main']
+strip: //div[@id='author-info']
+strip: //div[@id='author-links']
 strip: //h1
 test_url: http://www.scotusblog.com/2012/04/shaken-baby-case-an-update/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 84be27f..ca7ec19
@@ -1,3 +1,3 @@
-title: //h2\r
+title: //h2
 body: //div[@class='body']
 test_url: http://scraplab.net/2010/10/26/please-keep-your-belongings-with-you-at-all-times/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d8b969b..5fb0ee7
@@ -1,8 +1,8 @@
-strip: //a[starts-with(@href, '#')]\r
-strip: //*[@class='storyByline']\r
-body: //*[@class='storyPageText']/..\r
-author: string('Dave Winer')\r
-date: substring-before(substring-after(//*[@class='storyByline'], 'on'), 'at')\r
-title: //h1\r
+strip: //a[starts-with(@href, '#')]
+strip: //*[@class='storyByline']
+body: //*[@class='storyPageText']/..
+author: string('Dave Winer')
+date: substring-before(substring-after(//*[@class='storyByline'], 'on'), 'at')
+title: //h1
 footnotes: no
 test_url: http://scripting.com/stories/2011/07/08/yeahImStillYawning.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 9927675..55f2417
@@ -1,5 +1,5 @@
-body: //*[@class="entry-content"]\r
-title: //h1[@class="entry-title"]\r
-date: //*[@class="entry-date"]\r
+body: //*[@class="entry-content"]
+title: //h1[@class="entry-title"]
+date: //*[@class="entry-date"]
 author: //*[@class="author vcard"]
 test_url: http://sct.temple.edu/blogs/news-events/2011/05/congratulations-sct-class-of-2011/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/searchenginejournal.com.txt b/inc/3rdparty/site_config/standard/searchenginejournal.com.txt
new file mode 100755 (executable)
index 0000000..dc98af3
--- /dev/null
@@ -0,0 +1,5 @@
+strip: //ul[contains(@id, "social")]
+strip: //div[contains(@class, "ts-fab-wrapper")]
+strip: //div[contains(@id, 'gpt-ad')]
+
+test_url: http://www.searchenginejournal.com/web-design-vs-seo-it-doesnt-make-much-sense/62294/
old mode 100644 (file)
new mode 100755 (executable)
index f176d7c..fb6a107
@@ -1,20 +1,20 @@
-body: //div[@class="storyBox"]\r
-title: //div[@class="storyBox"]/h1\r
-author: //a[@rel="author"]\r
-date: substring-before(//span[@class="dateline"], 'by')\r
-\r
-#Removes related content but cleans up article text\r
-strip: //h1\r
-strip: //p[@class="homeStory tdmSideInfo"]\r
-strip: //div[@id="bylineShare"]\r
-strip: //script\r
-strip: //hr\r
-\r
-strip_id_or_class: homeStory\r
-strip_id_or_class: authorpic\r
-strip_id_or_class: insideComments\r
-strip_id_or_class: authorbio\r
-strip_id_or_class: gpt-ad-sel-cube\r
-strip_id_or_class: smxTextAd\r
+body: //div[@class="storyBox"]
+title: //div[@class="storyBox"]/h1
+author: //a[@rel="author"]
+date: substring-before(//span[@class="dateline"], 'by')
+
+#Removes related content but cleans up article text
+strip: //h1
+strip: //p[@class="homeStory tdmSideInfo"]
+strip: //div[@id="bylineShare"]
+strip: //script
+strip: //hr
+
+strip_id_or_class: homeStory
+strip_id_or_class: authorpic
+strip_id_or_class: insideComments
+strip_id_or_class: authorbio
+strip_id_or_class: gpt-ad-sel-cube
+strip_id_or_class: smxTextAd
 
 test_url: http://searchengineland.com/googles-jaw-dropping-sponsored-post-campaign-for-chrome-106348
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3e800a1..b6d9c92
@@ -1,4 +1,4 @@
-title: substring-before(//title, '«')\r
-body: //div[@class = 'entry']\r
+title: substring-before(//title, '«')
+body: //div[@class = 'entry']
 strip_id_or_class: 'postmetabox'
 test_url: http://sebbo.net/2010/12/akkus/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/select.yeeyan.org.txt b/inc/3rdparty/site_config/standard/select.yeeyan.org.txt
new file mode 100755 (executable)
index 0000000..6e98b14
--- /dev/null
@@ -0,0 +1,18 @@
+# This filter is tested on:
+# http://select.yeeyan.org/view/18312/332365
+# http://select.yeeyan.org/view/365295/333788
+# http://select.yeeyan.org/view/174464/332336
+
+tidy:no
+prune:no
+title://h1
+author: //div[@class='sa_author']/span/a
+date: substring-after(//div[@class='sa_author']/span/following-sibling::span, ':')
+body: //div[@class='sa_left closetag']
+wrap_in(b)://div[@class='sa_abstract']
+
+strip://ul[@class='sa_next clearfix']
+strip: //div[@class='sa_author']
+strip: //div[@class='sa_title_box']
+
+test_url: http://select.yeeyan.org/view/258033/333481
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d7b4788..5e63347
@@ -1,15 +1,15 @@
-body: //div[@id='content']\r
-\r
-# clean up recipe pages\r
-strip: //h2[@class='fn'] | //h2[@class='double-lined'] | //h3 | //div[@id='threeColumn2'] | //div[@id='threeColumn3']\r
-\r
-#recipe pages\r
-strip_id_or_class: "recipe-feedback"\r
-strip_id_or_class: "comments"\r
-strip_id_or_class: "procedure-number"\r
-strip_id_or_class: "more-with-author"\r
-\r
-#slice\r
-strip_id_or_class: "inner"\r
+body: //div[@id='content']
+
+# clean up recipe pages
+strip: //h2[@class='fn'] | //h2[@class='double-lined'] | //h3 | //div[@id='threeColumn2'] | //div[@id='threeColumn3']
+
+#recipe pages
+strip_id_or_class: "recipe-feedback"
+strip_id_or_class: "comments"
+strip_id_or_class: "procedure-number"
+strip_id_or_class: "more-with-author"
+
+#slice
+strip_id_or_class: "inner"
 
 test_url: http://www.seriouseats.com/recipes/2010/09/peking-duck-mandarin-pancakes-plum-sauce-recipe.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 9f443d5..4c10e9c
@@ -1,7 +1,7 @@
-title: //h1[@class='post-title']\r
-author: //div[@class='post-byline']/a\r
-date: substring-before(//div[@class='post-byline'], ', by')\r
-\r
-body: //div[@class='post-body']\r
+title: //h1[@class='post-title']
+author: //div[@class='post-byline']/a
+date: substring-before(//div[@class='post-byline'], ', by')
+
+body: //div[@class='post-body']
 dissolve: //noscript
 test_url: http://sf.curbed.com/archives/2011/10/17/lower_haight_loft_would_really_really_really_like_a_buyer.php
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index fca656d..1e7c85a
@@ -1,7 +1,7 @@
-title: //h1[@class="post-title"]\r
-author: //div[@class="post-byline"]/a\r
-date: substring-before(//div[@class='post-byline'], ', by')\r
-\r
-body: //div[@class='post-body']\r
+title: //h1[@class="post-title"]
+author: //div[@class="post-byline"]/a
+date: substring-before(//div[@class='post-byline'], ', by')
+
+body: //div[@class='post-body']
 strip_id_or_class: post-kicker
 test_url: http://sf.eater.com/archives/2012/05/22/nate_pollack_talks_about_the_american_grilled_cheese_kitchen_moving_into_the_mission.php
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 5f73fbc..5469112
@@ -1,12 +1,12 @@
-title: /html/head/title\r
-\r
-body: //div[@id = 'articlecontent']/div[contains(@class, 'bodytext')]\r
-author: //div[@class = 'articleheadings']/p[contains(@class,'author')]/span[@class = 'fn']\r
-date: //div[@class = 'articleheadings']/span[@class = 'updated']\r
-strip: //div[div[contains(@class, 'imgbox')]]\r
-\r
-body: //div[@class = 'blogitem']\r
-author: //p[@class="credit"]/span[@class="author"]/a[position() = 1]\r
-date: //span[@class = 'pubdate']\r
+title: /html/head/title
+
+body: //div[@id = 'articlecontent']/div[contains(@class, 'bodytext')]
+author: //div[@class = 'articleheadings']/p[contains(@class,'author')]/span[@class = 'fn']
+date: //div[@class = 'articleheadings']/span[@class = 'updated']
+strip: //div[div[contains(@class, 'imgbox')]]
+
+body: //div[@class = 'blogitem']
+author: //p[@class="credit"]/span[@class="author"]/a[position() = 1]
+date: //span[@class = 'pubdate']
 
 test_url: http://www.sfgate.com/columnists/garchik/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a11fe4c..73c3017
@@ -1,3 +1,3 @@
-body: //div[contains(@class, 'content_body')]\r
+body: //div[contains(@class, 'content_body')]
 strip_id_or_class: det_rel
 test_url: http://www.sfweekly.com/2012-03-14/news/cia-lsd-wayne-ritchie-george-h-white-mk-ultra/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index b175720..9a0d60a
@@ -1,3 +1,3 @@
-date: //span[@class='date']\r
+date: //span[@class='date']
 body: //div[@class='post_content']
 test_url: http://www.shabayek.com/blog/2011/10/16/%D8%AF%D8%B1%D9%88%D8%B3-%D9%85%D9%86-%D9%82%D8%B5%D8%A9-%D8%AA%D8%A3%D8%B3%D9%8A%D8%B3-%D8%AA%D9%88%D9%8A%D8%AA%D8%B1-%E2%80%93%D8%AC3/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index b536fc3..bd8438f
@@ -1,11 +1,11 @@
-title://*[@class='primary']/h1\r
-date: //*[@class='articledate']\r
-author: substring-before(substring-after(//*[@class='block first']/p,'2012 '),'.')\r
-body: //div[@class='primary']\r
-footnotes: yes\r
-strip: //*[@class='primary']/h1\r
-strip: //*[@class='articledate']\r
-strip: //*[@class='detailsarticle']\r
-strip: //*[@class='endnav']\r
-strip: //*[@class='endmeta']\r
+title://*[@class='primary']/h1
+date: //*[@class='articledate']
+author: substring-before(substring-after(//*[@class='block first']/p,'2012 '),'.')
+body: //div[@class='primary']
+footnotes: yes
+strip: //*[@class='primary']/h1
+strip: //*[@class='articledate']
+strip: //*[@class='detailsarticle']
+strip: //*[@class='endnav']
+strip: //*[@class='endmeta']
 test_url: http://shawnblanc.net/2011/11/kindle-touch-review/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 68059ae..43fd871
@@ -1,6 +1,6 @@
-body: //div[ @class='entry-content' ]\r
-\r
-strip: //div[ contains(@class, 'sharing') ]\r
-\r
+body: //div[ @class='entry-content' ]
+
+strip: //div[ contains(@class, 'sharing') ]
+
 date: //div[ @class='entry-meta' ]/a
 test_url: http://shifteleven.com/articles/2008/05/10/issue-tracking-git-ticgit
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a82ce69..b10e12d
@@ -1,11 +1,11 @@
-#body: (//div[@class='ftr-yt-vid'])[1]\r
-body: (//blockquote[contains(@class, 'postcontent')])[1]\r
-body: (//div[starts-with(@id, 'post_message')])[1]\r
-\r
-prune: no\r
-tidy: no\r
-\r
-#replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player"\r
-#replace_string(</iframe>): </iframe>&nbsp;</div>\r
-\r
+#body: (//div[@class='ftr-yt-vid'])[1]
+body: (//blockquote[contains(@class, 'postcontent')])[1]
+body: (//div[starts-with(@id, 'post_message')])[1]
+
+prune: no
+tidy: no
+
+#replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player"
+#replace_string(</iframe>): </iframe>&nbsp;</div>
+
 test_url: http://www.siasat.pk/forum/showthread.php?107668-Policy-Matters-17th-March-2012-Dr-Shahid-Masood-Gen-Hameed-gul-amp-Fawad-Chudhary-Pak-US-Relationship&p=787733
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/signalscv.com.txt b/inc/3rdparty/site_config/standard/signalscv.com.txt
new file mode 100755 (executable)
index 0000000..2d3c388
--- /dev/null
@@ -0,0 +1,10 @@
+author: //span[contains(@class, 'byline_1')]
+date: //span[@class='posted_date']
+body: //*[contains(@class, 'bigimage_container') or contains(@class, 'overlay_text') or contains(@id, 'articlebody')]
+
+strip_id_or_class: leftWrapper
+
+prune: no
+
+test_url: http://www.signalscv.com/section/46/article/102948/
+test_url: http://www.signalscv.com/syndication/feeds/rss/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e3ad6e4..6999969
@@ -1,5 +1,5 @@
-body: //div[contains(@class, "entry")]\r
-\r
-date: //div[contains(@class, "entryFooter")]/a\r
+body: //div[contains(@class, "entry")]
+
+date: //div[contains(@class, "entryFooter")]/a
 
 test_url: http://simonwillison.net/2009/Oct/22/redis/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a1b6b67..46e2d5f
@@ -1,5 +1,5 @@
-body: //div[@class='post-body']\r
-strip: //div[@id='lws_0']\r
-prune: no\r
+body: //div[@class='post-body']
+strip: //div[@id='lws_0']
+prune: no
 
 test_url: http://singaporeanstocksinvestor.blogspot.com/2011/04/aims-amp-capital-industrial-reit.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 822bbeb..0d05c40
@@ -1,6 +1,6 @@
-title: //div[@class='headline']//h2\r
-body: //div[contains(@class, 'storycontent')]\r
-\r
-prune: no\r
-\r
+title: //div[@class='headline']//h2
+body: //div[contains(@class, 'storycontent')]
+
+prune: no
+
 test_url: http://sintagoulis.gr/sokolatenia/sokolatenia-mpompa-me-amaretti-
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sivers.org.txt b/inc/3rdparty/site_config/standard/sivers.org.txt
new file mode 100755 (executable)
index 0000000..a88f30d
--- /dev/null
@@ -0,0 +1,6 @@
+title: //article[@class='post']/header[@class='wrapper']//h1/a
+author: //header[@id='masthead']//h1/a
+date: //article[@class='post']/header[@class='wrapper']//p[@class='postdate']
+body: //div[@id='body-content']
+
+test_url: http://sivers.org/delegate/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/skanesfria.se.txt b/inc/3rdparty/site_config/standard/skanesfria.se.txt
new file mode 100755 (executable)
index 0000000..a0ddac7
--- /dev/null
@@ -0,0 +1,7 @@
+body: //div[contains(@class, 'layout__inner')]//div[contains(@class, 'file-image') or contains(@class, 'node__content')]
+author: //article//div[contains(@class, 'field-byline')]
+strip_id_or_class: rekommenderade
+strip_id_or_class: disqus
+strip_id_or_class: annonser
+
+test_url: http://www.skanesfria.se/artikel/112045
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 78d38ec..4d17176
@@ -1,15 +1,15 @@
-title: substring-before(//title,'| /Film')\r
-date: substring-before(substring-after(//p[@class='post-info'],'Posted on '),'by')\r
-strip: //div[@class='pm-left']\r
-strip: //div[@class='pm-right']\r
-strip: //h2/span\r
-next_page_link: //h2/strong/a\r
-strip: //h2/strong/a\r
-strip: //p[contains(text(),'we have to split this post over')]\r
-strip: //p[@class='post-info']\r
-strip: //h1/a\r
-strip: //img[contains(@src,'siteimages/authors')]\r
-strip: //div[@id='header']\r
-strip: //div[@class='topad-right']\r
-strip: //strong[contains(text(),'Cool Posts From Around the Web:')]\r
+title: substring-before(//title,'| /Film')
+date: substring-before(substring-after(//p[@class='post-info'],'Posted on '),'by')
+strip: //div[@class='pm-left']
+strip: //div[@class='pm-right']
+strip: //h2/span
+next_page_link: //h2/strong/a
+strip: //h2/strong/a
+strip: //p[contains(text(),'we have to split this post over')]
+strip: //p[@class='post-info']
+strip: //h1/a
+strip: //img[contains(@src,'siteimages/authors')]
+strip: //div[@id='header']
+strip: //div[@class='topad-right']
+strip: //strong[contains(text(),'Cool Posts From Around the Web:')]
 test_url: http://www.slashfilm.com/superhero-bits-206/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e92f6a0..d5798e0
@@ -1,19 +1,19 @@
-title: //h1[@class="sl-art-head-dek"]\r
-body: //article//div[@class='sl-art-body']/div[contains(@class, 'body')]\r
-strip: //div[@class="department_kicker"]\r
-strip: //div[@id="insider_ad_wrapper" or @id="insider_ad_inner"]\r
-strip: //div[@id="bottom_sponsored_links"]\r
-strip: //div[@class="sl-art-ad-midflex"]\r
-#strip: //dl\r
-#strip: //p[em/a[contains(@href, 'facebook.com')]]\r
-prune: no\r
-\r
-author: //div[@id='author_bio']//a[contains(@href, '/author/')]\r
-author: //a[contains(@href, '/authors.')]\r
-\r
-date: substring-before(substring-after(//span[@class='sl-art-byline'], 'Posted '), ', at ')\r
-\r
-single_page_link: //a[@class='sl-art-sinpage']\r
-\r
-test_url: http://www.slate.com/id/2274583/pagenum/all/\r
+title: //h1[@class="sl-art-head-dek"]
+body: //article//div[@class='sl-art-body']/div[contains(@class, 'body')]
+strip: //div[@class="department_kicker"]
+strip: //div[@id="insider_ad_wrapper" or @id="insider_ad_inner"]
+strip: //div[@id="bottom_sponsored_links"]
+strip: //div[@class="sl-art-ad-midflex"]
+#strip: //dl
+#strip: //p[em/a[contains(@href, 'facebook.com')]]
+prune: no
+
+author: //div[@id='author_bio']//a[contains(@href, '/author/')]
+author: //a[contains(@href, '/authors.')]
+
+date: substring-before(substring-after(//span[@class='sl-art-byline'], 'Posted '), ', at ')
+
+single_page_link: //a[@class='sl-art-sinpage']
+
+test_url: http://www.slate.com/id/2274583/pagenum/all/
 test_url: http://www.slate.com/id/2293116/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 1a902b9..e62a396
@@ -1,15 +1,15 @@
-body: //div[@id='content']\r
-\r
-# clean up recipe pages\r
-strip: //h2[@class='fn'] | //h2[@class='double-lined'] | //h3 | //div[@id='threeColumn2'] | //div[@id='threeColumn3']\r
-\r
-#recipe pages\r
-strip_id_or_class: "recipe-feedback"\r
-strip_id_or_class: "comments"\r
-strip_id_or_class: "procedure-number"\r
-strip_id_or_class: "more-with-author"\r
-\r
-#slice\r
-strip_id_or_class: "inner"\r
+body: //div[@id='content']
+
+# clean up recipe pages
+strip: //h2[@class='fn'] | //h2[@class='double-lined'] | //h3 | //div[@id='threeColumn2'] | //div[@id='threeColumn3']
+
+#recipe pages
+strip_id_or_class: "recipe-feedback"
+strip_id_or_class: "comments"
+strip_id_or_class: "procedure-number"
+strip_id_or_class: "more-with-author"
+
+#slice
+strip_id_or_class: "inner"
 
 test_url: http://slice.seriouseats.com/archives/2010/10/the-pizza-lab-how-to-make-great-new-york-style-pizza.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index daa5e31..f952694
@@ -1,4 +1,4 @@
-strip_id_or_class: postCategory\r
-title: //h3[@class='postTitle']\r
+strip_id_or_class: postCategory
+title: //h3[@class='postTitle']
 body: //div[@class='postBody']
 test_url: http://slog.thestranger.com/slog/archives/2010/10/12/sl-letter-of-the-day-leave-it-alone
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ec6c55c..85ca46d
@@ -1,5 +1,5 @@
-title: //td[@class='hweissblau2']\r
-body: //p[@class='copy'] | //div[@class='Section1']\r
-prune: no\r
+title: //td[@class='hweissblau2']
+body: //p[@class='copy'] | //div[@class='Section1']
+prune: no
 
 test_url: http://www.smartinvestor.de/news/smartinvestor/detail.hbs?itemid=item949496655&recnr=14593
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c3d01ff..d41612c
@@ -1,3 +1,3 @@
-title: //meta[@property='og:title']/@content\r
+title: //meta[@property='og:title']/@content
 date: //p[@class='autor_line']/b/text()
 test_url: http://www.sme.sk/c/6268206/lipsic-vidi-malcharkove-uplatky.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 10a3f71..3e8fee9
@@ -1,20 +1,20 @@
-# meta data\r
-title://h1[@id = 'articleTitle']\r
-author:substring-after(//ul[@id = 'byLine']/li[1],'By ')\r
-date:substring-before(substring-after(//ul[@id = 'byLine']/li[last()],','),',')\r
-body://div[@id = 'article-body']\r
-\r
-# full content\r
-single_page_link://td/li[@class = 'article-singlepage']/a\r
-\r
-# caption clean up\r
-wrap_in(i)://span[@class='articleImageCaptionwide']\r
-move_into (//span[@class='articleImageCaptionwide'])://div[@id = 'articleImage']/p\r
-\r
-\r
-# clean up\r
-strip://p[@id = 'articlePaginationWrapper']\r
-strip://ul[contains(@class, 'cat-breadcrumb')]\r
-strip://div [@class= 'viewMorePhotos']\r
+# meta data
+title://h1[@id = 'articleTitle']
+author:substring-after(//ul[@id = 'byLine']/li[1],'By ')
+date:substring-before(substring-after(//ul[@id = 'byLine']/li[last()],','),',')
+body://div[@id = 'article-body']
+
+# full content
+single_page_link://td/li[@class = 'article-singlepage']/a
+
+# caption clean up
+wrap_in(i)://span[@class='articleImageCaptionwide']
+move_into (//span[@class='articleImageCaptionwide'])://div[@id = 'articleImage']/p
+
+
+# clean up
+strip://p[@id = 'articlePaginationWrapper']
+strip://ul[contains(@class, 'cat-breadcrumb')]
+strip://div [@class= 'viewMorePhotos']
 
 test_url: http://www.smithsonianmag.com/history-archaeology/The-Goddess-Goes-Home.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e22af7a..c68c132
@@ -1,5 +1,5 @@
-title: //h2[@class='custom-entry-title']\r
-author: substring-after(//span[@class='author vcard'],'by ')\r
-date: substring-after(//span[@class='publ'],'Published on ')\r
-body: //div[@class='postentry-content']\r
+title: //h2[@class='custom-entry-title']
+author: substring-after(//span[@class='author vcard'],'by ')
+date: substring-after(//span[@class='publ'],'Published on ')
+body: //div[@class='postentry-content']
 test_url: http://smokingapples.com/software/popclip-for-mac/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/somethingawful.com.txt b/inc/3rdparty/site_config/standard/somethingawful.com.txt
new file mode 100755 (executable)
index 0000000..4854794
--- /dev/null
@@ -0,0 +1,17 @@
+title: //h1
+body: //div[@id = 'content-area']
+author: //p[contains(@class, 'byline')]/a
+autodetect_next_page: yes
+tidy: no
+
+strip_id_or_class: articleid
+strip_id_or_class: logo
+strip_id_or_class: pagebar
+strip_id_or_class: featurenavlinks
+strip_id_or_class: featured_frontpage
+strip_id_or_class: sidebar
+strip_id_or_class: footer
+strip_id_or_class: byline
+strip_id_or_class: logo
+strip_id_or_class: nav_network
+test_url: http://www.somethingawful.com/d/dungeons-and-dragons/wtf-monster-manual.php
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/songshuhui.net.txt b/inc/3rdparty/site_config/standard/songshuhui.net.txt
new file mode 100755 (executable)
index 0000000..a923359
--- /dev/null
@@ -0,0 +1,7 @@
+# This filter is tested on:
+# http://songshuhui.net/archives/65522
+# http://songshuhui.net/archives/75760
+title://h2/span/a
+date:substring-before(substring-after(//div[@class='atrctitle']/div, '发表于'),' |')
+body://div[@class='entry']
+test_url: http://songshuhui.net/archives/74819
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 668fc44..b52169d
@@ -1,4 +1,4 @@
-#grab the actual content div\r
-body: //div[@class='rt-article']\r
-\r
+#grab the actual content div
+body: //div[@class='rt-article']
+
 test_url: http://www.sourcebooks.com/next/sourcebooks-next-our-blog/1601-another-piece-of-the-e-puzzle-or-when-good-ebook-promotions-go-bad.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a05c839..d0605ed
@@ -1,7 +1,7 @@
-author: /html/body/div[@id='wrapper']/div[@id='main-content']/div[@class='article_body']/a[@class='author-link']/child::text()\r
-\r
-body: /html/body/div[@id='wrapper']/div[@id='main-content']/div[@class='article_body']\r
-\r
-# Not very helpfull, the title and author are container by the same element that contains the body\r
+author: /html/body/div[@id='wrapper']/div[@id='main-content']/div[@class='article_body']/a[@class='author-link']/child::text()
+
+body: /html/body/div[@id='wrapper']/div[@id='main-content']/div[@class='article_body']
+
+# Not very helpfull, the title and author are container by the same element that contains the body
 strip: /html/body/div[@id='wrapper']/div[@id='main-content']/div[@class='article_body']/h2 | /html/body/div[@id='wrapper']/div[@id='main-content']/div[@class='article_body']/a[@class='author-link']
 test_url: http://www.spectator.co.uk/arts-and-culture/night-and-day/7449683/spotify-sunday-my-personal-soundtrack.thtml
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 4b0704a..aea3627
@@ -1,3 +1,3 @@
-body://div[@class="articleBody"]\r
+body://div[@class="articleBody"]
 author://p[@class="articleBodyTtl"]
 test_url: http://spectrum.ieee.org/semiconductors/processors/behind-intels-new-randomnumber-generator/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 390c075..413e015
@@ -1,75 +1,75 @@
-# A. Niepel, narya.de@...\r
-# - added single_page_link\r
-# - added author for default and single page view\r
-# - added date for single page view\r
-# fforst@...\r
-# - Fixed it\r
-# bode2104@...\r
-# - Fixed single_page_link\r
-# - Included intro text in single page view\r
-# - Added body in default view\r
-\r
-# set body\r
-tidy: no\r
-# body in single page view\r
-body: //div[@id="spArticleContent"]\r
-# body in default view\r
-body: //div[@id="spArticleSection"]\r
-# body in "Fotostrecke"\r
-body: //div[@id="spBigaContent"]\r
-\r
-# set date in single page view\r
-date: //div[@id="spArticleContent"]/h3\r
-# strip date\r
-strip: //div[@id="spArticleContent"]/h3\r
-# set date in "Fotostrecke"\r
-date: //div[@id="spBigaDatum"]\r
-\r
-#set title in single page view\r
-title: //div[@id='spArticleContent']/h2\r
-# strip title\r
-strip: //div[@id='spArticleContent']/h1\r
-strip: //div[@id='spArticleContent']/h2\r
-#set title in "Fotostrecke"\r
-title: //div[@class='spBigaHeadline']\r
-\r
-# set author\r
-author: //p[@class="spAuthor"]/a\r
-author: substring-after(//p[@class="spAuthor"], 'Von ')\r
-# strip author\r
-strip: //p[@class='spAuthor']\r
-\r
-# remove captions\r
-strip: //*/span[@class='spPicLayerText']\r
-strip: //*/div[@class='spPanoPlayerPaneControl']\r
-strip: //*/div[@class='spCredit']\r
-strip: //*/div[@class='spCredit']/following-sibling::p\r
-\r
-# remove ads\r
-strip: //div[@class='spMInline']\r
-\r
-# remove photogalleries and extras\r
-strip: //div[@class='spPhotoGallery']\r
-strip: //div[@class='spPhotoGallery']/following-sibling::br\r
-strip: //div[@class='spAssetAlignleft']\r
-strip: //div[contains(@class,'spAsset')]\r
-strip: //br[@clear='all']\r
-\r
-# remove community functions\r
-strip: //div[@id='spSocialBookmark']\r
-strip: //div[contains(@class, 'spCommunityBox')]\r
-strip: //div[contains(@class, 'spArticleNewsfeedBox')]\r
-strip: //div[@class='spArticleCredit']\r
-\r
-# remove clutter in "Fotostrecke"\r
-strip: //div[@id='spBreadcrumb']\r
-strip: //div[@id='spBigaLatestEntries']\r
-strip: //div[contains(@class, 'spBigaNavi')]\r
-strip: //div[@class='spDottedLine']\r
-\r
-# Use link to print article for single page view\r
-single_page_link: //a[contains(@href, '-druck')]\r
-\r
-# use next link in "Fotostrecke"\r
-next_page_link: //a[@class='spBigaControlForw']\r
+# A. Niepel, narya.de@...
+# - added single_page_link
+# - added author for default and single page view
+# - added date for single page view
+# fforst@...
+# - Fixed it
+# bode2104@...
+# - Fixed single_page_link
+# - Included intro text in single page view
+# - Added body in default view
+
+# set body
+tidy: no
+# body in single page view
+body: //div[@id="spArticleContent"]
+# body in default view
+body: //div[@id="spArticleSection"]
+# body in "Fotostrecke"
+body: //div[@id="spBigaContent"]
+
+# set date in single page view
+date: //div[@id="spArticleContent"]/h3
+# strip date
+strip: //div[@id="spArticleContent"]/h3
+# set date in "Fotostrecke"
+date: //div[@id="spBigaDatum"]
+
+#set title in single page view
+title: //div[@id='spArticleContent']/h2
+# strip title
+strip: //div[@id='spArticleContent']/h1
+strip: //div[@id='spArticleContent']/h2
+#set title in "Fotostrecke"
+title: //div[@class='spBigaHeadline']
+
+# set author
+author: //p[@class="spAuthor"]/a
+author: substring-after(//p[@class="spAuthor"], 'Von ')
+# strip author
+strip: //p[@class='spAuthor']
+
+# remove captions
+strip: //*/span[@class='spPicLayerText']
+strip: //*/div[@class='spPanoPlayerPaneControl']
+strip: //*/div[@class='spCredit']
+strip: //*/div[@class='spCredit']/following-sibling::p
+
+# remove ads
+strip: //div[@class='spMInline']
+
+# remove photogalleries and extras
+strip: //div[@class='spPhotoGallery']
+strip: //div[@class='spPhotoGallery']/following-sibling::br
+strip: //div[@class='spAssetAlignleft']
+strip: //div[contains(@class,'spAsset')]
+strip: //br[@clear='all']
+
+# remove community functions
+strip: //div[@id='spSocialBookmark']
+strip: //div[contains(@class, 'spCommunityBox')]
+strip: //div[contains(@class, 'spArticleNewsfeedBox')]
+strip: //div[@class='spArticleCredit']
+
+# remove clutter in "Fotostrecke"
+strip: //div[@id='spBreadcrumb']
+strip: //div[@id='spBigaLatestEntries']
+strip: //div[contains(@class, 'spBigaNavi')]
+strip: //div[@class='spDottedLine']
+
+# Use link to print article for single page view
+single_page_link: //a[contains(@href, '-druck')]
+
+# use next link in "Fotostrecke"
+next_page_link: //a[@class='spBigaControlForw']
 test_url: http://www.spiegel.de/politik/deutschland/0,1518,787602,00.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/spiked-online.com.txt b/inc/3rdparty/site_config/standard/spiked-online.com.txt
new file mode 100755 (executable)
index 0000000..7ec39c2
--- /dev/null
@@ -0,0 +1,7 @@
+title: //div[@id='articleTitleWrapper' or @id='mainFeature']//h1
+author: //*[@id='authorNameJob']//a
+date: //div[@id='articleMeta']/p
+body: //div[@id='mainFeature']//img | //div[contains(@class, 'fullText')]
+
+test_url: http://www.spiked-online.com/newsite/article/standing_up_to_the_white-coated_gods_of_fortune/13785
+test_url: http://www.spiked-online.com/newsite/article/sex_box_and_the_crisis_of_intimacy/14168
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 66f6192..88eb454
@@ -1,5 +1,5 @@
-tidy: no\r
-body: //section[contains(@class, 'main')]\r
-strip: //footer\r
+tidy: no
+body: //section[contains(@class, 'main')]
+strip: //footer
 strip: //a[@class='paginated']
 test_url: http://www.spin.com/articles/bathlands-deep-heart-americas-new-drug-nightmare
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d567165..3e05a22
@@ -1,5 +1,5 @@
-author:string('Dan Frommer/SplatF')\r
-date://div[@class='postdate']\r
-body://div[@class='entry']\r
+author:string('Dan Frommer/SplatF')
+date://div[@class='postdate']
+body://div[@class='entry']
 title://div[@class='post']/h1
 test_url: http://www.splatf.com/2012/02/month-six/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d1d392e..4bbc7aa
@@ -1,4 +1,4 @@
-author: //div[@class='byline']/a\r
-date: //div[@id='date']\r
+author: //div[@class='byline']/a
+date: //div[@id='date']
 body: //div[@class='entry']
 test_url: http://splitsider.com/2011/10/saturday-nights-children-rob-riggle-2004-2005/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index b404b82..18552d1
@@ -1,8 +1,8 @@
-title://div[@class="content_detail"]/h1\r
-\r
-author://div[@class="author"]/strong\r
-\r
-date:substring-before(substring-after(//div[@class="content_detail"]/*[@class="date"], ','), ' WIB')\r
-\r
+title://div[@class="content_detail"]/h1
+
+author://div[@class="author"]/strong
+
+date:substring-before(substring-after(//div[@class="content_detail"]/*[@class="date"], ','), ' WIB')
+
 body://div[@class='text_detail']
 test_url: http://sport.detik.com/sepakbola/read/2012/05/23/065011/1922350/71/agen-silva-ingin-bertahan-di-milan?b99220270
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a794ded..f0be85c
@@ -1,11 +1,11 @@
-single_page_link: //div[@id='content']//p[@class='readMore']/a\r
-\r
-title: //div[@class='hidden offscreen']/h2\r
-body: //div[@id="storyText"]\r
-move_into(//div[@id='storyText']): //div[@class='fact']\r
-strip: //small[@class='credit']\r
-strip: //small[@class='caption']\r
-date: substring-after(//div[@id='storyMeta']//p[@class='date'],'Publiziert am')\r
-strip: //p[@class='toplink']\r
+single_page_link: //div[@id='content']//p[@class='readMore']/a
+
+title: //div[@class='hidden offscreen']/h2
+body: //div[@id="storyText"]
+move_into(//div[@id='storyText']): //div[@class='fact']
+strip: //small[@class='credit']
+strip: //small[@class='caption']
+date: substring-after(//div[@id='storyMeta']//p[@class='date'],'Publiziert am')
+strip: //p[@class='toplink']
 
 test_url: http://sport.orf.at/stories/2084851/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sport365.fr.txt b/inc/3rdparty/site_config/standard/sport365.fr.txt
new file mode 100755 (executable)
index 0000000..8688f40
--- /dev/null
@@ -0,0 +1,8 @@
+body: //h2[contains(@class, 'body_head')] | //div[@id='img_article' or contains(@class, 'body_content')]
+body: //div[contains(@class, 'cpanel')]//div[contains(@class, 'thumbnails')]
+prune: no
+strip: //div[starts-with(@class, 'actu_')]
+strip: //div[contains(@class, 'data')]
+
+test_url: http://www.sport365.fr/basketball/nba/new-york-accord-avec-toronto-pour-bargnani-1038773.shtml
+test_url: http://www.sport365.fr/rss.xml
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e0f8223..8c21ef2
@@ -1,12 +1,12 @@
-title: //div[@class='headline'] | //div[@class='mod-header']/h3\r
-body: //div[contains(@class, 'article')]\r
-strip: //div[contains(@class, 'mod-inline')]\r
-strip: //*/span[@class='page-actions']/a\r
-strip: //*/span[@class='page-actions']/a\r
-strip: //div[@class='page-actions']/*\r
-strip: //div[@class='headline'] | //div[@class='mod-header']/h3\r
-strip: //div[@class='mod-blog-navigation']\r
-strip: //div[@class='monthday']\r
-strip: //div[@class='time']\r
-strip: //div[@class='timeofday']\r
+title: //div[@class='headline'] | //div[@class='mod-header']/h3
+body: //div[contains(@class, 'article')]
+strip: //div[contains(@class, 'mod-inline')]
+strip: //*/span[@class='page-actions']/a
+strip: //*/span[@class='page-actions']/a
+strip: //div[@class='page-actions']/*
+strip: //div[@class='headline'] | //div[@class='mod-header']/h3
+strip: //div[@class='mod-blog-navigation']
+strip: //div[@class='monthday']
+strip: //div[@class='time']
+strip: //div[@class='timeofday']
 test_url: http://sports.espn.go.com/espn/page2/story?page=simmonsnfl2010/lebron_james_return_clevelend&sportCat=nba
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 96a3bb7..b0f57e2
@@ -1,9 +1,9 @@
-title: //div[@id='article']/div[@class='hd']/h1\r
-body: //p[@id='byline'] | //div[@id='article']//div[@class='body_copy 0']\r
-strip: //div[@class='foot']\r
-strip: //div[@id='sidebar']//div[@class='ft']\r
-strip: //p[@id='byline']//em\r
-tidy: no\r
-prune: no\r
+title: //div[@id='article']/div[@class='hd']/h1
+body: //p[@id='byline'] | //div[@id='article']//div[@class='body_copy 0']
+strip: //div[@class='foot']
+strip: //div[@id='sidebar']//div[@class='ft']
+strip: //p[@id='byline']//em
+tidy: no
+prune: no
 
 test_url: http://sports.yahoo.com/nba/news?slug=ap-nbafinals
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6500e75..1e58b52
@@ -1,22 +1,22 @@
-title://div[@id='ardContent']/h1\r
-\r
-author://p[@id='ardAutor']\r
-author://span[@id='ardQuelle']\r
-author:string('sportschau.de')\r
-\r
-date:substring-after(//span[@id='ardStand'], 'Stand: ')\r
-\r
-body://div[@id='ardContent']\r
-\r
-strip://div[@id='ardContent']/h1\r
-strip://p[@id='ardAutor']\r
-strip: //div[@class='embeddedPlayer_clipinfo']\r
-strip: //div[@class='ardMehrZumThemaRechts']\r
-strip: //*[contains(@class, 'inv')]\r
-\r
-strip: //p[@id='ardAbbinder']\r
-strip: //div[@class='socialBookmarks']\r
-strip: //div[@id='ardContentEnd']\r
-strip: //div[@id='ardDisclaimer']\r
+title://div[@id='ardContent']/h1
+
+author://p[@id='ardAutor']
+author://span[@id='ardQuelle']
+author:string('sportschau.de')
+
+date:substring-after(//span[@id='ardStand'], 'Stand: ')
+
+body://div[@id='ardContent']
+
+strip://div[@id='ardContent']/h1
+strip://p[@id='ardAutor']
+strip: //div[@class='embeddedPlayer_clipinfo']
+strip: //div[@class='ardMehrZumThemaRechts']
+strip: //*[contains(@class, 'inv')]
+
+strip: //p[@id='ardAbbinder']
+strip: //div[@class='socialBookmarks']
+strip: //div[@id='ardContentEnd']
+strip: //div[@id='ardDisclaimer']
 strip: //div[@id='ardRechteSpalte']
 test_url: http://www.sportschau.de/sp/fussball/news201203/17/analyse_leverkusen_gladbach.jsp
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index afc5879..b3da813
@@ -1,26 +1,26 @@
-# main sportsillustrated.com articles\r
-#\r
-body: //div[@id="cnnStoryContent"]\r
-title: //div[@id="cnnStoryHeadline"]//h1\r
-author: //div[@id="cnnSubBanner"]//strong\r
-date: substring-after(//div[@id="cnnTimeStamp"], "Updated: ")\r
-date: substring-after(//div[@id="cnnTimeStamp"], "Posted: ")\r
-\r
-# kill ugly font buttons\r
-strip: //div[@id="cnnSCFontButtons"]\r
-\r
-# kill misc filler videos & etc\r
-strip: //div[@class="cnnDivideContent"]\r
-strip: //*[@class="cnnTMbox"]\r
-\r
-# si vault articles\r
-# -------------\r
-body: //div[@class="siv_artPara"]\r
-title: //div[@class="siv_artHeader"]//h1\r
-author: //div[@class="byline"]\r
-date: //div[@class="date"]\r
-\r
-next_page_link: //div[@id='cnnStoryContinue']/a\r
-strip_id_or_class: cnnstorypagination\r
-\r
+# main sportsillustrated.com articles
+#
+body: //div[@id="cnnStoryContent"]
+title: //div[@id="cnnStoryHeadline"]//h1
+author: //div[@id="cnnSubBanner"]//strong
+date: substring-after(//div[@id="cnnTimeStamp"], "Updated: ")
+date: substring-after(//div[@id="cnnTimeStamp"], "Posted: ")
+
+# kill ugly font buttons
+strip: //div[@id="cnnSCFontButtons"]
+
+# kill misc filler videos & etc
+strip: //div[@class="cnnDivideContent"]
+strip: //*[@class="cnnTMbox"]
+
+# si vault articles
+# -------------
+body: //div[@class="siv_artPara"]
+title: //div[@class="siv_artHeader"]//h1
+author: //div[@class="byline"]
+date: //div[@class="date"]
+
+next_page_link: //div[@id='cnnStoryContinue']/a
+strip_id_or_class: cnnstorypagination
+
 test_url: http://sportsillustrated.cnn.com/2012/writers/peter_king/02/27/combine/index.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 16636bc..5b68381
@@ -1,5 +1,5 @@
-title: //h2\r
-author: string('Michael Spreng')\r
-date: //div[@class='date']\r
+title: //h2
+author: string('Michael Spreng')
+date: //div[@class='date']
 body: //div[@class='entry']
 test_url: http://www.sprengsatz.de/?p=3691
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 4872519..15763c3
@@ -1,7 +1,7 @@
-body: //div[@id='ff-body']\r
-\r
-replace_string(<h1 align=center>): <div id="ff-body"><h1 align=center>\r
-\r
-prune: no\r
-\r
+body: //div[@id='ff-body']
+
+replace_string(<h1 align=center>): <div id="ff-body"><h1 align=center>
+
+prune: no
+
 test_url: http://www.sqlite.org/fileformat2.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 388209a..8eae13e
@@ -1,4 +1,4 @@
-body: //div[@class='content']\r
-date: substring-before( //div[@class='unit dateAndNotes'], 'with')\r
+body: //div[@class='content']
+date: substring-before( //div[@class='unit dateAndNotes'], 'with')
 title: //h3
 test_url: http://squashed.tumblr.com/post/17613522228/lets-stop-blaming-the-victims-of-predatory-lending
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e5317ba..bb95e93
@@ -1,14 +1,14 @@
-body: //div[@class='post-text' or @class='user-action-time' or @class='user-details' or @class='vote'] | //div[@id='answers-header']//h2\r
-\r
-replace_string(<div class="user-details"><br></div>): <!-- nothing -->\r
-replace_string(<div class="vote">): <div class="vote"><h3>Vote count: \r
-\r
-strip_id_or_class: vote-up\r
-strip_id_or_class: vote-down\r
-strip_id_or_class: star-off\r
-strip_id_or_class: favoritecount\r
-strip_id_or_class: -share\r
-strip_id_or_class: badgecount\r
-\r
+body: //div[@class='post-text' or @class='user-action-time' or @class='user-details' or @class='vote'] | //div[@id='answers-header']//h2
+
+replace_string(<div class="user-details"><br></div>): <!-- nothing -->
+replace_string(<div class="vote">): <div class="vote"><h3>Vote count: 
+
+strip_id_or_class: vote-up
+strip_id_or_class: vote-down
+strip_id_or_class: star-off
+strip_id_or_class: favoritecount
+strip_id_or_class: -share
+strip_id_or_class: badgecount
+
 
 test_url: http://stackoverflow.com/questions/4484289/id-like-to-understand-the-jquery-plugin-syntax
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index bde1421..a0f1587
@@ -1,14 +1,14 @@
-title: //div[@class='articleLeft']/h3\r
-\r
-author: substring-after(//span[@class='articleAuthor']/a,'By ')\r
-\r
-date: substring-before(//span[@class='articleDateTime'],'in ')\r
-\r
-body: //div[@class='articleLeft']\r
-strip: //div[@class='articleMoreNews']\r
-strip: //div[@class='articleLeft']/h3\r
-strip: //div[@class='articleLeft']/p[@class='articleInfo clearfix']\r
-\r
-# Remove duplicate title from text\r
-strip: //div[@id='site']/div[5][@class='holder']/div[1][@class='hBlock']/div[1][@class='sglCol article']/h3\r
+title: //div[@class='articleLeft']/h3
+
+author: substring-after(//span[@class='articleAuthor']/a,'By ')
+
+date: substring-before(//span[@class='articleDateTime'],'in ')
+
+body: //div[@class='articleLeft']
+strip: //div[@class='articleMoreNews']
+strip: //div[@class='articleLeft']/h3
+strip: //div[@class='articleLeft']/p[@class='articleInfo clearfix']
+
+# Remove duplicate title from text
+strip: //div[@id='site']/div[5][@class='holder']/div[1][@class='hBlock']/div[1][@class='sglCol article']/h3
 test_url: http://www.stalbansreview.co.uk/news/9581446.New_roundabout_in_King_Harry_Lane/r/?ref=rss
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 22a3348..71a2bda
@@ -1,16 +1,16 @@
-autodetect_next_page: no\r
-footnotes: no\r
-dissolve: //div[@class="column-2"]//div[@class="widget"]\r
-dissolve: //div[@class="column-2"]//div\r
-\r
-author: //div[@class="innerbyline"]/a\r
-strip: //div[@class="innerbyline"]/a\r
-\r
-strip: //p[@class="dateline"]\r
-date: //p[@class="dateline"]\r
-\r
-title: //h1[@class="title"]\r
-author: //div[@class="innerbyline"]/a\r
-date: //p[@class="dateline"]\r
+autodetect_next_page: no
+footnotes: no
+dissolve: //div[@class="column-2"]//div[@class="widget"]
+dissolve: //div[@class="column-2"]//div
+
+author: //div[@class="innerbyline"]/a
+strip: //div[@class="innerbyline"]/a
+
+strip: //p[@class="dateline"]
+date: //p[@class="dateline"]
+
+title: //h1[@class="title"]
+author: //div[@class="innerbyline"]/a
+date: //p[@class="dateline"]
 body: //div[@class="column-2"]
 test_url: http://www.standard.co.uk/lifestyle/esmagazine/grace-and-flavour-pizarro-7938350.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0579455..254e2c2
@@ -1,11 +1,11 @@
-title: //h1[@id='storyTitle']\r
-author: substring-after(//span[@class='hsa_postCredit'], 'By ') \r
-date://span[@class='hsa_dateStamp']\r
-body: //div[@class='storytext']\r
-strip_id_or_class: insideStoryAd \r
-strip_id_or_class: printDesc\r
-strip_id_or_class: sb_2010_story_tools\r
-strip_id_or_class: FBConnectButton_Text\r
-strip_id_or_class: breadcrumbs\r
-prune: no\r
+title: //h1[@id='storyTitle']
+author: substring-after(//span[@class='hsa_postCredit'], 'By ') 
+date://span[@class='hsa_dateStamp']
+body: //div[@class='storytext']
+strip_id_or_class: insideStoryAd 
+strip_id_or_class: printDesc
+strip_id_or_class: sb_2010_story_tools
+strip_id_or_class: FBConnectButton_Text
+strip_id_or_class: breadcrumbs
+prune: no
 test_url: http://www.staradvertiser.com/news/20111112_World_leaders_step_onto_isle_stage.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 1169984..efd1ec2
@@ -1,8 +1,8 @@
-title: /html/head/meta[@name='title']/@content\r
-author: //span[contains(concat(' ',normalize-space(@class),' '),' article_author ')]/a\r
-date: //span[contains(concat(' ',normalize-space(@class),' '),' article_date ')]\r
-\r
-body: //div[@class='entry-content']\r
-\r
+title: /html/head/meta[@name='title']/@content
+author: //span[contains(concat(' ',normalize-space(@class),' '),' article_author ')]/a
+date: //span[contains(concat(' ',normalize-space(@class),' '),' article_date ')]
+
+body: //div[@class='entry-content']
+
 single_page_link: //p[@class='pagination']/a
 test_url: http://www.stephenfry.com/2011/10/06/steve-jobs/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d66fee9..75379a9
@@ -1,5 +1,5 @@
-title: article/h1\r
-author: //p[@class='byline']\r
-date:  //p[@class='date']\r
+title: article/h1
+author: //p[@class='byline']
+date:  //p[@class='date']
 body: //div[@class='body']
 test_url: https://www.stlbeacon.org/#!/content/23404/mogop_caucus_031712
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 073043d..2f4f8cb
@@ -1,5 +1,5 @@
-strip_id_or_class: 'left'\r
-strip_id_or_class: 'right'\r
-strip_id_or_class: 'block-belowcontent'\r
+strip_id_or_class: 'left'
+strip_id_or_class: 'right'
+strip_id_or_class: 'block-belowcontent'
 
 test_url: http://stockholm.etc.se/reportage/bakom-stangda-dorrar-pa-fas-3-massa
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/stockholmsfria.nu.txt b/inc/3rdparty/site_config/standard/stockholmsfria.nu.txt
new file mode 100755 (executable)
index 0000000..cc8c28b
--- /dev/null
@@ -0,0 +1,7 @@
+body: //div[contains(@class, 'layout__inner')]//div[contains(@class, 'file-image') or contains(@class, 'node__content')]
+author: //article//div[contains(@class, 'field-byline')]
+strip_id_or_class: rekommenderade
+strip_id_or_class: disqus
+strip_id_or_class: annonser
+
+test_url: http://www.stockholmsfria.nu/artikel/112068
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/straightdope.com.txt b/inc/3rdparty/site_config/standard/straightdope.com.txt
new file mode 100755 (executable)
index 0000000..f01d7ad
--- /dev/null
@@ -0,0 +1,6 @@
+body: //div[@id='article' or @id='current_illustration']
+title: //div[@id='article']//h1
+date: //div[@id='article']//div[@class='date']
+prune: no
+
+test_url: http://www.straightdope.com/columns/read/947/whatever-happened-to-adoption-of-the-metric-system-in-the-u-s
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0b62a3d..6cf03ca
@@ -1,7 +1,7 @@
-title: //h2[@class="post-title"]\r
-date: //span[@class="post-date"]\r
-body: //div[@class="post-entry"]\r
-\r
-#This is also good for *.streetsblog.org, for example:\r
+title: //h2[@class="post-title"]
+date: //span[@class="post-date"]
+body: //div[@class="post-entry"]
+
+#This is also good for *.streetsblog.org, for example:
 #http://dc.streetsblog.org/2011/10/21/friday-job-market/
 test_url: http://streetsblog.net/2011/10/20/look-out-below-one-in-nine-bridges-structurally-deficient-reports-t4a/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 12fd093..3756092
@@ -1,22 +1,22 @@
-title://div[@id='left_col']/h1\r
-author:substring-after(//span[contains(@class,'storycredit')],'BY ')\r
-author://span[contains(@class,'storycredit')]\r
-date:substring-after(//div[contains(@class,'toolbox_date')],'Last updated ')\r
-date://div[contains(@class,'toolbox_date')]\r
-body://div[@id='left_col']\r
-\r
-strip_id_or_class: toolbox\r
-strip_id_or_class: story_features\r
-strip_id_or_class: sharebox_new\r
-strip_id_or_class: related_box\r
-strip_id_or_class: sponsored_links\r
-strip_id_or_class: hidden_ad\r
-strip_id_or_class: story_content_top\r
-strip_id_or_class: total_number\r
-strip_id_or_class: sort_order\r
-strip_id_or_class: subscribe_order\r
-\r
-strip://div[contains(@class,'ad_story')]\r
-\r
-test_url: http://www.stuff.co.nz/national/politics/3930344/PM-issues-challenge\r
+title://div[@id='left_col']/h1
+author:substring-after(//span[contains(@class,'storycredit')],'BY ')
+author://span[contains(@class,'storycredit')]
+date:substring-after(//div[contains(@class,'toolbox_date')],'Last updated ')
+date://div[contains(@class,'toolbox_date')]
+body://div[@id='left_col']
+
+strip_id_or_class: toolbox
+strip_id_or_class: story_features
+strip_id_or_class: sharebox_new
+strip_id_or_class: related_box
+strip_id_or_class: sponsored_links
+strip_id_or_class: hidden_ad
+strip_id_or_class: story_content_top
+strip_id_or_class: total_number
+strip_id_or_class: sort_order
+strip_id_or_class: subscribe_order
+
+strip://div[contains(@class,'ad_story')]
+
+test_url: http://www.stuff.co.nz/national/politics/3930344/PM-issues-challenge
 test_url: http://www.stuff.co.nz/entertainment/7045944/International-praise-for-Ladyhawke
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8568216..9adc3c5
@@ -1,3 +1,3 @@
-single_page_link: //iframe[@id='stumbleFrame']/@src\r
-\r
-test_url: www.stumbleupon.com/su/35V0wB/zouchmagazine.com/poetry-violet/
\ No newline at end of file
+single_page_link: //iframe[@id='tb-stumble-frame']/@src
+
+test_url: http://www.stumbleupon.com/su/35V0wB/zouchmagazine.com/poetry-violet/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 454e37b..9ba6eb7
@@ -1,17 +1,17 @@
-title: //*[@id='posts']/div[1]/h2\r
-author: //*[@id='posts']/div[1]/div[2]/span[2]/a\r
-date: //*[@class='date']\r
-body: //div[@class='body-lead']\r
-\r
-# take out the bit saying 'body'\r
-strip: //div[@class='body-lead']/div[@class='info-label']\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
+title: //*[@id='posts']/div[1]/h2
+author: //*[@id='posts']/div[1]/div[2]/span[2]/a
+date: //*[@class='date']
+body: //div[@class='body-lead']
+
+# take out the bit saying 'body'
+strip: //div[@class='body-lead']/div[@class='info-label']
+
+
+
+
+
+
+
+
+
 test_url: http://www.subtraction.com/2011/02/01/unnecessary-explanations
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 4aa9410..74b8d45
@@ -1,18 +1,18 @@
-# 2012-12-04: complete rewrite after Süddeutsche.de relaunch - carlo@...\r
-\r
-single_page_link: //a[ contains( @href, "/2.220/" ) ]\r
-\r
-body: //article[@id="sitecontent"]/section[@class="body"]\r
-author: //address[@class="author"]\r
-date: //div[@class="header"]//h1//span[@class="updated"]\r
-wrap_in(small): //div[@class="footer"]\r
-wrap_in(i): //figcaption/h3\r
-dissolve: //figcaption//h3\r
-dissolve: //figure/div[@class="body"]\r
-dissolve: //figure/a\r
-\r
-strip: //figure[ not( contains(@class, "zoomimage" ) ) ]\r
-strip: //div[@data-onlineonly="true"]\r
-strip: //address[@class="author"]\r
-\r
+# 2012-12-04: complete rewrite after Süddeutsche.de relaunch - carlo@...
+
+single_page_link: //a[ contains( @href, "/2.220/" ) ]
+
+body: //article[@id="sitecontent"]/section[@class="body"]
+author: //address[@class="author"]
+date: //div[@class="header"]//h1//span[@class="updated"]
+wrap_in(small): //div[@class="footer"]
+wrap_in(i): //figcaption/h3
+dissolve: //figcaption//h3
+dissolve: //figure/div[@class="body"]
+dissolve: //figure/a
+
+strip: //figure[ not( contains(@class, "zoomimage" ) ) ]
+strip: //div[@data-onlineonly="true"]
+strip: //address[@class="author"]
+
 test_url: http://www.sueddeutsche.de/muenchen/mietshaus-am-gaertnerplatz-alles-muss-raus-1.1556693
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 13390e4..6d4594c
@@ -1,14 +1,14 @@
-title: //div[@class='story-details']/h1\r
-date: //span[@class='date-time']\r
-Author: substring-after(//p[@class='by-line'], 'By ')\r
-\r
-strip: //div[@class='videoThumbnails']\r
-strip: //div[@class='ad-square2-container']\r
-strip: //div[@class='homeDeliveryContainer5']\r
-\r
-strip: //div[@class='image-description']\r
-strip: //div[@id='internal-side-bar']\r
-\r
-strip: //span[@class='hide']\r
+title: //div[@class='story-details']/h1
+date: //span[@class='date-time']
+Author: substring-after(//p[@class='by-line'], 'By ')
+
+strip: //div[@class='videoThumbnails']
+strip: //div[@class='ad-square2-container']
+strip: //div[@class='homeDeliveryContainer5']
+
+strip: //div[@class='image-description']
+strip: //div[@id='internal-side-bar']
+
+strip: //span[@class='hide']
 strip: //div[@class='date']
 test_url: http://www.suntimes.com/technology/ihnatko/8816567-452/review-kindle-fire-is-no-ipad-killer-but-it-is-a-killer-device.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 02b5b8c..bc0a1ca
@@ -1,4 +1,14 @@
-# Ads\r
-strip_id_or_class: articlead\r
+body: //div[@id='article-content']
+author: //div[@id='article']//div[@class='byline']/p
 
-test_url: http://www.svd.se/nyheter/inrikes/manga-huggormsbitna-golfare_5004031.svd
\ No newline at end of file
+# Ads
+strip_id_or_class: articlead
+
+# Sharing
+strip_id_or_class: share
+
+prune: no
+
+test_url: http://www.svd.se/nyheter/inrikes/oppositionen-stoppar-skattesankning_8531228.svd
+test_url: http://www.svd.se/nyheter/inrikes/manga-huggormsbitna-golfare_5004031.svd
+test_url: http://www.svd.se/?service=rss&type=senastenytt
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/svt.se.txt b/inc/3rdparty/site_config/standard/svt.se.txt
new file mode 100755 (executable)
index 0000000..ba35f7d
--- /dev/null
@@ -0,0 +1,16 @@
+title: //article[@role='main']//h1
+body: //article[@role='main']
+strip: //aside
+replace_string(<noscript>): <div>
+replace_string(</noscript>): </div>
+strip_id_or_class: svtHide-No-Js
+strip_id_or_class: aside
+strip_id_or_class: Aside
+strip_id_or_class: hidden
+strip_id_or_class: Share
+tidy: no
+prune: no
+
+test_url: http://www.svt.se/ug/framtidsdrommar-om-jobb-blev-lackande-gifthal
+test_url: http://www.svt.se/nyheter/het-debatt-mellan-borg-och-andersson
+test_url: http://www.svt.se/nyheter/regionalt/svtsormland/sj-tag-evakuerades-efter-rokdrama
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index da6772a..24ba142
@@ -1,11 +1,18 @@
-title: //h1\r
-\r
-author: //a[contains(@href, '/sok/?')]/text()\r
-\r
-date: substring-after(//span[@class='date'], 'Publicerad ')\r
-\r
-body: //div[@class='two_column_left']\r
-strip_id_or_class: story\r
-strip: //div[@class='leadText saplo:lead']/h5\r
-
-test_url: http://www.sydsvenskan.se/kultur-och-nojen/-jag-vill-garna--stanna--
\ No newline at end of file
+title: //h1
+
+author: //a[contains(@href, '/sok/?')]/text()
+
+date: //meta[@name='bi3dPubDate']/@content
+
+body: (//div[contains(@class, 'slider_wrapper')])[1] | //div[@id='article_image' or @class='two_column_left']
+strip_id_or_class: story
+strip_id_or_class: article_body_ad
+strip: //div[@class='leadText saplo:lead']/h5
+
+replace_string(<br />): <br /><br />
+
+prune: no
+
+test_url: http://www.sydsvenskan.se/malmo/allt-jag-ager-ligger-pa-botten/
+test_url: http://www.sydsvenskan.se/kultur-och-nojen/-jag-vill-garna--stanna--
+test_url: http://www.sydsvenskan.se/rss.xml
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3109c0e..5bcfb9e
@@ -1,12 +1,12 @@
-title: //div[contains(@class, "post")]/h2\r
-\r
-author: //div[contains(@class, "post")]/p[position()=last()]/text()[1]\r
-\r
-date: //div[contains(@class, "post")]/p[1]\r
-\r
-body: //div[contains(@class, "post")]\r
-\r
-strip: //div[contains(@class, "post")]/h2[1]\r
-strip: //div[contains(@class, "post")]/p[1]\r
+title: //div[contains(@class, "post")]/h2
+
+author: //div[contains(@class, "post")]/p[position()=last()]/text()[1]
+
+date: //div[contains(@class, "post")]/p[1]
+
+body: //div[contains(@class, "post")]
+
+strip: //div[contains(@class, "post")]/h2[1]
+strip: //div[contains(@class, "post")]/p[1]
 strip: //div[contains(@class, "post")]/p[position()=last()]
 test_url: http://www.symmetrymagazine.org/breaking/?p=12784
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c3e3497..e058032
@@ -1,15 +1,15 @@
-title: //h1\r
-body://div[@class='drucken']\r
-author: substring-after(//span[@class='autor'], 'Von ')\r
-author: //span[@class='autor']\r
-\r
-single_page_link://a[contains(@href, '/drucken/')]\r
-convert_double_br_tags:yes\r
-\r
-dissolve://div[@class='vorspann']\r
-\r
-strip://h1\r
-strip_id_or_class: klassifizierung\r
-strip_id_or_class: source\r
+title: //h1
+body://div[@class='drucken']
+author: substring-after(//span[@class='autor'], 'Von ')
+author: //span[@class='autor']
+
+single_page_link://a[contains(@href, '/drucken/')]
+convert_double_br_tags:yes
+
+dissolve://div[@class='vorspann']
+
+strip://h1
+strip_id_or_class: klassifizierung
+strip_id_or_class: source
 strip_id_or_class: autor
 test_url: http://sz-magazin.sueddeutsche.de/texte/anzeigen/37567
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/sz.de.txt b/inc/3rdparty/site_config/standard/sz.de.txt
new file mode 100755 (executable)
index 0000000..f67637d
--- /dev/null
@@ -0,0 +1,18 @@
+# 2012-12-04: complete rewrite after Süddeutsche.de relaunch - carlo@...
+
+single_page_link: //a[ contains( @href, "/2.220/" ) ]
+
+body: //article[@id="sitecontent"]/section[@class="body"]
+author: //address[@class="author"]
+date: //div[@class="header"]//h1//span[@class="updated"]
+wrap_in(small): //div[@class="footer"]
+wrap_in(i): //figcaption/h3
+dissolve: //figcaption//h3
+dissolve: //figure/div[@class="body"]
+dissolve: //figure/a
+
+strip: //figure[ not( contains(@class, "zoomimage" ) ) ]
+strip: //div[@data-onlineonly="true"]
+strip: //address[@class="author"]
+
+test_url: http://sz.de/1.1556693
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8ce8a90..be76cd0
@@ -1,23 +1,23 @@
-title://h1[1]\r
-\r
-author: substring-after(//em, 'Von ')\r
-author:string('tagesschau.de')\r
-\r
-date:substring-after(//div[@class='standDatum'], 'Stand: ')\r
-\r
-body://div[contains(@class, 'article')] | //div[contains(@class, 'centerCol')]\r
-\r
-strip://h1[1]\r
-strip: //div[contains(@class, 'directLinks')]\r
-strip: //div[contains(@class, 'zitatBox')]\r
-strip: //div[contains(@class, 'teaserBox metaBlock')]\r
-strip: //*[contains(@class, 'inv')]\r
-strip: //span[@class='imgSubline']\r
-strip: //*[contains(@class, 'topline')][1]\r
-strip: //div[@id='rightCol'][1]\r
-strip: //div[@id="footer"][1]\r
-strip: //div[@class="fPlayer"] \r
-strip: //div[@id='seitenanfang']\r
-strip: //div[@class='standDatum']\r
+title://h1[1]
+
+author: substring-after(//em, 'Von ')
+author:string('tagesschau.de')
+
+date:substring-after(//div[@class='standDatum'], 'Stand: ')
+
+body://div[contains(@class, 'article')] | //div[contains(@class, 'centerCol')]
+
+strip://h1[1]
+strip: //div[contains(@class, 'directLinks')]
+strip: //div[contains(@class, 'zitatBox')]
+strip: //div[contains(@class, 'teaserBox metaBlock')]
+strip: //*[contains(@class, 'inv')]
+strip: //span[@class='imgSubline']
+strip: //*[contains(@class, 'topline')][1]
+strip: //div[@id='rightCol'][1]
+strip: //div[@id="footer"][1]
+strip: //div[@class="fPlayer"] 
+strip: //div[@id='seitenanfang']
+strip: //div[@class='standDatum']
 strip: //em
 test_url: http://www.tagesschau.de/ausland/wahlkampffrankreich102.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index bfe841c..47a6ffa
@@ -1,5 +1,5 @@
-title: //span[@class="entry-title"]\r
-author: //*[contains(@class, 'item')]/p/a/text()\r
-date: substring-after(//*[contains(@class, 'item')]/p/text()[3], 'Posted:')\r
+title: //span[@class="entry-title"]
+author: //*[contains(@class, 'item')]/p/a/text()
+date: substring-after(//*[contains(@class, 'item')]/p/text()[3], 'Posted:')
 body: //div[@class="entry-content"]
 test_url: http://www.tampabay.com/news/salvador-dali-leaders-want-st-petersburg-city-council-to-put-brakes-on/1236349
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 13de70e..e1e7942
@@ -1,4 +1,4 @@
-title: //h3[@class="storytitle"]\r
-body: //div[@class="post"]\r
+title: //h3[@class="storytitle"]
+body: //div[@class="post"]
 strip: //div[@class="blurbBox"]
 test_url: http://taptaptap.com/blog/apples-precedents-vs-apples-guidelines/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 7777336..f3234f3
@@ -1,15 +1,11 @@
-title: //span[@id='ctl00_ctl00_MainContent_MainContent_RecipeImage1_lblRecipeTitle']\r
-body: //div[@id='RDNEW']//*[@class='Recipe-imgCon' or @class='Recipe-Intro' or @class='recipeDetails']\r
-strip_id_or_class: rec-ExRightPanel\r
-strip_id_or_class: divCarousel\r
-strip_id_or_class: preptimeOuter\r
-strip_id_or_class: cooktimeOuter\r
-strip_id_or_class: durationOuter\r
-strip_id_or_class: divImageFooter\r
-strip_id_or_class: microFormatFnIngred\r
-strip: //span[@class='Recipe-Intro']//*[@class='link' or @class='rating']\r
-\r
-prune: no\r
-tidy: no\r
-
-test_url: http://www.tasteofhome.com/recipes/Grinch-Punch
\ No newline at end of file
+title: //div[@id='ctl00_MainContent_ctl00_Div1']//h2
+body: //div[@id='ctl00_MainContent_ctl00_Div1']
+
+single_page_link: //div[contains(@class, 'recipeHeader')]//a[contains(@href, '/print')]
+
+strip_image_src: tohPrintL.png
+
+prune: no
+
+test_url: http://www.tasteofhome.com/recipes/Grinch-Punch
+test_url: http://www.tasteofhome.com/recipes/lactose-free-chocolate-chip-cookies
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6e84527..cf85366
@@ -1,8 +1,8 @@
-date: //div[@class='secthead']\r
-body: //div[@class='sectbody']\r
-title: concat(//div[@class='sectbody']/h4,': ',//div[@class='sectbody']/h1)\r
-author: //span[@class='author']\r
-strip: //p[@class='caption']\r
-strip_id_or_class: rack\r
+date: //div[@class='secthead']
+body: //div[@class='sectbody']
+title: concat(//div[@class='sectbody']/h4,': ',//div[@class='sectbody']/h1)
+author: //span[@class='author']
+strip: //p[@class='caption']
+strip_id_or_class: rack
 
 test_url: http://www.taz.de/Protestbewegung-Occupy/!80188/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index fbe94fa..558dc9c
@@ -1,5 +1,5 @@
-body: //div[@id='centercontent']\r
-strip: //div[@id='rightcontent']\r
-date: substring-before( //div[@id='cats'], '·')\r
+body: //div[@id='centercontent']
+strip: //div[@id='rightcontent']
+date: substring-before( //div[@id='cats'], '·')
 title: //h1
 test_url: http://www.tbray.org/ongoing/When/201x/2012/03/04/Mobile-Money
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tcmanila.tk.txt b/inc/3rdparty/site_config/standard/tcmanila.tk.txt
new file mode 100755 (executable)
index 0000000..f6032ec
--- /dev/null
@@ -0,0 +1,7 @@
+title: //h2
+body: //div[@class="post_content"]
+author: //span[@class="fn"]
+date: //time[@class="updated"]
+strip_comments: //yes
+footnotes: //yes
+test_url: http://tcmanila.tk/post/29189064358/my-2012-roadmap-is-almost-complete-look-at-the
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 765224e..4873b50
@@ -1,4 +1,4 @@
-title: //div[@id='main-content']/h1\r
-body: //div[@id='main-content']\r
+title: //div[@id='main-content']/h1
+body: //div[@id='main-content']
 strip: //div[@id='main-content']/h1
 test_url: http://www.tcng.org/index.php/blog/view/teaching-basic-health-cutting-down-costs
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index b6d17da..da19862
@@ -1,4 +1,4 @@
-title: //h1[@class='storyheadline']\r
-body: //div[@class='storytext']\r
+title: //h1[@class='storyheadline']
+body: //div[@class='storytext']
 strip: //strong
 test_url: http://tech.fortune.cnn.com/2011/03/17/why-startups-dont-go-public-anymore/?section=money_topstories&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fmoney_topstories+%28Top+Stories%29
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tech.gilt.com.txt b/inc/3rdparty/site_config/standard/tech.gilt.com.txt
new file mode 100755 (executable)
index 0000000..ab56460
--- /dev/null
@@ -0,0 +1,5 @@
+title: //div[@class="title"]/h1
+title: //div[@class="caption"]/h1
+author: substring-after(//div[@class="metadata"]/div[@class="date"]/a[2], 'by ')
+date: //div[@class="metadata"]/div[@class="date"]/a
+test_url: http://tech.gilt.com/post/46359463184/26-3-13-todays-noon-outage-and-what-were-doing-to
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f7228eb..75126f9
@@ -1,11 +1,11 @@
-title://h1[contains(@id,'artibodyTitle')]\r
-\r
-date://span[contains(@id,'pub_date')]\r
-\r
-body://div[contains(@id,'artibody')]\r
-\r
-strip://div[contains(@class,'otherContent')]\r
-\r
-next_page_link://p[@class='page']/a[contains(.,'下一页')]\r
+title://h1[contains(@id,'artibodyTitle')]
+
+date://span[contains(@id,'pub_date')]
+
+body://div[contains(@id,'artibody')]
+
+strip://div[contains(@class,'otherContent')]
+
+next_page_link://p[@class='page']/a[contains(.,'下一页')]
 
 test_url: http://tech.sina.com.cn/mobile/n/2012-03-22/07476863046.shtml
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f436acb..1509c46
@@ -1,18 +1,18 @@
-body: //div[contains(@class, 'media-container') or contains(@class, 'body-copy')]\r
-\r
-author: //a[@class="name"]\r
-\r
-date: //div[@class="post-time"]\r
-\r
-title: //h1[@class="headline"]\r
-strip_id_or_class: module-crunchbase\r
-\r
-# The following is for the mobile site\r
-body: //div[@id="singlentry"]\r
-author: substring-after(//span[@class="single-post-meta-top"],'rsaquo; ')\r
-date: substring-before(//div[@class="single-post-meta-top"],' @')\r
-title: //a[@class="sh2"]\r
-\r
-prune: no\r
-\r
+body: //div[contains(@class, 'media-container') or contains(@class, 'body-copy')]
+
+author: //a[@class="name"]
+
+date: //div[@class="post-time"]
+
+title: //h1[@class="headline"]
+strip_id_or_class: module-crunchbase
+
+# The following is for the mobile site
+body: //div[@id="singlentry"]
+author: substring-after(//span[@class="single-post-meta-top"],'rsaquo; ')
+date: substring-before(//div[@class="single-post-meta-top"],' @')
+title: //a[@class="sh2"]
+
+prune: no
+
 test_url: http://techcrunch.com/2011/10/18/apples-insanely-great-q1-2012/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 727f370..7db2f95
@@ -1,12 +1,12 @@
-body: //div[@class='story']\r
-title: //div[@class='story']/h1\r
-strip: //div[@class='story']/h1\r
-\r
-author: //div[@class='details']/p[contains(., 'by ')]/a\r
-date: //p[@class='storydate']\r
-\r
-strip: //p[a[contains(., 'Leave a Comment')]]\r
-strip_id_or_class: share\r
-strip_id_or_class: maincolumn_head\r
+body: //div[@class='story']
+title: //div[@class='story']/h1
+strip: //div[@class='story']/h1
+
+author: //div[@class='details']/p[contains(., 'by ')]/a
+date: //p[@class='storydate']
+
+strip: //p[a[contains(., 'Leave a Comment')]]
+strip_id_or_class: share
+strip_id_or_class: maincolumn_head
 strip_id_or_class: maincolmod
 test_url: http://www.techdirt.com/articles/20120112/17455117394/sega-gets-it-right-about-sopa-its-time-hard-reset-copyright-law-congress.shtml
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/techhive.com.txt b/inc/3rdparty/site_config/standard/techhive.com.txt
new file mode 100755 (executable)
index 0000000..29720b0
--- /dev/null
@@ -0,0 +1,18 @@
+title: //div[@class='articleHead']//h1
+author: //div[@class="author-name"]/a[1]
+body: //div[@class="main"]
+
+# remove 'From the Lab' and 'Recent posts' text
+strip: //div[@class='blogLabel']
+
+# remove byline and meta info
+strip: //div[@class="article-meta"]
+strip: //div[@class="author-info"]
+
+#strip tags and categories
+strip: //div[@class="department"]
+
+#strip product cap links
+strip: //div[@class="cap-main"]
+strip: //div[@id="compare-lede"]
+test_url: http://www.techhive.com/article/2010549/up-close-with-blackberry-10.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8644e00..0b4bfbd
@@ -1,3 +1,3 @@
-single_page_link_in_feed: //b/a\r
-\r
+single_page_link_in_feed: //b/a
+
 test_url_feed: http://www.techmeme.com/feed.xml
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index cc26ee4..d871b60
@@ -1,8 +1,8 @@
-title: //h2\r
-author: //meta[@name="author"]/@content\r
-date: //h3\r
-body: //div[@class="postBody"]\r
-strip: //h1\r
-strip: //h2\r
-strip: //h3\r
+title: //h2
+author: //meta[@name="author"]/@content
+date: //h3
+body: //div[@class="postBody"]
+strip: //h1
+strip: //h2
+strip: //h3
 test_url: http://technicallyjordan.tumblr.com/post/22914659822/facebook-to-launch-app-store-knock-off
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/technologizer.com.txt b/inc/3rdparty/site_config/standard/technologizer.com.txt
new file mode 100755 (executable)
index 0000000..179bf5a
--- /dev/null
@@ -0,0 +1,5 @@
+next_page_link: //a[contains(., 'NEXT PAGE')]
+# following::node() selects text nodes too whereas following::* selects only elements.
+strip: //span[@class='pageo']/following::node()
+strip: //span[@class='pageo']
+test_url: http://technologizer.com/2010/03/08/the-secret-origin-of-windows/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 41f21d4..d405eb1
@@ -1,16 +1,16 @@
-title: //header[@class='article-meta']/h1\r
-title: substring-before(//title, '|')\r
-\r
-body: //section[contains(@class, 'body')]\r
-\r
-# Author & Date for News and Featured Stories\r
-author: //ul[@class='byline']/li/a\r
-author: substring-before(substring-after(//ul[@class='byline']/li, 'By '), ' on')\r
-date: substring-after(//ul[@class='byline']/li, 'on ')\r
-\r
-# Author & Date for "Views"\r
-author: //div[@class='view-byline']/div[@class='meta']/h2[1]\r
-date: //div[@class='view-byline']/div[@class='meta']/h2[2]\r
-\r
-next_page_link: //section[@class='pagination']/a[contains(@class, 'continue')]\r
+title: //header[@class='article-meta']/h1
+title: substring-before(//title, '|')
+
+body: //section[contains(@class, 'body')]
+
+# Author & Date for News and Featured Stories
+author: //ul[@class='byline']/li/a
+author: substring-before(substring-after(//ul[@class='byline']/li, 'By '), ' on')
+date: substring-after(//ul[@class='byline']/li, 'on ')
+
+# Author & Date for "Views"
+author: //div[@class='view-byline']/div[@class='meta']/h2[1]
+date: //div[@class='view-byline']/div[@class='meta']/h2[2]
+
+next_page_link: //section[@class='pagination']/a[contains(@class, 'continue')]
 test_url: http://www.technologyreview.com/news/427567/facebooks-telescope-on-human-behavior/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 89ed834..8e1aa96
@@ -1,7 +1,7 @@
-body: //div[@class="post"]\r
-\r
-strip: //div[@class="post-meta"]\r
-strip: //div[@id="socialicons"]\r
-strip: //div[@id="authorbox"]\r
+body: //div[@class="post"]
+
+strip: //div[@class="post-meta"]
+strip: //div[@id="socialicons"]
+strip: //div[@id="authorbox"]
 
 test_url: http://techpinions.com/why-google-and-microsoft-hate-siri/3572
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ed92a97..0a0ca61
@@ -1,12 +1,12 @@
-# Title without news/reviews etc. appended\r
-title: //div[@id='subColumn1Pad']/div[1][@class='article']/div[1][@class='articleHead']/h1\r
-\r
-# Remove home link\r
-strip: //div[@id='page_logo']/a\r
-\r
-# Remove utilities\r
-strip: //*[(@id = "utilities")]\r
-\r
-# Remove comments link\r
+# Title without news/reviews etc. appended
+title: //div[@id='subColumn1Pad']/div[1][@class='article']/div[1][@class='articleHead']/h1
+
+# Remove home link
+strip: //div[@id='page_logo']/a
+
+# Remove utilities
+strip: //*[(@id = "utilities")]
+
+# Remove comments link
 strip: //div[@id='subColumn1Pad']/div[1][@class='article']/div[1][@class='articleHead']/p[@class='tiny']
 test_url: http://www.techradar.com/news/television/sky-to-rebrand-living-as-sky-living-903105
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ff3cd06..91b5baf
@@ -1,9 +1,9 @@
-body: //div[@id='artikelKolom']\r
-strip: //div[@class='broodMediaBox']/div[@class='docbox' or @class='artBannerWrapper']\r
-strip: //div[@id='artikeltoolbar']\r
-strip: //div[@class='reactiebalk artspacer' or @class='bannercenter clearfix artspacer']\r
-strip: //div[@id='artikelKolomRechts' or @id='TMGTweetWidget']\r
-tidy: no\r
-prune: no\r
+body: //div[@id='artikelKolom']
+strip: //div[@class='broodMediaBox']/div[@class='docbox' or @class='artBannerWrapper']
+strip: //div[@id='artikeltoolbar']
+strip: //div[@class='reactiebalk artspacer' or @class='bannercenter clearfix artspacer']
+strip: //div[@id='artikelKolomRechts' or @id='TMGTweetWidget']
+tidy: no
+prune: no
 
 test_url: http://www.telegraaf.nl/binnenland/10275097/__Identiteit_man_in_sloot_onbekend__.html?cid=rss
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e1faf23..8dcdb42
@@ -1,10 +1,10 @@
-body: //div[@class='byline' or @id='storyEmbSlide' or @id='mainBodyArea']\r
-strip: //p[@class='comments']\r
-strip: //div[@id='storyEmbSlide']//div[contains(@class, "hide")]\r
-strip: //div[@id='tmg-related-links' or @id='outbrain-related-links' or @id='onespot-related-links']\r
-strip: //p[@class='bbpTweet']/span[@class='timestamp']\r
-strip: //p[@class='bbpTweet']/span[@class='metadata']//img\r
-tidy: no\r
-prune: no\r
+body: //div[@class='byline' or @id='storyEmbSlide' or @id='mainBodyArea']
+strip: //p[@class='comments']
+strip: //div[@id='storyEmbSlide']//div[contains(@class, "hide")]
+strip: //div[@id='tmg-related-links' or @id='outbrain-related-links' or @id='onespot-related-links']
+strip: //p[@class='bbpTweet']/span[@class='timestamp']
+strip: //p[@class='bbpTweet']/span[@class='metadata']//img
+tidy: no
+prune: no
 
 test_url: http://www.telegraph.co.uk/news/worldnews/europe/ireland/8663451/Is-Ireland-divorcing-from-the-Catholic-Church.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thanhnien.com.vn.txt b/inc/3rdparty/site_config/standard/thanhnien.com.vn.txt
new file mode 100755 (executable)
index 0000000..596ecc9
--- /dev/null
@@ -0,0 +1,4 @@
+body://div[@id="print-news"]
+strip://a
+strip://span[@class="date-line"]
+test_url: http://www.thanhnien.com.vn/pages/20121006/hon-90-trieu-usd-nang-cap-do-thi-can-tho.aspx
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/the-magazine.org.txt b/inc/3rdparty/site_config/standard/the-magazine.org.txt
new file mode 100755 (executable)
index 0000000..0886465
--- /dev/null
@@ -0,0 +1,3 @@
+tidy: no
+
+test_url: http://the-magazine.org/1/alone-together-again
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/theage.com.au.txt b/inc/3rdparty/site_config/standard/theage.com.au.txt
new file mode 100755 (executable)
index 0000000..ea27c31
--- /dev/null
@@ -0,0 +1,5 @@
+author: //h3[@class='authorName']
+date: //time
+body: //div[@class='articleBody']
+strip_id_or_class: adspot
+test_url: http://www.theage.com.au/victoria/top-cops-warns-outlaw-bikies-we-have-a-gang-too-20130331-2h1l8.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/theamericanscholar.org.txt b/inc/3rdparty/site_config/standard/theamericanscholar.org.txt
new file mode 100755 (executable)
index 0000000..38b9667
--- /dev/null
@@ -0,0 +1,13 @@
+# Article Metadata
+title: //meta[@property="og:title"]/@content
+author: substring-after(//h3, 'By ')
+date: //h4/a[2]
+
+# Content Pruning
+strip: //h4
+strip: //a[@id="print_button"]
+strip: //p[@class="excerpt"]
+strip: //h3
+strip: //div[@class="caption"]
+strip: //center/a/img
+test_url: http://theamericanscholar.org/too-big-to-fail-and-too-risky-to-exist/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3bd555f..caa5ae0
@@ -1,3 +1,3 @@
-# Remove home link\r
+# Remove home link
 strip: //div[@id='blog-title']/a
 test_url: http://theappleblog.com/2010/10/21/the-new-macbook-air-is-underwhelming/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 267fd39..aa41b15
@@ -1,18 +1,20 @@
-title: //div[@id='article']/h1\r
-title: //h1\r
-\r
-body: //div[@class='articleText']\r
-body: //div[@class='articleContent']\r
-body: //div[@id='article']\r
-date: //*[contains(@class, 'date')]\r
-author: //div[@id='profile']//*[@class='authors']//a[1]\r
-author: //*[@class='author']/span\r
-prune: no\r
-\r
-strip: //div[@class='moreOnBoxWithImages']\r
-\r
-single_page_link: //a[@class='print']\r
-\r
-test_url: http://www.theatlantic.com/technology/archive/2011/04/want-to-see-how-crazy-a-bot-run-market-can-be/237773/\r
-test_url: http://www.theatlantic.com/magazine/archive/2007/11/the-autumn-of-the-multitaskers/6342/\r
+title: //div[contains(@class, 'articleHead')]//h1
+
+body: //div[@class='articleText']
+body: //div[@class='articleContent']
+body: //div[@id='article']
+date: //*[contains(@class, 'date')]
+author: //div[@id='profile']//*[@class='authors']//a[1]
+author: //*[@class='author']/span
+prune: no
+
+strip: //div[@class='moreOnBoxWithImages']
+strip: //p[contains(., 'This article available online at:')]
+strip: //p[contains(., 'This article available online at:')]/following::*
+strip: //div[@class='earthbox']
+
+single_page_link: //article//a[contains(@class, 'print')]
+
+test_url: http://www.theatlantic.com/technology/archive/2011/04/want-to-see-how-crazy-a-bot-run-market-can-be/237773/
+test_url: http://www.theatlantic.com/magazine/archive/2007/11/the-autumn-of-the-multitaskers/6342/
 test_url: http://www.theatlantic.com/entertainment/archive/2012/04/30-rock-live-a-funny-reminder-of-why-sitcoms-arent-shot-live-anymore/256447/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/theatlanticcities.com.txt b/inc/3rdparty/site_config/standard/theatlanticcities.com.txt
new file mode 100755 (executable)
index 0000000..880f207
--- /dev/null
@@ -0,0 +1,17 @@
+# To administrator:
+# Please replace the hostname with "*.theatlanticcities.com"
+
+# This filter is tested on:
+# http://m.theatlanticcities.com/arts-and-lifestyle/2012/04/invisible-borders-define-american-culture/1839/
+# http://www.theatlanticcities.com/housing/2012/11/chinas-holdouts/3981/
+# http://www.theatlanticcities.com/arts-and-lifestyle/2012/12/christmas-time-here/4133/
+
+title://h1
+author: //ul[@class='meta']/li/a
+date: //ul[@class='meta']/li/following-sibling::li
+body://article[@class='post']
+
+strip://h1
+strip://ul[@class='meta']
+strip://div[@class='newsletter-slug']
+test_url: http://www.theatlanticcities.com/arts-and-lifestyle/2012/12/christmas-time-here/4133/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 64df90c..b74442d
@@ -1,7 +1,7 @@
-title: //meta[@name='og:title']/@content\r
-date: //meta[@name='created']/@content\r
-body: //div[@class="StoryBody" or @class="storyTeaser"]\r
-\r
-replace_string(<p></p>): <br /><br />\r
-\r
+title: //meta[@name='og:title']/@content
+date: //meta[@name='created']/@content
+body: //div[@class="StoryBody" or @class="storyTeaser"]
+
+replace_string(<p></p>): <br /><br />
+
 test_url: http://www.thebostonchannel.com/slideshow/news/28210648/detail.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c3c2050..807e7da
@@ -1,10 +1,10 @@
-title: //h2[contains(@class, 'page-title')]\r
-body: //div[@id='content']/div[contains(@id, 'node-')]/div[@class='content']\r
-\r
-prune: no\r
-\r
-strip: //div[contains(@class, 'node-book')]//a[@class='button']\r
-\r
-single_page_link: //a[@class='tool-print']\r
+title: //h2[contains(@class, 'page-title')]
+body: //div[@id='content']/div[contains(@id, 'node-')]/div[@class='content']
+
+prune: no
+
+strip: //div[contains(@class, 'node-book')]//a[@class='button']
+
+single_page_link: //a[@class='tool-print']
 
 test_url: http://thebrowser.com/interviews/yotam-ottolenghi-on-his-favourite-cookery-books
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 9ef4ed8..13fa35a
@@ -1,10 +1,10 @@
-title: substring-before(//title, ' &ndash; ') \r
-author:string('Shawn')\r
-date: //*/time/@pubdate\r
-\r
-\r
-strip: //header\r
-strip: //div[@id='prev_next']\r
-strip: //div[@id='masthead']\r
-\r
+title: substring-before(//title, ' &ndash; ') 
+author:string('Shawn')
+date: //*/time/@pubdate
+
+
+strip: //header
+strip: //div[@id='prev_next']
+strip: //div[@id='masthead']
+
 test_url: http://thecarton.net/2012/12/20/imdb
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 24ebbba..e255e6a
@@ -1,24 +1,24 @@
-#keep all body text\r
-prune: no\r
-\r
-#title, body, metadata\r
-title: //div[@class='story_header']/h1\r
-body: //div[@id='content']\r
-author: substring-after(//span[@class='byline'], "by ")\r
-author: substring-after(//span[@class='byline'], "By ")\r
-author: //span[@class='byline']\r
-date: //span[@class='date']\r
-\r
-#formatting\r
-convert_double_br_tags: yes\r
-dissolve: //div[@class='slides_full']/ul/li\r
-\r
-# cleanup\r
-strip: //a[@id='story_note']\r
-strip: //br\r
-strip: //div[@class='intro']\r
-strip: //div[@class='share-block']\r
-strip: //div[@class='sidebar-social']\r
-strip: //div[@class='top-stories']\r
-strip: //div[@class='prevnext']\r
+#keep all body text
+prune: no
+
+#title, body, metadata
+title: //div[@class='story_header']/h1
+body: //div[@id='content']
+author: substring-after(//span[@class='byline'], "by ")
+author: substring-after(//span[@class='byline'], "By ")
+author: //span[@class='byline']
+date: //span[@class='date']
+
+#formatting
+convert_double_br_tags: yes
+dissolve: //div[@class='slides_full']/ul/li
+
+# cleanup
+strip: //a[@id='story_note']
+strip: //br
+strip: //div[@class='intro']
+strip: //div[@class='share-block']
+strip: //div[@class='sidebar-social']
+strip: //div[@class='top-stories']
+strip: //div[@class='prevnext']
 test_url: http://www.thedaily.com/page/2012/01/09/010912-news-college-costs-1-5/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 4781c65..f5e938a
@@ -1,7 +1,7 @@
-title: //h1\r
-body: //article/div[contains(@class, 'article-body')]\r
-#strip: //header/hgroup/h1\r
-strip: //footer[@class='storyFooter']\r
-single_page_link: //li[@class='print']/a\r
-prune: no\r
+title: //h1
+body: //article/div[contains(@class, 'article-body')]
+#strip: //header/hgroup/h1
+strip: //footer[@class='storyFooter']
+single_page_link: //li[@class='print']/a
+prune: no
 test_url: http://www.thedailybeast.com/articles/2010/04/06/how-mastercard-predicts-divorce.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0f15558..a83a6cf
@@ -1,14 +1,14 @@
-# Remove duplicated title\r
-strip: //div[@id='content']/div[1][@class='full_intro']/h2\r
-\r
-# Remove links, ads etc.\r
-strip: //*[(@class= "aside")]\r
-\r
-# Remove the  date and add it to the date published field in Instapaper\r
-strip: //div[@class="date"]\r
-date: //div[@class="date"]\r
-\r
-# There is no byline on The Daily Mash.\r
-\r
-convert_double_br_tags: yes\r
+# Remove duplicated title
+strip: //div[@id='content']/div[1][@class='full_intro']/h2
+
+# Remove links, ads etc.
+strip: //*[(@class= "aside")]
+
+# Remove the  date and add it to the date published field in Instapaper
+strip: //div[@class="date"]
+date: //div[@class="date"]
+
+# There is no byline on The Daily Mash.
+
+convert_double_br_tags: yes
 test_url: http://www.thedailymash.co.uk/index.php?option=com_content&task=view&id=4994&Itemid=81&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+thedailymash+%28The+Daily+Mash.+It%27s+news+to+us.%29
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thedisneyblog.com.txt b/inc/3rdparty/site_config/standard/thedisneyblog.com.txt
new file mode 100755 (executable)
index 0000000..57b3254
--- /dev/null
@@ -0,0 +1,7 @@
+title: //h1[contains(@class, 'entry-title')]
+author: //span[contains(@class, 'author vcard')]
+date: //span[@class = 'entry-date']
+body: //div[@class='entry-content']
+strip_id_or_class: bottomcontainerBox
+strip_id_or_class: lightsocial_container
+test_url: http://thedisneyblog.com/2012/11/17/videopolis-one-woman-disney-musical-beauty-and-the-beast/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/theeuropean-magazine.com.txt b/inc/3rdparty/site_config/standard/theeuropean-magazine.com.txt
new file mode 100755 (executable)
index 0000000..a19bae1
--- /dev/null
@@ -0,0 +1,17 @@
+# Tested on:
+# http://theeuropean-magazine.com/352-dyson-george/353-evolution-and-innovation
+# http://theeuropean-magazine.com/522-casertano-stefano/919-morsi-and-the-future-of-egypt
+
+title://h2[@class='article-title']
+author:substring-before(substring-after(//p[@class='article-meta'], 'by'), '&mdash;')
+date:substring-after(//p[@class='article-meta'], '&mdash;')
+body://div[@class='article']
+
+wrap_in(strong)://p[@class='article-teaser']
+move_into(//div[@class='article-head'])://li/img
+
+strip://h2[@class='article-title']
+strip://p[@class='article-meta']
+strip://div[@class='copyright']
+strip://div[@class='opinions-of-readers']
+test_url: http://theeuropean-magazine.com/522-casertano-stefano/919-morsi-and-the-future-of-egypt
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thegamedesignforum.com.txt b/inc/3rdparty/site_config/standard/thegamedesignforum.com.txt
new file mode 100755 (executable)
index 0000000..849ede7
--- /dev/null
@@ -0,0 +1,14 @@
+## ERROR: Removes all images. Please fix, have no idea why (bad HTML?)
+
+title: //h1[@class='featuretitle']
+body: //div[@id='nobordercontentarea']
+
+# remove Twitter badge
+strip: //img[@alt='Follow tgdfweb on Twitter']
+
+# fix for headers not showing for some reason
+wrap_in(h2): //h2[@class='sectionheader']
+dissolve: //h2[@class='sectionheader']
+
+tidy: yes
+test_url: http://thegamedesignforum.com/features/acceleration_flow_1.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index fae0fb2..da1c84f
@@ -1,41 +1,41 @@
-title: //h1[@id="headline"]\r
-author: //div[contains(@class, "editorial-byline-author")]/a\r
-date: substring-after(//div[contains(@class, "editorial-byline-meta")], " | ")\r
-\r
-# The article body contains a mix or article and non-article elements, so lot of manual tweaks are needed\r
-body: //div[@id="template"]\r
-strip_id_or_class: editorial-byline-pic\r
-strip_id_or_class: editorial-byline\r
-strip_id_or_class: headline\r
-\r
-# Include the leadin paragraph in the body text, but remove quotes because they're out of context\r
-dissolve: //div[contains(@id, "leadin")]\r
-strip_id_or_class: pullquote\r
-\r
-# Image captions removed because they're confusing in body text\r
-strip_id_or_class: image-caption-content\r
-\r
-# Remove header and footer\r
-strip_id_or_class: header\r
-strip_id_or_class: footer\r
-\r
-# Remove the hidden logo that seems to be used to cause Facebook to show the logo instead of a random article image\r
-strip: /html/body/span[contains(@style, "display: none")]\r
-\r
-# Remove search box\r
-strip_id_or_class: searchContainer\r
-strip: //div[contains(@class, "searchInstruction")]\r
-strip: //div[contains(@class, "searchResults")]/h4\r
-\r
-# Remove the 'Letters to the Editor' section\r
-strip_id_or_class: letter-text\r
-strip_id_or_class: letter-from\r
-strip_id_or_class: letter-date\r
-\r
-# Remove Like/Tweet links \r
-strip_id_or_class: social-tab\r
-\r
-# Remove 'divider' which causes an inexplicable slash to appear in the article body\r
-strip_id_or_class: divider\r
+title: //h1[@id="headline"]
+author: //div[contains(@class, "editorial-byline-author")]/a
+date: substring-after(//div[contains(@class, "editorial-byline-meta")], " | ")
+
+# The article body contains a mix or article and non-article elements, so lot of manual tweaks are needed
+body: //div[@id="template"]
+strip_id_or_class: editorial-byline-pic
+strip_id_or_class: editorial-byline
+strip_id_or_class: headline
+
+# Include the leadin paragraph in the body text, but remove quotes because they're out of context
+dissolve: //div[contains(@id, "leadin")]
+strip_id_or_class: pullquote
+
+# Image captions removed because they're confusing in body text
+strip_id_or_class: image-caption-content
+
+# Remove header and footer
+strip_id_or_class: header
+strip_id_or_class: footer
+
+# Remove the hidden logo that seems to be used to cause Facebook to show the logo instead of a random article image
+strip: /html/body/span[contains(@style, "display: none")]
+
+# Remove search box
+strip_id_or_class: searchContainer
+strip: //div[contains(@class, "searchInstruction")]
+strip: //div[contains(@class, "searchResults")]/h4
+
+# Remove the 'Letters to the Editor' section
+strip_id_or_class: letter-text
+strip_id_or_class: letter-from
+strip_id_or_class: letter-date
+
+# Remove Like/Tweet links 
+strip_id_or_class: social-tab
+
+# Remove 'divider' which causes an inexplicable slash to appear in the article body
+strip_id_or_class: divider
 
 test_url: http://www.theglobalmail.org/feature/tiramisu-time-in-pyongyang/88/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 90634a0..750f847
@@ -1,5 +1,5 @@
-single_page_link: //div[contains(@class, 'pagination')]//a[contains(@title, 'ingle page')]\r
-tidy: no\r
-prune: no\r
+single_page_link: //div[contains(@class, 'pagination')]//a[contains(@title, 'ingle page')]
+tidy: no
+prune: no
 
 test_url: http://www.theglobeandmail.com/report-on-business/rob-magazine/how-a-novice-miner-survived-a-summer-in-the-klondike/article2345350/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thegreatdiscontent.com.txt b/inc/3rdparty/site_config/standard/thegreatdiscontent.com.txt
new file mode 100755 (executable)
index 0000000..12442b4
--- /dev/null
@@ -0,0 +1,6 @@
+title: //h1[@id='headline']
+author: substring-after(//section[@class="credits"]/ul/li[1],"Interview by ")
+date: //time[@pubdate]
+body: //article[@class='interview']
+strip: //article[@class='interview']/footer
+test_url: http://thegreatdiscontent.com/jeffrey-zeldman
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/theguardian.com.txt b/inc/3rdparty/site_config/standard/theguardian.com.txt
new file mode 100755 (executable)
index 0000000..c803e4e
--- /dev/null
@@ -0,0 +1,13 @@
+title: //div[@id='main-article-info']//h1
+body: //div[@id='article-wrapper']
+date: //li[@class='publication']//time[@pubdate] | //li[@class='publication']//data[@pubdate]
+strip: //div[contains(@class, 'email-subscription')]
+strip: //div[contains(@class, 'kindleWidget')]
+#strip: //a[not(text())]
+strip_id_or_class: pocket-btn
+author: //li[@class='byline']
+prune: no
+tidy: no
+test_url: http://www.theguardian.com/world/2013/oct/04/nsa-gchq-attack-tor-network-encryption
+test_url: http://www.theguardian.com/world/2013/oct/03/edward-snowden-files-john-lanchester
+test_url: http://www.theguardian.com/commentisfree/2014/jun/15/britishness-search-identity-my-part-in-camerons-odyssey
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3544f24..2cd865b
@@ -1,13 +1,13 @@
-title: //h1[@class="Headline"]\r
-date: substring-after(//div[@class="posted"], 'EDT ')\r
-body: //div[@class="storyBody"]\r
-\r
-strip: //td[@class="AssocContentTD"]\r
-strip: //div[@id="pageTitle"]\r
-strip: //div[@class="posted"]\r
-strip: //div[@class="updated"]\r
-strip: //div[@class="js-kit-disclaimer"]\r
-strip: //table[@class="row3table"]\r
-strip: //div[@class="container2"]\r
+title: //h1[@class="Headline"]
+date: substring-after(//div[@class="posted"], 'EDT ')
+body: //div[@class="storyBody"]
+
+strip: //td[@class="AssocContentTD"]
+strip: //div[@id="pageTitle"]
+strip: //div[@class="posted"]
+strip: //div[@class="updated"]
+strip: //div[@class="js-kit-disclaimer"]
+strip: //table[@class="row3table"]
+strip: //div[@class="container2"]
 strip: //div[@id="delta"]
 test_url: http://www.theindychannel.com/news/31050840/detail.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/themarker.com.txt b/inc/3rdparty/site_config/standard/themarker.com.txt
new file mode 100755 (executable)
index 0000000..141b1a3
--- /dev/null
@@ -0,0 +1,11 @@
+title: //h1[contains(@class, 'mainTitle')]
+author: //ul[@class='author']//a[@rel='author']
+body: //div[@id='article-box']
+prune: no
+tidy: no
+strip_id_or_class: head
+strip_id_or_class: social-nav
+strip_id_or_class: rate
+strip_id_or_class: video
+
+test_url: http://www.themarker.com/markerweek/1.2093167
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e3e57fe..4d46dae
@@ -1,10 +1,10 @@
-title: /html/body/div/div[2]/div/div/div/h3\r
-\r
-body: /html/body/div/div[2]/div/div/div/div[2]\r
-\r
-strip: /html/body/div/div[2]/div/div/div/div[6]/div[3]/div/div/div\r
-\r
-tidy: no\r
-\r
+title: /html/body/div/div[2]/div/div/div/h3
+
+body: /html/body/div/div[2]/div/div/div/div[2]
+
+strip: /html/body/div/div[2]/div/div/div/div[6]/div[3]/div/div/div
+
+tidy: no
+
 # any way to get rid of this word character garbage?
 test_url: http://www.themillions.com/2010/07/at-the-movies-with-david-mitchell-the-thousand-autumns-of-jacob-de-zoet.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 518bff9..80aba44
@@ -1,7 +1,7 @@
-body: single-review\r
-strip_id_or_class: featured-review\r
-strip_id_or_class: resources\r
-strip_id_or_class: rate-the-book\r
-strip_id_or_class: write-review\r
+body: single-review
+strip_id_or_class: featured-review
+strip_id_or_class: resources
+strip_id_or_class: rate-the-book
+strip_id_or_class: write-review
 
 test_url: http://themuseumofinnocence.com/review.php?id=1179
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d88bcdd..dab17f0
@@ -1,11 +1,13 @@
-title: //h1[@class='print-title']\r
-body: //div[@class='print-content']\r
-author: //a[contains(@href, '/authors')]\r
-author: substring-before(//div[@class='print-created'], '|')\r
-date: //span[@class='article-date']\r
-date: substring-after(//div[@class='print-created'], '|')\r
-prune: no\r
-\r
-single_page_link: //ul[contains(@class, 'article-actions-bar')]//a[contains(@href, '/print/article/')]\r
-\r
+title: //h2[@property='dc:title']
+#body: //div[@class='print-content']
+body: //div[@id='wysiwyg']
+author: //a[contains(@href, '/authors')]
+author: substring-before(//div[@class='print-created'], '|')
+date: //span[@class='article-date']
+date: substring-after(//div[@class='print-created'], '|')
+prune: no
+
+#single_page_link: //ul[contains(@class, 'article-actions-bar')]//a[contains(@href, '/print/article/')]
+single_page_link: //ul[contains(@class, 'article-actions-bar')]//a[contains(@href, '?page=full')]
+
 test_url: http://www.thenation.com/article/162331/hard-against-time-roy-fisher
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 846b8a8..b7f5f0f
@@ -1,4 +1,4 @@
-body: //div[@id="beta-inner"]\r
-title: //h3[@class="entry-header"]\r
+body: //div[@id="beta-inner"]
+title: //h3[@class="entry-header"]
 
 test_url: http://thenetworkgarden.blogs.com/weblog/2011/09/microsoft-metro-and-the-next-wave-in-computing.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/thenextgeneration.org.txt b/inc/3rdparty/site_config/standard/thenextgeneration.org.txt
new file mode 100755 (executable)
index 0000000..dedd989
--- /dev/null
@@ -0,0 +1,8 @@
+title: //h1[@class='interior-page-title']
+author: //span[@class='author']/a
+date: //div[@class='byline']/time
+body: //div[@class='rich-text-body']
+
+strip: //div[@class='byline']
+strip: //div[@class='offscreen-menu']
+test_url: http://thenextgeneration.org/blog/post/rebrand-announce/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index fdc7000..684fe82
@@ -1,12 +1,12 @@
-body: //div[@class= 'article-body']\r
-author: //div[@class='featured mb-1']//a[starts-with(@href,'/author')]\r
-\r
-strip: //div[@class = 'bargo']\r
-strip: //div[@class = 'tf']\r
-strip: //div[@class = 'article']/div[@class = 'blue-box']\r
-strip_id_or_class: respond\r
-\r
-tidy: no\r
-next_page_link: //div[@class='pages-wrapper']//span/following-sibling::a/@href\r
-\r
+body: //div[@class= 'article-body']
+author: //div[@class='featured mb-1']//a[starts-with(@href,'/author')]
+
+strip: //div[@class = 'bargo']
+strip: //div[@class = 'tf']
+strip: //div[@class = 'article']/div[@class = 'blue-box']
+strip_id_or_class: respond
+
+tidy: no
+next_page_link: //div[@class='pages-wrapper']//span/following-sibling::a/@href
+
 test_url: http://thenextweb.com/apple/2011/10/12/tnw-review-a-complete-guide-to-apples-ios-5-with-icloud-an-os-14-years-in-the-making/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index c713232..c9abda7
@@ -1,3 +1,3 @@
-body: //div[@id='fullstory']\r
+body: //div[@id='fullstory']
 strip: //div[@id='page_leftbar']
 test_url: http://theoaklandpress.com/articles/2011/04/25/news/doc4db5330e0bce9220005852.txt
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 12918b8..90e8d65
@@ -1,11 +1,11 @@
-title: //h2[@class='title']\r
-date: substring-before(//p[@class='meta'], '|')\r
-body: //div[@class='story']\r
-#body: //div[@class='article_body']\r
-\r
-strip: //h2[@class='title']\r
-strip: //p[@class='meta']\r
-strip: //div[@class='ga_section']\r
-strip: //div[@id='recent_slider']\r
+title: //h2[@class='title']
+date: substring-before(//p[@class='meta'], '|')
+body: //div[@class='story']
+#body: //div[@class='article_body']
+
+strip: //h2[@class='title']
+strip: //p[@class='meta']
+strip: //div[@class='ga_section']
+strip: //div[@id='recent_slider']
 
 test_url: http://www.theonion.com/articles/pathetic-bobcats-owner-again-regaling-players-with,27572/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f89f3a8..75583cd
@@ -1,11 +1,11 @@
-title: //h1[@class='post-title']\r
-body: //div[@class='post']\r
-author: //p[@class='posted-by']\r
-date: //div[@class='sprite post-date']\r
-\r
-# The body of the post doesn't have it's own div so we have to strip out the metadata\r
-strip: //div[@class='author_avatar']\r
-strip: //div[@class='sprite post-date']\r
-strip: //h1[@class='post-title']\r
+title: //h1[@class='post-title']
+body: //div[@class='post']
+author: //p[@class='posted-by']
+date: //div[@class='sprite post-date']
+
+# The body of the post doesn't have it's own div so we have to strip out the metadata
+strip: //div[@class='author_avatar']
+strip: //div[@class='sprite post-date']
+strip: //h1[@class='post-title']
 strip: //p[@class='posted-by']
 test_url: http://thepioneerwoman.com/cooking/2011/08/pie-fats-a-comparison/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ebcc55d..5d30230
@@ -1,5 +1,8 @@
-title: //div[@id="article"]/h2\r
-author: //div[@id="article"]/p[@class="byline"]/a[1]\r
-date: //div[@id="article"]/p[@class="dateline"]/a[2]\r
-body: //div[@id="article"]/div[@id="body"]
-test_url: http://www.theregister.co.uk/2011/10/06/gas_bill_shocker/
\ No newline at end of file
+# Updated 25-Jan-2014
+single_page_link: //a[contains(@href, '/Print/')]
+
+title: //div[@id="article"]/h2
+author: //p[@class="byline"]/a
+date: //p[@class="dateline"]/a[last()]
+
+test_url: http://www.theregister.co.uk/2014/01/24/thirty_years_of_the_apple_macintosh_part_2/
old mode 100644 (file)
new mode 100755 (executable)
index ebff662..1f56316
@@ -1,3 +1,3 @@
-body: //div[@id='node-content']\r
+body: //div[@id='node-content']
 strip_id_or_class: pager
 test_url: http://www.theroot.com/views/why-i-am-male-feminist
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d01a89b..84d0e78
@@ -1,4 +1,4 @@
-title: /html/body/div/div[2]/div/div/h1\r
-\r
+title: /html/body/div/div[2]/div/div/h1
+
 body: /html/body/div/div[2]/div/div/div[2]
 test_url: http://therumpus.net/2010/07/the-rumpus-interview-with-david-means/?full=yes
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ab9a99e..68a8bc8
@@ -1,11 +1,11 @@
-#body: (//div[@class='ftr-yt-vid'])[1]\r
-body: (//blockquote[contains(@class, 'postcontent')])[1]\r
-body: (//div[starts-with(@id, 'post_message')])[1]\r
-\r
-prune: no\r
-tidy: no\r
-\r
-#replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player"\r
-#replace_string(</iframe>): </iframe>&nbsp;</div>\r
-\r
+#body: (//div[@class='ftr-yt-vid'])[1]
+body: (//blockquote[contains(@class, 'postcontent')])[1]
+body: (//div[starts-with(@id, 'post_message')])[1]
+
+prune: no
+tidy: no
+
+#replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player"
+#replace_string(</iframe>): </iframe>&nbsp;</div>
+
 test_url: http://www.thesiasat.com/showthread.php?19220-Dunya-News-HASB-E-HAAL-16-06-2012-Part-1-5
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d5c6c9e..dcdf257
@@ -1,4 +1,4 @@
-title: //h3[@class='post-title']/a[@class='post-title-link']\r
-body: //div[@class='post-content']\r
+title: //h3[@class='post-title']/a[@class='post-title-link']
+body: //div[@class='post-content']
 author: //div[@class='post-meta-under-title']/a
 test_url: http://www.thesimpledollar.com/2011/09/13/determining-the-size-of-your-emergency-fund/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e2ed1e6..ca98328
@@ -1,3 +1,3 @@
-strip: //*[(@id = "content")]/h2\r
+strip: //*[(@id = "content")]/h2
 strip: //*[(@class = "wp-notable-line")]
 test_url: http://www.thespoiler.co.uk/index.php/2010/10/21/wayne-rooney-tells-man-utd-its-not-me-its-you
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 409dc0c..f71cfb6
@@ -1,9 +1,9 @@
-title: //h1[contains(@class, 'cTitle')]\r
-body: //div[contains(@class, 'KonaBody') or @id='articleimageright']\r
-author: //meta[@name='Author']/@content\r
-date: //meta[@name='OriginalPublicationDate']/@content\r
-\r
-prune: no\r
-tidy: no\r
-\r
+title: //h1[contains(@class, 'cTitle')]
+body: //div[contains(@class, 'KonaBody') or @id='articleimageright']
+author: //meta[@name='Author']/@content
+date: //meta[@name='OriginalPublicationDate']/@content
+
+prune: no
+tidy: no
+
 test_url: http://www.thespoof.com/news/spoof.cfm?headline=s8i108389
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0f9855c..6fcf4fd
@@ -1,12 +1,12 @@
-# savage* filtering is for Savage Love, such as: http://www.thestranger.com/seattle/SavageLove?oid=5135029\r
-\r
-#other filtering are plain articles, such as: http://www.thestranger.com/seattle/the-stranger-election-control-board/Content?oid=5142885\r
-\r
-title: //div[@id='savageColumn_head']/h1\r
-title: //h1[@class="headlineLarge"]\r
-\r
-strip: //div[@id='savage_right'] | //div[@id='savageColumn_head'] | //div[@id='savageArticleRight'] | //div[@id='articleRight'] | //div[@class='savAppBanner']\r
-\r
-body: //div[@id='savageColumn']\r
+# savage* filtering is for Savage Love, such as: http://www.thestranger.com/seattle/SavageLove?oid=5135029
+
+#other filtering are plain articles, such as: http://www.thestranger.com/seattle/the-stranger-election-control-board/Content?oid=5142885
+
+title: //div[@id='savageColumn_head']/h1
+title: //h1[@class="headlineLarge"]
+
+strip: //div[@id='savage_right'] | //div[@id='savageColumn_head'] | //div[@id='savageArticleRight'] | //div[@id='articleRight'] | //div[@class='savAppBanner']
+
+body: //div[@id='savageColumn']
 body: //div[@id='story_text']
 test_url: http://www.thestranger.com/seattle/SavageLove?oid=5135029
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 5de7563..58eabf0
@@ -1,25 +1,25 @@
-title: //div[@id='storyHdr']/h1\r
-title: //div[@id='print']//h2\r
-body: //div[@class="virtualpage"]\r
-body: //div[@id='print']//div[@id='bd']\r
-author: //meta[@name="AUTHOR"]/@content\r
-author: (//div[@id='print']//div[@id='bd']/h4)[1]\r
-date: //meta[@name="DATE"]/@content\r
-date: //div[@id='print']//div[@id='dte']\r
-\r
-strip_id_or_class: articleFooter\r
-strip_id_or_class: sidebar\r
-strip_id_or_class: ie6PrintSubhead\r
-strip_id_or_class: subHdr\r
-\r
-\r
-replace_string(<P/>): </p><p>\r
-\r
-prune: no\r
-\r
-#TODO: redirects back - perhaps needs referer to work\r
-single_page_link: //div[@id='storyDetail']//a[contains(@href, '/print/')]\r
-\r
-test_url: http://www.thestreet.com/story/11386556/1/which-of-these-10-dividend-stocks-is-worth-the-risk.html\r
-# multi page\r
+title: //div[@id='storyHdr']/h1
+title: //div[@id='print']//h2
+body: //div[@class="virtualpage"]
+body: //div[@id='print']//div[@id='bd']
+author: //meta[@name="AUTHOR"]/@content
+author: (//div[@id='print']//div[@id='bd']/h4)[1]
+date: //meta[@name="DATE"]/@content
+date: //div[@id='print']//div[@id='dte']
+
+strip_id_or_class: articleFooter
+strip_id_or_class: sidebar
+strip_id_or_class: ie6PrintSubhead
+strip_id_or_class: subHdr
+
+
+replace_string(<P/>): </p><p>
+
+prune: no
+
+#TODO: redirects back - perhaps needs referer to work
+single_page_link: //div[@id='storyDetail']//a[contains(@href, '/print/')]
+
+test_url: http://www.thestreet.com/story/11386556/1/which-of-these-10-dividend-stocks-is-worth-the-risk.html
+# multi page
 test_url: http://www.thestreet.com/story/11387090/1/7-ubs-stock-picks-for-2012.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 11c5c15..1e1ce58
@@ -1,31 +1,48 @@
-title: //h1[contains(@class, "headline")]\r
-\r
-author: //p[contains(@class, "byline")]/a[contains(@class, "author")]\r
-\r
-date: substring-after(normalize-space(//p[contains(@class, "byline")]/span[contains(@class, "publish-date")]), "on ")\r
-\r
-body: //article[contains(@class, 'feature-entry')]\r
-body: //article\r
-prune: no\r
-tidy: no\r
-\r
-strip: //article/header\r
-strip: //*[@id='sticky-menu']\r
-strip: //aside\r
-strip: //nav\r
-\r
-strip_id_or_class: gallery\r
-strip_id_or_class: article-meta\r
-strip_id_or_class: story-navigation\r
-strip_id_or_class: slegend\r
-strip_id_or_class: related-product-meta\r
-strip_id_or_class: comments\r
-strip_id_or_class: ui-jump-list\r
-strip_id_or_class: pullquote\r
-\r
-strip: //q\r
-\r
-strip: //a[contains(@class, 'entry-section-title')]\r
-\r
-test_url: http://www.theverge.com/2012/2/29/2821763/lytro-review\r
-test_url: http://www.theverge.com/2011/11/3/2534861/nokia-lumia-800-review
\ No newline at end of file
+author: //p[contains(@class, "byline")]/a[contains(@class, "author")]
+
+date: //span[contains(@class, "publish-date")]/time[@pubdate]/@datetime
+
+body: //div[contains(@class, 'entry-content')]
+# for vergecasts, e.g. http://www.theverge.com/2013/8/22/4648566/the-vergecast-090-august-22th-2013-video
+body: //article
+prune: no
+#tidy: no
+
+strip: //article/header
+strip: //*[@id='sticky-menu']
+strip: //aside
+strip: //nav
+strip: //img[contains(@class, 'vox-lazy-load')]
+# deal with bad parsing
+strip: //div[contains(@class, 'story-image')]//div[contains(., 'function(')]
+
+strip_id_or_class: gallery
+strip_id_or_class: article-meta
+strip_id_or_class: story-navigation
+strip_id_or_class: slegend
+strip_id_or_class: related-product-meta
+strip_id_or_class: comments
+strip_id_or_class: ui-jump-list
+strip_id_or_class: pullquote
+strip_id_or_class: m-ad
+strip_id_or_class: social-sharing
+strip_id_or_class: m-video-entry__excerpt
+strip_id_or_class: hidden
+
+replace_string(<noscript>): <div>
+replace_string(</noscript>): </div>
+
+find_string: <script
+replace_string: <div style="display:none" 
+find_string: </script>
+replace_string: </div>
+
+strip: //q
+
+strip: //a[contains(@class, 'entry-section-title')]
+
+test_url: http://www.theverge.com/2012/2/29/2821763/lytro-review
+test_url: http://www.theverge.com/2011/11/3/2534861/nokia-lumia-800-review
+test_url: http://www.theverge.com/2013/2/24/4026114/barnes-noble-shifting-focus-away-from-nook-hardware
+test_url: http://www.theverge.com/2014/6/19/5824072/top-shelf-living-the-dream
+test_url: http://www.theverge.com/rss/frontpage
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 27281ce..f98749e
@@ -1,4 +1,4 @@
-body: //div[@class="briefingEntry"]\r
-prune: no\r
+body: //div[@class="briefingEntry"]
+prune: no
 
 test_url: http://theweek.com/article/index/215763/insider-trading-on-capitol-hill
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8934b68..1eec4e3
@@ -1,4 +1,4 @@
-author: //p[@class="byline"]/a\r
-body: //div[@class="post"]\r
+author: //p[@class="byline"]/a
+body: //div[@class="post"]
 
 test_url: http://thinkprogress.org/special/2011/11/12/367040/harvard-law-professor-criticizes-homeland-security-feel-of-overreaction-to-occupy-harvard/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 958d4b2..73b3c9e
@@ -1,2 +1,2 @@
-body: //div[@class='main-content-panel']/div[@class='img'] | //div[@id='page_content_Content9_oModuleContent_2_div_Body']\r
+body: //div[@class='main-content-panel']/div[@class='img'] | //div[@id='page_content_Content9_oModuleContent_2_div_Body']
 test_url: http://www.thisdaylive.com/articles/australia-pm-talks-human-rights-with-chinas-wen/90394/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6850b4b..70b5399
@@ -1,8 +1,8 @@
-author: //div[@class='meta clearfix']/a\r
-body: //div[@class='post']\r
-\r
-strip: //div[@class='metaCat']\r
-strip: //div[@class='post']/h1\r
-strip: //div[@class='post']/div[@class='meta clearfix']\r
+author: //div[@class='meta clearfix']/a
+body: //div[@class='post']
+
+strip: //div[@class='metaCat']
+strip: //div[@class='post']/h1
+strip: //div[@class='post']/div[@class='meta clearfix']
 strip: //div[@class='post']/div[@class='social-bar clearfix']
 test_url: http://thisismynext.com/2011/10/18/galaxy-nexus-android-ice-cream-sandwich-pictures-video-hands-on/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8bcf2ec..1950e58
@@ -1,3 +1,3 @@
-author: //span[@class='fn']\r
-date: substring-before(substring-after(//*[@id='center_ajax_sub']/div/div[3],'|'),'|')\r
+author: //span[@class='fn']
+date: substring-before(substring-after(//*[@id='center_ajax_sub']/div/div[3],'|'),'|')
 test_url: http://tidbits.com/article/12651
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index fd3fe08..f3f886b
@@ -1,14 +1,12 @@
-# 2011-10-25 - carlo@... - Initial setup.\r
-\r
-single_page_link: //li[@class='print']/a/@href\r
-\r
-title: //h1\r
-author: //meta[@name="byline"]/@content\r
-date: //meta[@name="date"]/@content\r
-\r
-strip: //span[@class="see"]\r
-strip: //div[@class="byline"]\r
-strip: //div[@id="date2"]\r
-strip: //h1\r
-\r
-test_url: http://www.time.com/time/specials/packages/article/0,28804,2094921_2094923_2094924,00.html
\ No newline at end of file
+title: //h1[contains(@class, 'article-title')]
+author: //article//span[contains(@class, 'byline')]
+date: //time[@pubdate]/@datetime
+body: //section[contains(@class, 'article-body')]
+prune: no
+tidy: no
+
+strip: //figcaption
+strip: //p[contains(., 'MORE:') and ./a]
+strip: //aside
+
+test_url: http://time.com/14478/emotions-may-not-be-so-universal-after-all/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 1729773..af1c23c
@@ -1,6 +1,6 @@
-title: //h1\r
-body: //div[@class="storytext"]\r
-strip: //div[@id="thelogin"]\r
-strip: //*[@class="hide"]\r
+title: //h1
+body: //div[@class="storytext"]
+strip: //div[@id="thelogin"]
+strip: //*[@class="hide"]
 strip: //div[@id="anchored"]
 test_url: http://www.timeshighereducation.co.uk/story.asp?sectioncode=26&storycode=416124&c=1
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 9533eb0..b8474d9
@@ -1,9 +1,9 @@
-body: //div[@id='content']\r
-\r
-strip_id_or_class: featured-box\r
-strip_id_or_class: postmeta\r
-strip_id_or_class: respond\r
-\r
-author: //a[contains(@href, '/author/') and contains(@title, 'Posts by')]\r
-date: substring-before(//a[contains(@href, '/author/') and contains(@title, 'Posts by')]/.., ' by ')\r
+body: //div[@id='content']
+
+strip_id_or_class: featured-box
+strip_id_or_class: postmeta
+strip_id_or_class: respond
+
+author: //a[contains(@href, '/author/') and contains(@title, 'Posts by')]
+date: substring-before(//a[contains(@href, '/author/') and contains(@title, 'Posts by')]/.., ' by ')
 test_url: http://www.tipb.com/2011/10/17/iphone-4s-review/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 65a1899..199f5d1
@@ -1,17 +1,17 @@
-title: //div[contains(@class, 'article_detail')]/div[@class='entry_header']/h1\r
-title: //div[contains(@class, 'article_detail')]//h1\r
-title: //h1\r
-\r
-body: //div[contains(@class, 'article_detail')]\r
-\r
-author: //div[@class='article_detail']/div[@class='entry_header']/li/div[@class='author']//h3\r
-author: div[@class='author']//h3\r
-strip: //div[contains(@class, 'field-field-book-cover')]\r
-\r
-date: translate(//*[@class='post_date' and contains(., ' 20')], '|', '')\r
-\r
-prune: no\r
-\r
-single_page_link: //a[@class='print-page']\r
-\r
+title: //div[contains(@class, 'article_detail')]/div[@class='entry_header']/h1
+title: //div[contains(@class, 'article_detail')]//h1
+title: //h1
+
+body: //div[contains(@class, 'article_detail')]
+
+author: //div[@class='article_detail']/div[@class='entry_header']/li/div[@class='author']//h3
+author: div[@class='author']//h3
+strip: //div[contains(@class, 'field-field-book-cover')]
+
+date: translate(//*[@class='post_date' and contains(., ' 20')], '|', '')
+
+prune: no
+
+single_page_link: //a[@class='print-page']
+
 test_url: http://www.tnr.com/blog/jonathan-chait/92991/did-obama-get-rolled
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d8548c7..701a212
@@ -1,6 +1,6 @@
-title: //div[@id='maincontent']//div[@class='title']\r
-body: //div[@id='maincontent']//div[@class='byline'] | //div[@id='maincontent']//div[@class='meat']\r
-\r
-tidy: no\r
+title: //div[@id='maincontent']//div[@class='title']
+body: //div[@id='maincontent']//div[@class='byline'] | //div[@id='maincontent']//div[@class='meat']
+
+tidy: no
 
 test_url: http://www.tomdispatch.com/post/175436/tomgram:_noam_chomsky%2C_the_imperial_mentality_and_9_11/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 2bba6de..2b43757
@@ -1,8 +1,8 @@
-tidy: no\r
-title: //title\r
-author: //a[@itemprop = 'author']\r
-date: //time[@itemprop = 'datePublished']\r
-body: //div[@id = 'intelliTXT']\r
-\r
+tidy: no
+title: //title
+author: //a[@itemprop = 'author']
+date: //time[@itemprop = 'datePublished']
+body: //div[@id = 'intelliTXT']
+
 next_page_link: //li[@class="pagin next"]/a
 test_url: http://www.tomshardware.com/reviews/gaming-graphics-card-review,3107.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e910003..eee57cc
@@ -1,12 +1,12 @@
-body://div[@id="news-content"]/div[@id="intelliTXT"][1]\r
-\r
-author://div[@id="header-news-infos"]/a[1]\r
-\r
-date: //div[@id="header-news-infos"]/span[1]\r
-\r
-title://h1[@id="header-news-title" and @class="hardwareTitle"][1]\r
-\r
-strip://div[@id="news-content"]/div[@id="intelliTXT"]/table \r
-\r
+body://div[@id="news-content"]/div[@id="intelliTXT"][1]
+
+author://div[@id="header-news-infos"]/a[1]
+
+date: //div[@id="header-news-infos"]/span[1]
+
+title://h1[@id="header-news-title" and @class="hardwareTitle"][1]
+
+strip://div[@id="news-content"]/div[@id="intelliTXT"]/table 
+
 footnotes: no
 test_url: http://www.tomshardware.de/DDR4-DDR3-ISSCC-Samsung-Hynix,news-247133.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index dbe60b1..bb45d89
@@ -1,6 +1,6 @@
-body: //div[@class='post']\r
-\r
-strip: //div[@class='social']\r
-strip: //span[@class='next']\r
+body: //div[@class='post']
+
+strip: //div[@class='social']
+strip: //span[@class='next']
 strip: //span[@class='previous']
 test_url: http://toolsandtoys.net/noble-tonic-02/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tracks.ranea.org.txt b/inc/3rdparty/site_config/standard/tracks.ranea.org.txt
new file mode 100755 (executable)
index 0000000..5a38647
--- /dev/null
@@ -0,0 +1,14 @@
+# Metadata
+title: substring-after(//title, 'Coyote Tracks - ')
+author: //meta[@name="author"]/@content
+date: //div[@class="post_header"]/a
+
+# Content Pruning
+strip: //div[@class="column left"]
+strip: //div[@class="pages"]
+strip: //a[@class="text_title"]
+strip: //ol[@class="notes"]
+
+dissolve: //div[@class='column right']/ul
+dissolve: //li[@class='post']
+test_url: http://tracks.ranea.org/post/31431060205/the-next-big-uh-slightly-taller-thing
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/trailerzone.de.txt b/inc/3rdparty/site_config/standard/trailerzone.de.txt
new file mode 100755 (executable)
index 0000000..02151a6
--- /dev/null
@@ -0,0 +1,9 @@
+body: //div[@id='video' or @id='main']
+
+strip_id_or_class: socialshareprivacy2
+strip_id_or_class: wp_rp_first
+
+find_string: Genre</strong>
+replace_string: </strong></p><p><strong>Genre</strong>
+
+test_url: http://www.trailerzone.de/g-i-joe-2-die-abrechnung/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 96e491f..d6cfb6d
@@ -1,8 +1,8 @@
-title: //div[@class="Post-body"]//span[@class="PostHeader"]\r
-author: //div[@class="PostHeaderIcons metadata"]/a[@title="Author"]\r
-date: substring-before(//div[@class="PostHeaderIcons metadata"], '|')\r
-body: //div[@class="Post-body"]\r
-strip_id_or_class: print1\r
-strip_id_or_class: metadata\r
+title: //div[@class="Post-body"]//span[@class="PostHeader"]
+author: //div[@class="PostHeaderIcons metadata"]/a[@title="Author"]
+date: substring-before(//div[@class="PostHeaderIcons metadata"], '|')
+body: //div[@class="Post-body"]
+strip_id_or_class: print1
+strip_id_or_class: metadata
 strip_id_or_class: authorbox
 test_url: http://traningslara.se/skoinlagg-och-skador-finns-det-nagot-samband/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 82797db..663cafe
@@ -1,13 +1,13 @@
-title: //title\r
-author: //span/a\r
-date: substring-after(//small,'Published:')\r
-\r
-strip: //h1[@class='vert_class']\r
-strip: //h1[@class='headline']\r
-strip: //img[contains(@src,'logo_triblive.gif')]\r
-\r
-#strip: //h6\r
-#strip_img_src: logo_triblive.gif\r
-\r
-single_page_link: //a[@class='stprint']\r
+title: //title
+author: //span/a
+date: substring-after(//small,'Published:')
+
+strip: //h1[@class='vert_class']
+strip: //h1[@class='headline']
+strip: //img[contains(@src,'logo_triblive.gif')]
+
+#strip: //h6
+#strip_img_src: logo_triblive.gif
+
+single_page_link: //a[@class='stprint']
 test_url: http://triblive.com/sports/2819913-85/lemieux-deal-penguins-burkle-nhl-owners-team-mario-bettman-case
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e7c1a4b..9e0663b
@@ -1,10 +1,12 @@
-title: //div[@class='printbody']/h1\r
-body: //div[@class='printbody']\r
-prune: no\r
-\r
-strip: //div[@class='printbody']/a[@href='http://www.truthdig.com/']\r
-strip: //table[@class='footer']\r
-\r
-single_page_link: //div[@class='article_tools']//a[contains(@href, '/print/')]\r
-\r
-test_url: http://www.truthdig.com/report/item/the_election_march_of_the_trolls_20110829/
\ No newline at end of file
+title: //div[@class='printbody']/h1
+body: //div[@class='printbody']
+prune: no
+
+strip: //div[@class='printbody']/a[@href='http://www.truthdig.com/']
+strip: //table[@class='footer']
+strip: //h6[contains(., 'http://')]
+
+single_page_link: //a[contains(@href, '/print/')]
+
+test_url: http://www.truthdig.com/report/item/the_election_march_of_the_trolls_20110829/
+test_url: http://www.truthdig.com/dig/item/the_death_of_truth_20130505/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0dab5b0..63537c1
@@ -1,4 +1,4 @@
-title: //h2\r
-author: //a[starts-with(@href, '/AuthorStories')]\r
+title: //h2
+author: //a[starts-with(@href, '/AuthorStories')]
 body: //div[@id='storyinnerbody']
 test_url: http://www.tthfanfic.org/Story-6512/Kudra+Journeys.htm
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
old mode 100644 (file)
new mode 100755 (executable)
index b86f8cc..2af00c2
@@ -1,6 +1,6 @@
-title: //h1[@class='posttitle']\r
-author: //span[@class='author']/a\r
-date: //span[@class='timestamp']\r
-body: //div[@class='body']\r
+title: //h1[@class='posttitle']
+author: //span[@class='author']/a
+date: //span[@class='timestamp']
+body: //div[@class='body']
 
 test_url: http://www.tuaw.com/2011/10/19/apple-posts-fans-memories-of-steve-jobs/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a3946cb..6e18e3d
@@ -1,6 +1,6 @@
-title: //h1[@class='post-title']\r
-author: //div[@class='display-name']\r
-date: //div[@class='date']\r
-body: //div[@class='body']\r
-footnotes: no\r
+title: //h1[@class='post-title']
+author: //div[@class='display-name']
+date: //div[@class='date']
+body: //div[@class='body']
+footnotes: no
 test_url: http://tuckreview.com/2012/8/14/migrating-to-v6
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/tuhdo.github.io.txt b/inc/3rdparty/site_config/standard/tuhdo.github.io.txt
new file mode 100644 (file)
index 0000000..beb551f
--- /dev/null
@@ -0,0 +1,7 @@
+# Generated by FiveFilters.org's web-based selection tool
+# Place this file inside your site_config/custom/ folder
+# Source: http://siteconfig.fivefilters.org/grab.php?url=https%3A%2F%2Ftuhdo.github.io%2Femacs-tutor.html
+
+body: //div[@id='content']
+strip_id_or_class: table-of-contents
+test_url: https://tuhdo.github.io/emacs-tutor.html
old mode 100644 (file)
new mode 100755 (executable)
index 08dbba5..3cc3a9c
@@ -1,20 +1,20 @@
-# Google Custom Search\r
-strip_id_or_class: google_branding_style\r
-\r
-# Avoid double title\r
-strip_id_or_class: pagetitle\r
-\r
-# external links are labelled\r
-strip_image_src: http://static.mediatropes.info/pmwiki/pub/external_link.gif\r
-\r
-title: //div[@class="pagetitle"]\r
-body: //div[@id="wikitext"]\r
-\r
-# don't get clever.\r
-strip_comments: no\r
-prune: no\r
-\r
-# navigation in footer lives inside the wikitext div, annoyingly.\r
-strip_id_or_class: pathholder\r
+# Google Custom Search
+strip_id_or_class: google_branding_style
+
+# Avoid double title
+strip_id_or_class: pagetitle
+
+# external links are labelled
+strip_image_src: http://static.mediatropes.info/pmwiki/pub/external_link.gif
+
+title: //div[@class="pagetitle"]
+body: //div[@id="wikitext"]
+
+# don't get clever.
+strip_comments: no
+prune: no
+
+# navigation in footer lives inside the wikitext div, annoyingly.
+strip_id_or_class: pathholder
 
 test_url: http://tvtropes.org/pmwiki/pmwiki.php/Main/WithinParameters
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 12ab154..520ebd8
@@ -1,9 +1,9 @@
-title: //title\r
-body: (//p[contains(@class, 'js-tweet-text')])[1]\r
-author: (//strong[contains(@class, 'fullname')])[1]\r
-date: //span[contains(@class, 'js-short-timestamp')]/@data-time\r
-\r
-prune: no\r
-tidy: no\r
-\r
+title: //title
+body: (//p[contains(@class, 'js-tweet-text')])[1]
+author: (//strong[contains(@class, 'fullname')])[1]
+date: //span[contains(@class, 'js-short-timestamp')]/@data-time
+
+prune: no
+tidy: no
+
 test_url: https://twitter.com/medialens/status/216883678582804480
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 088d658..3469be0
@@ -1,6 +1,6 @@
-body: //div[@class='d3cmsCBody']//div[@class='pubText pubDate' or @class='newsComment' or contains(@class, 'newsPhoto') or @class='newsText']\r
-strip: //div[contains(@class, 'mpindex')]\r
-prune: no\r
-tidy: no\r
-\r
+body: //div[@class='d3cmsCBody']//div[@class='pubText pubDate' or @class='newsComment' or contains(@class, 'newsPhoto') or @class='newsText']
+strip: //div[contains(@class, 'mpindex')]
+prune: no
+tidy: no
+
 test_url: http://www.uefa.com/uefaeuropaleague/news/newsid=1617320.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 29e1956..cd9c136
@@ -1,23 +1,23 @@
-# applies to uk.ds.ign.com, uk.wii.ign.com etc.\r
-# possibly to non-UK versions, but I can&rsquo;t test that\r
-\r
-title: //h1[@class="headline"]\r
-author: //div[@class="hdr-sub byline"]/a\r
-date: //h2[@class="publish-date"]/span\r
-body: //div[@id="main-article-content"]\r
-\r
-strip: //ul[@class="lnks-readmore"]\r
-\r
-strip: //div[@class="inlineImageCaption"]\r
-# can&rsquo;t make the images appear, so remove the captions\r
-\r
-strip: //div[@style="width:468px"]\r
-# video caption links\r
-\r
-convert_double_br_tags: yes\r
-\r
-strip_comments: no\r
-# otherwise the &lsquo;Closing Comments&rsquo; are removed\r
-\r
+# applies to uk.ds.ign.com, uk.wii.ign.com etc.
+# possibly to non-UK versions, but I can&rsquo;t test that
+
+title: //h1[@class="headline"]
+author: //div[@class="hdr-sub byline"]/a
+date: //h2[@class="publish-date"]/span
+body: //div[@id="main-article-content"]
+
+strip: //ul[@class="lnks-readmore"]
+
+strip: //div[@class="inlineImageCaption"]
+# can&rsquo;t make the images appear, so remove the captions
+
+strip: //div[@style="width:468px"]
+# video caption links
+
+convert_double_br_tags: yes
+
+strip_comments: no
+# otherwise the &lsquo;Closing Comments&rsquo; are removed
+
 # Ratings box could do with some rearranging, but it&rsquo;s tricky
 test_url: http://uk.xbox360.ign.com/articles/121/1210717p1.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index cbe87d1..4a5ae34
@@ -1,17 +1,17 @@
-author: substring-before(substring-after(//div[@class='post-byline'], 'By '), ', on')\r
-date: substring-after(//div[@class='post-byline'], ', on')\r
-\r
-# for some reason, the following is producing a "no text [48]" error\r
-#title: //div[@class='post-headline']\r
-\r
-# for some reason, the following doesn't appear to isolate just the body copy\r
-body: //div[@class='post-bodycopy']\r
-\r
-# we solve the above issue by stripping out everything else we don't want\r
-# these can probably all be removed if the body: command above worked\r
-strip_id_or_class: reply\r
-strip_id_or_class: left\r
-strip_id_or_class: post-headline\r
-strip_id_or_class: post-byline\r
+author: substring-before(substring-after(//div[@class='post-byline'], 'By '), ', on')
+date: substring-after(//div[@class='post-byline'], ', on')
+
+# for some reason, the following is producing a "no text [48]" error
+#title: //div[@class='post-headline']
+
+# for some reason, the following doesn't appear to isolate just the body copy
+body: //div[@class='post-bodycopy']
+
+# we solve the above issue by stripping out everything else we don't want
+# these can probably all be removed if the body: command above worked
+strip_id_or_class: reply
+strip_id_or_class: left
+strip_id_or_class: post-headline
+strip_id_or_class: post-byline
 strip_id_or_class: footer
 test_url: http://www.uni-watch.com/2011/10/18/the-curious-case-of-steve-debergs-microphone-and-speaker/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/unwinnable.com.txt b/inc/3rdparty/site_config/standard/unwinnable.com.txt
new file mode 100755 (executable)
index 0000000..05ad86a
--- /dev/null
@@ -0,0 +1,9 @@
+title: //h1[@class='postTitle']
+author: //a[@rel='author']
+date: substring-before(//h4[@class='postAuthor'], '|')
+body: //div[@class='postContent']
+
+strip: //div[@class='simplePullQuote']
+
+wrap_in(figure): //img
+test_url: http://www.unwinnable.com/2013/04/23/gratifying-play/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/uppsalafria.se.txt b/inc/3rdparty/site_config/standard/uppsalafria.se.txt
new file mode 100755 (executable)
index 0000000..79c59ec
--- /dev/null
@@ -0,0 +1,7 @@
+body: //div[contains(@class, 'layout__inner')]//div[contains(@class, 'file-image') or contains(@class, 'node__content')]
+author: //article//div[contains(@class, 'field-byline')]
+strip_id_or_class: rekommenderade
+strip_id_or_class: disqus
+strip_id_or_class: annonser
+
+test_url: http://www.uppsalafria.se/artikel/97167
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 86061f7..385c95c
@@ -1,3 +1,3 @@
-title: //title\r
-body: //td[@id='content']
-test_url: http://www.urbandictionary.com/define.php?term=Grown-Ass
\ No newline at end of file
+title: //title
+body: //table[@id='entries']
+test_url: http://www.urbandictionary.com/define.php?term=Grown-Ass
diff --git a/inc/3rdparty/site_config/standard/usatoday.com.txt b/inc/3rdparty/site_config/standard/usatoday.com.txt
new file mode 100755 (executable)
index 0000000..710a7b3
--- /dev/null
@@ -0,0 +1,8 @@
+date: //meta[@itemprop="datePublished"]/@content
+author: //div[@itemprop="author"]
+body: //div[@itemprop='articleBody']
+
+strip_id_or_class: share-tools
+
+test_url: http://www.usatoday.com/story/news/world/2014/03/18/malaysia-plane-search/6552429/
+test_url: http://rssfeeds.usatoday.com/usatoday-NewsTopStories
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index eb10a48..30c2882
@@ -1,6 +1,6 @@
-body: //div[@id='CS_Element_maincontent']\r
-\r
-tidy: no\r
-prune: no\r
+body: //div[@id='CS_Element_maincontent']
+
+tidy: no
+prune: no
 
 test_url: http://www.usccb.org/bible/readings/072412.cfm
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f6be84c..b8511c7
@@ -1,8 +1,8 @@
-title: //h1\r
-\r
-date: substring-after(//p[@class='overline']/strong, ',')\r
-body: //div[@class="maintext"]\r
-strip: //p[@class='overline']\r
-strip: //h1\r
+title: //h1
+
+date: substring-after(//p[@class='overline']/strong, ',')
+body: //div[@class="maintext"]
+strip: //p[@class='overline']
+strip: //h1
 tidy: no
 test_url: http://www.useit.com/alertbox/mobile-startup-screen.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/usfirst.org.txt b/inc/3rdparty/site_config/standard/usfirst.org.txt
new file mode 100755 (executable)
index 0000000..f02b2d3
--- /dev/null
@@ -0,0 +1,6 @@
+title: //meta[@property='dc:title']/@content
+date: //div[@class='content']//span[@property='dc:date']/@content
+body: //div[@property='content:encoded']
+prune: no
+
+test_url: http://www.usfirst.org/roboticsprograms/frc/Photo-From-Kickoff-Filming
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/utdailybeacon.com.txt b/inc/3rdparty/site_config/standard/utdailybeacon.com.txt
new file mode 100755 (executable)
index 0000000..d37911b
--- /dev/null
@@ -0,0 +1,5 @@
+title: //h1
+author: //*[@class='byline']
+date: substring-after(//*[@class='pubdatetime'], 'Published: ')
+body: //*[@class='body-block']
+test_url: http://utdailybeacon.com/news/2012/oct/8/energy-forum-continues/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a893bda..c69f2df
@@ -1,7 +1,7 @@
-author: ("Arturo Toledo")\r
-title: //div[@class="post"]/h2\r
-body: //div[@class="entry"]\r
-\r
-# Remove Twitter button\r
+author: ("Arturo Toledo")
+title: //div[@class="post"]/h2
+body: //div[@class="entry"]
+
+# Remove Twitter button
 strip: //div[@class="entry"]/p[2]/a/img
 test_url: http://ux.artu.tv/?p=192
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index bfc47d1..efa3822
@@ -1,30 +1,30 @@
-title: //meta[@property="og:title"]/@content\r
-author: //div[contains(@class, 'byline')]//span[contains(@class, 'name')]\r
-date: //div[contains(@class, 'cn_date_time')]\r
-body: //div[contains(@class, 'pageContainers')]\r
-body: //article[@id='items-container']\r
-#body: //h2[@class='sub-header'] | //div[contains(@class, 'contributor-type') or @class='display-date' or @class='content-container']\r
-\r
-strip_id_or_class: bc\r
-strip_id_or_class: utilities\r
-strip_id_or_class: list-supporting\r
-strip_id_or_class: yrail\r
-strip_id_or_class: urail\r
-\r
-prune: no\r
-#tidy: no\r
-\r
-strip_id_or_class: super-rubric-section\r
-strip_id_or_class: cn_date_time\r
-strip_id_or_class: cn_contributors\r
-strip_id_or_class: cn_pagination_controls\r
-strip_id_or_class: cn_features_container\r
-strip_id_or_class: global-footer\r
-strip_id_or_class: cn_ecom_placement\r
-strip: //li[@class='blogNavPrev']\r
-\r
-single_page_link: //a[@title='Print this page']\r
-\r
-test_url: http://www.vanityfair.com/politics/features/2011/05/egypt-revolutionaries-201105\r
-test_url: http://www.vanityfair.com/politics/features/2008/08/hitchens200808\r
+title: //meta[@property="og:title"]/@content
+author: //div[contains(@class, 'byline')]//span[contains(@class, 'name')]
+date: //div[contains(@class, 'cn_date_time')]
+body: //div[contains(@class, 'pageContainers')]
+body: //article[@id='items-container']
+#body: //h2[@class='sub-header'] | //div[contains(@class, 'contributor-type') or @class='display-date' or @class='content-container']
+
+strip_id_or_class: bc
+strip_id_or_class: utilities
+strip_id_or_class: list-supporting
+strip_id_or_class: yrail
+strip_id_or_class: urail
+
+prune: no
+#tidy: no
+
+strip_id_or_class: super-rubric-section
+strip_id_or_class: cn_date_time
+strip_id_or_class: cn_contributors
+strip_id_or_class: cn_pagination_controls
+strip_id_or_class: cn_features_container
+strip_id_or_class: global-footer
+strip_id_or_class: cn_ecom_placement
+strip: //li[@class='blogNavPrev']
+
+single_page_link: //a[@title='Print this page']
+
+test_url: http://www.vanityfair.com/politics/features/2011/05/egypt-revolutionaries-201105
+test_url: http://www.vanityfair.com/politics/features/2008/08/hitchens200808
 test_url: http://www.vanityfair.com/style/2012/01/prisoners-of-style-201201
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6b5e0ae..c0133c9
@@ -1,5 +1,5 @@
-title: //div[@class='ArticleHeadlineDetailedView']\r
-date:  //span[@class='ArticlePublicationDateTimeDetailedView']\r
-author://span[@class='ArticleBylineDetailedView']\r
+title: //div[@class='ArticleHeadlineDetailedView']
+date:  //span[@class='ArticlePublicationDateTimeDetailedView']
+author://span[@class='ArticleBylineDetailedView']
 body: //div[@class='ArticleTextDetailedView']
 test_url: http://www.varingen.no/Nyheter/tabid/392/Default.aspx?ModuleId=56651&articleView=true
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index b1db4c3..dfbf69c
@@ -1,4 +1,4 @@
-# FB comments are inside an h2. Weird. Without this, the line 'Comments' is preserved by the text parser\r
-\r
+# FB comments are inside an h2. Weird. Without this, the line 'Comments' is preserved by the text parser
+
 strip: //h2
 test_url: http://www.varsity.co.uk/reviews/2662
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/vea.gov.vn.txt b/inc/3rdparty/site_config/standard/vea.gov.vn.txt
new file mode 100755 (executable)
index 0000000..9c8420c
--- /dev/null
@@ -0,0 +1,7 @@
+title://div[@class="detail-new-title"]
+body://div[@class="innerpad"]
+strip://div[@class="ArticleUtility"]
+strip://div[@class="commentPost"]
+strip://div[@class="comment-box"]
+strip://div[@id="TinLienQuan"]
+test_url: http://vea.gov.vn/vn/tintuc/tintuchangngay/Pages/T%C4%83ng-c%C6%B0%E1%BB%9Dng-b%E1%BA%A3o-t%E1%BB%93n-%C4%91%E1%BB%99ng-v%E1%BA%ADt-hoang-d%C3%A3-%E1%BB%9F-Vi%E1%BB%87t-Nam.aspx
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ba99917..265f9fc
@@ -1,3 +1,3 @@
-title: //td[@class='second_content']/h1\r
+title: //td[@class='second_content']/h1
 body: //td[@class='second_content']/div[@class='article_text']
 test_url: http://www.vedomosti.ru/newspaper/article/259377/rasprodazha_mailru
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 14144c0..2a44c31
@@ -1,5 +1,5 @@
-author: //div[@class="blogginnleggForfatter"]\r
-date: concat(//div[@class='blogginnleggDatoDag'],' ',//div[@class='blogginnleggDatoMnd'])\r
-strip: //div[contains(@id,"bloggDelingslenker")]\r
+author: //div[@class="blogginnleggForfatter"]
+date: concat(//div[@class='blogginnleggDatoDag'],' ',//div[@class='blogginnleggDatoMnd'])
+strip: //div[contains(@id,"bloggDelingslenker")]
 strip: //div[contains(@id,"bloggDelingslenker")]
 test_url: http://veggbilder.no/blogginnlegg/fristelser
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 294ace9..d22fc5c
@@ -1,6 +1,6 @@
-title: //h2\r
-date: substring-before(//small," &bull; Permalink")\r
-author:string('Martin Hering')\r
-\r
+title: //h2
+date: substring-before(//small," &bull; Permalink")
+author:string('Martin Hering')
+
 Strip: //p/small
 test_url: http://vemedio.com/blog/posts/state-of-support-and-icloud
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 41bfa8c..d6321d7
@@ -1,6 +1,6 @@
-title: //h1[@class="entry-title"]\r
-author: //div[@class="author-name"]\r
-date: //span[@class="the-time"]\r
-body: //div[@class="entry-content"]\r
+title: //h1[@class="entry-title"]
+author: //div[@class="author-name"]
+date: //span[@class="the-time"]
+body: //div[@class="entry-content"]
 strip: //div[@class="vb-gallery"]
 test_url: http://venturebeat.com/2012/07/17/marissa-mayer-yahoo/#s:mayer-1
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 74203ca..418b83a
@@ -1,12 +1,12 @@
-title: //article/header/h1\r
-\r
-author: //article/header/section[@class='byline']/span[contains(@class, 'author')]/a\r
-date: //article/header/section[@class='byline']/span[@class='published']/span\r
-\r
-body: //article/section[@class='body']\r
-\r
-convert_double_br_tags: yes\r
-\r
-# This is required, because Tidy chokes on the HTML5 tags...\r
+title: //article/header/h1
+
+author: //article/header/section[@class='byline']/span[contains(@class, 'author')]/a
+date: //article/header/section[@class='byline']/span[@class='published']/span
+
+body: //article/section[@class='body']
+
+convert_double_br_tags: yes
+
+# This is required, because Tidy chokes on the HTML5 tags...
 tidy: no
 test_url: http://www.version2.dk/artikel/17069-amerikansk-hit-investor-er-vild-med-danske-net-ivaerksaettere
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 4cdd0c0..ad0fec6
@@ -1,7 +1,7 @@
-title: //title\r
-body: //div[contains(@class, 'printRecipe')]\r
-strip: //div[@class='recipeHeader']\r
-prune: no\r
-tidy: no\r
+title: //title
+body: //div[contains(@class, 'printRecipe')]
+strip: //div[@class='recipeHeader']
+prune: no
+tidy: no
 single_page_link: //ul[@class='printOptions']//a[contains(@href, 'detail.aspx?p=1&showphoto=true')]
 test_url: http://www.verybestbaking.com/recipes/143190/Penne-Pasta-with-Sun-dried-Tomato-Cream-Sauce/detail.aspx
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index fceeea0..bfadb4a
@@ -1,3 +1,3 @@
-body: //div[@id='artikkelspalte']\r
+body: //div[@id='artikkelspalte']
 strip_id_or_class: 'breadcrumb'
 test_url: http://www.vg.no/spill/artikkel.php?artid=10003628
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 1dca55a..5db7746
@@ -1,9 +1,9 @@
-title: concat("Video: ", //div[@id='currentVideoTitleDivId'])\r
-body: //div[@id='currentVideoDescriptionId']\r
-author: //meta[@name='author']/@content\r
-\r
-replace_string(<div id="currentVideoDescriptionId" style="display): <div id="currentVideoDescriptionId" style="displayitplease\r
-\r
-replace_string(<div id="currentVideoTitleDivId" style="display): <div id="currentVideoTitleDivId" style="displayitplease\r
-\r
+title: concat("Video: ", //div[@id='currentVideoTitleDivId'])
+body: //div[@id='currentVideoDescriptionId']
+author: //meta[@name='author']/@content
+
+replace_string(<div id="currentVideoDescriptionId" style="display): <div id="currentVideoDescriptionId" style="displayitplease
+
+replace_string(<div id="currentVideoTitleDivId" style="display): <div id="currentVideoTitleDivId" style="displayitplease
+
 test_url: http://video.forbes.com/fvn/business/wells-fargo-inside-the-bank-that-works
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a166381..d93780c
@@ -1,6 +1,6 @@
-title: //h2[@class='posttitle']\r
-date: substring-before(substring-after(//span[@class='postdate'], 'on '), ' by')\r
-date: //span[@class='postdate']\r
-author: //span[@class='postdate']/a\r
+title: //h2[@class='posttitle']
+date: substring-before(substring-after(//span[@class='postdate'], 'on '), ' by')
+date: //span[@class='postdate']
+author: //span[@class='postdate']/a
 body: //div[@class='entry line_top']
 test_url: http://videogum.com/395042/here-are-some-afternoon-links-92/list/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index df37460..36e4a2f
@@ -1,9 +1,9 @@
-title: //h2[@class='headline']\r
-\r
+title: //h2[@class='headline']
+
 body: //div[@class='ContentPrint']
-\r
-prune: no\r
-\r
-single_page_link: //a[contains(@href, '/printVersion/')]\r
-\r
+
+prune: no
+
+single_page_link: //a[contains(@href, '/printVersion/')]
+
 test_url: http://www.villagevoice.com/2010-03-16/news/new-york-s-ten-worst-landlords/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d6c6701..f36c9c5
@@ -1,17 +1,17 @@
-title: //title\r
-body: //iframe\r
-\r
-find_string: <html>&lt;iframe \r
-replace_string: <iframe id="video" \r
-\r
-find_string: &gt;&lt;/iframe&gt;</html>\r
-replace_string: ></iframe>\r
-\r
-replace_string(&quot;): "\r
-\r
-single_page_link: //link[@type='text/xml+oembed']\r
-\r
-prune: no\r
-tidy: no\r
-\r
+title: //title
+body: //iframe
+
+find_string: <html>&lt;iframe 
+replace_string: <iframe id="video" 
+
+find_string: &gt;&lt;/iframe&gt;</html>
+replace_string: ></iframe>
+
+replace_string(&quot;): "
+
+single_page_link: //link[@type='text/xml+oembed']
+
+prune: no
+tidy: no
+
 test_url: http://vimeo.com/35941909
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/viply.de.txt b/inc/3rdparty/site_config/standard/viply.de.txt
new file mode 100755 (executable)
index 0000000..e3599c9
--- /dev/null
@@ -0,0 +1,12 @@
+title: //div[@id='singletext']//h1
+body: //div[contains(@class, 'mypictureborder')] | //div[@id='singletext']
+prune: no
+
+strip_id_or_class: singletostart
+strip_id_or_class: navigation
+strip_id_or_class: social
+strip_id_or_class: single_topwrapper
+strip: //a[contains(., 'Nächster Artikel')]
+
+test_url: http://www.viply.de/?p=87973
+test_url: http://www.viply.de/?feed=rss2
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0f03198..04e0910
@@ -1,14 +1,14 @@
-# Author's name, when present, has 'skrifar:' ('writes:') appended to it.\r
-# In case of multiple authors, this would be 'skrifa:', hence only 7 characters\r
-# are stripped off.\r
-author: substring(//div[@class='paragraph']/div[@class='meta'], 0, string-length(//div[@class='paragraph']/div[@class='meta']) - 7)\r
-\r
-date: //span[@class='date']\r
-title: //h1\r
-body: //div[@class='paragraph']\r
-\r
-# Strip out author string when present\r
-strip: //div[@class='paragraph']/div[@class='meta']\r
-\r
+# Author's name, when present, has 'skrifar:' ('writes:') appended to it.
+# In case of multiple authors, this would be 'skrifa:', hence only 7 characters
+# are stripped off.
+author: substring(//div[@class='paragraph']/div[@class='meta'], 0, string-length(//div[@class='paragraph']/div[@class='meta']) - 7)
+
+date: //span[@class='date']
+title: //h1
+body: //div[@class='paragraph']
+
+# Strip out author string when present
+strip: //div[@class='paragraph']/div[@class='meta']
+
 convert_double_br_tags: yes
 test_url: http://visir.is/esb,-ipa,-bhm-og-bsrb/article/2012701319997
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8b2a300..f2d11c7
@@ -1,6 +1,6 @@
-strip: //*[(@id = "ja-search")]\r
-body: //*[(@id = "ja-mainbody")]\r
-body: //*[(@id = "content-mass-bottom")]\r
-strip://h3[contains(span,'Related Posts')]\r
+strip: //*[(@id = "ja-search")]
+body: //*[(@id = "ja-mainbody")]
+body: //*[(@id = "content-mass-bottom")]
+strip://h3[contains(span,'Related Posts')]
 strip://img
 test_url: http://vitispr.com/blog/coventry-is-a-technology-hotspot
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 23c928b..e5ebc43
@@ -1,8 +1,8 @@
-body: //div[@cpms_content]//h2[@class='Lead'] | //div[@cpms_content]//p[@class='Normal'] | //div[@cpms_content]//table\r
-strip://div[@class="box-item"]\r
-strip://div[@id="ARTICLE_BANNER"]\r
-strip://a\r
-strip://div[@class="tag-parent"]\r
-strip://div[@class="email-print txtr"]\r
-\r
+body: //div[@cpms_content]//h2[@class='Lead'] | //div[@cpms_content]//p[@class='Normal'] | //div[@cpms_content]//table
+strip://div[@class="box-item"]
+strip://div[@id="ARTICLE_BANNER"]
+strip://a
+strip://div[@class="tag-parent"]
+strip://div[@class="email-print txtr"]
+
 test_url: http://vnexpress.net/gl/xa-hoi/2011/04/tim-thay-nan-nhan-cuoi-cung-vu-sap-mo-da-o-len-co/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6bd0e85..b754aeb
@@ -1,3 +1,3 @@
-title: //h1\r
+title: //h1
 body: //div[@class='entrytext']
 test_url: http://voices.washingtonpost.com/ezra-klein/2010/10/why_isnt_monetary_policy_discr.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a39c9f4..cfb9ea1
@@ -1,3 +1,3 @@
-body: //div[contains(@class, 'KonaBody')]\r
+body: //div[contains(@class, 'KonaBody')]
 
 test_url: http://www.vworker.com/RentACoder/misc/BidRequests/ShowBidRequest.asp?lngBidRequestId=1634186
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index afcba0f..e92757d
@@ -1,4 +1,4 @@
-title: //h2[@class="title"]\r
-body: //div[@class="post"]\r
+title: //h2[@class="title"]
+body: //div[@class="post"]
 
 test_url: http://waffle.wootest.net/2011/06/22/on-reading-news/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3ab2217..c53eb0d
@@ -1,14 +1,14 @@
-title: //div[@id='pr']/h3\r
-author: //div[@class='dateline']//a[contains(@href, '/author/')]\r
-\r
-# print page\r
-body: //div[@id='prbody']\r
-# standard page\r
-body: //div[@id='pgbody']\r
-\r
-# for multi-page articles\r
-single_page_link: //div[@class='tipjar']//a[contains(@href, '/printerFriendly.php?')]\r
-\r
-prune: no\r
+title: //div[@id='pr']/h3
+author: //div[@class='dateline']//a[contains(@href, '/author/')]
+
+# print page
+body: //div[@id='prbody']
+# standard page
+body: //div[@id='pgbody']
+
+# for multi-page articles
+single_page_link: //div[@class='tipjar']//a[contains(@href, '/printerFriendly.php?')]
+
+prune: no
 
 test_url: http://www.walrusmagazine.com/articles/2011.12-memoir-kidnapped
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index a41a351..21f5635
@@ -1,3 +1,3 @@
-title: //h3\r
+title: //h3
 body: //div[@class="content_wysiwyg"]
 test_url: http://www.warnerbros.fr/game-of-thrones-un-junket-vu-de-l-interieur-268.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/washingtoninstitute.org.txt b/inc/3rdparty/site_config/standard/washingtoninstitute.org.txt
new file mode 100755 (executable)
index 0000000..17f4567
--- /dev/null
@@ -0,0 +1,6 @@
+body: //div[@class='main']//article
+
+prune: no
+
+test_url: http://www.washingtoninstitute.org/policy-analysis/view/striking-syria-lessons-from-the-israeli-experience?goback=.gde_3822158_member_273623672
+test_url: http://www.washingtoninstitute.org/rss/11/10
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index edf1642..8f8902a
@@ -1,10 +1,10 @@
-title://a[@class = 'headline-article']\r
-\r
-author: substring-after(//div[@class = 'article']/p[@class = 'author'], 'By ')\r
-date://div[@class = 'article']/span[@class = 'date']\r
-body://div[@class = 'article']\r
-single_page_link://a[@class = 'print']\r
-strip://p[@class = 'author']\r
-strip://a[@class = 'headline-article']\r
+title://a[@class = 'headline-article']
+
+author: substring-after(//div[@class = 'article']/p[@class = 'author'], 'By ')
+date://div[@class = 'article']/span[@class = 'date']
+body://div[@class = 'article']
+single_page_link://a[@class = 'print']
+strip://p[@class = 'author']
+strip://a[@class = 'headline-article']
 strip://span[@class = 'date']
 test_url: http://www.washingtonmonthly.com/magazine/julyaugust_2011/features/the_trinity_sisters030380.php
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 2931ca5..0aa9f1d
@@ -1,21 +1,32 @@
-body: //div[@class="article_body"]\r
-author://meta[@name='DC.creator']/@content\r
-title://meta[@name='title']/@content\r
-date://div[contains(@class,'byline')]//span[contains(@class,'published')]/@title\r
-date://meta[@name="DC.date.issued"]/@content\r
-strip://div[@class="relative primary-slot padding-top img-border gallery-container photo-wrapper"]\r
-strip://div[@id="wp-column six end"]\r
-strip://div[contains(@class,'hidden')]\r
-strip://div[@id='article-side-rail']\r
-strip://div[@class="module component todays-paper-module curved"]\r
-strip://div[@class="module component live-qa curved img-border"]\r
-strip://div[@class="module component newsletter-signup curved"]\r
-strip://div[@class="module featured-stories component curved img-border"]\r
-\r
-strip_id_or_class: carousel\r
-strip_id_or_class: toolbar\r
-strip_id_or_class: module\r
-\r
-test_url: http://www.washingtonpost.com/world/europe/in-europe-new-fears-of-german-might/2011/10/19/gIQA3baZ7L_story.html?hpid=z1\r
-test_url: http://www.washingtonpost.com/national/health-science/radical-theory-of-first-americans-places-stone-age-europeans-in-delmarva-20000-years-ago/2012/02/28/gIQA4mriiR_story.html\r
+# Seems to be redirecting to articles.washingtonpost.com for many users
+
+body: //div[contains(@class, "article_body")]
+# print view
+body: //div[@id='print_facet']//div[@id='body']
+
+author://meta[@name='DC.creator']/@content
+title://meta[@name='title']/@content
+date://div[contains(@class,'byline')]//span[contains(@class,'published')]/@title
+date://meta[@name="DC.date.issued"]/@content
+strip://div[@class="relative primary-slot padding-top img-border gallery-container photo-wrapper"]
+strip://div[@id="wp-column six end"]
+strip://div[contains(@class,'hidden')]
+strip://div[@id='article-side-rail']
+strip://div[@class="module component todays-paper-module curved"]
+strip://div[@class="module component live-qa curved img-border"]
+strip://div[@class="module component newsletter-signup curved"]
+strip://div[@class="module featured-stories component curved img-border"]
+
+strip_id_or_class: carousel
+strip_id_or_class: toolbar
+strip_id_or_class: module
+
+# Change gJQAwdJG4U_story.html to gJQAwdJG4U_print.html
+single_page_link: concat(substring-before(//link[@rel="canonical"]/@href, "_story.html"), "_print.html")
+
+# [OLD] Change gJQAwdJG4U_story.html to gJQAwdJG4U_story_print.html
+#single_page_link: concat(substring-before(//link[@rel="canonical"]/@href, "_story.html"), "_story_print.html")
+
+test_url: http://www.washingtonpost.com/world/europe/in-europe-new-fears-of-german-might/2011/10/19/gIQA3baZ7L_story.html?hpid=z1
+test_url: http://www.washingtonpost.com/national/health-science/radical-theory-of-first-americans-places-stone-age-europeans-in-delmarva-20000-years-ago/2012/02/28/gIQA4mriiR_story.html
 test_url: http://www.washingtonpost.com/lifestyle/magazine/the-sorry-fate-of-a-tech-pioneer-halsey-minor-and-historic-virginia-estate-carters-grove/2012/05/30/gJQAwdJG4U_story.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index dfcd008..9ed43a2
@@ -1,6 +1,6 @@
-body: //div[@id='template_article']\r
-\r
-strip_id_or_class: article_more\r
-strip: //hr\r
+body: //div[@id='template_article']
+
+strip_id_or_class: article_more
+strip: //hr
 
 test_url: http://www.web-libre.org/dossiers/jacuzzi-gonflable,8493.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 9e75a8a..578ba52
@@ -1,5 +1,5 @@
-title://div[@class="post"]/h2\r
-author://p[@class="postinfo"]/a\r
-date:substring-before(substring-after(//p[@class="postinfo"],' on '),' under ')\r
+title://div[@class="post"]/h2
+author://p[@class="postinfo"]/a
+date:substring-before(substring-after(//p[@class="postinfo"],' on '),' under ')
 body://div[@class="contenttext"]
 test_url: http://weblog.bignerdranch.com/?p=304
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 3fabda0..7cfa49d
@@ -1,9 +1,9 @@
-title: //h2[@class="pageTitle"]\r
-strip: //div[@class="postfoot"]\r
-strip: //h2[@class="pageTitle"]\r
-strip: //h3[@class="pageTitle"]\r
-body: //div[@class="post"]\r
-author: substring-before(substring-after(//div[@class="postfoot"], 'by'), 'Filed')\r
-date: substring-before(substring-after(//div[@class="postfoot"], 'Published'), 'by')\r
+title: //h2[@class="pageTitle"]
+strip: //div[@class="postfoot"]
+strip: //h2[@class="pageTitle"]
+strip: //h3[@class="pageTitle"]
+body: //div[@class="post"]
+author: substring-before(substring-after(//div[@class="postfoot"], 'by'), 'Filed')
+date: substring-before(substring-after(//div[@class="postfoot"], 'Published'), 'by')
 
 test_url: http://weblogs.asp.net/scottgu/archive/2011/08/31/html-editor-smart-tasks-and-event-handler-generation-asp-net-vnext-series.aspx
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 8922b02..cea1014
@@ -1,8 +1,8 @@
-tidy: no\r
-dissolve: //div[@id="content"]/div/article/header\r
-body: //div[@id="content"]/div/article \r
-title: //div[@id="content"]/div/article/h1\r
-date: //div[@id="content"]/div/article/header/div[@id="issueSelectTrigger"]\r
-strip: //div[@id="content"]/div/article/h1\r
+tidy: no
+dissolve: //div[@id="content"]/div/article/header
+body: //div[@id="content"]/div/article 
+title: //div[@id="content"]/div/article/h1
+date: //div[@id="content"]/div/article/header/div[@id="issueSelectTrigger"]
+strip: //div[@id="content"]/div/article/h1
 
 test_url: http://webpaper.nzz.ch/2012/06/23/front/JJKMS/aphrodite-und-die-kommunisten?guest_pass=24a3ca5b6d%3AJJKMS%3Ad30e1be8628c099669671d4da56cdce4187790ba
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/webwereld.nl.txt b/inc/3rdparty/site_config/standard/webwereld.nl.txt
new file mode 100755 (executable)
index 0000000..40a5aa3
--- /dev/null
@@ -0,0 +1,8 @@
+strip: //*[@class="paginator"]
+body: //*[@id="articleText"]
+next_page_link: //a[@class="next"]
+
+# No author detection
+# No publishing date detection
+# No author and intro deduplication over multiple pages
+test_url: http://webwereld.nl/analyse/111452/de-code-van-dorifel-nader-bekeken.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 6e4f828..42e65e9
@@ -1,22 +1,22 @@
-# set body\r
-tidy: no\r
-body: //div[contains(@class, 'articleContent')]\r
-\r
-# remove clutter\r
-strip: //div[@class='advertising']\r
-strip: //div[@class='themenalarm']\r
-strip: //div[contains(@class, 'inTextTeaser')]\r
-\r
-# remove captions\r
-strip: //span[@class='copyRight']\r
-\r
-# remove photo galleries and extras\r
-strip: //div[contains(@class, 'textGallery')]\r
-strip: //div[contains(@class, 'videoGallery')]\r
-strip: //div[contains(@class, 'imageGallery')]\r
-strip: //div[contains(@class, 'openContent')]\r
-\r
-# remove comments\r
-strip: //div[@id = 'writeComment']\r
-\r
+# set body
+tidy: no
+body: //div[contains(@class, 'articleContent')]
+
+# remove clutter
+strip: //div[@class='advertising']
+strip: //div[@class='themenalarm']
+strip: //div[contains(@class, 'inTextTeaser')]
+
+# remove captions
+strip: //span[@class='copyRight']
+
+# remove photo galleries and extras
+strip: //div[contains(@class, 'textGallery')]
+strip: //div[contains(@class, 'videoGallery')]
+strip: //div[contains(@class, 'imageGallery')]
+strip: //div[contains(@class, 'openContent')]
+
+# remove comments
+strip: //div[@id = 'writeComment']
+
 test_url: http://www.welt.de/vermischtes/weltgeschehen/article11050589/27-Bergleute-in-neuseelaendischer-Mine-vermisst.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index b934302..3132e98
@@ -1,6 +1,6 @@
-title: substring-before(//title, '«')\r
-\r
-body: //div[@class='entry']\r
-strip: //div[@class='sharing_label']\r
+title: substring-before(//title, '«')
+
+body: //div[@class='entry']
+strip: //div[@class='sharing_label']
 strip: //div[@class='snap_nopreview sharing robots-nocontent']
 test_url: http://www.westhamtillidie.com/2012/03/11/twelve-things-we-learned-from-the-doncaster-game/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 52c5cf1..100a8c8
@@ -1,7 +1,7 @@
-strip: //div[@class="navigation"]\r
-strip: //div[@id="sidebar"]\r
-strip: //div[@id="post-extra-content"]\r
-strip: //div[@id="footer"]\r
-strip: //div[contains(@class, "sharing")]\r
+strip: //div[@class="navigation"]
+strip: //div[@id="sidebar"]
+strip: //div[@id="post-extra-content"]
+strip: //div[@id="footer"]
+strip: //div[contains(@class, "sharing")]
 
 test_url: http://whatever.scalzi.com/2011/01/09/quick-giffords-follow-up/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index aa9783c..b9eeaa0
@@ -1,11 +1,11 @@
-body://div[contains(@class,'oAndtLyrics')]\r
-strip://div[contains(@class,'info')]\r
-strip://div[contains(@id,'romanization')]\r
-strip://div[contains(@id,'youtube')]\r
-strip://div[contains(@id,'romanizationSelector')]\r
-strip://div[contains(@id,'langSelectWrap')]\r
-strip://div[contains(@id,'requestTranslationWrap')]\r
-strip://div[contains(@id,'viewMore')]\r
-strip://div[contains(@class,'lyricsListInMainContent')]\r
+body://div[contains(@class,'oAndtLyrics')]
+strip://div[contains(@class,'info')]
+strip://div[contains(@id,'romanization')]
+strip://div[contains(@id,'youtube')]
+strip://div[contains(@id,'romanizationSelector')]
+strip://div[contains(@id,'langSelectWrap')]
+strip://div[contains(@id,'requestTranslationWrap')]
+strip://div[contains(@id,'viewMore')]
+strip://div[contains(@class,'lyricsListInMainContent')]
 strip://div[contains(@class,'descIpNoti')]
 test_url: http://wheelyric.com/lyrics/121#2
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 1f262a0..b80fe5d
@@ -1,8 +1,8 @@
-title: //h1\r
-body: //div[@id='content']\r
-strip_id_or_class: editsection\r
-strip_id_or_class: toc\r
-strip: //div[@id='siteNotice']\r
-strip: //div[@id='content']//table[last()]\r
+title: //h1
+body: //div[@id='content']
+strip_id_or_class: editsection
+strip_id_or_class: toc
+strip: //div[@id='siteNotice']
+strip: //div[@id='content']//table[last()]
 prune: no
 test_url: http://wiki.guildwars.com/wiki/Monk
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e176907..e923399
@@ -1,8 +1,8 @@
-title: //h1\r
-body: //div[@id='content']\r
-strip_id_or_class: editsection\r
-strip_id_or_class: toc\r
-strip: //div[@id='siteNotice']\r
-strip: //div[@id='content']//table[last()]\r
+title: //h1
+body: //div[@id='content']
+strip_id_or_class: editsection
+strip_id_or_class: toc
+strip: //div[@id='siteNotice']
+strip: //div[@id='content']//table[last()]
 prune: no
 test_url: http://wiki.guildwars2.com/wiki/Guardian
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/wikihow.com.txt b/inc/3rdparty/site_config/standard/wikihow.com.txt
new file mode 100755 (executable)
index 0000000..fe95d3f
--- /dev/null
@@ -0,0 +1,15 @@
+# ...&printable=yes
+body: //div[@id='bodycontents']
+prune: no
+tidy: no
+strip_id_or_class: gatEditSection
+strip_id_or_class: relatedwikihows
+#strip: //div[contains(@class, 'step_num')]
+
+replace_string(<script ): <div style="display: none" 
+replace_string(</script>): </div>
+
+single_page_link: //a[@id='gatPrintView']
+single_page_link: concat(//link[@rel='canonical']/@href, '?printable=yes')
+
+test_url: http://www.wikihow.com/Start-Your-Own-Country
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index da5bd0b..1f32a37
@@ -1,14 +1,14 @@
-# copied from .wikipedia.org.txt\r
-title: //h1[@id='firstHeading' or @class='firstHeading']\r
-body: //div[@id = 'bodyContent']\r
-strip_id_or_class: editsection\r
-#strip_id_or_class: toc\r
-strip_id_or_class: vertical-navbox\r
-strip: //table[@id='toc'] | //div[@id='p-toc']\r
-strip: //div[@id='catlinks' or @id='contentSub']\r
-strip: //div[@id='jump-to-nav']\r
-strip: //div[@class='thumbcaption']//div[@class='magnify']\r
-strip: //table[@class='navbox']\r
-prune: no\r
+# copied from .wikipedia.org.txt
+title: //h1[@id='firstHeading' or @class='firstHeading']
+body: //div[@id = 'bodyContent']
+strip_id_or_class: editsection
+#strip_id_or_class: toc
+strip_id_or_class: vertical-navbox
+strip: //table[@id='toc'] | //div[@id='p-toc']
+strip: //div[@id='catlinks' or @id='contentSub']
+strip: //div[@id='jump-to-nav']
+strip: //div[@class='thumbcaption']//div[@class='magnify']
+strip: //table[@class='navbox']
+prune: no
 tidy: no
 test_url: http://wikitravel.org/wiki/en/index.php?title=Bangkok&printable=yes
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 24467c2..394f9ca
@@ -1,4 +1,4 @@
-strip: //div[@class="widget-area"]\r
-title: //*[@class="entry-title"]\r
+strip: //div[@class="widget-area"]
+title: //*[@class="entry-title"]
 date: //time[@class="entry-date"]
 test_url: http://will-self.com/2012/02/01/real-meals-dominos-pizza/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index fb5f92e..cefabec
@@ -1,3 +1,3 @@
-title: substring-after(//span[@class='itemTitle'], ':') \r
+title: substring-after(//span[@class='itemTitle'], ':') 
 body: //div[@id='content']
 test_url: http://www.williampfaff.com/modules/news/article.php?storyid=491
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index bc93637..dddc6f9
@@ -1,12 +1,12 @@
-title: //h1/span\r
-\r
-body: //div[@id="news_content"]\r
-\r
-author: //div[@class="bookmarks_btm"]/p[1]/a[1]/text()\r
-\r
-date: //span[@class='date']\r
-\r
-# Rubrikenbild entfernen\r
-strip: //div[@id="news_content"]/a[1]\r
+title: //h1/span
+
+body: //div[@id="news_content"]
+
+author: //div[@class="bookmarks_btm"]/p[1]/a[1]/text()
+
+date: //span[@class='date']
+
+# Rubrikenbild entfernen
+strip: //div[@id="news_content"]/a[1]
 
 test_url: http://winfuture.de/news,69672.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index cedb439..f25f9c9
@@ -1,6 +1,6 @@
-title: //h1[@class='page-heading']\r
-author: //small/strong/a\r
-#their date string is relative, so if you save the page 2 hours after it is posted it may say 'two hours ago, instead of providing a useful date/time'\r
-date: substring-before(substring-after(//small,'on'),'with')\r
-body: //div[@class='entry']\r
+title: //h1[@class='page-heading']
+author: //small/strong/a
+#their date string is relative, so if you save the page 2 hours after it is posted it may say 'two hours ago, instead of providing a useful date/time'
+date: substring-before(substring-after(//small,'on'),'with')
+body: //div[@class='entry']
 test_url: http://www.winrumors.com/chinese-windows-phone-launch-still-on-track-for-early-2012/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index db6a6fc..f725b67
@@ -1,3 +1,3 @@
-date: //*[@class='kicker']\r
-body: //*[@class='KonaBody']\r
+date: //*[@class='kicker']
+body: //*[@class='KonaBody']
 test_url: http://www.winsupersite.com/article/paul-thurrotts-wininfo/android-malware-surges-separate-studies-141364
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 69bbf5b..f5a72d1
@@ -1,22 +1,25 @@
-title: //meta[@property="og:title"]/@content \r
-title: //h1\r
-title: //*[@class='posttitle']\r
-author: //*[@class='entryAuthor']/a[1]\r
-author://*[@class='member-title']\r
-author://li[@class='author']/a[contains(@href, '/author/')]\r
-date: substring-after(//div[@class='entryAuthor'], '·')\r
-date: substring-before(//*[@class='entryDate'], '|')\r
-body: //div[@class='entry']\r
-strip: //span[contains(@class, 'nextprev')]\r
-#strip_id_or_class: ngg-galleryoverview \r
-# ngg-galleryoverview is the whole content sometimes, e.g. http://www.wired.com/underwire/2011/12/best-mixtapes-of-2011/?pid=5736&viewall=true\r
-\r
-strip: //p[span[contains(@class, 'contentjump')]]\r
-strip: //text()[contains(., 'nextpage')]\r
-\r
-prune: no\r
-\r
-single_page_link: //a[contains(@href, '/all/1') and contains(@class, 'contentjumpall')]\r
-\r
-test_url: http://www.wired.com/cloudline/2011/10/meet-arms-cortex-a15-the-future-of-the-ipad-and-possibly-the-macbook-air/\r
-test_url: http://www.wired.com/threatlevel/2012/05/ff_counterfeiter/all/1
\ No newline at end of file
+title: //meta[@name='Title']/@content
+author: //meta[@name='Author']/@content
+date: //meta[@name='DisplayDate']/@content
+body: //div[@class='entry']
+strip: //p[contains(., 'Pages:') and contains(., 'View All')]
+strip: //p[@class='caption']
+strip: //div[@class='desc' or @class='slide' or @id='slide-info']
+
+strip_id_or_class: pullquote
+strip_id_or_class: left_rail
+strip_id_or_class: related-container
+strip_id_or_class: radvert-caption-wrap
+
+# Remove gallery?
+strip_id_or_class: wpgallery
+
+#strip: //text()[contains(., 'nextpage')]
+
+prune: no
+
+single_page_link: //a[.='View All' and contains(@href, '/all/')]
+
+test_url: http://www.wired.com/cloudline/2011/10/meet-arms-cortex-a15-the-future-of-the-ipad-and-possibly-the-macbook-air/
+test_url: http://www.wired.com/wiredenterprise/2013/09/docker/
+test_url: http://www.wired.com/threatlevel/2012/05/ff_counterfeiter/all/
old mode 100644 (file)
new mode 100755 (executable)
index ffb6b2d..1d403a9
@@ -1,13 +1,13 @@
-title: //div[@class="bodyText"]/h1/text()\r
-body: //div[@class="bodyText"]\r
-\r
-# author and date are separated by only a newline\r
-# can't figure out how to tokenize that yet\r
-author: //div[@class="bodyText"]/span[@class="info"]/text()\r
-date: //div[@class="bodyText"]/span[@class="info"]/text()\r
-\r
-# strip metdata from body text\r
-strip: //div[@class="bodyText"]/h1/text()\r
-strip: //div[@class="bodyText"]/span[@class="info"]\r
+title: //div[@class="bodyText"]/h1/text()
+body: //div[@class="bodyText"]
+
+# author and date are separated by only a newline
+# can't figure out how to tokenize that yet
+author: //div[@class="bodyText"]/span[@class="info"]/text()
+date: //div[@class="bodyText"]/span[@class="info"]/text()
+
+# strip metdata from body text
+strip: //div[@class="bodyText"]/h1/text()
+strip: //div[@class="bodyText"]/span[@class="info"]
 strip: //div[@class="bodyText"]/span[@class="info"]
 test_url: http://www.wmnf.org/news_stories/light-rail-advocates-join-forces-to-combat-opposition-in-pinellas
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d9011d2..70168fb
@@ -1,4 +1,4 @@
-date://*[@class="entry-date"]\r
-author://*[@class="author vcard"]\r
+date://*[@class="entry-date"]
+author://*[@class="author vcard"]
 strip://*[@style="position:relative;left:72px;top:2px;"]|//*[@id="authorbox"]
 test_url: http://wmpoweruser.com/breaking-nokia-announces-nfc-support-in-lumia-610-windows-phone-device/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/wordyard.com.txt b/inc/3rdparty/site_config/standard/wordyard.com.txt
new file mode 100644 (file)
index 0000000..d8c753d
--- /dev/null
@@ -0,0 +1,8 @@
+# Generated by FiveFilters.org's web-based selection tool
+# Place this file inside your site_config/custom/ folder
+# Source: http://siteconfig.fivefilters.org/grab.php?url=http%3A%2F%2Fwww.wordyard.com%2F2014%2F09%2F26%2Fremove-blindfold-before-embarking-to-utopia%2F
+
+body: //div[contains(concat(' ',normalize-space(@class),' '),' entry-content ')]
+strip_id_or_class: robots-nocontent
+strip_id_or_class: post-revisions
+test_url: http://www.wordyard.com/2014/09/26/remove-blindfold-before-embarking-to-utopia/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0e42ca5..b88f927
@@ -1,5 +1,5 @@
-title: //div[@class="content article"]/h1\r
-date: substring-after(//*[@class='date'], '//')\r
-body: //*[@class='article-content']\r
+title: //div[@class="content article"]/h1
+date: substring-after(//*[@class='date'], '//')
+body: //*[@class='article-content']
 strip: //*[@id='nomodal']
 test_url: http://www.worldpoultry.net/news/kyrgyzstan-restricts-poultry-imports-from-russia-and-kazakhstan-9332.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 733d607..4682e0d
@@ -1,4 +1,4 @@
-title: //p[@id='content']\r
-\r
+title: //p[@id='content']
+
 body: //div[@class='contentblock']
 test_url: http://www.worldwidewords.org/weirdwords/ww-gro1.htm
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 759fb81..44add9c
@@ -1,6 +1,6 @@
-title: //h2[@class="posttitle"]\r
-body: //div[@class="post"]\r
-strip: //h2[@class="posttitle"]\r
-strip: //p[@class="filed-under"]\r
+title: //h2[@class="posttitle"]
+body: //div[@class="post"]
+strip: //h2[@class="posttitle"]
+strip: //p[@class="filed-under"]
 convert_double_br_tags: yes
 test_url: http://wow.joystiq.com/2011/06/20/the-overachiever-guide-to-midsummer-festival-2011-achievements/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/wpmayor.com.txt b/inc/3rdparty/site_config/standard/wpmayor.com.txt
new file mode 100755 (executable)
index 0000000..bb4fffc
--- /dev/null
@@ -0,0 +1,8 @@
+body: //div[@id='nrelate_flyout_placeholder']
+
+strip_id_or_class: share
+
+prune: no
+
+test_url: http://www.wpmayor.com/themes/wordpress-portfolio-resume-themes/
+test_url: http://www.wpmayor.com/feed/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/wtatennis.com.txt b/inc/3rdparty/site_config/standard/wtatennis.com.txt
new file mode 100755 (executable)
index 0000000..1000ab2
--- /dev/null
@@ -0,0 +1,7 @@
+title: //h1[contains(@class, 'header-2')]
+body: //article//*[contains(@class, 'teaserText') or contains(@class, 'lastUpdated') or contains(@class, 'image') or contains(@class, 'body')]
+strip_id_or_class: articleIndex
+prune: no
+
+test_url: http://www.wtatennis.com/news/article/3190914
+test_url: http://www.wtatennis.com/news/article/3190244
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 0846be2..97a5c19
@@ -1,15 +1,15 @@
-body://div[@id='articleNew']\r
-strip://div[@id='articleBy']\r
-strip://div[@id='articleDate']\r
-strip://td[@class='articleGraphicCredit']\r
-strip://h1\r
-strip://div[@id='articleEnd']\r
-strip://p[@class='tagline']\r
-strip://div[@class='openBox adslibraryArticle']\r
-strip_id_or_class:ad-180x150-1\r
-\r
-\r
-title: //div[@id="articleNew"]/h1\r
-author: //div[@id="articleBy"]/p/b\r
-date: substring-before(//div[@id="articleDate"], "-")\r
+body://div[@id='articleNew']
+strip://div[@id='articleBy']
+strip://div[@id='articleDate']
+strip://td[@class='articleGraphicCredit']
+strip://h1
+strip://div[@id='articleEnd']
+strip://p[@class='tagline']
+strip://div[@class='openBox adslibraryArticle']
+strip_id_or_class:ad-180x150-1
+
+
+title: //div[@id="articleNew"]/h1
+author: //div[@id="articleBy"]/p/b
+date: substring-before(//div[@id="articleDate"], "-")
 test_url: http://www1.folha.uol.com.br/mundo/1115805-ex-ditador-argentino-videla-e-condenado-a-50-anos-de-prisao.shtml
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index f99467c..638583d
@@ -1,11 +1,9 @@
-title:h1
-author: //*[@class = 'author']
-date: //*[@class = 'date']
-body: //*[@id = 'art']
-next_page_link: //*[@id='Str']/a[contains(text(), 'nastepne')]
-strip: //*[@class = 'rel_zdjTOP']
-strip: //*[@id = 'rel']
-strip: //*[@class = 'txt_upl']
-strip: //*[@id='Str']
-strip: //*[@id='source']
-test_url: http://wyborcza.pl/1,123455,11536088,Gdy_peknie_fejs__obryzga_wszystko.html?as=1&startsz=x
\ No newline at end of file
+body: //div[@id='article']
+strip: //div[@class='head']
+
+strip_id_or_class: txt_upl
+
+single_page_link: //div[@id='gazeta_article_tools']//a[contains(@class, 'print')]
+
+test_url: http://wyborcza.pl/1,123455,11536088,Gdy_peknie_fejs__obryzga_wszystko.html?as=1&startsz=x
+test_url: http://wyborcza.pl/1,75478,14880255,Biskup_Dydycz_o_pedofilii_i_tajemnicy_spowiedzi__Zamiast.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index d8c8713..bd7ecf2
@@ -1,3 +1,3 @@
-body: //div[@class='article-body']\r
+body: //div[@class='article-body']
 title: //h1
 test_url: http://wyctim.com/icloud-sync-regebbi-rendszereken/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index fbc1d3d..5b2be74
@@ -1,5 +1,5 @@
-title://h1\r
-\r
-date://p[@class='articleDate']\r
+title://h1
+
+date://p[@class='articleDate']
 body://div[@class='articleBody wzStandardArticle']
 test_url: http://www.wz-newsline.de/home/sport/tennis/federer-zum-vierten-mal-sieger-in-indian-wells-1.938050
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/xfgjls.com.txt b/inc/3rdparty/site_config/standard/xfgjls.com.txt
new file mode 100755 (executable)
index 0000000..2dc247a
--- /dev/null
@@ -0,0 +1,11 @@
+# This filter is tested on:
+# http://www.xfgjls.com/magazine/html/?131.html
+# http://www.xfgjls.com/magazine/html/?170.html
+
+body://h3/following-sibling::div
+title: //h3
+date: substring-before(//h3/following-sibling::div/p, ' ')
+author: substring-before(substring-after(//h3/following-sibling::div/p, '作者:'), '来源')
+wrap_in(strong)://span[contains(@style, "FONT-WEIGHT: bold")]
+dissolve://span[@style="FONT-FAMILY: '宋体'; FONT-SIZE: 10.5pt; FONT-WEIGHT: bold; mso-spacerun: 'yes'"]
+test_url: http://www.xfgjls.com/magazine/html/?170.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index e02960e..c09fa4d
@@ -1,4 +1,4 @@
-title: //h1[@class="entry-title"]\r
-author: //span[@class="fn"]\r
+title: //h1[@class="entry-title"]
+author: //span[@class="fn"]
 date: //p[@class="meta"]
 test_url: http://xoeb.us/blog/2012/03/16/my-mistakes-with-our-first-release/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
diff --git a/inc/3rdparty/site_config/standard/ynet.co.il.txt b/inc/3rdparty/site_config/standard/ynet.co.il.txt
new file mode 100755 (executable)
index 0000000..aa86566
--- /dev/null
@@ -0,0 +1,26 @@
+body: //span[@id='article_content' or @class='text16g']
+
+# ads
+strip: //div[.//div[contains(@id, 'ads.')]]
+# related content heading
+strip: //p[contains(., 'עוד בערוץ החדשות של ynet:')]
+strip: //p[contains(., 'כותרות אחרונות מהעולם בחדשות ynet:')]
+strip: //div[contains(., 'אינציקלופדיית ynet:')]
+# related content links
+strip: //a[@class='bluelink']
+# strip image bullets
+strip_image_src: ynet_manual_bullet.png
+
+prune: no
+tidy: no
+
+# prevent JS issues
+find_string: <script type='text/javascript'>
+replace_string: <div style="display:none;">
+find_string: </script>
+replace_string: </div>
+
+test_url: http://www.ynet.co.il/articles/0,7340,L-4354266,00.html
+test_url: http://www.ynet.co.il/articles/0,7340,L-4354268,00.html
+#feed
+test_url: http://www.ynet.co.il/Integration/StoryRss2.xml
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 9e24db3..2aeb7e0
@@ -1,5 +1,5 @@
-title://div[@class='entry-title']\r
-body://div[@class='entry-content']\r
-strip_comments:yes\r
+title://div[@class='entry-title']
+body://div[@class='entry-content']
+strip_comments:yes
 convert_double_br_tags:yes
 test_url: http://www.yostivanich.com/2010/07/11/wired-com-with-world-watching-wikileaks-falls-into-disrepair/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/yourerie.com.txt b/inc/3rdparty/site_config/standard/yourerie.com.txt
new file mode 100755 (executable)
index 0000000..b46b09e
--- /dev/null
@@ -0,0 +1,2 @@
+body: //div[@class="nxFullTextData"]
+test_url: http://yourerie.com/fulltext?nxd_id=306552
old mode 100644 (file)
new mode 100755 (executable)
index d52b735..b0d95f1
@@ -1,15 +1,15 @@
-title: //title\r
-body: //iframe\r
-\r
-find_string: <html>&lt;iframe \r
-replace_string: <iframe id="video" \r
-\r
-find_string: &gt;&lt;/iframe&gt;</html>\r
-replace_string: ></iframe>\r
-\r
-single_page_link: //link[@type='text/xml+oembed']\r
-\r
-prune: no\r
-tidy: no\r
-\r
+title: //title
+body: //iframe
+
+find_string: <html>&lt;iframe 
+replace_string: <iframe id="video" 
+
+find_string: &gt;&lt;/iframe&gt;</html>
+replace_string: ></iframe>
+
+single_page_link: //link[@type='text/xml+oembed']
+
+prune: no
+tidy: no
+
 test_url: http://www.youtube.com/watch?v=F6gLH0r3iVU
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/zcommunications.org.txt b/inc/3rdparty/site_config/standard/zcommunications.org.txt
new file mode 100755 (executable)
index 0000000..4deb49b
--- /dev/null
@@ -0,0 +1,7 @@
+title: //h1[@id='view_title']
+author: //div[contains(@class, 'content_authors')]//a
+body: //div[@id='view_body']
+
+prune: no
+
+test_url: http://www.zcommunications.org/orwellian-language-update-by-edward-s-herman.html
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index b244b22..939fb0e
@@ -1,10 +1,10 @@
-title: //h1[@class="h s-1"]\r
-author: substring-before(substring-after(//p[@class="meta s-10"], 'By'), '|')\r
-author: substring-after(//div[@class="bio"]//h3, 'About ')\r
-date: substring-after(//p[@class="meta s-10"], '|')\r
-date: substring-after(//p[@class="meta"], '|')\r
-body: //div[@class="content-1 entry space-1 clear"]\r
-body: //div[@class="storyBody"]\r
-\r
-test_url: http://www.zdnet.com/blog/microsoft/the-bing-back-end-more-on-cosmos-tiger-and-scope/10920\r
+title: //h1[@class="h s-1"]
+author: substring-before(substring-after(//p[@class="meta s-10"], 'By'), '|')
+author: substring-after(//div[@class="bio"]//h3, 'About ')
+date: substring-after(//p[@class="meta s-10"], '|')
+date: substring-after(//p[@class="meta"], '|')
+body: //div[@class="content-1 entry space-1 clear"]
+body: //div[@class="storyBody"]
+
+test_url: http://www.zdnet.com/blog/microsoft/the-bing-back-end-more-on-cosmos-tiger-and-scope/10920
 test_url: http://www.zdnet.com/researchers-find-web-tracking-up-privacy-down-7000000358/
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 66a7f1a..9815d47
@@ -1,44 +1,45 @@
-# 2012-12-23 [carlo@...] fixed half-assed headlines in articles, removed inline author profiles, adjusted picture captions\r
-# 2012-03-17 [dkless@...] Cut metadata parts in the beginning and the ends of the content block; copyright entries for pictures removed; Author fixed, not sure if old entries still valid (I left them); Weird problems with some pages addressed (see last section for removing hidden section)\r
-# 2011-12-09 [carlo@...] Removed "related articles" block\r
-# 2011-08-23 [carlo@...] changed single page link to use print version: page works better, less ambiguity. Related cleanups and simplifications.\r
-# 2011-08-20 [carlo@...] added author, fixed date\r
-\r
-\r
-single_page_link: //a[@title='Druckversion']\r
-tidy: no\r
-\r
-title: //title\r
-date: substring-before( //li[@class="date"], " " )\r
-author: //li[@class="author"]/a/text() | //li[@class="author first"]/a/text()\r
-author: substring-after(//li[@class='source first '], 'Quelle: ')\r
-\r
-strip_id_or_class: articleheader\r
-strip: //div[@id="comments"] | //div[@class="pagination block"] | //p[@class="ressortbacklink"] | //div[@id="relatedArticles"]  |  // div[@class="inline portrait"]\r
-\r
-#Removes author and date from the start\r
-strip: //ul[@class="tools"]\r
-#Removes copyright statement - often disturb as first line of the news\r
-strip: //p[@class="copyright"]\r
-strip: //div[@class="copyright"]\r
-#Removes pagination links at the end\r
-strip: //div[@class="pagination"]\r
-\r
-# Fix picture captions\r
-wrap_in(small): //p[@class="caption"]/text()\r
-\r
-# Fix sub-headlines\r
-wrap_in(h2): //p/strong\r
-dissolve: //h2/strong\r
-\r
-#Sometimes things are embedded in the print version that are not displayed on the web, but will be displayed in the mobilized versions and lead even to problems. These sections are removed here.\r
-strip_id_or_class:"informatives"\r
-strip_id_or_class:"bottom"\r
-strip_id_or_class:"teasermosaic"\r
-strip_id_or_class:"comments"\r
-strip_id_or_class:"articlefooter af"\r
-strip_id_or_class:"relateds"\r
-strip_id_or_class:"pagination"\r
-\r
-footnotes: no\r
-test_url: http://www.zeit.de/kultur/film/2012-12/Kurzfilmtag
\ No newline at end of file
+# 2013.10.30 [rezor92] fixed single_page_link
+# 2012-12-23 [carlo@...] fixed half-assed headlines in articles, removed inline author profiles, adjusted picture captions
+# 2012-03-17 [dkless@...] Cut metadata parts in the beginning and the ends of the content block; copyright entries for pictures removed; Author fixed, not sure if old entries still valid (I left them); Weird problems with some pages addressed (see last section for removing hidden section)
+# 2011-12-09 [carlo@...] Removed "related articles" block
+# 2011-08-23 [carlo@...] changed single page link to use print version: page works better, less ambiguity. Related cleanups and simplifications.
+# 2011-08-20 [carlo@...] added author, fixed date
+
+
+single_page_link: //a[@title='Auf einer Seite']
+tidy: no
+
+title: //title
+date: substring-before( //li[@class="date"], " " )
+author: //li[@class="author"]/a/text() | //li[@class="author first"]/a/text()
+author: substring-after(//li[@class='source first '], 'Quelle: ')
+
+strip_id_or_class: articleheader
+strip: //div[@id="comments"] | //div[@class="pagination block"] | //p[@class="ressortbacklink"] | //div[@id="relatedArticles"]  |  // div[@class="inline portrait"]
+
+#Removes author and date from the start
+strip: //ul[@class="tools"]
+#Removes copyright statement - often disturb as first line of the news
+strip: //p[@class="copyright"]
+strip: //div[@class="copyright"]
+#Removes pagination links at the end
+strip: //div[@class="pagination"]
+
+# Fix picture captions
+wrap_in(small): //p[@class="caption"]/text()
+
+# Fix sub-headlines
+wrap_in(h2): //p/strong
+dissolve: //h2/strong
+
+#Sometimes things are embedded in the print version that are not displayed on the web, but will be displayed in the mobilized versions and lead even to problems. These sections are removed here.
+strip_id_or_class:"informatives"
+strip_id_or_class:"bottom"
+strip_id_or_class:"teasermosaic"
+strip_id_or_class:"comments"
+strip_id_or_class:"articlefooter af"
+strip_id_or_class:"relateds"
+strip_id_or_class:"pagination"
+
+footnotes: no
+test_url: http://www.zeit.de/kultur/film/2012-12/Kurzfilmtag
diff --git a/inc/3rdparty/site_config/standard/zerohedge.com.txt b/inc/3rdparty/site_config/standard/zerohedge.com.txt
new file mode 100755 (executable)
index 0000000..7e76aee
--- /dev/null
@@ -0,0 +1,10 @@
+author: //span[@class='submitted']/a
+strip: //div[@class='clear-block clr']
+strip: //div[@class='picture']
+strip: //span[@class='submitted']
+strip: //div[@class='breadcrumb']
+strip: //div[@class='fivestar-static-form-item']
+strip: //div[@class='js-links']
+strip: //div[@class='links clear-block clear']
+strip: //div[@class='block block-block']
+test_url: http://www.zerohedge.com/news/bernankes-columbus-voyage-end-monetary-policy-world
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index ea9132a..afa964d
@@ -1,3 +1,3 @@
-title: //h1\r
+title: //h1
 body: //div[@id="primarycontent"]
 test_url: http://zerokspot.com/weblog/2011/06/26/europython2011/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/zhihu.com.txt b/inc/3rdparty/site_config/standard/zhihu.com.txt
new file mode 100755 (executable)
index 0000000..3c9d8c1
--- /dev/null
@@ -0,0 +1,19 @@
+# This filter is tested on:
+# http://www.zhihu.com/question/19587406
+# http://www.zhihu.com/question/20649035
+# http://www.zhihu.com/question/20637942
+
+author: //h3[@class='zm-item-answer-author-wrap']
+title://h2[@class='zm-item-title']
+date://a[@class='answer-date-link meta-item']
+convert_double_br_tags: yes
+
+wrap_in(blockquote)://div[@class='zm-editable-content']
+wrap_in(blockquote)://sup/text()
+dissolve://sup
+
+strip://div[@class='zh-answers-title']
+strip:///div[@class='zm-item-vote-info ']
+strip://div[@class='zm-item-answer-author-info']
+strip://div[@class='zu-blue-info-board zg-r3px']
+test_url: http://www.zhihu.com/question/20637942
\ No newline at end of file
old mode 100644 (file)
new mode 100755 (executable)
index 2a2f58a..188d4dd
@@ -1,3 +1,3 @@
-title: substring-after(id, 'post')/h2\r
+title: substring-after(id, 'post')/h2
 body://div[@class = 'entry']
 test_url: http://www.zingtrain.com/category/ontrack/january-2007/
\ No newline at end of file
index 11cccb7238d9d6a6e16695386152a25fc6789fa8..b5dd21203a9a667872303bc71a0b97498f862769 100755 (executable)
@@ -5,23 +5,28 @@
  * @category   wallabag
  * @author     Nicolas Lœuillet <nicolas@loeuillet.org>
  * @copyright  2013
- * @license    http://www.wtfpl.net/ see COPYING file
+ * @license    http://opensource.org/licenses/MIT see COPYING file
  */
 
 class Database {
+
     var $handle;
-    private $order = array(
-      'ia' => 'ORDER BY entries.id',
-      'id' => 'ORDER BY entries.id DESC',
-      'ta' => 'ORDER BY lower(entries.title)',
-      'td' => 'ORDER BY lower(entries.title) DESC',
-      'default' => 'ORDER BY entries.id'
+    private $order = array (
+        'ia' => 'ORDER BY entries.id',
+        'id' => 'ORDER BY entries.id DESC',
+        'ta' => 'ORDER BY lower(entries.title)',
+        'td' => 'ORDER BY lower(entries.title) DESC',
+        'default' => 'ORDER BY entries.id'
     );
 
     function __construct()
     {
         switch (STORAGE) {
             case 'sqlite':
+                // Check if /db is writeable
+                if ( !is_writable(STORAGE_SQLITE) || !is_writable(dirname(STORAGE_SQLITE))) {\r
+                       die('An error occured: "db" directory must be writeable for your web server user!');
+                }
                 $db_path = 'sqlite:' . STORAGE_SQLITE;
                 $this->handle = new PDO($db_path);
                 break;
@@ -38,28 +43,17 @@ class Database {
         }
 
         $this->handle->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
+        $this->_checkTags();
         Tools::logm('storage type ' . STORAGE);
     }
 
-    private function getHandle() {
+    private function getHandle()
+    {
         return $this->handle;
     }
 
-    public function isInstalled() {
-        $sql = "SELECT username FROM users";
-        $query = $this->executeQuery($sql, array());
-        if ($query == false) {
-            die(STORAGE . ' database looks empty. You have to create it (you can find database structure in install folder).');
-        }
-        $hasAdmin = count($query->fetchAll());
-
-        if ($hasAdmin == 0)
-            return false;
-
-        return true;
-    }
-
-    public function checkTags() {
+    private function _checkTags()
+    {
 
         if (STORAGE == 'sqlite') {
             $sql = '
@@ -123,9 +117,10 @@ class Database {
         $query = $this->executeQuery($sql, array());
     }
 
-    public function install($login, $password) {
+    public function install($login, $password, $email = '')
+    {
         $sql = 'INSERT INTO users ( username, password, name, email) VALUES (?, ?, ?, ?)';
-        $params = array($login, $password, $login, ' ');
+        $params = array($login, $password, $login, $email);
         $query = $this->executeQuery($sql, $params);
 
         $sequence = '';
@@ -150,7 +145,8 @@ class Database {
         return TRUE;
     }
 
-    public function getConfigUser($id) {
+    public function getConfigUser($id)
+    {
         $sql = "SELECT * FROM users_config WHERE user_id = ?";
         $query = $this->executeQuery($sql, array($id));
         $result = $query->fetchAll();
@@ -163,7 +159,8 @@ class Database {
         return $user_config;
     }
 
-    public function userExists($username) {
+    public function userExists($username)
+    {
         $sql = "SELECT * FROM users WHERE username=?";
         $query = $this->executeQuery($sql, array($username));
         $login = $query->fetchAll();
@@ -174,13 +171,14 @@ class Database {
         }
     }
 
-    public function login($username, $password, $isauthenticated=false) {
+    public function login($username, $password, $isauthenticated = FALSE)
+    {
         if ($isauthenticated) {
-          $sql = "SELECT * FROM users WHERE username=?";
-          $query = $this->executeQuery($sql, array($username));
+            $sql = "SELECT * FROM users WHERE username=?";
+            $query = $this->executeQuery($sql, array($username));
         } else {
-          $sql = "SELECT * FROM users WHERE username=? AND password=?";
-          $query = $this->executeQuery($sql, array($username, $password));
+            $sql = "SELECT * FROM users WHERE username=? AND password=?";
+            $query = $this->executeQuery($sql, array($username, $password));
         }
         $login = $query->fetchAll();
 
@@ -204,7 +202,8 @@ class Database {
         $query = $this->executeQuery($sql_update, $params_update);
     }
 
-    public function updateUserConfig($userId, $key, $value) {
+    public function updateUserConfig($userId, $key, $value)
+    {
         $config = $this->getConfigUser($userId);
 
         if (! isset($config[$key])) {
@@ -218,7 +217,8 @@ class Database {
         $query = $this->executeQuery($sql, $params);
     }
 
-    private function executeQuery($sql, $params) {
+    private function executeQuery($sql, $params)
+    {
         try
         {
             $query = $this->getHandle()->prepare($sql);
@@ -232,28 +232,32 @@ class Database {
         }
     }
     
-    public function listUsers($username=null) {
+    public function listUsers($username = NULL)
+    {
         $sql = 'SELECT count(*) FROM users'.( $username ? ' WHERE username=?' : '');
         $query = $this->executeQuery($sql, ( $username ? array($username) : array()));
         list($count) = $query->fetch();
         return $count;
     }
     
-    public function getUserPassword($userID) {
+    public function getUserPassword($userID)
+    {
         $sql = "SELECT * FROM users WHERE id=?";
         $query = $this->executeQuery($sql, array($userID));
         $password = $query->fetchAll();
         return isset($password[0]['password']) ? $password[0]['password'] : null;
     }
     
-    public function deleteUserConfig($userID) {
+    public function deleteUserConfig($userID)
+    {
         $sql_action = 'DELETE from users_config WHERE user_id=?';
         $params_action = array($userID);
         $query = $this->executeQuery($sql_action, $params_action);
         return $query;
     }
     
-    public function deleteTagsEntriesAndEntries($userID) {
+    public function deleteTagsEntriesAndEntries($userID)
+    {
         $entries = $this->retrieveAll($userID);
         foreach($entries as $entryid) {
             $tags = $this->retrieveTagsByEntry($entryid);
@@ -264,20 +268,23 @@ class Database {
         }
     }
     
-    public function deleteUser($userID) {
+    public function deleteUser($userID)
+    {
         $sql_action = 'DELETE from users WHERE id=?';
         $params_action = array($userID);
         $query = $this->executeQuery($sql_action, $params_action);
     }
 
-    public function updateContentAndTitle($id, $title, $body, $user_id) {
+    public function updateContentAndTitle($id, $title, $body, $user_id)
+    {
         $sql_action = 'UPDATE entries SET content = ?, title = ? WHERE id=? AND user_id=?';
         $params_action = array($body, $title, $id, $user_id);
         $query = $this->executeQuery($sql_action, $params_action);
         return $query;
     }
 
-    public function retrieveUnfetchedEntries($user_id, $limit) {
+    public function retrieveUnfetchedEntries($user_id, $limit)
+    {
 
         $sql_limit = "LIMIT 0,".$limit;
         if (STORAGE == 'postgres') {
@@ -291,7 +298,8 @@ class Database {
         return $entries;
     }
 
-    public function retrieveUnfetchedEntriesCount($user_id) {
+    public function retrieveUnfetchedEntriesCount($user_id)
+    {
       $sql        = "SELECT count(*) FROM entries WHERE (content = '' OR content IS NULL) AND title LIKE 'Untitled - Import%' AND user_id=?";
       $query      = $this->executeQuery($sql, array($user_id));
       list($count) = $query->fetch();
@@ -299,7 +307,8 @@ class Database {
       return $count;
     }
 
-    public function retrieveAll($user_id) {
+    public function retrieveAll($user_id)
+    {
         $sql        = "SELECT * FROM entries WHERE user_id=? ORDER BY id";
         $query      = $this->executeQuery($sql, array($user_id));
         $entries    = $query->fetchAll();
@@ -307,7 +316,8 @@ class Database {
         return $entries;
     }
 
-    public function retrieveOneById($id, $user_id) {
+    public function retrieveOneById($id, $user_id)
+    {
         $entry  = NULL;
         $sql    = "SELECT * FROM entries WHERE id=? AND user_id=?";
         $params = array(intval($id), $user_id);
@@ -317,7 +327,8 @@ class Database {
         return isset($entry[0]) ? $entry[0] : null;
     }
 
-    public function retrieveOneByURL($url, $user_id) {
+    public function retrieveOneByURL($url, $user_id)
+    {
         $entry  = NULL;
         $sql    = "SELECT * FROM entries WHERE url=? AND user_id=?";
         $params = array($url, $user_id);
@@ -327,13 +338,15 @@ class Database {
         return isset($entry[0]) ? $entry[0] : null;
     }
 
-    public function reassignTags($old_entry_id, $new_entry_id) {
+    public function reassignTags($old_entry_id, $new_entry_id)
+    {
         $sql    = "UPDATE tags_entries SET entry_id=? WHERE entry_id=?";
         $params = array($new_entry_id, $old_entry_id);
         $query  = $this->executeQuery($sql, $params);
     }
 
-    public function getEntriesByView($view, $user_id, $limit = '', $tag_id = 0) {
+    public function getEntriesByView($view, $user_id, $limit = '', $tag_id = 0)
+    {
         switch ($view) {
             case 'archive':
                 $sql    = "SELECT * FROM entries WHERE user_id=? AND is_read=? ";
@@ -361,9 +374,10 @@ class Database {
                 $entries = $query->fetchAll();
 
                 return $entries;
-        }
+    }
 
-    public function getEntriesByViewCount($view, $user_id, $tag_id = 0) {
+    public function getEntriesByViewCount($view, $user_id, $tag_id = 0)
+    {
         switch ($view) {
             case 'archive':
                     $sql    = "SELECT count(*) FROM entries WHERE user_id=? AND is_read=? ";
@@ -391,7 +405,8 @@ class Database {
         return $count;
     }
 
-    public function updateContent($id, $content, $user_id) {
+    public function updateContent($id, $content, $user_id)
+    {
         $sql_action = 'UPDATE entries SET content = ? WHERE id=? AND user_id=?';
         $params_action = array($content, $id, $user_id);
         $query = $this->executeQuery($sql_action, $params_action);
@@ -406,7 +421,8 @@ class Database {
      * @param integer $user_id
      * @return integer $id of inserted record
      */
-    public function add($url, $title, $content, $user_id, $isFavorite=0, $isRead=0) {
+    public function add($url, $title, $content, $user_id, $isFavorite=0, $isRead=0)
+    {
         $sql_action = 'INSERT INTO entries ( url, title, content, user_id, is_fav, is_read ) VALUES (?, ?, ?, ?, ?, ?)';
         $params_action = array($url, $title, $content, $user_id, $isFavorite, $isRead);
 
@@ -419,36 +435,42 @@ class Database {
         return $id;
     }
 
-    public function deleteById($id, $user_id) {
+    public function deleteById($id, $user_id)
+    {
         $sql_action     = "DELETE FROM entries WHERE id=? AND user_id=?";
         $params_action  = array($id, $user_id);
         $query          = $this->executeQuery($sql_action, $params_action);
         return $query;
     }
 
-    public function favoriteById($id, $user_id) {
+    public function favoriteById($id, $user_id)
+    {
         $sql_action     = "UPDATE entries SET is_fav=NOT is_fav WHERE id=? AND user_id=?";
         $params_action  = array($id, $user_id);
         $query          = $this->executeQuery($sql_action, $params_action);
     }
 
-    public function archiveById($id, $user_id) {
+    public function archiveById($id, $user_id)
+    {
         $sql_action     = "UPDATE entries SET is_read=NOT is_read WHERE id=? AND user_id=?";
         $params_action  = array($id, $user_id);
         $query          = $this->executeQuery($sql_action, $params_action);
     }
 
-    public function archiveAll($user_id) {
+    public function archiveAll($user_id)
+    {
         $sql_action     = "UPDATE entries SET is_read=? WHERE user_id=? AND is_read=?";
         $params_action  = array($user_id, 1, 0);
         $query          = $this->executeQuery($sql_action, $params_action);
     }
 
-    public function getLastId($column = '') {
+    public function getLastId($column = '')
+    {
         return $this->getHandle()->lastInsertId($column);
     }
 
-    public function search($term, $user_id, $limit = '') {
+    public function search($term, $user_id, $limit = '')
+    {
         $search = '%'.$term.'%';
         $sql_action = "SELECT * FROM entries WHERE user_id=? AND (content LIKE ? OR title LIKE ? OR url LIKE ?) "; //searches in content, title and URL
         $sql_action .= $this->getEntriesOrder().' ' . $limit;
@@ -457,7 +479,8 @@ class Database {
         return $query->fetchAll();
        }
 
-    public function retrieveAllTags($user_id, $term = null) {
+    public function retrieveAllTags($user_id, $term = NULL)
+    {
         $sql = "SELECT DISTINCT tags.*, count(entries.id) AS entriescount FROM tags
           LEFT JOIN tags_entries ON tags_entries.tag_id=tags.id
           LEFT JOIN entries ON tags_entries.entry_id=entries.id
@@ -471,7 +494,8 @@ class Database {
         return $tags;
     }
 
-    public function retrieveTag($id, $user_id) {
+    public function retrieveTag($id, $user_id)
+    {
         $tag  = NULL;
         $sql    = "SELECT DISTINCT tags.* FROM tags
           LEFT JOIN tags_entries ON tags_entries.tag_id=tags.id
@@ -481,10 +505,11 @@ class Database {
         $query  = $this->executeQuery($sql, $params);
         $tag  = $query->fetchAll();
 
-        return isset($tag[0]) ? $tag[0] : null;
+        return isset($tag[0]) ? $tag[0] : NULL;
     }
 
-    public function retrieveEntriesByTag($tag_id, $user_id) {
+    public function retrieveEntriesByTag($tag_id, $user_id)
+    {
         $sql =
             "SELECT entries.* FROM entries
             LEFT JOIN tags_entries ON tags_entries.entry_id=entries.id
@@ -495,7 +520,8 @@ class Database {
         return $entries;
     }
 
-    public function retrieveTagsByEntry($entry_id) {
+    public function retrieveTagsByEntry($entry_id)
+    {
         $sql =
             "SELECT tags.* FROM tags
             LEFT JOIN tags_entries ON tags_entries.tag_id=tags.id
@@ -506,14 +532,16 @@ class Database {
         return $tags;
     }
 
-    public function removeTagForEntry($entry_id, $tag_id) {
+    public function removeTagForEntry($entry_id, $tag_id)
+    {
         $sql_action     = "DELETE FROM tags_entries WHERE tag_id=? AND entry_id=?";
         $params_action  = array($tag_id, $entry_id);
         $query          = $this->executeQuery($sql_action, $params_action);
         return $query;
     }
     
-    public function cleanUnusedTag($tag_id) {
+    public function cleanUnusedTag($tag_id)
+    {
         $sql_action = "SELECT tags.* FROM tags JOIN tags_entries ON tags_entries.tag_id=tags.id WHERE tags.id=?";
         $query = $this->executeQuery($sql_action,array($tag_id));
         $tagstokeep = $query->fetchAll();
@@ -532,7 +560,8 @@ class Database {
         
     }
 
-    public function retrieveTagByValue($value) {
+    public function retrieveTagByValue($value)
+    {
         $tag  = NULL;
         $sql    = "SELECT * FROM tags WHERE value=?";
         $params = array($value);
@@ -542,27 +571,29 @@ class Database {
         return isset($tag[0]) ? $tag[0] : null;
     }
 
-    public function createTag($value) {
+    public function createTag($value)
+    {
         $sql_action = 'INSERT INTO tags ( value ) VALUES (?)';
         $params_action = array($value);
         $query = $this->executeQuery($sql_action, $params_action);
         return $query;
     }
 
-    public function setTagToEntry($tag_id, $entry_id) {
+    public function setTagToEntry($tag_id, $entry_id)
+    {
         $sql_action = 'INSERT INTO tags_entries ( tag_id, entry_id ) VALUES (?, ?)';
         $params_action = array($tag_id, $entry_id);
         $query = $this->executeQuery($sql_action, $params_action);
         return $query;
     }
 
-        private function getEntriesOrder() {
-            if (isset($_SESSION['sort']) and array_key_exists($_SESSION['sort'], $this->order)) {
-                return $this->order[$_SESSION['sort']];
-            }
-            else {
-                return $this->order['default'];
-            }
+    private function getEntriesOrder()
+    {
+        if (isset($_SESSION['sort']) and array_key_exists($_SESSION['sort'], $this->order)) {
+            return $this->order[$_SESSION['sort']];
         }
-
+        else {
+            return $this->order['default'];
+        }
+    }
 }
diff --git a/inc/poche/Language.class.php b/inc/poche/Language.class.php
new file mode 100644 (file)
index 0000000..8d3912f
--- /dev/null
@@ -0,0 +1,113 @@
+<?php
+/**
+ * wallabag, self hostable application allowing you to not miss any content anymore
+ *
+ * @category   wallabag
+ * @author     Nicolas Lœuillet <nicolas@loeuillet.org>
+ * @copyright  2013
+ * @license    http://opensource.org/licenses/MIT see COPYING file
+ */
+
+class Language
+{
+    protected $wallabag;
+
+    private $currentLanguage;
+
+    private $languageNames = array(
+        'cs_CZ.utf8' => 'čeština',
+        'de_DE.utf8' => 'German',
+        'en_EN.utf8' => 'English',
+        'es_ES.utf8' => 'Español',
+        'fa_IR.utf8' => 'فارسی',
+        'fr_FR.utf8' => 'Français',
+        'it_IT.utf8' => 'Italiano',
+        'pl_PL.utf8' => 'Polski',
+        'pt_BR.utf8' => 'Português (Brasil)',
+        'ru_RU.utf8' => 'Pусский',
+        'sl_SI.utf8' => 'Slovenščina',
+        'uk_UA.utf8' => 'Українська',
+    );
+
+    public function __construct(Poche $wallabag)
+    {
+        $this->wallabag = $wallabag;
+        $pocheUser = Session::getParam('poche_user');
+        $language  = (is_null($pocheUser) ? LANG : $pocheUser->getConfigValue('language'));
+
+        @putenv('LC_ALL=' . $language);
+        setlocale(LC_ALL, $language);
+        bindtextdomain($language, LOCALE);
+        textdomain($language);
+
+        $this->currentLanguage = $language;
+    }
+
+    public function getLanguage() {
+        return $this->currentLanguage;
+    }
+
+    public function getInstalledLanguages() {
+        $handle = opendir(LOCALE);
+        $languages = array();
+
+        while (($language = readdir($handle)) !== false) {
+            # Languages are stored in a directory, so all directory names are languages
+            # @todo move language installation data to database
+            if (! is_dir(LOCALE . '/' . $language) || in_array($language, array('..', '.', 'tools'))) {
+                continue;
+            }
+
+            $current = false;
+
+            if ($language === $this->getLanguage()) {
+                $current = true;
+            }
+
+            $languages[] = array('name' => (isset($this->languageNames[$language]) ? $this->languageNames[$language] : $language), 'value' => $language, 'current' => $current);
+        }
+
+        return $languages;
+    }
+
+
+    /**
+     * Update language for current user
+     *
+     * @param $newLanguage
+     */
+    public function updateLanguage($newLanguage)
+    {
+        # we are not going to change it to the current language
+        if ($newLanguage == $this->getLanguage()) {
+            $this->wallabag->messages->add('w', _('still using the "' . $this->getLanguage() . '" language!'));
+            Tools::redirect('?view=config');
+        }
+
+        $languages = $this->getInstalledLanguages();
+        $actualLanguage = false;
+
+        foreach ($languages as $language) {
+            if ($language['value'] == $newLanguage) {
+                $actualLanguage = true;
+                break;
+            }
+        }
+
+        if (!$actualLanguage) {
+            $this->wallabag->messages->add('e', _('that language does not seem to be installed'));
+            Tools::redirect('?view=config');
+        }
+
+        $this->wallabag->store->updateUserConfig($this->wallabag->user->getId(), 'language', $newLanguage);
+        $this->wallabag->messages->add('s', _('you have changed your language preferences'));
+
+        $currentConfig = $_SESSION['poche_user']->config;
+        $currentConfig['language'] = $newLanguage;
+
+        $_SESSION['poche_user']->setConfig($currentConfig);
+
+        Tools::emptyCache();
+        Tools::redirect('?view=config');
+    }
+} 
\ No newline at end of file
index 09a9f5ff85276423a29b505312d2f1cec66ead8a..8cebafa31af026de49c59c3db8ccf6954bc4419f 100755 (executable)
  * @category   wallabag
  * @author     Nicolas Lœuillet <nicolas@loeuillet.org>
  * @copyright  2013
- * @license    http://www.wtfpl.net/ see COPYING file
+ * @license    http://opensource.org/licenses/MIT see COPYING file
  */
 
 class Poche
 {
-    public static $canRenderTemplates = true;
-    public static $configFileAvailable = true;
-
+    /**
+     * @var User
+     */
     public $user;
+    /**
+     * @var Database
+     */
     public $store;
+    /**
+     * @var Template
+     */
     public $tpl;
+    /**
+     * @var Language
+     */
+    public $language;
+    /**
+     * @var Routing
+     */
+    public $routing;
+    /**
+     * @var Messages
+     */
     public $messages;
+    /**
+     * @var Paginator
+     */
     public $pagination;
 
-    private $currentTheme = '';
-    private $currentLanguage = '';
-    private $notInstalledMessage = array();
-
-    private $language_names = array(
-      'cs_CZ.utf8' => 'čeština',
-      'de_DE.utf8' => 'German',
-      'en_EN.utf8' => 'English',
-      'es_ES.utf8' => 'Español',
-      'fa_IR.utf8' => 'فارسی',
-      'fr_FR.utf8' => 'Français',
-      'it_IT.utf8' => 'Italiano',
-      'pl_PL.utf8' => 'Polski',
-      'pt_BR.utf8' => 'Português (Brasil)',
-      'ru_RU.utf8' => 'Pусский',
-      'sl_SI.utf8' => 'Slovenščina',
-      'uk_UA.utf8' => 'Українська',
-    );
     public function __construct()
     {
-        if ($this->configFileIsAvailable()) {
-            $this->init();
-        }
-
-        if ($this->themeIsInstalled()) {
-            $this->initTpl();
-        }
-
-        if ($this->systemIsInstalled()) {
-            $this->store = new Database();
-            $this->messages = new Messages();
-            # installation
-            if (! $this->store->isInstalled()) {
-                $this->install();
-            }
-            $this->store->checkTags();
-        }
+        $this->init();
     }
 
     private function init()
     {
         Tools::initPhp();
 
-        if (isset($_SESSION['poche_user']) && $_SESSION['poche_user'] != array()) {
-            $this->user = $_SESSION['poche_user'];
+        $pocheUser = Session::getParam('poche_user');
+
+        if ($pocheUser && $pocheUser != array()) {
+            $this->user = $pocheUser;
         } else {
-            # fake user, just for install & login screens
+            // fake user, just for install & login screens
             $this->user = new User();
             $this->user->setConfig($this->getDefaultConfig());
         }
 
-        # l10n
-        $language = $this->user->getConfigValue('language');
-        @putenv('LC_ALL=' . $language);
-        setlocale(LC_ALL, $language);
-        bindtextdomain($language, LOCALE);
-        textdomain($language);
-
-        # Pagination
-        $this->pagination = new Paginator($this->user->getConfigValue('pager'), 'p');
-
-        # Set up theme
-        $themeDirectory = $this->user->getConfigValue('theme');
-
-        if ($themeDirectory === false) {
-            $themeDirectory = DEFAULT_THEME;
-        }
-
-        $this->currentTheme = $themeDirectory;
-
-        # Set up language
-        $languageDirectory = $this->user->getConfigValue('language');
-
-        if ($languageDirectory === false) {
-            $languageDirectory = DEFAULT_THEME;
-        }
-
-        $this->currentLanguage = $languageDirectory;
+        $this->pagination   = new Paginator($this->user->getConfigValue('pager'), 'p');
+        $this->language     = new Language($this);
+        $this->tpl          = new Template($this);
+        $this->store        = new Database();
+        $this->messages     = new Messages();
+        $this->routing      = new Routing($this);
     }
 
-    public function configFileIsAvailable() {
-        if (! self::$configFileAvailable) {
-            $this->notInstalledMessage[] = 'You have to copy (don\'t just rename!) inc/poche/config.inc.default.php to inc/poche/config.inc.php.';
-
-            return false;
-        }
-
-        return true;
-    }
-
-    public function themeIsInstalled() {
-        $passTheme = TRUE;
-        # Twig is an absolute requirement for Poche to function. Abort immediately if the Composer installer hasn't been run yet
-        if (! self::$canRenderTemplates) {
-            $this->notInstalledMessage[] = 'Twig does not seem to be installed. Please initialize the Composer installation to automatically fetch dependencies. You can also download <a href="http://wllbg.org/vendor">vendor.zip</a> and extract it in your wallabag folder.';
-            $passTheme = FALSE;
-        }
-
-        if (! is_writable(CACHE)) {
-            $this->notInstalledMessage[] = 'You don\'t have write access on cache directory.';
-
-            self::$canRenderTemplates = false;
-
-            $passTheme = FALSE;
-        }
-
-        # Check if the selected theme and its requirements are present
-        $theme = $this->getTheme();
-
-        if ($theme != '' && ! is_dir(THEME . '/' . $theme)) {
-            $this->notInstalledMessage[] = 'The currently selected theme (' . $theme . ') does not seem to be properly installed (Missing directory: ' . THEME . '/' . $theme . ')';
-
-            self::$canRenderTemplates = false;
-
-            $passTheme = FALSE;
-        }
-
-        $themeInfo = $this->getThemeInfo($theme);
-        if (isset($themeInfo['requirements']) && is_array($themeInfo['requirements'])) {
-            foreach ($themeInfo['requirements'] as $requiredTheme) {
-                if (! is_dir(THEME . '/' . $requiredTheme)) {
-                    $this->notInstalledMessage[] = 'The required "' . $requiredTheme . '" theme is missing for the current theme (' . $theme . ')';
-
-                    self::$canRenderTemplates = false;
-
-                    $passTheme = FALSE;
-                }
-            }
-        }
-
-        if (!$passTheme) {
-            return FALSE;
-        }
-
-
-        return true;
+    public function run()
+    {
+        $this->routing->run();
     }
 
     /**
-     * all checks before installation.
-     * @todo move HTML to template
-     * @return boolean
+     * Creates a new user
      */
-    public function systemIsInstalled()
-    {
-        $msg = TRUE;
-
-        $configSalt = defined('SALT') ? constant('SALT') : '';
-
-        if (empty($configSalt)) {
-            $this->notInstalledMessage[] = 'You have not yet filled in the SALT value in the config.inc.php file.';
-            $msg = FALSE;
-        }
-        if (STORAGE == 'sqlite' && ! file_exists(STORAGE_SQLITE)) {
-            Tools::logm('sqlite file doesn\'t exist');
-            $this->notInstalledMessage[] = 'sqlite file doesn\'t exist, you can find it in install folder. Copy it in /db folder.';
-            $msg = FALSE;
-        }
-        if (is_dir(ROOT . '/install') && ! DEBUG_POCHE) {
-            $this->notInstalledMessage[] = 'you have to delete the /install folder before using poche.';
-            $msg = FALSE;
-        }
-        if (STORAGE == 'sqlite' && ! is_writable(STORAGE_SQLITE)) {
-            Tools::logm('you don\'t have write access on sqlite file');
-            $this->notInstalledMessage[] = 'You don\'t have write access on sqlite file.';
-            $msg = FALSE;
-        }
-
-        if (! $msg) {
-            return false;
-        }
-
-        return true;
-    }
-
-    public function getNotInstalledMessage() {
-        return $this->notInstalledMessage;
-    }
-
-    private function initTpl()
+    public function createNewUser($username, $password, $email = "")
     {
-        $loaderChain = new Twig_Loader_Chain();
-        $theme = $this->getTheme();
-
-        # add the current theme as first to the loader chain so Twig will look there first for overridden template files
-        try {
-            $loaderChain->addLoader(new Twig_Loader_Filesystem(THEME . '/' . $theme));
-        } catch (Twig_Error_Loader $e) {
-            # @todo isInstalled() should catch this, inject Twig later
-            die('The currently selected theme (' . $theme . ') does not seem to be properly installed (' . THEME . '/' . $theme .' is missing)');
-        }
-
-        # add all required themes to the loader chain
-        $themeInfo = $this->getThemeInfo($theme);
-        if (isset($themeInfo['requirements']) && is_array($themeInfo['requirements'])) {
-            foreach ($themeInfo['requirements'] as $requiredTheme) {
-                try {
-                    $loaderChain->addLoader(new Twig_Loader_Filesystem(THEME . '/' . $requiredTheme));
-                } catch (Twig_Error_Loader $e) {
-                    # @todo isInstalled() should catch this, inject Twig later
-                    die('The required "' . $requiredTheme . '" theme is missing for the current theme (' . $theme . ')');
-                }
-            }
-        }
-
-        if (DEBUG_POCHE) {
-            $twigParams = array();
-        } else {
-            $twigParams = array('cache' => CACHE);
-        }
-
-        $this->tpl = new Twig_Environment($loaderChain, $twigParams);
-        $this->tpl->addExtension(new Twig_Extensions_Extension_I18n());
-
-        # filter to display domain name of an url
-        $filter = new Twig_SimpleFilter('getDomain', 'Tools::getDomain');
-        $this->tpl->addFilter($filter);
-
-        # filter for reading time
-        $filter = new Twig_SimpleFilter('getReadingTime', 'Tools::getReadingTime');
-        $this->tpl->addFilter($filter);
-    }
-
-    public function createNewUser() {
-        if (isset($_GET['newuser'])){
-            if ($_POST['newusername'] != "" && $_POST['password4newuser'] != ""){
-                $newusername = filter_var($_POST['newusername'], FILTER_SANITIZE_STRING);
-                if (!$this->store->userExists($newusername)){
-                    if ($this->store->install($newusername, Tools::encodeString($_POST['password4newuser'] . $newusername))) {
-                        Tools::logm('The new user '.$newusername.' has been installed');
-                        $this->messages->add('s', sprintf(_('The new user %s has been installed. Do you want to <a href="?logout">logout ?</a>'),$newusername));
-                        Tools::redirect();
-                    }
-                    else {
-                        Tools::logm('error during adding new user');
-                        Tools::redirect();
-                    }
-                }
-                else {
-                    $this->messages->add('e', sprintf(_('Error : An user with the name %s already exists !'),$newusername));
-                    Tools::logm('An user with the name '.$newusername.' already exists !');
+        if (!empty($username) && !empty($password)){
+            $newUsername = filter_var($username, FILTER_SANITIZE_STRING);
+            $email = filter_var($email, FILTER_SANITIZE_STRING);
+            if (!$this->store->userExists($newUsername)){
+                if ($this->store->install($newUsername, Tools::encodeString($password . $newUsername), $email)) {
+                    Tools::logm('The new user ' . $newUsername . ' has been installed');
+                    $this->messages->add('s', sprintf(_('The new user %s has been installed. Do you want to <a href="?logout">logout ?</a>'), $newUsername));
                     Tools::redirect();
                 }
-            }
-        }
-    }
-
-    public function deleteUser(){
-        if (isset($_GET['deluser'])){
-            if ($this->store->listUsers() > 1) {
-                if (Tools::encodeString($_POST['password4deletinguser'].$this->user->getUsername()) == $this->store->getUserPassword($this->user->getId())) {
-                    $username = $this->user->getUsername();
-                    $this->store->deleteUserConfig($this->user->getId());
-                    Tools::logm('The configuration for user '. $username .' has been deleted !');
-                    $this->store->deleteTagsEntriesAndEntries($this->user->getId());
-                    Tools::logm('The entries for user '. $username .' has been deleted !');
-                    $this->store->deleteUser($this->user->getId());
-                    Tools::logm('User '. $username .' has been completely deleted !');
-                    Session::logout();
-                    Tools::logm('logout');
-                    Tools::redirect();
-                    $this->messages->add('s', sprintf(_('User %s has been successfully deleted !'),$newusername));
-                }
                 else {
-                    Tools::logm('Bad password !');
-                    $this->messages->add('e', _('Error : The password is wrong !'));
-                }
-            }
-            else {
-                Tools::logm('Only user !');
-                $this->messages->add('e', _('Error : You are the only user, you cannot delete your account !'));
-            }
-        }
-    }
-
-    private function install()
-    {
-        Tools::logm('poche still not installed');
-        echo $this->tpl->render('install.twig', array(
-            'token' => Session::getToken(),
-            'theme' => $this->getTheme(),
-            'poche_url' => Tools::getPocheUrl()
-        ));
-        if (isset($_GET['install'])) {
-            if (($_POST['password'] == $_POST['password_repeat'])
-                && $_POST['password'] != "" && $_POST['login'] != "") {
-                # let's rock, install poche baby !
-                if ($this->store->install($_POST['login'], Tools::encodeString($_POST['password'] . $_POST['login'])))
-                {
-                    Session::logout();
-                    Tools::logm('poche is now installed');
+                    Tools::logm('error during adding new user');
                     Tools::redirect();
                 }
             }
             else {
-                Tools::logm('error during installation');
+                $this->messages->add('e', sprintf(_('Error : An user with the name %s already exists !'), $newUsername));
+                Tools::logm('An user with the name ' . $newUsername . ' already exists !');
                 Tools::redirect();
             }
         }
-        exit();
-    }
-
-    public function getTheme() {
-        return $this->currentTheme;
     }
 
     /**
-     * Provides theme information by parsing theme.ini file if present in the theme's root directory.
-     * In all cases, the following data will be returned:
-     * - name: theme's name, or key if the theme is unnamed,
-     * - current: boolean informing if the theme is the current user theme.
-     *
-     * @param string $theme Theme key (directory name)
-     * @return array|boolean Theme information, or false if the theme doesn't exist.
+     * Delete an existing user
      */
-    public function getThemeInfo($theme) {
-        if (!is_dir(THEME . '/' . $theme)) {
-            return false;
-        }
-
-        $themeIniFile = THEME . '/' . $theme . '/theme.ini';
-        $themeInfo = array();
-
-        if (is_file($themeIniFile) && is_readable($themeIniFile)) {
-            $themeInfo = parse_ini_file($themeIniFile);
-        }
-
-        if ($themeInfo === false) {
-            $themeInfo = array();
-        }
-        if (!isset($themeInfo['name'])) {
-            $themeInfo['name'] = $theme;
-        }
-        $themeInfo['current'] = ($theme === $this->getTheme());
-
-        return $themeInfo;
-    }
-
-    public function getInstalledThemes() {
-        $handle = opendir(THEME);
-        $themes = array();
-
-        while (($theme = readdir($handle)) !== false) {
-            # Themes are stored in a directory, so all directory names are themes
-            # @todo move theme installation data to database
-            if (!is_dir(THEME . '/' . $theme) || in_array($theme, array('.', '..'))) {
-                continue;
-            }
-
-            $themes[$theme] = $this->getThemeInfo($theme);
-        }
-
-        ksort($themes);
-
-        return $themes;
-    }
-
-    public function getLanguage() {
-        return $this->currentLanguage;
-    }
-
-    public function getInstalledLanguages() {
-        $handle = opendir(LOCALE);
-        $languages = array();
-
-        while (($language = readdir($handle)) !== false) {
-            # Languages are stored in a directory, so all directory names are languages
-            # @todo move language installation data to database
-            if (! is_dir(LOCALE . '/' . $language) || in_array($language, array('..', '.', 'tools'))) {
-                continue;
+    public function deleteUser($password)
+    {
+        if ($this->store->listUsers() > 1) {
+            if (Tools::encodeString($password . $this->user->getUsername()) == $this->store->getUserPassword($this->user->getId())) {
+                $username = $this->user->getUsername();
+                $this->store->deleteUserConfig($this->user->getId());
+                Tools::logm('The configuration for user '. $username .' has been deleted !');
+                $this->store->deleteTagsEntriesAndEntries($this->user->getId());
+                Tools::logm('The entries for user '. $username .' has been deleted !');
+                $this->store->deleteUser($this->user->getId());
+                Tools::logm('User '. $username .' has been completely deleted !');
+                Session::logout();
+                Tools::logm('logout');
+                Tools::redirect();
+                $this->messages->add('s', sprintf(_('User %s has been successfully deleted !'), $username));
             }
-
-            $current = false;
-
-            if ($language === $this->getLanguage()) {
-                $current = true;
+            else {
+                Tools::logm('Bad password !');
+                $this->messages->add('e', _('Error : The password is wrong !'));
             }
-
-            $languages[] = array('name' => (isset($this->language_names[$language]) ? $this->language_names[$language] : $language), 'value' => $language, 'current' => $current);
         }
-
-        return $languages;
+        else {
+            Tools::logm('Only user !');
+            $this->messages->add('e', _('Error : You are the only user, you cannot delete your account !'));
+        }
     }
 
     public function getDefaultConfig()
@@ -425,7 +150,7 @@ class Poche
                 $body = $content['rss']['channel']['item']['description'];
 
                 // clean content from prevent xss attack
-                $purifier = $this->getPurifier();
+                $purifier = $this->_getPurifier();
                 $title = $purifier->purify($title);
                 $body = $purifier->purify($body);
 
@@ -437,7 +162,7 @@ class Poche
                 if ( $last_id ) {
                     Tools::logm('add link ' . $url->getUrl());
                     if (DOWNLOAD_PICTURES) {
-                        $content = filtre_picture($body, $url->getUrl(), $last_id);
+                        $content = Picture::filterPicture($body, $url->getUrl(), $last_id);
                         Tools::logm('updating content article');
                         $this->store->updateContent($last_id, $content, $this->user->getId());
                     }
@@ -472,7 +197,7 @@ class Poche
                 $msg = 'delete link #' . $id;
                 if ($this->store->deleteById($id, $this->user->getId())) {
                     if (DOWNLOAD_PICTURES) {
-                        remove_directory(ABS_PATH . $id);
+                        Picture::removeDirectory(ABS_PATH . $id);
                     }
                     $this->messages->add('s', _('the link has been deleted successfully'));
                 }
@@ -590,16 +315,16 @@ class Poche
         switch ($view)
         {
             case 'config':
-                $dev_infos = $this->getPocheVersion('dev');
+                $dev_infos = $this->_getPocheVersion('dev');
                 $dev = trim($dev_infos[0]);
                 $check_time_dev = date('d-M-Y H:i', $dev_infos[1]);
-                $prod_infos = $this->getPocheVersion('prod');
+                $prod_infos = $this->_getPocheVersion('prod');
                 $prod = trim($prod_infos[0]);
                 $check_time_prod = date('d-M-Y H:i', $prod_infos[1]);
                 $compare_dev = version_compare(POCHE, $dev);
                 $compare_prod = version_compare(POCHE, $prod);
-                $themes = $this->getInstalledThemes();
-                $languages = $this->getInstalledLanguages();
+                $themes = $this->tpl->getInstalledThemes();
+                $languages = $this->language->getInstalledLanguages();
                 $token = $this->user->getConfigValue('token');
                 $http_auth = (isset($_SERVER['PHP_AUTH_USER']) || isset($_SERVER['REMOTE_USER'])) ? true : false;
                 $only_user = ($this->store->listUsers() > 1) ? false : true;
@@ -703,7 +428,7 @@ class Poche
                     'listmode' => (isset($_COOKIE['listmode']) ? true : false),
                 );
 
-                //if id is given - we retrive entries by tag: id is tag id
+                //if id is given - we retrieve entries by tag: id is tag id
                 if ($id) {
                   $tpl_vars['tag'] = $this->store->retrieveTag($id, $this->user->getId());
                   $tpl_vars['id'] = intval($id);
@@ -733,7 +458,7 @@ class Poche
      * @todo set the new password in function header like this updatePassword($newPassword)
      * @return boolean
      */
-    public function updatePassword()
+    public function updatePassword($password, $confirmPassword)
     {
         if (MODE_DEMO) {
             $this->messages->add('i', _('in demo mode, you can\'t update your password'));
@@ -741,10 +466,10 @@ class Poche
             Tools::redirect('?view=config');
         }
         else {
-            if (isset($_POST['password']) && isset($_POST['password_repeat'])) {
-                if ($_POST['password'] == $_POST['password_repeat'] && $_POST['password'] != "") {
+            if (isset($password) && isset($confirmPassword)) {
+                if ($password == $confirmPassword && !empty($password)) {
                     $this->messages->add('s', _('your password has been updated'));
-                    $this->store->updatePassword($this->user->getId(), Tools::encodeString($_POST['password'] . $this->user->getUsername()));
+                    $this->store->updatePassword($this->user->getId(), Tools::encodeString($password . $this->user->getUsername()));
                     Session::logout();
                     Tools::logm('password updated');
                     Tools::redirect();
@@ -757,102 +482,25 @@ class Poche
         }
     }
 
-    public function updateTheme()
-    {
-        # no data
-        if (empty($_POST['theme'])) {
-        }
-
-        # we are not going to change it to the current theme...
-        if ($_POST['theme'] == $this->getTheme()) {
-            $this->messages->add('w', _('still using the "' . $this->getTheme() . '" theme!'));
-            Tools::redirect('?view=config');
-        }
-
-        $themes = $this->getInstalledThemes();
-        $actualTheme = false;
-
-        foreach (array_keys($themes) as $theme) {
-            if ($theme == $_POST['theme']) {
-                $actualTheme = true;
-                break;
-            }
-        }
-
-        if (! $actualTheme) {
-            $this->messages->add('e', _('that theme does not seem to be installed'));
-            Tools::redirect('?view=config');
-        }
-
-        $this->store->updateUserConfig($this->user->getId(), 'theme', $_POST['theme']);
-        $this->messages->add('s', _('you have changed your theme preferences'));
-
-        $currentConfig = $_SESSION['poche_user']->config;
-        $currentConfig['theme'] = $_POST['theme'];
-
-        $_SESSION['poche_user']->setConfig($currentConfig);
-
-        $this->emptyCache();
-
-        Tools::redirect('?view=config');
-    }
-
-    public function updateLanguage()
-    {
-        # no data
-        if (empty($_POST['language'])) {
-        }
-
-        # we are not going to change it to the current language...
-        if ($_POST['language'] == $this->getLanguage()) {
-            $this->messages->add('w', _('still using the "' . $this->getLanguage() . '" language!'));
-            Tools::redirect('?view=config');
-        }
-
-        $languages = $this->getInstalledLanguages();
-        $actualLanguage = false;
-
-        foreach ($languages as $language) {
-            if ($language['value'] == $_POST['language']) {
-                $actualLanguage = true;
-                break;
-            }
-        }
-
-        if (! $actualLanguage) {
-            $this->messages->add('e', _('that language does not seem to be installed'));
-            Tools::redirect('?view=config');
-        }
-
-        $this->store->updateUserConfig($this->user->getId(), 'language', $_POST['language']);
-        $this->messages->add('s', _('you have changed your language preferences'));
-
-        $currentConfig = $_SESSION['poche_user']->config;
-        $currentConfig['language'] = $_POST['language'];
-
-        $_SESSION['poche_user']->setConfig($currentConfig);
-
-        $this->emptyCache();
-
-        Tools::redirect('?view=config');
-    }
     /**
-     * get credentials from differents sources
-     * it redirects the user to the $referer link
+     * Get credentials from differents sources
+     * It redirects the user to the $referer link
+     *
      * @return array
      */
-    private function credentials() {
-        if(isset($_SERVER['PHP_AUTH_USER'])) {
-            return array($_SERVER['PHP_AUTH_USER'],'php_auth',true);
+    private function credentials()
+    {
+        if (isset($_SERVER['PHP_AUTH_USER'])) {
+            return array($_SERVER['PHP_AUTH_USER'], 'php_auth', true);
         }
-        if(!empty($_POST['login']) && !empty($_POST['password'])) {
-            return array($_POST['login'],$_POST['password'],false);
+        if (!empty($_POST['login']) && !empty($_POST['password'])) {
+            return array($_POST['login'], $_POST['password'], false);
         }
-        if(isset($_SERVER['REMOTE_USER'])) {
-            return array($_SERVER['REMOTE_USER'],'http_auth',true);
+        if (isset($_SERVER['REMOTE_USER'])) {
+            return array($_SERVER['REMOTE_USER'], 'http_auth', true);
         }
 
-        return array(false,false,false);
+        return array(false, false, false);
     }
 
     /**
@@ -877,6 +525,14 @@ class Poche
                 $longlastingsession = isset($_POST['longlastingsession']);
                 $passwordTest = ($isauthenticated) ? $user['password'] : Tools::encodeString($password . $login);
                 Session::login($user['username'], $user['password'], $login, $passwordTest, $longlastingsession, array('poche_user' => new User($user)));
+
+                # reload l10n
+                $language = $user['config']['language'];
+                @putenv('LC_ALL=' . $language);
+                setlocale(LC_ALL, $language);
+                bindtextdomain($language, LOCALE);
+                textdomain($language);
+
                 $this->messages->add('s', _('welcome to your wallabag'));
                 Tools::logm('login successful');
                 Tools::redirect($referer);
@@ -901,12 +557,13 @@ class Poche
     }
 
     /**
-     * import datas into your poche
+     * import datas into your wallabag
      * @return boolean
      */
+
     public function import() {
 
-      if ( isset($_FILES['file']) ) {
+      if ( isset($_FILES['file']) && $_FILES['file']['tmp_name'] ) {
         Tools::logm('Import stated: parsing file');
 
         // assume, that file is in json format
@@ -935,95 +592,112 @@ class Poche
               $read = ((sizeof($data) && $read)?0:1);
             }
           }
-        }
+       }
 
-        //for readability structure
-        foreach ($data as $record) {
-          if (is_array($record)) {
-            $data[] = $record;
-            foreach ($record as $record2) {
-              if (is_array($record2)) {
-                $data[] = $record2;
-              }
+            // for readability structure
+
+            foreach($data as $record) {
+                if (is_array($record)) {
+                    $data[] = $record;
+                    foreach($record as $record2) {
+                        if (is_array($record2)) {
+                            $data[] = $record2;
+                        }
+                    }
+                }
             }
-          }
-        }
 
-        $urlsInserted = array(); //urls of articles inserted
-        foreach ($data as $record) {
-          $url = trim( isset($record['article__url']) ? $record['article__url'] : (isset($record['url']) ? $record['url'] : '') );
-          if ( $url and !in_array($url, $urlsInserted) ) {
-            $title = (isset($record['title']) ? $record['title'] :  _('Untitled - Import - ').'</a> <a href="./?import">'._('click to finish import').'</a><a>');
-            $body = (isset($record['content']) ? $record['content'] : '');
-            $isRead = (isset($record['is_read']) ? intval($record['is_read']) : (isset($record['archive'])?intval($record['archive']):0));
-            $isFavorite = (isset($record['is_fav']) ? intval($record['is_fav']) : (isset($record['favorite'])?intval($record['favorite']):0) );
-            //insert new record
-            $id = $this->store->add($url, $title, $body, $this->user->getId(), $isFavorite, $isRead);
-            if ( $id ) {
-              $urlsInserted[] = $url; //add
-
-              if ( isset($record['tags']) && trim($record['tags']) ) {
-                //@TODO: set tags
+            $urlsInserted = array(); //urls of articles inserted
+            foreach($data as $record) {
+                $url = trim(isset($record['article__url']) ? $record['article__url'] : (isset($record['url']) ? $record['url'] : ''));
+                if ($url and !in_array($url, $urlsInserted)) {
+                    $title = (isset($record['title']) ? $record['title'] : _('Untitled - Import - ') . '</a> <a href="./?import">' . _('click to finish import') . '</a><a>');
+                    $body = (isset($record['content']) ? $record['content'] : '');
+                    $isRead = (isset($record['is_read']) ? intval($record['is_read']) : (isset($record['archive']) ? intval($record['archive']) : 0));
+                    $isFavorite = (isset($record['is_fav']) ? intval($record['is_fav']) : (isset($record['favorite']) ? intval($record['favorite']) : 0));
 
-              }
+                    // insert new record
+
+                    $id = $this->store->add($url, $title, $body, $this->user->getId() , $isFavorite, $isRead);
+                    if ($id) {
+                        $urlsInserted[] = $url; //add
+                        if (isset($record['tags']) && trim($record['tags'])) {
+
+                            // @TODO: set tags
+
+                        }
+                    }
+                }
+            }
+
+            $i = sizeof($urlsInserted);
+            if ($i > 0) {
+                $this->messages->add('s', _('Articles inserted: ') . $i . _('. Please note, that some may be marked as "read".'));
             }
-          }
-        }
 
-        $i = sizeof($urlsInserted);
-        if ( $i > 0 ) {
-          $this->messages->add('s', _('Articles inserted: ').$i._('. Please note, that some may be marked as "read".'));
-        }
         Tools::logm('Import of articles finished: '.$i.' articles added (w/o content if not provided).');
       }
-      //file parsing finished here
+      else {
+        $this->messages->add('s', _('Did you forget to select a file?'));
+      }
+        // file parsing finished here
+        // now download article contents if any
+        // check if we need to download any content
 
-      //now download article contents if any
+        $recordsDownloadRequired = $this->store->retrieveUnfetchedEntriesCount($this->user->getId());
 
-      //check if we need to download any content
-      $recordsDownloadRequired = $this->store->retrieveUnfetchedEntriesCount($this->user->getId());
-      if ( $recordsDownloadRequired == 0 ) {
-        //nothing to download
-        $this->messages->add('s', _('Import finished.'));
-        Tools::logm('Import finished completely');
-        Tools::redirect();
-      }
-      else {
-        //if just inserted - don't download anything, download will start in next reload
-        if ( !isset($_FILES['file']) ) {
-          //download next batch
-          Tools::logm('Fetching next batch of articles...');
-          $items = $this->store->retrieveUnfetchedEntries($this->user->getId(), IMPORT_LIMIT);
+        if ($recordsDownloadRequired == 0) {
 
-          $purifier = $this->getPurifier();
+            // nothing to download
 
-          foreach ($items as $item) {
-            $url = new Url(base64_encode($item['url']));
-            Tools::logm('Fetching article '.$item['id']);
-            $content = Tools::getPageContent($url);
+            $this->messages->add('s', _('Import finished.'));
+            Tools::logm('Import finished completely');
+            Tools::redirect();
+        }
+        else {
 
-            $title = (($content['rss']['channel']['item']['title'] != '') ? $content['rss']['channel']['item']['title'] : _('Untitled'));
-            $body = (($content['rss']['channel']['item']['description'] != '') ? $content['rss']['channel']['item']['description'] : _('Undefined'));
+            // if just inserted - don't download anything, download will start in next reload
 
-            //clean content to prevent xss attack
-            $title = $purifier->purify($title);
-            $body = $purifier->purify($body);
+            if (!isset($_FILES['file'])) {
 
-            $this->store->updateContentAndTitle($item['id'], $title, $body, $this->user->getId());
-            Tools::logm('Article '.$item['id'].' updated.');
-          }
+                // download next batch
+
+                Tools::logm('Fetching next batch of articles...');
+                $items = $this->store->retrieveUnfetchedEntries($this->user->getId() , IMPORT_LIMIT);
+                $purifier = $this->_getPurifier();
+                foreach($items as $item) {
+                    $url = new Url(base64_encode($item['url']));
+                    Tools::logm('Fetching article ' . $item['id']);
+                    $content = Tools::getPageContent($url);
+                    $title = (($content['rss']['channel']['item']['title'] != '') ? $content['rss']['channel']['item']['title'] : _('Untitled'));
+                    $body = (($content['rss']['channel']['item']['description'] != '') ? $content['rss']['channel']['item']['description'] : _('Undefined'));
+
+                    // clean content to prevent xss attack
 
+                    $title = $purifier->purify($title);
+                    $body = $purifier->purify($body);
+                    $this->store->updateContentAndTitle($item['id'], $title, $body, $this->user->getId());
+                    Tools::logm('Article ' . $item['id'] . ' updated.');
+                }
+            }
         }
-      }
 
-      return array('includeImport'=>true, 'import'=>array('recordsDownloadRequired'=>$recordsDownloadRequired, 'recordsUnderDownload'=> IMPORT_LIMIT, 'delay'=> IMPORT_DELAY * 1000) );
+        return array(
+            'includeImport' => true,
+            'import' => array(
+                'recordsDownloadRequired' => $recordsDownloadRequired,
+                'recordsUnderDownload' => IMPORT_LIMIT,
+                'delay' => IMPORT_DELAY * 1000
+            )
+        );
     }
 
     /**
      * export poche entries in json
      * @return json all poche entries
      */
-    public function export() {
+    public function export()
+    {
       $filename = "wallabag-export-".$this->user->getId()."-".date("Y-m-d").".json";
       header('Content-Disposition: attachment; filename='.$filename);
 
@@ -1039,7 +713,7 @@ class Poche
      * @param  string $which 'prod' or 'dev'
      * @return string        latest $which version
      */
-    private function getPocheVersion($which = 'prod') {
+    private function _getPocheVersion($which = 'prod') {
       $cache_file = CACHE . '/' . $which;
       $check_time = time();
 
@@ -1054,29 +728,27 @@ class Poche
       return array($version, $check_time);
     }
 
-    public function generateToken()
+    /**
+     * Update token for current user
+     */
+    public function updateToken()
     {
-      if (ini_get('open_basedir') === '') {
-        if (strtoupper(substr(PHP_OS, 0, 3)) === 'WIN') {
-          echo 'This is a server using Windows!';
-          // alternative to /dev/urandom for Windows
-          $token = substr(base64_encode(uniqid(mt_rand(), true)), 0, 20);
-        } else {
-          $token = substr(base64_encode(file_get_contents('/dev/urandom', false, null, 0, 20)), 0, 15);
-        }
-      }
-      else {
-        $token = substr(base64_encode(uniqid(mt_rand(), true)), 0, 20);
-      }
-
-      $token = str_replace('+', '', $token);
-      $this->store->updateUserConfig($this->user->getId(), 'token', $token);
-      $currentConfig = $_SESSION['poche_user']->config;
-      $currentConfig['token'] = $token;
-      $_SESSION['poche_user']->setConfig($currentConfig);
-      Tools::redirect();
+        $token = Tools::generateToken();
+        $this->store->updateUserConfig($this->user->getId(), 'token', $token);
+        $currentConfig = $_SESSION['poche_user']->config;
+        $currentConfig['token'] = $token;
+        $_SESSION['poche_user']->setConfig($currentConfig);
+        Tools::redirect();
     }
 
+    /**
+     * Generate RSS feeds for current user
+     *
+     * @param $token
+     * @param $user_id
+     * @param $tag_id
+     * @param string $type
+     */
     public function generateFeeds($token, $user_id, $tag_id, $type = 'home')
     {
         $allowed_types = array('home', 'fav', 'archive', 'tag');
@@ -1086,10 +758,9 @@ class Poche
             die(sprintf(_('User with this id (%d) does not exist.'), $user_id));
         }
 
-        if (!in_array($type, $allowed_types) || $token != $config['token']) {
-            die(_('Uh, there is a problem while generating feeds.'));
+        if (!in_array($type, $allowed_types) || !isset($config['token']) || $token != $config['token']) {
+            die(_('Uh, there is a problem while generating feed. Wrong token used?'));
         }
-        // Check the token
 
         $feed = new FeedWriter(RSS2);
         $feed->setTitle('wallabag — ' . $type . ' feed');
@@ -1121,147 +792,22 @@ class Poche
         exit;
     }
 
-    public function emptyCache() {
-        $files = new RecursiveIteratorIterator(
-            new RecursiveDirectoryIterator(CACHE, RecursiveDirectoryIterator::SKIP_DOTS),
-            RecursiveIteratorIterator::CHILD_FIRST
-        );
-
-        foreach ($files as $fileinfo) {
-            $todo = ($fileinfo->isDir() ? 'rmdir' : 'unlink');
-            $todo($fileinfo->getRealPath());
-        }
 
-        Tools::logm('empty cache');
-        $this->messages->add('s', _('Cache deleted.'));
-        Tools::redirect();
-    }
 
     /**
-     * return new purifier object with actual config
+     * Returns new purifier object with actual config
      */
-    protected function getPurifier() {
-      $config = HTMLPurifier_Config::createDefault();
-      $config->set('Cache.SerializerPath', CACHE);
-      $config->set('HTML.SafeIframe', true);
+    private function _getPurifier()
+    {
+        $config = HTMLPurifier_Config::createDefault();
+        $config->set('Cache.SerializerPath', CACHE);
+        $config->set('HTML.SafeIframe', true);
 
-      //allow YouTube, Vimeo and dailymotion videos
-      $config->set('URI.SafeIframeRegexp', '%^(https?:)?//(www\.youtube(?:-nocookie)?\.com/embed/|player\.vimeo\.com/video/|www\.dailymotion\.com/embed/video/)%');
+        //allow YouTube, Vimeo and dailymotion videos
+        $config->set('URI.SafeIframeRegexp', '%^(https?:)?//(www\.youtube(?:-nocookie)?\.com/embed/|player\.vimeo\.com/video/|www\.dailymotion\.com/embed/video/)%');
 
-      return new HTMLPurifier($config);
+        return new HTMLPurifier($config);
     }
 
-    /**
-     * handle epub
-     */
-    public function createEpub() {
-
-        switch ($_GET['method']) {
-            case 'id':
-                $entryID = filter_var($_GET['id'],FILTER_SANITIZE_NUMBER_INT);
-                $entry = $this->store->retrieveOneById($entryID, $this->user->getId());
-                $entries = array($entry);
-                $bookTitle = $entry['title'];
-                $bookFileName = substr($bookTitle, 0, 200);
-                break;
-            case 'all':
-                $entries = $this->store->retrieveAll($this->user->getId());
-                $bookTitle = sprintf(_('All my articles on '), date(_('d.m.y'))); #translatable because each country has it's own date format system
-                $bookFileName = _('Allarticles') . date(_('dmY'));
-                break;
-            case 'tag':
-                $tag = filter_var($_GET['tag'],FILTER_SANITIZE_STRING);
-                $tags_id = $this->store->retrieveAllTags($this->user->getId(),$tag);
-                $tag_id = $tags_id[0]["id"]; // we take the first result, which is supposed to match perfectly. There must be a workaround.
-                $entries = $this->store->retrieveEntriesByTag($tag_id,$this->user->getId());
-                $bookTitle = sprintf(_('Articles tagged %s'),$tag);
-                $bookFileName = substr(sprintf(_('Tag %s'),$tag), 0, 200);
-                break;
-            case 'category':
-                $category = filter_var($_GET['category'],FILTER_SANITIZE_STRING);
-                $entries = $this->store->getEntriesByView($category,$this->user->getId());
-                $bookTitle = sprintf(_('All articles in category %s'), $category);
-                $bookFileName = substr(sprintf(_('Category %s'),$category), 0, 200);
-                break;
-            case 'search':
-                $search = filter_var($_GET['search'],FILTER_SANITIZE_STRING);
-                $entries = $this->store->search($search,$this->user->getId());
-                $bookTitle = sprintf(_('All articles for search %s'), $search);
-                $bookFileName = substr(sprintf(_('Search %s'), $search), 0, 200);
-                break;
-            case 'default':
-                die(_('Uh, there is a problem while generating epub.'));
-
-        }
-
-        $content_start =
-        "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
-        . "<html xmlns=\"http://www.w3.org/1999/xhtml\" xmlns:epub=\"http://www.idpf.org/2007/ops\">\n"
-        . "<head>"
-        . "<meta http-equiv=\"Default-Style\" content=\"text/html; charset=utf-8\" />\n"
-        . "<title>wallabag articles book</title>\n"
-        . "</head>\n"
-        . "<body>\n";
-
-        $bookEnd = "</body>\n</html>\n";
 
-        $log = new Logger("wallabag", TRUE);
-        $fileDir = CACHE;
-        
-        $book = new EPub(EPub::BOOK_VERSION_EPUB3, DEBUG_POCHE);
-        $log->logLine("new EPub()");
-        $log->logLine("EPub class version: " . EPub::VERSION);
-        $log->logLine("EPub Req. Zip version: " . EPub::REQ_ZIP_VERSION);
-        $log->logLine("Zip version: " . Zip::VERSION);
-        $log->logLine("getCurrentServerURL: " . $book->getCurrentServerURL());
-        $log->logLine("getCurrentPageURL..: " . $book->getCurrentPageURL());
-
-        $book->setTitle(_('wallabag\'s articles'));
-        $book->setIdentifier("http://$_SERVER[HTTP_HOST]", EPub::IDENTIFIER_URI); // Could also be the ISBN number, prefered for published books, or a UUID.
-        //$book->setLanguage("en"); // Not needed, but included for the example, Language is mandatory, but EPub defaults to "en". Use RFC3066 Language codes, such as "en", "da", "fr" etc.
-        $book->setDescription(_("Some articles saved on my wallabag"));
-        $book->setAuthor("wallabag","wallabag");
-        $book->setPublisher("wallabag","wallabag"); // I hope this is a non existant address :)
-        $book->setDate(time()); // Strictly not needed as the book date defaults to time().
-        //$book->setRights("Copyright and licence information specific for the book."); // As this is generated, this _could_ contain the name or licence information of the user who purchased the book, if needed. If this is used that way, the identifier must also be made unique for the book.
-        $book->setSourceURL("http://$_SERVER[HTTP_HOST]");
-
-        $book->addDublinCoreMetadata(DublinCore::CONTRIBUTOR, "PHP");
-        $book->addDublinCoreMetadata(DublinCore::CONTRIBUTOR, "wallabag");
-
-        $cssData = "body {\n margin-left: .5em;\n margin-right: .5em;\n text-align: justify;\n}\n\np {\n font-family: serif;\n font-size: 10pt;\n text-align: justify;\n text-indent: 1em;\n margin-top: 0px;\n margin-bottom: 1ex;\n}\n\nh1, h2 {\n font-family: sans-serif;\n font-style: italic;\n text-align: center;\n background-color: #6b879c;\n color: white;\n width: 100%;\n}\n\nh1 {\n margin-bottom: 2px;\n}\n\nh2 {\n margin-top: -2px;\n margin-bottom: 2px;\n}\n";
-
-        $log->logLine("Add Cover");
-
-        $fullTitle = "<h1> " . $bookTitle . "</h1>\n";
-
-        $book->setCoverImage("Cover.png", file_get_contents("themes/baggy/img/apple-touch-icon-152.png"), "image/png", $fullTitle);
-
-        $cover = $content_start . '<div style="text-align:center;"><p>' . _('Produced by wallabag with PHPePub') . '</p><p>'. _('Please open <a href="https://github.com/wallabag/wallabag/issues" >an issue</a> if you have trouble with the display of this E-Book on your device.') . '</p></div>' . $bookEnd;
-
-        //$book->addChapter("Table of Contents", "TOC.xhtml", NULL, false, EPub::EXTERNAL_REF_IGNORE);
-        $book->addChapter("Notices", "Cover2.html", $cover);
-
-        $book->buildTOC();
-
-        foreach ($entries as $entry) { //set tags as subjects
-            $tags = $this->store->retrieveTagsByEntry($entry['id']);
-            foreach ($tags as $tag) {
-                $book->setSubject($tag['value']);
-            }
-
-            $log->logLine("Set up parameters");
-
-            $chapter = $content_start . $entry['content'] . $bookEnd;
-            $book->addChapter($entry['title'], htmlspecialchars($entry['title']) . ".html", $chapter, true, EPub::EXTERNAL_REF_ADD);
-            $log->logLine("Added chapter " . $entry['title']);
-        }
-
-        if (DEBUG_POCHE) {
-            $epuplog = $book->getLog();
-            $book->addChapter("Log", "Log.html", $content_start . $log->getLog() . "\n</pre>" . $bookEnd); // log generation
-        }
-        $book->finalize();
-        $zipData = $book->sendBook($bookFileName);
-    }
 }
diff --git a/inc/poche/Routing.class.php b/inc/poche/Routing.class.php
new file mode 100755 (executable)
index 0000000..0b37305
--- /dev/null
@@ -0,0 +1,151 @@
+<?php
+/**
+ * wallabag, self hostable application allowing you to not miss any content anymore
+ *
+ * @category   wallabag
+ * @author     Nicolas Lœuillet <nicolas@loeuillet.org>
+ * @copyright  2013
+ * @license    http://opensource.org/licenses/MIT see COPYING file
+ */
+
+class Routing
+{
+    protected $wallabag;
+    protected $referer;
+    protected $view;
+    protected $action;
+    protected $id;
+    protected $url;
+    protected $file;
+    protected $defaultVars = array();
+    protected $vars = array();
+
+    public function __construct(Poche $wallabag)
+    {
+        $this->wallabag = $wallabag;
+        $this->_init();
+    }
+
+    private function _init()
+    {
+        # Parse GET & REFERER vars
+        $this->referer      = empty($_SERVER['HTTP_REFERER']) ? '' : $_SERVER['HTTP_REFERER'];
+        $this->view         = Tools::checkVar('view', 'home');
+        $this->action       = Tools::checkVar('action');
+        $this->id           = Tools::checkVar('id');
+        $_SESSION['sort']   = Tools::checkVar('sort', 'id');
+        $this->url          = new Url((isset ($_GET['url'])) ? $_GET['url'] : '');
+    }
+
+    public function run()
+    {
+        # vars to _always_ send to templates
+        $this->defaultVars = array(
+            'referer' => $this->referer,
+            'view' => $this->view,
+            'poche_url' => Tools::getPocheUrl(),
+            'title' => _('wallabag, a read it later open source system'),
+            'token' => \Session::getToken(),
+            'theme' => $this->wallabag->tpl->getTheme()
+        );
+
+        $this->_launchAction();
+        $this->_defineTplInformation();
+
+        # because messages can be added in $poche->action(), we have to add this entry now (we can add it before)
+        $this->vars = array_merge($this->vars, array('messages' => $this->wallabag->messages->display('all', FALSE)));
+
+        $this->_render($this->file, $this->vars);
+    }
+
+    private function _defineTplInformation()
+    {
+        $tplFile = array();
+        $tplVars = array();
+
+        if (\Session::isLogged()) {
+            $this->wallabag->action($this->action, $this->url, $this->id);
+            $tplFile = Tools::getTplFile($this->view);
+            $tplVars = array_merge($this->vars, $this->wallabag->displayView($this->view, $this->id));
+        } elseif(isset($_SERVER['PHP_AUTH_USER'])) {
+            if($this->wallabag->store->userExists($_SERVER['PHP_AUTH_USER'])) {
+                $this->wallabag->login($this->referer);
+            } else {
+                $this->wallabag->messages->add('e', _('login failed: user doesn\'t exist'));
+                Tools::logm('user doesn\'t exist');
+                $tplFile = Tools::getTplFile('login');
+                $tplVars['http_auth'] = 1;
+            }
+        } elseif(isset($_SERVER['REMOTE_USER'])) {
+            if($this->wallabag->store->userExists($_SERVER['REMOTE_USER'])) {
+                $this->wallabag->login($this->referer);
+            } else {
+                $this->wallabag->messages->add('e', _('login failed: user doesn\'t exist'));
+                Tools::logm('user doesn\'t exist');
+                $tplFile = Tools::getTplFile('login');
+                $tplVars['http_auth'] = 1;
+            }
+        } else {
+            $tplFile = Tools::getTplFile('login');
+            $tplVars['http_auth'] = 0;
+            \Session::logout();
+        }
+
+        $this->file = $tplFile;
+        $this->vars = array_merge($this->defaultVars, $tplVars);
+    }
+
+    private function _launchAction()
+    {
+        if (isset($_GET['login'])) {
+               // hello to you
+               $this->wallabag->login($this->referer);
+        } elseif (isset($_GET['feed']) && isset($_GET['user_id'])) {
+            $tag_id = (isset($_GET['tag_id']) ? intval($_GET['tag_id']) : 0);
+            $this->wallabag->generateFeeds($_GET['token'], filter_var($_GET['user_id'],FILTER_SANITIZE_NUMBER_INT), $tag_id, $_GET['type']);
+        }
+        
+        //allowed ONLY to logged in user
+        if (\Session::isLogged() === true) 
+        {
+            if (isset($_GET['logout'])) {
+                // see you soon !
+                $this->wallabag->logout();
+            } elseif (isset($_GET['config'])) {
+                // update password
+                $this->wallabag->updatePassword($_POST['password'], $_POST['password_repeat']);
+            } elseif (isset($_GET['newuser'])) {
+                $this->wallabag->createNewUser($_POST['newusername'], $_POST['password4newuser']);
+            } elseif (isset($_GET['deluser'])) {
+                $this->wallabag->deleteUser($_POST['password4deletinguser']);
+            } elseif (isset($_GET['epub'])) {
+                $epub = new WallabagEpub($this->wallabag, $_GET['method'], $_GET['id'], $_GET['value']);
+                $epub->run();
+            } elseif (isset($_GET['import'])) {
+                $import = $this->wallabag->import();
+                $tplVars = array_merge($this->vars, $import);
+            } elseif (isset($_GET['empty-cache'])) {
+                Tools::emptyCache();
+            } elseif (isset($_GET['export'])) {
+                $this->wallabag->export();
+            } elseif (isset($_GET['updatetheme'])) {
+                $this->wallabag->tpl->updateTheme($_POST['theme']);
+            } elseif (isset($_GET['updatelanguage'])) {
+                $this->wallabag->language->updateLanguage($_POST['language']);
+            } elseif (isset($_GET['uploadfile'])) {
+                $this->wallabag->uploadFile();
+            } elseif (isset($_GET['feed']) && isset($_GET['action']) && $_GET['action'] == 'generate') {
+                $this->wallabag->updateToken();
+            }
+            elseif (isset($_GET['plainurl']) && !empty($_GET['plainurl'])) {
+                $plainUrl = new Url(base64_encode($_GET['plainurl']));
+                $this->wallabag->action('add', $plainUrl);
+            }
+        }
+    }
+
+    public function _render($file, $vars)
+    {
+        echo $this->wallabag->tpl->render($file, $vars);
+    }
+} 
\ No newline at end of file
diff --git a/inc/poche/Template.class.php b/inc/poche/Template.class.php
new file mode 100644 (file)
index 0000000..b686f2e
--- /dev/null
@@ -0,0 +1,235 @@
+<?php
+/**
+ * wallabag, self hostable application allowing you to not miss any content anymore
+ *
+ * @category   wallabag
+ * @author     Nicolas Lœuillet <nicolas@loeuillet.org>
+ * @copyright  2013
+ * @license    http://opensource.org/licenses/MIT see COPYING file
+ */
+
+class Template extends Twig_Environment
+{
+    protected $wallabag;
+
+    private $canRenderTemplates = TRUE;
+    private $currentTheme = '';
+
+    public function __construct(Poche $wallabag)
+    {
+        $this->wallabag = $wallabag;
+
+        // Set up theme
+        $pocheUser = Session::getParam('poche_user');
+
+        $themeDirectory = (is_null($pocheUser) ? DEFAULT_THEME : $pocheUser->getConfigValue('theme'));
+
+        if ($themeDirectory === false) {
+            $themeDirectory = DEFAULT_THEME;
+        }
+
+        $this->currentTheme = $themeDirectory;
+
+        if ($this->_themeIsInstalled() === array()) {
+            $this->_init();
+        }
+    }
+
+    /**
+     * Returns true if selected theme is installed
+     *
+     * @return bool
+     */
+    private function _themeIsInstalled()
+    {
+        $errors = array();
+
+        // Twig is an absolute requirement for wallabag to function.
+        // Abort immediately if the Composer installer hasn't been run yet
+        if (!$this->canRenderTemplates) {
+            $errors[]   = 'Twig does not seem to be installed. Please initialize the Composer installation to automatically fetch dependencies. You can also download <a href="http://wllbg.org/vendor">vendor.zip</a> and extract it in your wallabag folder.';
+        }
+
+        // Check if the selected theme and its requirements are present
+        $theme = $this->getTheme();
+        if ($theme != '' && !is_dir(THEME . '/' . $theme)) {
+            $errors[]                   = 'The currently selected theme (' . $theme . ') does not seem to be properly installed (Missing directory: ' . THEME . '/' . $theme . ')';
+            $this->canRenderTemplates   = FALSE;
+        }
+
+        $themeInfo = $this->getThemeInfo($theme);
+        if (isset($themeInfo['requirements']) && is_array($themeInfo['requirements'])) {
+            foreach ($themeInfo['requirements'] as $requiredTheme) {
+                if (! is_dir(THEME . '/' . $requiredTheme)) {
+                    $errors[]                   = 'The required "' . $requiredTheme . '" theme is missing for the current theme (' . $theme . ')';
+                    $this->canRenderTemplates   = FALSE;
+                }
+            }
+        }
+
+        $currentErrors = (is_null(Session::getParam('errors'))? array() : Session::getParam('errors'));
+        Session::setParam('errors', array_merge($errors, $currentErrors));
+
+        return $errors;
+    }
+
+    /**
+     * Initialization for templates
+     */
+    private function _init()
+    {
+        $loaderChain    = new Twig_Loader_Chain();
+        $theme          = $this->getTheme();
+
+        // add the current theme as first to the loader chain
+        // so Twig will look there first for overridden template files
+        try {
+            $loaderChain->addLoader(new Twig_Loader_Filesystem(THEME . '/' . $theme));
+        } catch (Twig_Error_Loader $e) {
+            # @todo isInstalled() should catch this, inject Twig later
+            die('The currently selected theme (' . $theme . ') does not seem to be properly installed (' . THEME . '/' . $theme .' is missing)');
+        }
+
+        // add all required themes to the loader chain
+        $themeInfo = $this->getThemeInfo($theme);
+        if (isset($themeInfo['requirements']) && is_array($themeInfo['requirements'])) {
+            foreach ($themeInfo['requirements'] as $requiredTheme) {
+                try {
+                    $loaderChain->addLoader(new Twig_Loader_Filesystem(THEME . '/' . $requiredTheme));
+                } catch (Twig_Error_Loader $e) {
+                    # @todo isInstalled() should catch this, inject Twig later
+                    die('The required "' . $requiredTheme . '" theme is missing for the current theme (' . $theme . ')');
+                }
+            }
+        }
+
+        if (DEBUG_POCHE) {
+            $twigParams = array();
+        } else {
+            $twigParams = array('cache' => CACHE);
+        }
+
+        parent::__construct($loaderChain, $twigParams);
+
+        //$tpl = new Twig_Environment($loaderChain, $twigParams);
+        $this->addExtension(new Twig_Extensions_Extension_I18n());
+
+        # filter to display domain name of an url
+        $filter = new Twig_SimpleFilter('getDomain', 'Tools::getDomain');
+        $this->addFilter($filter);
+
+        # filter for reading time
+        $filter = new Twig_SimpleFilter('getReadingTime', 'Tools::getReadingTime');
+        $this->addFilter($filter);
+    }
+
+    /**
+     * Returns current theme
+     *
+     * @return string
+     */
+    public function getTheme()
+    {
+        return $this->currentTheme;
+    }
+
+    /**
+     * Provides theme information by parsing theme.ini file if present in the theme's root directory.
+     * In all cases, the following data will be returned:
+     * - name: theme's name, or key if the theme is unnamed,
+     * - current: boolean informing if the theme is the current user theme.
+     *
+     * @param string $theme Theme key (directory name)
+     * @return array|boolean Theme information, or false if the theme doesn't exist.
+     */
+    public function getThemeInfo($theme)
+    {
+        if (!is_dir(THEME . '/' . $theme)) {
+            return false;
+        }
+
+        $themeIniFile   = THEME . '/' . $theme . '/theme.ini';
+        $themeInfo      = array();
+
+        if (is_file($themeIniFile) && is_readable($themeIniFile)) {
+            $themeInfo = parse_ini_file($themeIniFile);
+        }
+
+        if ($themeInfo === false) {
+            $themeInfo = array();
+        }
+
+        if (!isset($themeInfo['name'])) {
+            $themeInfo['name'] = $theme;
+        }
+
+        $themeInfo['current'] = ($theme === $this->getTheme());
+
+        return $themeInfo;
+    }
+
+    /**
+     * Returns an array with installed themes
+     *
+     * @return array
+     */
+    public function getInstalledThemes()
+    {
+        $handle = opendir(THEME);
+        $themes = array();
+
+        while (($theme = readdir($handle)) !== false) {
+            # Themes are stored in a directory, so all directory names are themes
+            # @todo move theme installation data to database
+            if (!is_dir(THEME . '/' . $theme) || in_array($theme, array('.', '..'))) {
+                continue;
+            }
+
+            $themes[$theme] = $this->getThemeInfo($theme);
+        }
+
+        ksort($themes);
+
+        return $themes;
+    }
+
+    /**
+     * Update theme for the current user
+     *
+     * @param $newTheme
+     */
+    public function updateTheme($newTheme)
+    {
+        # we are not going to change it to the current theme...
+        if ($newTheme == $this->getTheme()) {
+            $this->wallabag->messages->add('w', _('still using the "' . $this->getTheme() . '" theme!'));
+            Tools::redirect('?view=config');
+        }
+
+        $themes = $this->getInstalledThemes();
+        $actualTheme = false;
+
+        foreach (array_keys($themes) as $theme) {
+            if ($theme == $newTheme) {
+                $actualTheme = true;
+                break;
+            }
+        }
+
+        if (!$actualTheme) {
+            $this->wallabag->messages->add('e', _('that theme does not seem to be installed'));
+            Tools::redirect('?view=config');
+        }
+
+        $this->wallabag->store->updateUserConfig($this->wallabag->user->getId(), 'theme', $newTheme);
+        $this->wallabag->messages->add('s', _('you have changed your theme preferences'));
+
+        $currentConfig = $_SESSION['poche_user']->config;
+        $currentConfig['theme'] = $newTheme;
+
+        $_SESSION['poche_user']->setConfig($currentConfig);
+
+        Tools::emptyCache();
+        Tools::redirect('?view=config');
+    }
+}
\ No newline at end of file
index cc01f4030498ef8127574bad949408f6a24065c2..93ec3fc6a877bf93f00e7c0dfadfd2cdf2b412d8 100755 (executable)
@@ -5,19 +5,18 @@
  * @category   wallabag
  * @author     Nicolas Lœuillet <nicolas@loeuillet.org>
  * @copyright  2013
- * @license    http://www.wtfpl.net/ see COPYING file
+ * @license    http://opensource.org/licenses/MIT see COPYING file
  */
 
-class Tools
+final class Tools
 {
+    /**
+     * Initialize PHP environment
+     */
     public static function initPhp()
     {
         define('START_TIME', microtime(true));
 
-        if (phpversion() < 5) {
-            die(_('Oops, it seems you don\'t have PHP 5.'));
-        }
-
         function stripslashesDeep($value) {
             return is_array($value)
                 ? array_map('stripslashesDeep', $value)
@@ -34,6 +33,11 @@ class Tools
         register_shutdown_function('ob_end_flush');
     }
 
+    /**
+     * Get wallabag instance URL
+     *
+     * @return string
+     */
     public static function getPocheUrl()
     {
         $https = (!empty($_SERVER['HTTPS'])
@@ -50,6 +54,10 @@ class Tools
             || ($https && $_SERVER["SERVER_PORT"] == '443')
             || ($https && $_SERVER["SERVER_PORT"]==SSL_PORT) //Custom HTTPS port detection
             ? '' : ':' . $_SERVER["SERVER_PORT"]);
+        
+        if (isset($_SERVER["HTTP_X_FORWARDED_PORT"])) {
+            $serverport = ':' . $_SERVER["HTTP_X_FORWARDED_PORT"];
+        }
 
         $scriptname = str_replace('/index.php', '/', $_SERVER["SCRIPT_NAME"]);
 
@@ -67,6 +75,11 @@ class Tools
             . $host . $serverport . $scriptname;
     }
 
+    /**
+     * Redirects to a URL
+     *
+     * @param string $url
+     */
     public static function redirect($url = '')
     {
         if ($url === '') {
@@ -87,11 +100,18 @@ class Tools
                 $url = $ref;
             }
         }
+
         self::logm('redirect to ' . $url);
         header('Location: '.$url);
         exit();
     }
 
+    /**
+     * Returns name of the template file to display
+     *
+     * @param $view
+     * @return string
+     */
     public static function getTplFile($view)
     {
         $views = array(
@@ -99,13 +119,15 @@ class Tools
             'edit-tags', 'view', 'login', 'error'
             );
 
-        if (in_array($view, $views)) {
-            return $view . '.twig';
-        }
-
-        return 'home.twig';
+        return (in_array($view, $views) ? $view . '.twig' : 'home.twig');
     }
 
+    /**
+     * Download a file (typically, for downloading pictures on web server)
+     *
+     * @param $url
+     * @return bool|mixed|string
+     */
     public static function getFile($url)
     {
         $timeout = 15;
@@ -186,6 +208,11 @@ class Tools
         }
     }
 
+    /**
+     * Headers for JSON export
+     *
+     * @param $data
+     */
     public static function renderJson($data)
     {
         header('Cache-Control: no-cache, must-revalidate');
@@ -195,6 +222,11 @@ class Tools
         exit();
     }
 
+    /**
+     * Create new line in log file
+     *
+     * @param $message
+     */
     public static function logm($message)
     {
         if (DEBUG_POCHE && php_sapi_name() != 'cli') {
@@ -204,36 +236,57 @@ class Tools
         }
     }
 
+    /**
+     * Encode a URL by using a salt
+     *
+     * @param $string
+     * @return string
+     */
     public static function encodeString($string)
     {
         return sha1($string . SALT);
     }
 
+    /**
+     * Cleans a variable
+     *
+     * @param $var
+     * @param string $default
+     * @return string
+     */
     public static function checkVar($var, $default = '')
     {
-        return ((isset ($_REQUEST["$var"])) ? htmlentities($_REQUEST["$var"]) : $default);
+        return ((isset($_REQUEST["$var"])) ? htmlentities($_REQUEST["$var"]) : $default);
     }
 
+    /**
+     * Returns the domain name for a URL
+     *
+     * @param $url
+     * @return string
+     */
     public static function getDomain($url)
     {
         return parse_url($url, PHP_URL_HOST);
     }
 
-    public static function getReadingTime($text) {
-        $word = str_word_count(strip_tags($text));
-        $minutes = floor($word / 200);
-        $seconds = floor($word % 200 / (200 / 60));
-        $time = array('minutes' => $minutes, 'seconds' => $seconds);
-
-        return $minutes;
-    }
-
-    public static function getDocLanguage($userlanguage) {
-        $lang = explode('.', $userlanguage);
-        return str_replace('_', '-', $lang[0]);
+    /**
+     * For a given text, we calculate reading time for an article
+     *
+     * @param $text
+     * @return float
+     */
+    public static function getReadingTime($text)
+    {
+        return floor(str_word_count(strip_tags($text)) / 200);
     }
 
-    public static function status($status_code)
+    /**
+     * Returns the correct header for a status code
+     *
+     * @param $status_code
+     */
+    private static function _status($status_code)
     {
         if (strpos(php_sapi_name(), 'apache') !== false) {
 
@@ -245,29 +298,24 @@ class Tools
         }
     }
 
-    public static function download_db() {
-        header('Content-Disposition: attachment; filename="poche.sqlite.gz"');
-        self::status(200);
-
-        header('Content-Transfer-Encoding: binary');
-        header('Content-Type: application/octet-stream');
-        echo gzencode(file_get_contents(STORAGE_SQLITE));
-
-        exit;
-    }
-
+    /**
+     * Get the content for a given URL (by a call to FullTextFeed)
+     *
+     * @param Url $url
+     * @return mixed
+     */
     public static function getPageContent(Url $url)
     {
         // Saving and clearing context
         $REAL = array();
         foreach( $GLOBALS as $key => $value ) {
             if( $key != 'GLOBALS' && $key != '_SESSION' && $key != 'HTTP_SESSION_VARS' ) {
-                $GLOBALS[$key] = array();
-                $REAL[$key] = $value;
+                $GLOBALS[$key]  = array();
+                $REAL[$key]     = $value;
             }
         }
         // Saving and clearing session
-        if ( isset($_SESSION) ) {
+        if (isset($_SESSION)) {
             $REAL_SESSION = array();
             foreach( $_SESSION as $key => $value ) {
                 $REAL_SESSION[$key] = $value;
@@ -279,12 +327,12 @@ class Tools
         $scope = function() {
             extract( func_get_arg(1) );
             $_GET = $_REQUEST = array(
-                        "url" => $url->getUrl(),
-                        "max" => 5,
-                        "links" => "preserve",
-                        "exc" => "",
-                        "format" => "json",
-                        "submit" => "Create Feed"
+                "url" => $url->getUrl(),
+                "max" => 5,
+                "links" => "preserve",
+                "exc" => "",
+                "format" => "json",
+                "submit" => "Create Feed"
             );
             ob_start();
             require func_get_arg(0);
@@ -292,23 +340,26 @@ class Tools
             ob_end_clean();
             return $json;
         };
-        $json = $scope( "inc/3rdparty/makefulltextfeed.php", array("url" => $url) );
+
+        $json = $scope("inc/3rdparty/makefulltextfeed.php", array("url" => $url));
 
         // Clearing and restoring context
-        foreach( $GLOBALS as $key => $value ) {
-            if( $key != "GLOBALS" && $key != "_SESSION" ) {
+        foreach ($GLOBALS as $key => $value) {
+            if($key != "GLOBALS" && $key != "_SESSION" ) {
                 unset($GLOBALS[$key]);
             }
         }
-        foreach( $REAL as $key => $value ) {
+        foreach ($REAL as $key => $value) {
             $GLOBALS[$key] = $value;
         }
+
         // Clearing and restoring session
-        if ( isset($REAL_SESSION) ) {
-            foreach( $_SESSION as $key => $value ) {
+        if (isset($REAL_SESSION)) {
+            foreach($_SESSION as $key => $value) {
                 unset($_SESSION[$key]);
             }
-            foreach( $REAL_SESSION as $key => $value ) {
+
+            foreach($REAL_SESSION as $key => $value) {
                 $_SESSION[$key] = $value;
             }
         }
@@ -318,11 +369,48 @@ class Tools
 
     /**
      * Returns whether we handle an AJAX (XMLHttpRequest) request.
+     *
      * @return boolean whether we handle an AJAX (XMLHttpRequest) request.
      */
     public static function isAjaxRequest()
     {
-      return isset($_SERVER['HTTP_X_REQUESTED_WITH']) && $_SERVER['HTTP_X_REQUESTED_WITH']==='XMLHttpRequest';
+        return isset($_SERVER['HTTP_X_REQUESTED_WITH']) && $_SERVER['HTTP_X_REQUESTED_WITH']==='XMLHttpRequest';
+    }
+
+    /*
+     * Empty cache folder
+     */
+    public static function emptyCache()
+    {
+        $files = new RecursiveIteratorIterator(
+            new RecursiveDirectoryIterator(CACHE, RecursiveDirectoryIterator::SKIP_DOTS),
+            RecursiveIteratorIterator::CHILD_FIRST
+        );
+
+        foreach ($files as $fileInfo) {
+            $todo = ($fileInfo->isDir() ? 'rmdir' : 'unlink');
+            $todo($fileInfo->getRealPath());
+        }
+
+        Tools::logm('empty cache');
+        Tools::redirect();
+    }
+
+    public static function generateToken()
+    {
+        if (ini_get('open_basedir') === '') {
+            if (strtoupper(substr(PHP_OS, 0, 3)) === 'WIN') {
+                // alternative to /dev/urandom for Windows
+                $token = substr(base64_encode(uniqid(mt_rand(), true)), 0, 20);
+            } else {
+                $token = substr(base64_encode(file_get_contents('/dev/urandom', false, null, 0, 20)), 0, 15);
+            }
+        }
+        else {
+            $token = substr(base64_encode(uniqid(mt_rand(), true)), 0, 20);
+        }
+
+        return str_replace('+', '', $token);
     }
 
 }
index aba236fa5b57f24bf8d6b479fa532730ffef7dea..d9172b7d0730d9eb6c075dcabbefe19bf3ddd69d 100644 (file)
@@ -5,7 +5,7 @@
  * @category   wallabag
  * @author     Nicolas Lœuillet <nicolas@loeuillet.org>
  * @copyright  2013
- * @license    http://www.wtfpl.net/ see COPYING file
+ * @license    http://opensource.org/licenses/MIT see COPYING file
  */
 
 class Url
index cc8bec650d44dc182006d805b20b81f154ef9fb1..eaadd3e5bcc7ed343c4617f62a3663e456a0d0bf 100644 (file)
@@ -5,7 +5,7 @@
  * @category   wallabag
  * @author     Nicolas Lœuillet <nicolas@loeuillet.org>
  * @copyright  2013
- * @license    http://www.wtfpl.net/ see COPYING file
+ * @license    http://opensource.org/licenses/MIT see COPYING file
  */
 
 class User
@@ -44,7 +44,14 @@ class User
         $this->config = $config;
     }
 
-    public function getConfigValue($name) {
+    /**
+     * Returns configuration entry for a user
+     *
+     * @param $name
+     * @return bool
+     */
+    public function getConfigValue($name)
+    {
         return (isset($this->config[$name])) ? $this->config[$name] : FALSE;
     }
 }
\ No newline at end of file
diff --git a/inc/poche/WallabagEpub.class.php b/inc/poche/WallabagEpub.class.php
new file mode 100644 (file)
index 0000000..9c4d356
--- /dev/null
@@ -0,0 +1,135 @@
+<?php
+/**
+ * wallabag, self hostable application allowing you to not miss any content anymore
+ *
+ * @category   wallabag
+ * @author     Nicolas Lœuillet <nicolas@loeuillet.org>
+ * @copyright  2013
+ * @license    http://opensource.org/licenses/MIT see COPYING file
+ */
+
+class WallabagEpub
+{
+    protected $wallabag;
+    protected $method;
+    protected $value;
+
+    public function __construct(Poche $wallabag, $method, $value)
+    {
+        $this->wallabag = $wallabag;
+        $this->method   = $method;
+        $this->value    = $value;
+    }
+
+    /**
+     * handle ePub
+     */
+    public function run()
+    {
+        switch ($this->method) {
+            case 'id':
+                $entryID = filter_var($this->value, FILTER_SANITIZE_NUMBER_INT);
+                $entry = $this->wallabag->store->retrieveOneById($entryID, $this->wallabag->user->getId());
+                $entries = array($entry);
+                $bookTitle = $entry['title'];
+                $bookFileName = substr($bookTitle, 0, 200);
+                break;
+            case 'all':
+                $entries = $this->wallabag->store->retrieveAll($this->wallabag->user->getId());
+                $bookTitle = sprintf(_('All my articles on '), date(_('d.m.y'))); #translatable because each country has it's own date format system
+                $bookFileName = _('Allarticles') . date(_('dmY'));
+                break;
+            case 'tag':
+                $tag = filter_var($this->value, FILTER_SANITIZE_STRING);
+                $tags_id = $this->wallabag->store->retrieveAllTags($this->wallabag->user->getId(), $tag);
+                $tag_id = $tags_id[0]["id"]; // we take the first result, which is supposed to match perfectly. There must be a workaround.
+                $entries = $this->wallabag->store->retrieveEntriesByTag($tag_id, $this->wallabag->user->getId());
+                $bookTitle = sprintf(_('Articles tagged %s'), $tag);
+                $bookFileName = substr(sprintf(_('Tag %s'), $tag), 0, 200);
+                break;
+            case 'category':
+                $category = filter_var($this->value, FILTER_SANITIZE_STRING);
+                $entries = $this->wallabag->store->getEntriesByView($category, $this->wallabag->user->getId());
+                $bookTitle = sprintf(_('All articles in category %s'), $category);
+                $bookFileName = substr(sprintf(_('Category %s'), $category), 0, 200);
+                break;
+            case 'search':
+                $search = filter_var($this->value, FILTER_SANITIZE_STRING);
+                $entries = $this->store->search($search, $this->wallabag->user->getId());
+                $bookTitle = sprintf(_('All articles for search %s'), $search);
+                $bookFileName = substr(sprintf(_('Search %s'), $search), 0, 200);
+                break;
+            case 'default':
+                die(_('Uh, there is a problem while generating epub.'));
+        }
+
+        $content_start =
+            "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
+            . "<html xmlns=\"http://www.w3.org/1999/xhtml\" xmlns:epub=\"http://www.idpf.org/2007/ops\">\n"
+            . "<head>"
+            . "<meta http-equiv=\"Default-Style\" content=\"text/html; charset=utf-8\" />\n"
+            . "<title>wallabag articles book</title>\n"
+            . "</head>\n"
+            . "<body>\n";
+
+        $bookEnd = "</body>\n</html>\n";
+
+        $log = new Logger("wallabag", TRUE);
+        $fileDir = CACHE;
+
+        $book = new EPub(EPub::BOOK_VERSION_EPUB3, DEBUG_POCHE);
+        $log->logLine("new EPub()");
+        $log->logLine("EPub class version: " . EPub::VERSION);
+        $log->logLine("EPub Req. Zip version: " . EPub::REQ_ZIP_VERSION);
+        $log->logLine("Zip version: " . Zip::VERSION);
+        $log->logLine("getCurrentServerURL: " . $book->getCurrentServerURL());
+        $log->logLine("getCurrentPageURL..: " . $book->getCurrentPageURL());
+
+        $book->setTitle($bookTitle);
+        $book->setIdentifier("http://$_SERVER[HTTP_HOST]", EPub::IDENTIFIER_URI); // Could also be the ISBN number, prefered for published books, or a UUID.
+        //$book->setLanguage("en"); // Not needed, but included for the example, Language is mandatory, but EPub defaults to "en". Use RFC3066 Language codes, such as "en", "da", "fr" etc.
+        $book->setDescription(_("Some articles saved on my wallabag"));
+        $book->setAuthor("wallabag", "wallabag");
+        $book->setPublisher("wallabag", "wallabag"); // I hope this is a non existant address :)
+        $book->setDate(time()); // Strictly not needed as the book date defaults to time().
+        //$book->setRights("Copyright and licence information specific for the book."); // As this is generated, this _could_ contain the name or licence information of the user who purchased the book, if needed. If this is used that way, the identifier must also be made unique for the book.
+        $book->setSourceURL("http://$_SERVER[HTTP_HOST]");
+
+        $book->addDublinCoreMetadata(DublinCore::CONTRIBUTOR, "PHP");
+        $book->addDublinCoreMetadata(DublinCore::CONTRIBUTOR, "wallabag");
+
+        $cssData = "body {\n margin-left: .5em;\n margin-right: .5em;\n text-align: justify;\n}\n\np {\n font-family: serif;\n font-size: 10pt;\n text-align: justify;\n text-indent: 1em;\n margin-top: 0px;\n margin-bottom: 1ex;\n}\n\nh1, h2 {\n font-family: sans-serif;\n font-style: italic;\n text-align: center;\n background-color: #6b879c;\n color: white;\n width: 100%;\n}\n\nh1 {\n margin-bottom: 2px;\n}\n\nh2 {\n margin-top: -2px;\n margin-bottom: 2px;\n}\n";
+
+        $log->logLine("Add Cover");
+
+        $fullTitle = "<h1> " . $bookTitle . "</h1>\n";
+
+        $book->setCoverImage("Cover.png", file_get_contents("themes/baggy/img/apple-touch-icon-152.png"), "image/png", $fullTitle);
+
+        $cover = $content_start . '<div style="text-align:center;"><p>' . _('Produced by wallabag with PHPePub') . '</p><p>'. _('Please open <a href="https://github.com/wallabag/wallabag/issues" >an issue</a> if you have trouble with the display of this E-Book on your device.') . '</p></div>' . $bookEnd;
+
+        //$book->addChapter("Table of Contents", "TOC.xhtml", NULL, false, EPub::EXTERNAL_REF_IGNORE);
+        $book->addChapter("Notices", "Cover2.html", $cover);
+
+        $book->buildTOC();
+
+        foreach ($entries as $entry) { //set tags as subjects
+            $tags = $this->wallabag->store->retrieveTagsByEntry($entry['id']);
+            foreach ($tags as $tag) {
+                $book->setSubject($tag['value']);
+            }
+
+            $log->logLine("Set up parameters");
+
+            $chapter = $content_start . $entry['content'] . $bookEnd;
+            $book->addChapter($entry['title'], htmlspecialchars($entry['title']) . ".html", $chapter, true, EPub::EXTERNAL_REF_ADD);
+            $log->logLine("Added chapter " . $entry['title']);
+        }
+
+        if (DEBUG_POCHE) {
+            $book->addChapter("Log", "Log.html", $content_start . $log->getLog() . "\n</pre>" . $bookEnd); // log generation
+        }
+        $book->finalize();
+        $zipData = $book->sendBook($bookFileName);
+    }
+} 
\ No newline at end of file
index 95f727c6889f760661b63573d27ef6767d752595..2a458544f0fd49b09a4b4f4bb873486db5e3216b 100755 (executable)
@@ -5,7 +5,7 @@
  * @category   wallabag
  * @author     Nicolas Lœuillet <nicolas@loeuillet.org>
  * @copyright  2013
- * @license    http://www.wtfpl.net/ see COPYING file
+ * @license    http://opensource.org/licenses/MIT see COPYING file
  */
 
 @define ('SALT', ''); # put a strong string here
@@ -59,7 +59,7 @@
 @define ('LOCALE', ROOT . '/locale');
 @define ('CACHE', ROOT . '/cache');
 
-@define ('PAGINATION', '10');
+@define ('PAGINATION', '12');
 
 //limit for download of articles during import
 @define ('IMPORT_LIMIT', 5);
index 8cf86d03de12c930c409913e985d5de22a9b8efb..b8c487e39277e604c684c880e4b03d55d358829e 100755 (executable)
@@ -5,7 +5,7 @@
  * @category   wallabag
  * @author     Nicolas Lœuillet <nicolas@loeuillet.org>
  * @copyright  2013
- * @license    http://www.wtfpl.net/ see COPYING file
+ * @license    http://opensource.org/licenses/MIT see COPYING file
  */
 
 # the poche system root directory (/inc)
@@ -18,6 +18,11 @@ require_once INCLUDES . '/poche/Tools.class.php';
 require_once INCLUDES . '/poche/User.class.php';
 require_once INCLUDES . '/poche/Url.class.php';
 require_once INCLUDES . '/3rdparty/class.messages.php';
+require_once ROOT . '/vendor/autoload.php';
+require_once INCLUDES . '/poche/Template.class.php';
+require_once INCLUDES . '/poche/Language.class.php';
+require_once INCLUDES . '/poche/Routing.class.php';
+require_once INCLUDES . '/poche/WallabagEpub.class.php';
 require_once INCLUDES . '/poche/Poche.class.php';
 
 require_once INCLUDES . '/poche/Database.class.php';
@@ -36,25 +41,18 @@ require_once INCLUDES . '/3rdparty/libraries/PHPePub/Logger.php';
 require_once INCLUDES . '/3rdparty/libraries/PHPePub/EPub.php';
 require_once INCLUDES . '/3rdparty/libraries/PHPePub/EPubChapterSplitter.php';
 
-# Composer its autoloader for automatically loading Twig
-if (! file_exists(ROOT . '/vendor/autoload.php')) {
-    Poche::$canRenderTemplates = false;
-} else {
-    require_once ROOT . '/vendor/autoload.php';
-}
-
 # system configuration; database credentials et caetera
-if (! file_exists(INCLUDES . '/poche/config.inc.php')) {
-    Poche::$configFileAvailable = false;
-} else {
-    require_once INCLUDES . '/poche/config.inc.php';
-    require_once INCLUDES . '/poche/config.inc.default.php';
-}
+require_once INCLUDES . '/poche/config.inc.php';
+require_once INCLUDES . '/poche/config.inc.default.php';
 
-if (Poche::$configFileAvailable && DOWNLOAD_PICTURES) {
+if (DOWNLOAD_PICTURES) {
     require_once  INCLUDES . '/poche/pochePictures.php';
 }
 
 if (!ini_get('date.timezone') || !@date_default_timezone_set(ini_get('date.timezone'))) {
     date_default_timezone_set('UTC');
-}
\ No newline at end of file
+}
+
+if (defined('ERROR_REPORTING')) {
+    error_reporting(ERROR_REPORTING);
+}
index 7c319a857659862c7484afa365fad176a143bfbd..52394c703b1130fb73bbd9aa8807f17b89a574a0 100644 (file)
  * @category   wallabag
  * @author     Nicolas Lœuillet <nicolas@loeuillet.org>
  * @copyright  2013
- * @license    http://www.wtfpl.net/ see COPYING file
+ * @license    http://opensource.org/licenses/MIT see COPYING file
  */
 
-/**
- * On modifie les URLS des images dans le corps de l'article
- */
-function filtre_picture($content, $url, $id)
+
+final class Picture
 {
-    $matches = array();
-    $processing_pictures = array(); // list of processing image to avoid processing the same pictures twice
-    preg_match_all('#<\s*(img)[^>]+src="([^"]*)"[^>]*>#Si', $content, $matches, PREG_SET_ORDER);
-    foreach($matches as $i => $link) {
-        $link[1] = trim($link[1]);
-        if (!preg_match('#^(([a-z]+://)|(\#))#', $link[1])) {
-            $absolute_path = get_absolute_link($link[2],$url);
-            $filename = basename(parse_url($absolute_path, PHP_URL_PATH));
-            $directory = create_assets_directory($id);
-            $fullpath = $directory . '/' . $filename;
-            
-            if (in_array($absolute_path, $processing_pictures) === true) {
-                // replace picture's URL only if processing is OK : already processing -> go to next picture
-                continue;
+    /**
+     * Changing pictures URL in article content
+     */
+    public static function filterPicture($content, $url, $id)
+    {
+        $matches = array();
+        $processing_pictures = array(); // list of processing image to avoid processing the same pictures twice
+        preg_match_all('#<\s*(img)[^>]+src="([^"]*)"[^>]*>#Si', $content, $matches, PREG_SET_ORDER);
+        foreach($matches as $i => $link) {
+            $link[1] = trim($link[1]);
+            if (!preg_match('#^(([a-z]+://)|(\#))#', $link[1])) {
+                $absolute_path = self::_getAbsoluteLink($link[2], $url);
+                $filename = basename(parse_url($absolute_path, PHP_URL_PATH));
+                $directory = self::_createAssetsDirectory($id);
+                $fullpath = $directory . '/' . $filename;
+
+                if (in_array($absolute_path, $processing_pictures) === true) {
+                    // replace picture's URL only if processing is OK : already processing -> go to next picture
+                    continue;
+                }
+
+                if (self::_downloadPictures($absolute_path, $fullpath) === true) {
+                    $content = str_replace($matches[$i][2], Tools::getPocheUrl() . $fullpath, $content);
+                }
+
+                $processing_pictures[] = $absolute_path;
             }
-            
-            if (download_pictures($absolute_path, $fullpath) === true) {
-                $content = str_replace($matches[$i][2], $fullpath, $content);
-            }
-            
-            $processing_pictures[] = $absolute_path;
         }
 
+        return $content;
     }
 
-    return $content;
-}
+    /**
+     * Get absolute URL
+     */
+    private static function _getAbsoluteLink($relativeLink, $url)
+    {
+        /* return if already absolute URL */
+        if (parse_url($relativeLink, PHP_URL_SCHEME) != '') return $relativeLink;
 
-/**
- * Retourne le lien absolu
- */
-function get_absolute_link($relative_link, $url) {
-    /* return if already absolute URL */
-    if (parse_url($relative_link, PHP_URL_SCHEME) != '') return $relative_link;
+        /* queries and anchors */
+        if ($relativeLink[0]=='#' || $relativeLink[0]=='?') return $url . $relativeLink;
 
-    /* queries and anchors */
-    if ($relative_link[0]=='#' || $relative_link[0]=='?') return $url . $relative_link;
+        /* parse base URL and convert to local variables:
+           $scheme, $host, $path */
+        extract(parse_url($url));
 
-    /* parse base URL and convert to local variables:
-       $scheme, $host, $path */
-    extract(parse_url($url));
+        /* remove non-directory element from path */
+        $path = preg_replace('#/[^/]*$#', '', $path);
 
-    /* remove non-directory element from path */
-    $path = preg_replace('#/[^/]*$#', '', $path);
+        /* destroy path if relative url points to root */
+        if ($relativeLink[0] == '/') $path = '';
 
-    /* destroy path if relative url points to root */
-    if ($relative_link[0] == '/') $path = '';
+        /* dirty absolute URL */
+        $abs = $host . $path . '/' . $relativeLink;
 
-    /* dirty absolute URL */
-    $abs = $host . $path . '/' . $relative_link;
+        /* replace '//' or '/./' or '/foo/../' with '/' */
+        $re = array('#(/\.?/)#', '#/(?!\.\.)[^/]+/\.\./#');
+        for($n=1; $n>0; $abs=preg_replace($re, '/', $abs, -1, $n)) {}
 
-    /* replace '//' or '/./' or '/foo/../' with '/' */
-    $re = array('#(/\.?/)#', '#/(?!\.\.)[^/]+/\.\./#');
-    for($n=1; $n>0; $abs=preg_replace($re, '/', $abs, -1, $n)) {}
+        /* absolute URL is ready! */
+        return $scheme.'://'.$abs;
+    }
 
-    /* absolute URL is ready! */
-    return $scheme.'://'.$abs;
-}
+    /**
+     * Downloading pictures
+     *
+     * @return bool true if the download and processing is OK, false else
+     */
+    private static function _downloadPictures($absolute_path, $fullpath)
+    {
+        $rawdata = Tools::getFile($absolute_path);
+        $fullpath = urldecode($fullpath);
+
+        if(file_exists($fullpath)) {
+            unlink($fullpath);
+        }
 
-/**
- * Téléchargement des images
- * 
- * @return bool true if the download and processing is OK, false else
- */
-function download_pictures($absolute_path, $fullpath)
-{
-    $rawdata = Tools::getFile($absolute_path);
-    $fullpath = urldecode($fullpath);
+        // check extension
+        $file_ext = strrchr($fullpath, '.');
+        $whitelist = array(".jpg",".jpeg",".gif",".png");
+        if (!(in_array($file_ext, $whitelist))) {
+            Tools::logm('processed image with not allowed extension. Skipping ' . $fullpath);
+            return false;
+        }
 
-    if(file_exists($fullpath)) {
-        unlink($fullpath);
-    }
-    
-    // check extension
-    $file_ext = strrchr($fullpath, '.');
-    $whitelist = array(".jpg",".jpeg",".gif",".png"); 
-    if (!(in_array($file_ext, $whitelist))) {
-        Tools::logm('processed image with not allowed extension. Skipping ' . $fullpath);
-        return false;
-    }
-    
-    // check headers
-    $imageinfo = getimagesize($absolute_path);
-    if ($imageinfo['mime'] != 'image/gif' && $imageinfo['mime'] != 'image/jpeg'&& $imageinfo['mime'] != 'image/jpg'&& $imageinfo['mime'] != 'image/png') {
-        Tools::logm('processed image with bad header. Skipping ' . $fullpath);
-        return false;
-    }
-    
-    // regenerate image
-    $im = imagecreatefromstring($rawdata);
-    if ($im === false) {
-        Tools::logm('error while regenerating image ' . $fullpath);
-        return false;
-    }
-    
-    switch ($imageinfo['mime']) {
-        case 'image/gif':
-            $result = imagegif($im, $fullpath);
-            break;
-        case 'image/jpeg':
-        case 'image/jpg':
-            $result = imagejpeg($im, $fullpath, REGENERATE_PICTURES_QUALITY);
-            break;
-        case 'image/png':
-            $result = imagepng($im, $fullpath, ceil(REGENERATE_PICTURES_QUALITY / 100 * 9));
-            break;
-    }
-    imagedestroy($im);
-    
-    return $result;
-}
+        // check headers
+        $imageinfo = getimagesize($absolute_path);
+        if ($imageinfo['mime'] != 'image/gif' && $imageinfo['mime'] != 'image/jpeg'&& $imageinfo['mime'] != 'image/jpg'&& $imageinfo['mime'] != 'image/png') {
+            Tools::logm('processed image with bad header. Skipping ' . $fullpath);
+            return false;
+        }
 
-/**
- * Crée un répertoire de médias pour l'article
- */
-function create_assets_directory($id)
-{
-    $assets_path = ABS_PATH;
-    if(!is_dir($assets_path)) {
-        mkdir($assets_path, 0715);
-    }
+        // regenerate image
+        $im = imagecreatefromstring($rawdata);
+        if ($im === false) {
+            Tools::logm('error while regenerating image ' . $fullpath);
+            return false;
+        }
+
+        switch ($imageinfo['mime']) {
+            case 'image/gif':
+                $result = imagegif($im, $fullpath);
+                break;
+            case 'image/jpeg':
+            case 'image/jpg':
+                $result = imagejpeg($im, $fullpath, REGENERATE_PICTURES_QUALITY);
+                break;
+            case 'image/png':
+                $result = imagepng($im, $fullpath, ceil(REGENERATE_PICTURES_QUALITY / 100 * 9));
+                break;
+        }
+        imagedestroy($im);
 
-    $article_directory = $assets_path . $id;
-    if(!is_dir($article_directory)) {
-        mkdir($article_directory, 0715);
+        return $result;
     }
 
-    return $article_directory;
-}
+    /**
+     * Create a directory for an article
+     *
+     * @param $id ID of the article
+     * @return string
+     */
+    private static function _createAssetsDirectory($id)
+    {
+        $assets_path = ABS_PATH;
+        if (!is_dir($assets_path)) {
+            mkdir($assets_path, 0715);
+        }
 
-/**
- * Suppression du répertoire d'images
- */
-function remove_directory($directory)
-{
-    if(is_dir($directory)) {
-        $files = array_diff(scandir($directory), array('.','..'));
-        foreach ($files as $file) {
-            (is_dir("$directory/$file")) ? remove_directory("$directory/$file") : unlink("$directory/$file");
+        $article_directory = $assets_path . $id;
+        if (!is_dir($article_directory)) {
+            mkdir($article_directory, 0715);
+        }
+
+        return $article_directory;
+    }
+
+    /**
+     * Remove the directory
+     *
+     * @param $directory
+     * @return bool
+     */
+    public static function removeDirectory($directory)
+    {
+        if (is_dir($directory)) {
+            $files = array_diff(scandir($directory), array('.','..'));
+            foreach ($files as $file) {
+                (is_dir("$directory/$file")) ? self::removeDirectory("$directory/$file") : unlink("$directory/$file");
+            }
+            return rmdir($directory);
         }
-        return rmdir($directory);
     }
-}
+}
\ No newline at end of file
index cce10b50215d85710d4b19a8c4f042980b00c9de..39990d55a7c59292f1a7ba78caa5dcbc146189e3 100755 (executable)
--- a/index.php
+++ b/index.php
  * @category   wallabag
  * @author     Nicolas Lœuillet <nicolas@loeuillet.org>
  * @copyright  2013
- * @license    http://www.wtfpl.net/ see COPYING file
+ * @license    http://opensource.org/licenses/MIT see COPYING file
  */
 
-define ('POCHE', '1.7.1');
+define ('POCHE', '1.8.0');
 require 'check_essentials.php';
-require_once 'inc/poche/global.inc.php';
 require 'check_setup.php';
+require_once 'inc/poche/global.inc.php';
 
-# Set error reporting level
-if (defined('ERROR_REPORTING')) {
-       error_reporting(ERROR_REPORTING);
-}
-
-# Start session
-Session::$sessionName = 'poche';
+// Start session
+Session::$sessionName = 'wallabag';
 Session::init();
 
-# Start Poche
-$poche = new Poche();
-$notInstalledMessage = $poche -> getNotInstalledMessage();
-
-# Parse GET & REFERER vars
-$referer = empty($_SERVER['HTTP_REFERER']) ? '' : $_SERVER['HTTP_REFERER'];
-$view = Tools::checkVar('view', 'home');
-$action = Tools::checkVar('action');
-$id = Tools::checkVar('id');
-$_SESSION['sort'] = Tools::checkVar('sort', 'id');
-$url = new Url((isset ($_GET['url'])) ? $_GET['url'] : '');
-
-# vars to _always_ send to templates
-$tpl_vars = array(
-    'referer' => $referer,
-    'view' => $view,
-    'poche_url' => Tools::getPocheUrl(),
-    'title' => _('wallabag, a read it later open source system'),
-    'token' => Session::getToken(),
-    'theme' => $poche->getTheme()
-);
-
-if (! empty($notInstalledMessage)) {
-    if (! Poche::$canRenderTemplates || ! Poche::$configFileAvailable) {
-        # We cannot use Twig to display the error message
-        echo '<h1>Errors</h1><ol>';
-        foreach ($notInstalledMessage as $message) {
-            echo '<li>' . $message . '</li>';
-        }
-        echo '</ol>';
-        die();
-    } else {
-        # Twig is installed, put the error message in the template
-        $tpl_file = Tools::getTplFile('error');
-        $tpl_vars = array_merge($tpl_vars, array('msg' => $poche->getNotInstalledMessage()));
-        echo $poche->tpl->render($tpl_file, $tpl_vars);
-        exit;
-    }
-}
-
-# poche actions
-if (isset($_GET['login'])) {
-    # hello you
-    $poche->login($referer);
-} elseif (isset($_GET['logout'])) {
-    # see you soon !
-    $poche->logout();
-} elseif (isset($_GET['config'])) {
-    # Update password
-    $poche->updatePassword();
-} elseif (isset($_GET['newuser'])) {
-    $poche->createNewUser();
-} elseif (isset($_GET['deluser'])) {
-    $poche->deleteUser();
-} elseif (isset($_GET['epub'])) {
-    $poche->createEpub();
-} elseif (isset($_GET['import'])) {
-    $import = $poche->import();
-    $tpl_vars = array_merge($tpl_vars, $import);
-} elseif (isset($_GET['download'])) {
-    Tools::download_db();
-} elseif (isset($_GET['empty-cache'])) {
-    $poche->emptyCache();
-} elseif (isset($_GET['export'])) {
-    $poche->export();
-} elseif (isset($_GET['updatetheme'])) {
-    $poche->updateTheme();
-} elseif (isset($_GET['updatelanguage'])) {
-    $poche->updateLanguage();
-} elseif (isset($_GET['uploadfile'])) {
-    $poche->uploadFile();
-} elseif (isset($_GET['feed'])) {
-    if (isset($_GET['action']) && $_GET['action'] == 'generate') {
-        $poche->generateToken();
-    }
-    else {
-        $tag_id = (isset($_GET['tag_id']) ? intval($_GET['tag_id']) : 0);
-        $poche->generateFeeds($_GET['token'], filter_var($_GET['user_id'],FILTER_SANITIZE_NUMBER_INT), $tag_id, $_GET['type']);
-    }
-}
-
-elseif (isset($_GET['plainurl']) && !empty($_GET['plainurl'])) {
-    $plain_url = new Url(base64_encode($_GET['plainurl']));
-    $poche->action('add', $plain_url);
-}
-
-if (Session::isLogged()) {
-    $poche->action($action, $url, $id);
-    $tpl_file = Tools::getTplFile($view);
-    $tpl_vars = array_merge($tpl_vars, $poche->displayView($view, $id));
-} elseif(isset($_SERVER['PHP_AUTH_USER'])) {
-    if($poche->store->userExists($_SERVER['PHP_AUTH_USER'])) {
-        $poche->login($referer);
-    } else {
-        $poche->messages->add('e', _('login failed: user doesn\'t exist'));
-        Tools::logm('user doesn\'t exist');
-        $tpl_file = Tools::getTplFile('login');
-        $tpl_vars['http_auth'] = 1;
-    }
-} elseif(isset($_SERVER['REMOTE_USER'])) {
-    if($poche->store->userExists($_SERVER['REMOTE_USER'])) {
-        $poche->login($referer);
-    } else {
-        $poche->messages->add('e', _('login failed: user doesn\'t exist'));
-        Tools::logm('user doesn\'t exist');
-        $tpl_file = Tools::getTplFile('login');
-        $tpl_vars['http_auth'] = 1;
-    }
-} else {
-    $tpl_file = Tools::getTplFile('login');
-    $tpl_vars['http_auth'] = 0;
-    Session::logout();
-}
-
-# because messages can be added in $poche->action(), we have to add this entry now (we can add it before)
-$messages = $poche->messages->display('all', FALSE);
-$tpl_vars = array_merge($tpl_vars, array('messages' => $messages));
-
-# display poche
-echo $poche->tpl->render($tpl_file, $tpl_vars);
+// Let's rock !
+$wallabag = new Poche();
+$wallabag->run();
\ No newline at end of file
index bcb62708d98691344e92268dbdcb91c37bd26d95..5de2fb7b2ea6bedb838183426b7bb4cb16206922 100755 (executable)
@@ -1,4 +1,13 @@
 <?php
+/**
+ * wallabag, self hostable application allowing you to not miss any content anymore
+ *
+ * @category   wallabag
+ * @author     Nicolas Lœuillet <nicolas@loeuillet.org>
+ * @copyright  2013
+ * @license    http://opensource.org/licenses/MIT see COPYING file
+ */
+
 $errors = array();
 $successes = array();
 
@@ -147,16 +156,16 @@ else if (isset($_POST['install'])) {
                 }
 
                 // create database structure
-                $query = executeQuery($handle, $sql_structure, array());
+                $query = $handle->exec($sql_structure);
 
                 // Create user
                 $handle->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
 
-                $sql = 'INSERT INTO users (username, password, name) VALUES (?, ?, ?)';
+                $sql = "INSERT INTO users (username, password, name, email) VALUES (?, ?, ?, '')";
                 $params = array($username, $salted_password, $username);
                 $query = executeQuery($handle, $sql, $params);
 
-                $id_user = $handle->lastInsertId();
+                $id_user = (int)$handle->lastInsertId('users_id_seq');
 
                 $sql = 'INSERT INTO users_config ( user_id, name, value ) VALUES (?, ?, ?)';
                 $params = array($id_user, 'pager', '10');
@@ -302,9 +311,11 @@ php composer.phar install</code></pre></li>
                     <p>
                         <label for="password">Password</label>
                         <input type="password" required id="password" name="password" value="wallabag" />
+                        <label for="show">Show password:</label> <input style="margin-left:-80px;" name="show" id="show" type="checkbox" onchange="document.getElementById('password').type = this.checked ? 'text' : 'password'">
                     </p>
                     <p>
-                        <label for="show">Show password:</label> <input name="show" id="show" type="checkbox" onchange="document.getElementById('password').type = this.checked ? 'text' : 'password'">
+                        <label for="email">Email (not required)</label>
+                        <input type="email" id="email" name="email" />
                     </p>
                 </fieldset>
 
index bf5f69e7286a149f8f2646d2fba934698e3d5ce3..a3c984970b4279f17783368de837c0ad58d38a03 100644 (file)
Binary files a/locale/en_EN.utf8/LC_MESSAGES/en_EN.utf8.mo and b/locale/en_EN.utf8/LC_MESSAGES/en_EN.utf8.mo differ
index 119fb0601819b1ebfc93521760eee9f1ab73cab6..c589866cf0efe6c6b269c0f45f9a45fd88cbb292 100644 (file)
@@ -1,19 +1,17 @@
 msgid ""
 msgstr ""
-"Project-Id-Version: \n"
+"Project-Id-Version: wallabag\n"
 "Report-Msgid-Bugs-To: \n"
 "POT-Creation-Date: 2014-02-25 15:17+0300\n"
 "PO-Revision-Date: \n"
-"Last-Translator: Maryana <mariroz@mr.lviv.ua>\n"
+"Last-Translator: tcit <tcit@tcit.fr>\n"
 "Language-Team: \n"
-"Language: \n"
+"Language: en\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Generator: Poedit 1.5.4\n"
-"X-Poedit-Language: English\n"
+"X-Generator: Poedit 1.6.9\n"
 "X-Poedit-Basepath: .\n"
-"X-Poedit-SearchPath-0: /home/mariroz/_DEV/web/wallabag/wallabag-master-testing\n"
 
 msgid "wallabag, a read it later open source system"
 msgstr "wallabag, a read it later open source system"
@@ -84,8 +82,12 @@ msgstr "A more recent development version is available."
 msgid "Feeds"
 msgstr "Feeds"
 
-msgid "Your feed token is currently empty and must first be generated to enable feeds. Click <a href='?feed&amp;action=generate'>here to generate it</a>."
-msgstr "Your feed token is currently empty and must first be generated to enable feeds. Click <a href='?feed&amp;action=generate'>here to generate it</a>."
+msgid ""
+"Your feed token is currently empty and must first be generated to enable "
+"feeds. Click <a href='?feed&amp;action=generate'>here to generate it</a>."
+msgstr ""
+"Your feed token is currently empty and must first be generated to enable "
+"feeds. Click <a href='?feed&amp;action=generate'>here to generate it</a>."
 
 msgid "Unread feed"
 msgstr "Unread feed"
@@ -102,8 +104,12 @@ msgstr "Your token:"
 msgid "Your user id:"
 msgstr "Your user id:"
 
-msgid "You can regenerate your token: <a href='?feed&amp;action=generate'>generate!</a>."
-msgstr "You can regenerate your token: <a href='?feed&amp;action=generate'>generate!</a>."
+msgid ""
+"You can regenerate your token: <a href='?feed&amp;action=generate'>generate!"
+"</a>."
+msgstr ""
+"You can regenerate your token: <a href='?feed&amp;action=generate'>generate!"
+"</a>."
 
 msgid "Change your theme"
 msgstr "Change your theme"
@@ -135,8 +141,10 @@ msgstr "Repeat your new password:"
 msgid "Import"
 msgstr "Import"
 
-msgid "Please execute the import script locally as it can take a very long time."
-msgstr "Please execute the import script locally as it can take a very long time."
+msgid ""
+"Please execute the import script locally as it can take a very long time."
+msgstr ""
+"Please execute the import script locally as it can take a very long time."
 
 msgid "More info in the official documentation:"
 msgstr "More info in the official documentation:"
@@ -184,8 +192,12 @@ msgstr "return to article"
 msgid "plop"
 msgstr "plop"
 
-msgid "You can <a href='wallabag_compatibility_test.php'>check your configuration here</a>."
-msgstr "You can <a href='wallabag_compatibility_test.php'>check your configuration here</a>."
+msgid ""
+"You can <a href='wallabag_compatibility_test.php'>check your configuration "
+"here</a>."
+msgstr ""
+"You can <a href='wallabag_compatibility_test.php'>check your configuration "
+"here</a>."
 
 msgid "favoris"
 msgstr "favoris"
@@ -247,8 +259,14 @@ msgstr "installation"
 msgid "install your wallabag"
 msgstr "install your wallabag"
 
-msgid "wallabag is still not installed. Please fill the below form to install it. Don't hesitate to <a href='http://doc.wallabag.org/'>read the documentation on wallabag website</a>."
-msgstr "wallabag is still not installed. Please fill the below form to install it. Don't hesitate to <a href='http://doc.wallabag.org/'>read the documentation on wallabag website</a>."
+msgid ""
+"wallabag is still not installed. Please fill the below form to install it. "
+"Don't hesitate to <a href='http://doc.wallabag.org/'>read the documentation "
+"on wallabag website</a>."
+msgstr ""
+"wallabag is still not installed. Please fill the below form to install it. "
+"Don't hesitate to <a href='http://doc.wallabag.org/'>read the documentation "
+"on wallabag website</a>."
 
 msgid "Login"
 msgstr "Login"
@@ -401,7 +419,8 @@ msgid "a more recent development version is available."
 msgstr "a more recent development version is available."
 
 msgid "Please execute the import script locally, it can take a very long time."
-msgstr "Please execute the import script locally, it can take a very long time."
+msgstr ""
+"Please execute the import script locally, it can take a very long time."
 
 msgid "More infos in the official doc:"
 msgstr "More infos in the official doc:"
@@ -448,8 +467,12 @@ msgstr "in demo mode, you can't update your password"
 msgid "your password has been updated"
 msgstr "your password has been updated"
 
-msgid "the two fields have to be filled & the password must be the same in the two fields"
-msgstr "the two fields have to be filled & the password must be the same in the two fields"
+msgid ""
+"the two fields have to be filled & the password must be the same in the two "
+"fields"
+msgstr ""
+"the two fields have to be filled & the password must be the same in the two "
+"fields"
 
 msgid "still using the \""
 msgstr "still using the \""
@@ -505,6 +528,49 @@ msgstr "Cache deleted."
 msgid "Oops, it seems you don't have PHP 5."
 msgstr "Oops, it seems you don't have PHP 5."
 
+msgid "Add user"
+msgstr "Add user"
+
+msgid "Add a new user :"
+msgstr "Add a new user :"
+
+msgid "Login for new user"
+msgstr "Login for new user"
+
+msgid "Password for new user"
+msgstr "Password for new user"
+
+msgid "Email for new user (not required)"
+msgstr "Email for new user (not required)"
+
+msgid "Send"
+msgstr "Send"
+
+msgid "Delete account"
+msgstr "Delete account"
+
+msgid "You can delete your account by entering your password and validating."
+msgstr "You can delete your account by entering your password and validating."
+
+msgid "Be careful, data will be erased forever (that is a very long time)."
+msgstr "Be careful, data will be erased forever (that is a very long time)."
+
+msgid "Type here your password"
+msgstr "Type here your password"
+
+msgid "You are the only user, you cannot delete your own account."
+msgstr "You are the only user, you cannot delete your own account."
+
+msgid ""
+"To completely remove wallabag, delete the wallabag folder on your web server "
+"(and eventual databases)."
+msgstr ""
+"To completely remove wallabag, delete the wallabag folder on your web server "
+"(and eventual databases)."
+
+msgid "Enter your search here"
+msgstr "Enter your search here"
+
 #~ msgid "poche it!"
 #~ msgstr "poche it!"
 
index b625e346c7ea82c3760705371100a4d25a5d10ac..f4a28e728795c9fd86866eff27a5a531ef8fdccb 100644 (file)
Binary files a/locale/fr_FR.utf8/LC_MESSAGES/fr_FR.utf8.mo and b/locale/fr_FR.utf8/LC_MESSAGES/fr_FR.utf8.mo differ
index 0343bfec63fe6e54b6758f0591bcad57462362d3..0912b8722f84b36088354ee288390f1ede13bad1 100644 (file)
@@ -4,7 +4,7 @@ msgstr ""
 "Report-Msgid-Bugs-To: \n"
 "POT-Creation-Date: 2014-05-10 20:09+0100\n"
 "PO-Revision-Date: \n"
-"Last-Translator: Gilles Wittezaële <gilles.wittezaele@laposte.net>\n"
+"Last-Translator: tcit <tcit@tcit.fr>\n"
 "Language-Team: \n"
 "Language: fr_FR\n"
 "MIME-Version: 1.0\n"
@@ -12,7 +12,7 @@ msgstr ""
 "Content-Transfer-Encoding: 8bit\n"
 "X-Poedit-KeywordsList: _;gettext;gettext_noop\n"
 "X-Poedit-SourceCharset: UTF-8\n"
-"X-Generator: Poedit 1.5.4\n"
+"X-Generator: Poedit 1.6.9\n"
 "Plural-Forms: nplurals=2; plural=(n != 1);\n"
 
 msgid "wallabag, a read it later open source system"
@@ -298,6 +298,9 @@ msgstr "Nom d'utilisateur"
 msgid "Password for new user"
 msgstr "Mot de passe du nouvel utilisateur"
 
+msgid "Email for new user (not required)"
+msgstr "E-mail pour le nouvel utilisateur (facultatif)"
+
 msgid "Send"
 msgstr "Envoyer"
 
@@ -320,10 +323,12 @@ msgstr ""
 "Vous êtes l'unique utilisateur, vous ne pouvez pas supprimer votre compte."
 
 msgid ""
-"To completely remove wallabag, delete the wallabag folder on your web server."
+"To completely remove wallabag, delete the wallabag folder on your web server "
+"(and eventual databases)."
 msgstr ""
 "Pour désinstaller complètement wallabag, supprimez le répertoire "
-"<code>wallabag</code> de votre serveur Web."
+"<code>wallabag</code> de votre serveur Web (ainsi que les bases de données "
+"éventuelles)."
 
 msgid "Save a link"
 msgstr "Ajouter un lien"
index f1b75cd50056099c333fe48970ca90c8479a7b48..59b6a46b408e486827e90cf5745d02c6ed44b286 100644 (file)
@@ -1,6 +1,6 @@
             <button id="menu" class="icon icon-menu desktopHide"><span>Menu</span></button>
             <ul id="links" class="links">
-                <li><a href="./" {% if view == 'home' %}class="current"{% endif %}>{% trans "home" %}</a></li>
+                <li><a href="./" {% if view == 'home' %}class="current"{% endif %}>{% trans "unread" %}</a></li>
                 <li><a href="./?view=fav" {% if view == 'fav' %}class="current"{% endif %}>{% trans "favorites" %}</a></li>
                 <li><a href="./?view=archive" {% if view == 'archive' %}class="current"{% endif %}>{% trans "archive" %}</a></li>
                 <li><a href="./?view=tags" {% if view == 'tags' %}class="current"{% endif %}>{% trans "tags" %}</a></li>
index 505ec368e30754fe71858d8a8fdf0bb3161b56de..57a928c0b4c2a135b5bc0e716ecdc1dcb9501d89 100755 (executable)
@@ -4,7 +4,7 @@
         {% trans "Save a link" %}</h2>
         <input type="hidden" name="autoclose" value="1" />
         <input required placeholder="example.com/article" class="addurl" id="plainurl" name="plainurl" type="url" />
+        <span id="add-link-result"></span>
         <input type="submit" value="{% trans "save link!" %}" />
-        <div id="add-link-result"></div>
     </form>
 </div>
index b8ac3bfa6576db0efba9ceaf9fba262e148c1be9..7b2c70824016366bde13e2d77399e58c10f37ac3 100644 (file)
@@ -1,7 +1,7 @@
 <div id="search-form" class="messages info">
 <form method="get" action="index.php">
         <input type="hidden" name="view" value="search"></input>
-        <label><a href="javascript: void(null);" id="search-form-close">X</a>{% trans "Search" %}</label> : <input type="text" name="search" />
+        <label><a href="javascript: void(null);" id="search-form-close">X</a>{% trans "Search" %}</label> : <input required placeholder="{% trans "Enter your search here" %}" type="text" name="search" id="searchfield" />
         <input id="submit-search" type="submit" value="{% trans "Search" %} !"></input>
 </form>
 </div>
\ No newline at end of file
index 46735f0744d1e45816df071a4e5a92b9588eb739..3523cd0847365c2cd8945a1653883a25ffcd3b22 100755 (executable)
                 <fieldset class="w500p">
                     <div class="row">
                         <label class="col w150p" for="file">{% trans "File:" %}</label>
-                        <input class="col" type="file" id="file" name="file" tabindex="4">
+                        <input class="col" type="file" id="file" name="file" tabindex="4" required="required">
                     </div>
                     <div class="row mts txtcenter">
                         <button class="bouton" type="submit" tabindex="4">{% trans "Import" %}</button>
             <p><a href="?import">{% trans "You can click here to fetch content for articles with no content." %}</a></p>
             
             <h2>{% trans "Export your wallabag data" %}</h2>
-            {% if constant('STORAGE') == 'sqlite' %}
-            <p><a href="?download" target="_blank">{% trans "Click here" %}</a> {% trans "to download your database." %}</p>{% endif %}
             <p><a href="?export" target="_blank">{% trans "Click here" %}</a> {% trans "to export your wallabag data." %}</p>
             
             <h2>{% trans "Fancy an E-Book ?" %}</h2>
                 <fieldset class="w500p">
                 <div class="row">
                     <label class="col w150p" for="newusername">{% trans 'Login for new user' %}</label>
-                    <input class="col" type="text" id="newusername" name="newusername" placeholder="{% trans 'Login' %}">
+                    <input class="col" type="text" id="newusername" name="newusername" placeholder="{% trans 'Login' %}" required>
                 </div>
                 <div class="row">
                     <label class="col w150p" for="password4newuser">{% trans "Password for new user" %}</label>
-                    <input class="col" type="password" id="password4newuser" name="password4newuser" placeholder="{% trans "Password" %}">
+                    <input class="col" type="password" id="password4newuser" name="password4newuser" placeholder="{% trans 'Password' %}" required>
+                </div>
+                <div class="row">
+                    <label class="col w150p" for="newuseremail">{% trans 'Email for new user (not required)' %}</label>
+                    <input class="col" type="email" id="newuseremail" name="newuseremail" placeholder="{% trans 'Email' %}">
                 </div>
                 <div class="row mts txtcenter">
                     <button type="submit">{% trans "Send" %}</button>  
             </div>
             </form>
             {% else %}<p>{% trans "You are the only user, you cannot delete your own account." %}<br />
-            {% trans "To completely remove wallabag, delete the wallabag folder on your web server." %}</p>{% endif %}
+            {% trans "To completely remove wallabag, delete the wallabag folder on your web server (and eventual databases)." %}</p>{% endif %}
 {% endblock %}
index f8238744928be9f1c15d48d8178174bf19228074..b775a2916251ae40edbc21f6a7d8de654519d7dc 100755 (executable)
@@ -89,7 +89,7 @@ form fieldset {
   margin: 0;
 }
 
-form input[type="text"], select, form input[type="password"], form input[type="url"] {
+form input[type="text"], select, form input[type="password"], form input[type="url"], form input[type="email"] {
   border: 1px solid #999;
   padding: 0.5em 1em;
   min-width: 12em;
@@ -630,7 +630,7 @@ a.add-to-wallabag-link-after:after {
 
 #add-link-result {
   font-weight: bold;
-  margin-top: 10px;
+  font-size: 0.9em;
 }
 
 /* ==========================================================================
index 3942d3bf8aef4396db5f0dc4ee1541ce60e48b0e..e788b588f88c2ac681fba1b84c4c3286ef6201df 100755 (executable)
@@ -61,9 +61,9 @@
 
             {% if search_term is defined %}<a title="{% trans %} Apply the tag {{ search_term }} to this search {% endtrans %}" href="./?action=add_tag&search={{ search_term }}">{% trans %} Apply the tag {{ search_term }} to this search {% endtrans %}</a>{% endif %}
             
-            {% if tag %}<a title="{% trans "Download the articles from this tag in an epub" %}" href="./?epub&amp;method=tag&amp;tag={{ tag.value }}">{% trans "Download the articles from this tag in an epub" %}</a>
-            {% elseif search_term is defined %}<a title="{% trans "Download the articles from this search in an epub" %}" href="./?epub&amp;method=search&amp;search={{ search_term }}">{% trans "Download the articles from this search in an epub" %}</a>
-            {% else %}<a title="{% trans "Download the articles from this category in an epub" %}" href="./?epub&amp;method=category&amp;category={{ view }}">{% trans "Download the articles from this category in an epub" %}</a>{% endif %}
+            {% if tag %}<a title="{% trans "Download the articles from this tag in an epub" %}" href="./?epub&amp;method=tag&amp;value={{ tag.value }}">{% trans "Download the articles from this tag in an epub" %}</a>
+            {% elseif search_term is defined %}<a title="{% trans "Download the articles from this search in an epub" %}" href="./?epub&amp;method=search&amp;value={{ search_term }}">{% trans "Download the articles from this search in an epub" %}</a>
+            {% else %}<a title="{% trans "Download the articles from this category in an epub" %}" href="./?epub&amp;method=category&amp;value={{ view }}">{% trans "Download the articles from this category in an epub" %}</a>{% endif %}
             
             {% endif %}
 {% endblock %}
index 703ce5be1f766f6260406d850b61defd77b571d0..53a9ee69223cbf5fcd6602443b27b13276f26b48 100755 (executable)
@@ -17,7 +17,7 @@
                 {% if constant('SHARE_SHAARLI') == 1 %}<li><a href="{{ constant('SHAARLI_URL') }}/index.php?post={{ entry.url|url_encode }}&amp;title={{ entry.title|url_encode }}" target="_blank" class="tool shaarli" title="{% trans "shaarli" %}"><span>{% trans "shaarli" %}</span></a></li>{% endif %}
                 {% if constant('FLATTR') == 1 %}{% if flattr.status == constant('FLATTRABLE') %}<li><a href="http://flattr.com/submit/auto?url={{ entry.url }}" class="tool flattr icon icon-flattr" target="_blank" title="{% trans "flattr" %}"><span>{% trans "flattr" %}</span></a></li>{% elseif flattr.status == constant('FLATTRED') %}<li><a href="{{ flattr.flattrItemURL }}" class="tool flattr icon icon-flattr" target="_blank" title="{% trans "flattr" %}"><span>{% trans "flattr" %}</span> ({{ flattr.numflattrs }})</a></li>{% endif %}{% endif %}
                 {% if constant('SHOW_PRINTLINK') == 1 %}<li><a title="{% trans "Print" %}" class="tool icon icon-print" href="javascript: window.print();"><span>{% trans "Print" %}</span></a></li>{% endif %}
-                <li><a href="./?epub&amp;method=id&amp;id={{ entry.id|e }}" title="Generate epub file">EPUB</a></li>
+                <li><a href="./?epub&amp;method=id&amp;value={{ entry.id|e }}" title="Generate epub file">EPUB</a></li>
                 <li><a href="mailto:hello@wallabag.org?subject=Wrong%20display%20in%20wallabag&amp;body={{ entry.url|url_encode }}" title="{% trans "Does this article appear wrong?" %}" class="tool bad-display icon icon-delete"><span>{% trans "Does this article appear wrong?" %}</span></a></li>
             </ul>
         </div>
index 5e73729980140f6b6a88e3325b9446804d7203d2..0e37660ada361fa6e7c7e9e4ebefabbf11299054 100755 (executable)
@@ -1,6 +1,6 @@
             <div id="menuContainer">
               <ul id="links">
-                  <li><a href="./" {% if view == 'home' %}class="current"{% endif %}>{% trans "home" %}</a></li>
+                  <li><a href="./" {% if view == 'home' %}class="current"{% endif %}>{% trans "unread" %}</a></li>
                   <li><a href="./?view=fav" {% if view == 'fav' %}class="current"{% endif %}>{% trans "favorites" %}</a></li>
                   <li><a href="./?view=archive" {% if view == 'archive' %}class="current"{% endif %}>{% trans "archive" %}</a></li>
                   <li><a href="./?view=tags" {% if view == 'tags' %}class="current"{% endif %}>{% trans "tags" %}</a></li>
index 25479a3d79d8830350ab3823685bdb1766a39253..c5c916c047bf4fba3d589a1f353605182ea64436 100755 (executable)
@@ -12,7 +12,7 @@
                 {% if constant('SHARE_MAIL') == 1 %}<li><a href="mailto:?subject={{ entry.title|url_encode }}&amp;body={{ entry.url|url_encode }}%20via%20@wallabagapp" class="tool email" title="{% trans "email" %}"><span>{% trans "email" %}</span></a></li>{% endif %}
                 {% if constant('SHARE_SHAARLI') == 1 %}<li><a href="{{ constant('SHAARLI_URL') }}/index.php?post={{ entry.url|url_encode }}&amp;title={{ entry.title|url_encode }}" target="_blank" class="tool shaarli" title="{% trans "shaarli" %}"><span>{% trans "shaarli" %}</span></a></li>{% endif %}
                 {% if constant('FLATTR') == 1 %}{% if flattr.status == constant('FLATTRABLE') %}<li class="flattrli"><a href="http://flattr.com/submit/auto?url={{ entry.url }}" class="tool flattr" target="_blank" title="{% trans "flattr" %}"><span>{% trans "flattr" %}</span></a></li>{% elseif flattr.status == constant('FLATTRED') %}<li><a href="{{ flattr.flattrItemURL }}" class="tool flattr" target="_blank" title="{% trans "flattr" %}"><span>{% trans "flattr" %}</span>{{ flattr.numflattrs }}</a></li>{% endif %}{% endif %}
-                <li><a href="./?epub&amp;method=id&amp;id={{ entry.id|e }}" title="Generate epub file">EPUB</a></li>
+                <li><a href="./?epub&amp;method=id&amp;value={{ entry.id|e }}" title="Generate epub file">EPUB</a></li>
                 <li><a href="mailto:hello@wallabag.org?subject=Wrong%20display%20in%20wallabag&amp;body={{ entry.url|url_encode }}" title="{% trans "this article appears wrong?" %}" class="tool bad-display"><span>{% trans "this article appears wrong?" %}</span></a></li>
             </ul>
         </div>
index 9ab58461abe2dadd38722e7fe782e1904376f8ad..c49337942adf78d6de75cb69e94aa6d51e9dae96 100755 (executable)
                     <label class="col w150p" for="password4newuser">{% trans "Password for new user" %}</label>
                     <input class="col" type="password" id="password4newuser" name="password4newuser" placeholder="{% trans "Password" %}">
                 </div>
+                <div class="row">
+                    <label class="col w150p" for="newuseremail">{% trans 'Email for new user (not required)' %}</label>
+                    <input class="col" type="email" id="newuseremail" name="newuseremail" placeholder="{% trans 'Email' %}">
+                </div>
                 <div class="row mts txtcenter">
                     <button type="submit">{% trans "Send" %}</button>  
                 </div>
             </div>
             </form>
             {% else %}<p>{% trans "You are the only user, you cannot delete your own account." %}<br />
-            {% trans "To completely remove wallabag, delete the wallabag folder on your web server." %}</p>{% endif %}
+            {% trans "To completely remove wallabag, delete the wallabag folder on your web server (and eventual databases)." %}</p>{% endif %}
         </div>
 {% endblock %}
index 401f3f20d96fc6a074c5e31689c813efdda97ba7..811298eb58f00a0e3a4e0a699494c59e35fcaf2e 100755 (executable)
@@ -53,9 +53,9 @@
             
             {{ block('pager') }}
             
-            {% if tag %}<a title="{% trans "Download the articles from this tag in an epub" %}" href="./?epub&amp;method=tag&amp;tag={{ tag.value }}">{% trans "Download the articles from this tag in an epub" %}</a>
-            {% elseif search_term is defined %}<a title="{% trans "Download the articles from this search in an epub" %}" href="./?epub&amp;method=search&amp;search={{ search_term }}">{% trans "Download the articles from this search in an epub" %}</a>
-            {% else %}<a title="{% trans "Download the articles from this category in an epub" %}" href="./?epub&amp;method=category&amp;category={{ view }}">{% trans "Download the articles from this category in an epub" %}</a>{% endif %}
+            {% if tag %}<a title="{% trans "Download the articles from this tag in an epub" %}" href="./?epub&amp;method=tag&amp;value={{ tag.value }}">{% trans "Download the articles from this tag in an epub" %}</a>
+            {% elseif search_term is defined %}<a title="{% trans "Download the articles from this search in an epub" %}" href="./?epub&amp;method=search&amp;value={{ search_term }}">{% trans "Download the articles from this search in an epub" %}</a>
+            {% else %}<a title="{% trans "Download the articles from this category in an epub" %}" href="./?epub&amp;method=category&amp;value={{ view }}">{% trans "Download the articles from this category in an epub" %}</a>{% endif %}
             
             {% endif %}
 
index 0daa0b03d5ca8f6ef23a40e33214c2e39ee9395f..9b9a60626176877b6f38a3566dcb357212c2297e 100644 (file)
@@ -1,5 +1,5 @@
             <ul id="links">
-                <li><a href="./" {% if view == 'home' %}class="current"{% endif %}>{% trans "home" %}</a></li>
+                <li><a href="./" {% if view == 'home' %}class="current"{% endif %}>{% trans "unread" %}</a></li>
                 <li><a href="./?view=fav" {% if view == 'fav' %}class="current"{% endif %}>{% trans "favorites" %}</a></li>
                 <li><a href="./?view=archive" {% if view == 'archive' %}class="current"{% endif %}>{% trans "archive" %}</a></li>
                 <li><a href="./?view=tags" {% if view == 'tags' %}class="current"{% endif %}>{% trans "tags" %}</a></li>
index 33bea20d79422c3e1749dd1b6ce1637d6575473f..0cf6097a5964f559bfc2ed76cbd2c60bee7ec570 100755 (executable)
@@ -2,7 +2,7 @@
 <form method="get" action="index.php">
     <p>
                <input type="hidden" name="view" value="search"></input>
-        <label>{% trans "Search" %}</label> : <input type="text" placeholder="{% trans "Enter your search here" %}" name="search" />
+        <label>{% trans "Search" %}</label> : <input type="text" required placeholder="{% trans "Enter your search here" %}" name="search" id="searchfield" />
                <input type="submit" value="{% trans "Search" %} !"></input>
     </p>
 </form>
index 160f6046a46426f5590603bfd2ddb0d5de1f42d3..36b66e8841532d7e2cd5a1bcc0929d7b5a29015f 100755 (executable)
                 <fieldset class="w500p">
                     <div class="row">
                         <label class="col w150p" for="file">{% trans "File:" %}</label>
-                        <input class="col" type="file" id="file" name="file" tabindex="4">
+                        <input class="col" type="file" id="file" name="file" tabindex="4" required="required">
                     </div>
                     <div class="row mts txtcenter">
                         <button class="bouton" type="submit" tabindex="4">{% trans "Import" %}</button>
             <p><a href="?import">{% trans "You can click here to fetch content for articles with no content." %}</a></p>
 
             <h2>{% trans "Export your wallabag data" %}</h2>
-            {% if constant('STORAGE') == 'sqlite' %}
-            <p><a href="?download" target="_blank">{% trans "Click here" %}</a> {% trans "to download your database." %}</p>{% endif %}
             <p><a href="?export" target="_blank">{% trans "Click here" %}</a> {% trans "to export your wallabag data." %}</p>
 
             <h2>{% trans "Cache" %}</h2>
                     <label class="col w150p" for="password4newuser">{% trans "Password for new user" %}</label>
                     <input class="col" type="password" id="password4newuser" name="password4newuser" placeholder="{% trans "Password" %}">
                 </div>
+                <div class="row">
+                    <label class="col w150p" for="newuseremail">{% trans 'Email for new user (not required)' %}</label>
+                    <input class="col" type="email" id="newuseremail" name="newuseremail" placeholder="{% trans 'Email' %}">
+                </div>
                 <div class="row mts txtcenter">
                     <button type="submit">{% trans "Send" %}</button>  
                 </div>
             </div>
             </form>
             {% else %}<p>{% trans "You are the only user, you cannot delete your own account." %}<br />
-            {% trans "To completely remove wallabag, delete the wallabag folder on your web server." %}</p>{% endif %}
+            {% trans "To completely remove wallabag, delete the wallabag folder on your web server (and eventual databases)." %}</p>{% endif %}
 {% endblock %}
index 46b547956244eb3f84d64b292a33120323c179ec..3ad4e34e50e69035e753d2795172d3755575647b 100644 (file)
@@ -8,6 +8,11 @@
     border-radius: 4px;
 }
 
+/* Search form message needs a little more width, depending on translations */
+#search-form {
+    width: 420px;
+}
+
 .messages a.closeMessage {
     display: none;
     float: right;
index e6c781f58e53331c56826517f6cc4c689521d201..093c2dc5e20b04076dc767143b89ec1b01c9de6b 100755 (executable)
@@ -60,9 +60,9 @@
             
             {% if view == 'home' %}{% if nb_results > 1 %}<a title="{% trans "mark all the entries as read" %}"  href="./?action=archive_all">{% trans "mark all the entries as read" %}</a>{% endif %}{% endif %}
             
-            {% if tag %}<a title="{% trans "Download the articles from this tag in an epub" %}" href="./?epub&amp;method=tag&amp;tag={{ tag.value }}">{% trans "Download the articles from this tag in an epub" %}</a>
-            {% elseif search_term is defined %}<a title="{% trans "Download the articles from this search in an epub" %}" href="./?epub&amp;method=search&amp;search={{ search_term }}">{% trans "Download the articles from this search in an epub" %}</a>
-            {% else %}<a title="{% trans "Download the articles from this category in an epub" %}" href="./?epub&amp;method=category&amp;category={{ view }}">{% trans "Download the articles from this category in an epub" %}</a>{% endif %}
+            {% if tag %}<a title="{% trans "Download the articles from this tag in an epub" %}" href="./?epub&amp;method=tag&amp;value={{ tag.value }}">{% trans "Download the articles from this tag in an epub" %}</a>
+            {% elseif search_term is defined %}<a title="{% trans "Download the articles from this search in an epub" %}" href="./?epub&amp;method=search&amp;value={{ search_term }}">{% trans "Download the articles from this search in an epub" %}</a>
+            {% else %}<a title="{% trans "Download the articles from this category in an epub" %}" href="./?epub&amp;method=category&amp;value={{ view }}">{% trans "Download the articles from this category in an epub" %}</a>{% endif %}
             
             {% endif %}
 {% endblock %}
index 06be3f0cdced6fd923d72af5b3e9d1cdb49e6641..a32e6e70aa9f0471d659f169c368a43598ea2642 100644 (file)
@@ -10,6 +10,7 @@ $(document).ready(function() {
 
     $("#search").click(function(){
         closeSearch();
+        $('#searchfield').focus();
     });
 
     $("#search-form-close").click(function(){
index 88d1407964d2b843236099ff23384be6a09c2a90..dbbbde3c05a22da4d57f20d9b2d4c2c59e3e2336 100755 (executable)
@@ -16,7 +16,7 @@
                 {% if constant('SHARE_SHAARLI') == 1 %}<li><a href="{{ constant('SHAARLI_URL') }}/index.php?post={{ entry.url|url_encode }}&amp;title={{ entry.title|url_encode }}" target="_blank" class="tool shaarli" title="{% trans "shaarli" %}"><span>{% trans "shaarli" %}</span></a></li>{% endif %}
                 {% if constant('FLATTR') == 1 %}{% if flattr.status == constant('FLATTRABLE') %}<li><a href="http://flattr.com/submit/auto?url={{ entry.url }}" class="tool flattr" target="_blank" title="{% trans "flattr" %}"><span>{% trans "flattr" %}</span></a></li>{% elseif flattr.status == constant('FLATTRED') %}<li><a href="{{ flattr.flattrItemURL }}" class="tool flattr" target="_blank" title="{% trans "flattr" %}"><span>{% trans "flattr" %}</span>{{ flattr.numflattrs }}</a></li>{% endif %}{% endif %}
                 {% if constant('SHOW_PRINTLINK') == 1 %}<li><a title="{% trans "Print" %}" class="tool print" href="javascript: window.print();"><span>{% trans "Print" %}</span></a></li>{% endif %}
-                <li><a href="./?epub&amp;method=id&amp;id={{ entry.id|e }}" title="Generate epub file">EPUB</a></li>
+                <li><a href="./?epub&amp;method=id&amp;value={{ entry.id|e }}" title="Generate epub file">EPUB</a></li>
                 <li><a href="mailto:hello@wallabag.org?subject=Wrong%20display%20in%20wallabag&amp;body={{ entry.url|url_encode }}" title="{% trans "Does this article appear wrong?" %}" class="tool bad-display"><span>{% trans "Does this article appear wrong?" %}</span></a></li>
                 {% if constant('SHOW_READPERCENT') == 1 %}<li><div id="readLeftPercent">0%</div></li>{% endif %}
             </ul>
index d6f221564c6a27550143e61f75d4f98555652b5b..3f9ff59f92d3937b576fcc25213941c46b80d37e 100644 (file)
@@ -1,10 +1,20 @@
 <?php
+/**
+ * wallabag, self hostable application allowing you to not miss any content anymore
+ *
+ * @category   wallabag
+ * @author     Nicolas Lœuillet <nicolas@loeuillet.org>
+ * @copyright  2013
+ * @license    http://opensource.org/licenses/MIT see COPYING file
+ */
+
 $app_name = 'wallabag';
 
 $php_ok = (function_exists('version_compare') && version_compare(phpversion(), '5.3.3', '>='));
 $pcre_ok = extension_loaded('pcre');
 $zlib_ok = extension_loaded('zlib');
 $mbstring_ok = extension_loaded('mbstring');
+$dom_ok = extension_loaded('DOM');
 $iconv_ok = extension_loaded('iconv');
 $tidy_ok = function_exists('tidy_parse_string');
 $curl_ok = function_exists('curl_exec');
@@ -13,6 +23,8 @@ $parallel_ok = ((extension_loaded('http') && class_exists('HttpRequestPool')) ||
 $allow_url_fopen_ok = (bool)ini_get('allow_url_fopen');
 $filter_ok = extension_loaded('filter');
 $gettext_ok = function_exists("gettext");
+$gd_ok = extension_loaded('gd');
+
 
 if (extension_loaded('xmlreader')) {
        $xml_ok = true;
@@ -223,10 +235,20 @@ if (isset($_GET['from'])){
                                                <td>Enabled</td>
                                                <?php echo ($iconv_ok) ? '<td class="good">Enabled' : '<td class="bad">Disabled'; ?></td>
                                        </tr> -->
+                                       <tr class="<?php echo ($dom_ok) ? 'enabled' : 'disabled'; ?>">
+                                               <td><a href="http://php.net/manual/en/book.dom.php">DOM / XML extension</a></td>
+                                               <td>Enabled</td>
+                                               <?php echo ($dom_ok) ? '<td class="good">Enabled' : '<td class="bad">Disabled'; ?></td>
+                                       </tr>
                                        <tr class="<?php echo ($filter_ok) ? 'enabled' : 'disabled'; ?>">
                                                <td><a href="http://uk.php.net/manual/en/book.filter.php">Data filtering</a></td>
                                                <td>Enabled</td>
                                                <?php echo ($filter_ok) ? '<td class="good">Enabled' : '<td class="pass">Disabled'; ?></td>
+                                       </tr>
+                                       <tr class="<?php echo ($gd_ok) ? 'enabled' : 'disabled'; ?>">
+                                               <td><a href="http://php.net/manual/en/book.image.php">GD</a></td>
+                                               <td>Enabled</td>
+                                               <?php echo ($gd_ok) ? '<td class="good">Enabled' : '<td class="pass">Disabled'; ?></td>
                                        </tr>                                   
                                        <tr class="<?php echo ($tidy_ok) ? 'enabled' : 'disabled'; ?>">
                                                <td><a href="http://php.net/tidy">Tidy</a></td>
@@ -266,7 +288,7 @@ if (isset($_GET['from'])){
                        <h3>What does this mean?</h3>
                        <ol>
                                <?php //if ($php_ok && $xml_ok && $pcre_ok && $mbstring_ok && $iconv_ok && $filter_ok && $zlib_ok && $tidy_ok && $curl_ok && $parallel_ok && $allow_url_fopen_ok): ?>
-                               <?php if ($php_ok && $xml_ok && $pcre_ok && $filter_ok && $tidy_ok && $curl_ok && $parallel_ok && $allow_url_fopen_ok && $gettext_ok && $parse_ini_ok): ?>
+                               <?php if ($php_ok && $xml_ok && $pcre_ok && $dom_ok && $filter_ok && $gd_ok && $tidy_ok && $curl_ok && $parallel_ok && $allow_url_fopen_ok && $gettext_ok && $parse_ini_ok): ?>
                                <li><em>You have everything you need to run <?php echo $app_name; ?> properly!  Congratulations!</em></li>
                                <?php else: ?>
                                        <?php if ($php_ok): ?>
@@ -283,48 +305,62 @@ if (isset($_GET['from'])){
                                                                                <li><strong>Gettext:</strong> You have <code>gettext</code> enabled. <em>No problems here.</em></li>
                                         
                                         <?php if ($parse_ini_ok): ?>
+                                               <li><strong>Parse ini:</strong> You can parse <em>ini</em> files. <em>No problems here.</em></li>
+
+                                               <?php if ($dom_ok): ?>
+                                                       <li><strong>DOM/XML:</strong> You can parse <em>ini</em> files. <em>No problems here.</em></li>
                                         
-                                            <?php if ($filter_ok): ?>
-                                                <li><strong>Data filtering:</strong> You have the PHP filter extension enabled. <em>No problems here.</em></li>
-        
-                                                <?php if ($zlib_ok): ?>
-                                                    <li><strong>Zlib:</strong> You have <code>Zlib</code> enabled.  This allows SimplePie to support GZIP-encoded feeds.  <em>No problems here.</em></li>
-                                                <?php else: ?>
-                                                    <li><strong>Zlib:</strong> The <code>Zlib</code> extension is not available.  SimplePie will ignore any GZIP-encoding, and instead handle feeds as uncompressed text.</li>
-                                                <?php endif; ?>
-                                                
-                                                <?php if ($mbstring_ok && $iconv_ok): ?>
-                                                    <li><strong>mbstring and iconv:</strong> You have both <code>mbstring</code> and <code>iconv</code> installed!  This will allow <?php echo $app_name; ?> to handle the greatest number of languages. <em>No problems here.</em></li>
-                                                <?php elseif ($mbstring_ok): ?>
-                                                    <li><strong>mbstring:</strong> <code>mbstring</code> is installed, but <code>iconv</code> is not.</li>
-                                                <?php elseif ($iconv_ok): ?>
-                                                    <li><strong>iconv:</strong> <code>iconv</code> is installed, but <code>mbstring</code> is not.</li>
-                                                <?php else: ?>
-                                                    <li><strong>mbstring and iconv:</strong> <em>You do not have either of the extensions installed.</em> This will significantly impair your ability to read non-English feeds, as well as even some English ones.</li>
-                                                <?php endif; ?>
-
-                                                <?php if ($tidy_ok): ?>
-                                                    <li><strong>Tidy:</strong> You have <code>Tidy</code> support installed.  <em>No problems here.</em></li>
-                                                <?php else: ?>
-                                                    <li><strong>Tidy:</strong> The <code>Tidy</code> extension is not available.  <?php echo $app_name; ?> should still work with most feeds, but you may experience problems with some.</li>
-                                                <?php endif; ?>
-                                            
-                                                <?php if ($curl_ok): ?>
-                                                    <li><strong>cURL:</strong> You have <code>cURL</code> support installed.  <em>No problems here.</em></li>
-                                                <?php else: ?>
-                                                    <li><strong>cURL:</strong> The <code>cURL</code> extension is not available.  SimplePie will use <code>fsockopen()</code> instead.</li>
-                                                <?php endif; ?>
-                
-                                                <?php if ($parallel_ok): ?>
-                                                    <li><strong>Parallel URL fetching:</strong> You have <code>HttpRequestPool</code> or <code>curl_multi</code> support installed.  <em>No problems here.</em></li>
-                                                <?php else: ?>
-                                                    <li><strong>Parallel URL fetching:</strong> <code>HttpRequestPool</code> or <code>curl_multi</code> support is not available.  <?php echo $app_name; ?> will use <code>file_get_contents()</code> instead to fetch URLs sequentially rather than in parallel.</li>
-                                                <?php endif; ?>
-
-                                            <?php else: ?>
-                                                <li><strong>Data filtering:</strong> Your PHP configuration has the filter extension disabled.  <strong><?php echo $app_name; ?> will not work here.</strong></li>
-                                            <?php endif; ?>
-                                        
+                                                   <?php if ($filter_ok): ?>
+                                                       <li><strong>Data filtering:</strong> You can use the PHP build-in DOM to operate on XML documents. <em>No problems here.</em></li>
+               
+                                                       <?php if ($zlib_ok): ?>
+                                                           <li><strong>Zlib:</strong> You have <code>Zlib</code> enabled.  This allows SimplePie to support GZIP-encoded feeds.  <em>No problems here.</em></li>
+                                                       <?php else: ?>
+                                                           <li><strong>Zlib:</strong> The <code>Zlib</code> extension is not available.  SimplePie will ignore any GZIP-encoding, and instead handle feeds as uncompressed text.</li>
+                                                       <?php endif; ?>
+                                                       
+                                                       <?php if ($mbstring_ok && $iconv_ok): ?>
+                                                           <li><strong>mbstring and iconv:</strong> You have both <code>mbstring</code> and <code>iconv</code> installed!  This will allow <?php echo $app_name; ?> to handle the greatest number of languages. <em>No problems here.</em></li>
+                                                       <?php elseif ($mbstring_ok): ?>
+                                                           <li><strong>mbstring:</strong> <code>mbstring</code> is installed, but <code>iconv</code> is not.</li>
+                                                       <?php elseif ($iconv_ok): ?>
+                                                           <li><strong>iconv:</strong> <code>iconv</code> is installed, but <code>mbstring</code> is not.</li>
+                                                       <?php else: ?>
+                                                           <li><strong>mbstring and iconv:</strong> <em>You do not have either of the extensions installed.</em> This will significantly impair your ability to read non-English feeds, as well as even some English ones.</li>
+                                                       <?php endif; ?>
+
+                                                       <?php if ($gd_ok): ?>
+                                                           <li><strong>GD:</strong> You have <code>GD</code> support installed.  <em>No problems here.</em></li>
+                                                       <?php else: ?>
+                                                           <li><strong>GD:</strong> The <code>GD</code> extension is not available.  <?php echo $app_name; ?> will not be able to download pictures locally on your server.</li>
+                                                       <?php endif; ?>
+
+                                                       <?php if ($tidy_ok): ?>
+                                                           <li><strong>Tidy:</strong> You have <code>Tidy</code> support installed.  <em>No problems here.</em></li>
+                                                       <?php else: ?>
+                                                           <li><strong>Tidy:</strong> The <code>Tidy</code> extension is not available.  <?php echo $app_name; ?> should still work with most feeds, but you may experience problems with some.</li>
+                                                       <?php endif; ?>
+                                                   
+                                                       <?php if ($curl_ok): ?>
+                                                           <li><strong>cURL:</strong> You have <code>cURL</code> support installed.  <em>No problems here.</em></li>
+                                                       <?php else: ?>
+                                                           <li><strong>cURL:</strong> The <code>cURL</code> extension is not available.  SimplePie will use <code>fsockopen()</code> instead.</li>
+                                                       <?php endif; ?>
+                       
+                                                       <?php if ($parallel_ok): ?>
+                                                           <li><strong>Parallel URL fetching:</strong> You have <code>HttpRequestPool</code> or <code>curl_multi</code> support installed.  <em>No problems here.</em></li>
+                                                       <?php else: ?>
+                                                           <li><strong>Parallel URL fetching:</strong> <code>HttpRequestPool</code> or <code>curl_multi</code> support is not available.  <?php echo $app_name; ?> will use <code>file_get_contents()</code> instead to fetch URLs sequentially rather than in parallel.</li>
+                                                       <?php endif; ?>
+
+                                                       <?php else: ?>
+                                                           <li><strong>Data filtering:</strong> Your PHP configuration has the filter extension disabled.  <strong><?php echo $app_name; ?> will not work here.</strong></li>
+                                                       <?php endif; ?>
+
+                                                   <?php else: ?>
+                                                       <li><strong>DOM/XML:</strong> Your PHP configuration isn't standard, you're missing PHP-DOM.  You may try to install a package or recompile PHP. <strong><?php echo $app_name; ?> will not work here.</strong></li>
+                                               <?php endif; ?>
+
                                         <?php else : ?>
                                             <li><strong>Parse ini files function :</strong> Bad luck : your webhost has decided to block the use of the <em>parse_ini_file</em> function. <strong><?php echo $app_name; ?> will not work here.</strong>
                                         <?php endif; ?>