From 4e067ceabd705201a16b4c92cf4b23f3b990326c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20L=C5=93uillet?= <nicolas@loeuillet.org> Date: Sun, 13 Jul 2014 10:15:40 +0200 Subject: updated specific configuration for parsing --- inc/3rdparty/site_config/standard/24ways.org.txt | 8 +- inc/3rdparty/site_config/standard/36kr.com.txt | 8 ++ .../site_config/standard/37signals.com.txt | 8 +- .../site_config/standard/3quarksdaily.com.txt | 16 +-- .../site_config/standard/3voor12.vpro.nl.txt | 0 .../site_config/standard/43folders.com.txt | 4 +- inc/3rdparty/site_config/standard/500px.com.txt | 50 +++---- .../site_config/standard/512pixels.net.txt | 0 inc/3rdparty/site_config/standard/5by5.tv.txt | 14 +- .../site_config/standard/7newsbelize.com.txt | 7 + inc/3rdparty/site_config/standard/944.com.txt | 14 +- inc/3rdparty/site_config/standard/README.md | 38 ++++++ .../standard/aachener-nachrichten.de.txt | 18 +-- .../site_config/standard/aachener-zeitung.de.txt | 18 +-- inc/3rdparty/site_config/standard/abc.es.txt | 10 +- inc/3rdparty/site_config/standard/abc.net.au.txt | 26 ++-- .../site_config/standard/abcnews.go.com.txt | 52 ++++---- .../site_config/standard/accesstoinsight.org.txt | 16 +-- inc/3rdparty/site_config/standard/acidcow.com.txt | 4 +- inc/3rdparty/site_config/standard/acquia.com.txt | 14 +- inc/3rdparty/site_config/standard/acroswing.fr.txt | 6 +- .../site_config/standard/aftenposten.no.txt | 5 + .../site_config/standard/aftonbladet.se.txt | 13 ++ .../site_config/standard/aht.seriouseats.com.txt | 26 ++-- inc/3rdparty/site_config/standard/albayan.ae.txt | 6 + .../site_config/standard/alex.mullr.net.txt | 0 .../site_config/standard/alexduner.com.txt | 4 + .../standard/alexduner.squarespace.com.txt | 4 + .../site_config/standard/alistapart.com.txt | 20 +-- .../site_config/standard/aljazeera.com.txt | 14 +- .../site_config/standard/allrecipes.com.txt | 24 ++-- .../site_config/standard/allthingsd.com.txt | 21 +-- inc/3rdparty/site_config/standard/allyou.com.txt | 12 +- .../site_config/standard/alphabeta.argaam.com.txt | 18 +-- inc/3rdparty/site_config/standard/alriyadh.com.txt | 16 +-- inc/3rdparty/site_config/standard/alseraj.net.txt | 0 inc/3rdparty/site_config/standard/alt1040.com.txt | 0 inc/3rdparty/site_config/standard/alternet.org.txt | 4 + inc/3rdparty/site_config/standard/altfoto.com.txt | 0 .../site_config/standard/alumni.stanford.edu.txt | 16 +-- .../site_config/standard/amandala.com.bz.txt | 6 + inc/3rdparty/site_config/standard/amazon.com.txt | 36 ++--- .../site_config/standard/americandrink.net.txt | 8 +- .../site_config/standard/americascup.com.txt | 18 +-- .../standard/americastestkitchenfeed.com.txt | 6 +- inc/3rdparty/site_config/standard/amptoons.com.txt | 8 ++ .../site_config/standard/anandtech.com.txt | 20 +-- .../site_config/standard/androidpolice.com.txt | 5 + .../site_config/standard/andyrutledge.com.txt | 16 +-- .../standard/annatravelling.wordpress.com.txt | 14 +- .../site_config/standard/applature.com.txt | 34 ++--- inc/3rdparty/site_config/standard/apple.com.txt | 12 +- .../site_config/standard/appledaily.com.tw.txt | 4 + .../site_config/standard/appleinsider.com.txt | 34 +++-- .../site_config/standard/appleweblog.com.txt | 0 .../site_config/standard/archdaily.com.txt | 6 +- .../site_config/standard/archiveofourown.org.txt | 38 +++--- .../site_config/standard/arstechnica.com.txt | 33 ++--- .../site_config/standard/articles.boston.com.txt | 8 +- .../site_config/standard/articles.courant.com.txt | 18 +-- .../standard/articles.washingtonpost.com.txt | 11 ++ inc/3rdparty/site_config/standard/asahi.com.txt | 2 +- inc/3rdparty/site_config/standard/ascarter.net.txt | 6 +- .../site_config/standard/astronews.com.txt | 10 +- inc/3rdparty/site_config/standard/asymco.com.txt | 12 +- inc/3rdparty/site_config/standard/autoblog.com.txt | 8 +- inc/3rdparty/site_config/standard/avclub.com.txt | 4 +- .../site_config/standard/baltimoresun.com.txt | 20 +-- .../standard/baseballprospectus.com.txt | 13 ++ .../site_config/standard/basicthinking.de.txt | 10 +- inc/3rdparty/site_config/standard/bb.is.txt | 22 +-- inc/3rdparty/site_config/standard/bbc.co.uk.txt | 74 ++++++----- .../site_config/standard/bbcgoodfood.com.txt | 16 +++ .../site_config/standard/benoitmaison.org.txt | 28 ++-- .../site_config/standard/berlingske.dk.txt | 2 +- inc/3rdparty/site_config/standard/bernama.com.txt | 5 + inc/3rdparty/site_config/standard/betabeat.com.txt | 0 inc/3rdparty/site_config/standard/betanews.com.txt | 10 +- .../site_config/standard/biography.com.txt | 12 +- inc/3rdparty/site_config/standard/bitelia.com.txt | 0 .../site_config/standard/bizjournals.com.txt | 13 ++ inc/3rdparty/site_config/standard/bjango.com.txt | 10 +- .../site_config/standard/blog.arsln.org.txt | 12 +- .../site_config/standard/blog.asmartbear.com.txt | 10 +- .../site_config/standard/blog.cloudflare.com.txt | 14 +- inc/3rdparty/site_config/standard/blog.fefe.de.txt | 6 +- .../site_config/standard/blog.instagram.com.txt | 18 +-- .../site_config/standard/blog.instapaper.com.txt | 9 ++ .../site_config/standard/blog.jaysalvat.com.txt | 4 +- .../site_config/standard/blog.kaelig.fr.txt | 6 +- .../site_config/standard/blog.naver.com.txt | 8 +- .../site_config/standard/blog.pchome.net.txt | 20 +-- .../site_config/standard/blog.pinboard.in.txt | 8 +- .../site_config/standard/blog.renren.com.txt | 11 ++ .../site_config/standard/blog.sina.com.cn.txt | 50 +++---- inc/3rdparty/site_config/standard/blog.spu.edu.txt | 0 .../site_config/standard/blog.wells.ee.txt | 10 +- .../site_config/standard/blogs.aljazeera.net.txt | 12 +- .../site_config/standard/blogs.forbes.com.txt | 0 .../site_config/standard/blogs.hbr.org.txt | 6 +- .../site_config/standard/blogs.msdn.com.txt | 8 +- .../site_config/standard/blogs.reuters.com.txt | 2 +- .../standard/blogs.scientificamerican.com.txt | 28 ++-- .../standard/blogs.smithsonianmag.com.txt | 26 ++-- .../site_config/standard/blogs.technet.com.txt | 13 +- .../site_config/standard/bluetouff.com.txt | 4 +- .../site_config/standard/boagworld.com.txt | 12 +- .../site_config/standard/boingboing.net.txt | 18 +-- .../site_config/standard/boldizsar.palotas.eu.txt | 2 +- .../site_config/standard/book.douban.com.txt | 8 +- .../site_config/standard/bookforum.com.txt | 34 ++--- .../site_config/standard/borderhouseblog.com.txt | 10 +- .../site_config/standard/bostonglobe.com.txt | 28 ++-- .../site_config/standard/bostonreview.net.txt | 26 ++-- .../site_config/standard/boundlessline.org.txt | 6 +- .../site_config/standard/bowdoinorient.com.txt | 6 + .../site_config/standard/brainfacts.org.txt | 16 +-- inc/3rdparty/site_config/standard/brandeins.de.txt | 10 +- .../standard/brandingstrategyinsider.com.txt | 2 +- .../site_config/standard/brasil.elpais.com.txt | 23 ++++ .../site_config/standard/brettterpstra.com.txt | 6 +- .../site_config/standard/brisbanetimes.com.au.txt | 0 .../site_config/standard/brookings.edu.txt | 22 +-- .../site_config/standard/brooksreview.net.txt | 10 +- inc/3rdparty/site_config/standard/bt.no.txt | 12 ++ inc/3rdparty/site_config/standard/buffed.de.txt | 14 ++ inc/3rdparty/site_config/standard/buquad.com.txt | 12 +- .../standard/business2community.com.txt | 5 + .../site_config/standard/businessinsider.com.txt | 28 ++-- .../site_config/standard/businessnews.com.tn.txt | 22 +-- .../site_config/standard/businessweek.com.txt | 58 ++++---- inc/3rdparty/site_config/standard/buzzfeed.com.txt | 26 ++-- .../site_config/standard/bygonebureau.com.txt | 8 +- inc/3rdparty/site_config/standard/cable.co.uk.txt | 11 ++ .../standard/cardboardconnection.com.txt | 12 +- .../site_config/standard/carpeaqua.com.txt | 8 +- inc/3rdparty/site_config/standard/cars.com.txt | 7 + inc/3rdparty/site_config/standard/catb.org.txt | 10 +- inc/3rdparty/site_config/standard/cbc.ca.txt | 6 +- inc/3rdparty/site_config/standard/cbn.com.txt | 8 ++ inc/3rdparty/site_config/standard/cbsnews.com.txt | 29 ++-- .../site_config/standard/cedarrepublican.com.txt | 2 + inc/3rdparty/site_config/standard/chareidi.org.txt | 0 .../site_config/standard/chinamining.org.txt | 16 +-- inc/3rdparty/site_config/standard/chomsky.info.txt | 8 +- inc/3rdparty/site_config/standard/chrisltd.com.txt | 6 + .../site_config/standard/christianitytoday.com.txt | 22 +-- .../site_config/standard/christianpf.com.txt | 6 +- .../site_config/standard/christies.com.txt | 8 +- .../site_config/standard/chrome.google.com.txt | 14 +- .../site_config/standard/chronicle.com.txt | 30 ++--- .../site_config/standard/ciaosamin.com.txt | 4 + inc/3rdparty/site_config/standard/cicero.de.txt | 62 ++++----- .../site_config/standard/ciperchile.cl.txt | 4 +- inc/3rdparty/site_config/standard/cjr.org.txt | 10 +- .../site_config/standard/classyllama.com.txt | 6 + inc/3rdparty/site_config/standard/clientk.com.txt | 8 +- inc/3rdparty/site_config/standard/clubic.com.txt | 20 +-- inc/3rdparty/site_config/standard/cmswire.com.txt | 8 +- .../site_config/standard/cn.engadget.com.txt | 5 + .../site_config/standard/cn.reuters.com.txt | 5 + inc/3rdparty/site_config/standard/cnet.com.txt | 30 ++--- inc/3rdparty/site_config/standard/cnn.com.txt | 42 +++--- inc/3rdparty/site_config/standard/cnnsi.com.txt | 50 +++---- .../site_config/standard/code.activestate.com.txt | 18 +-- .../site_config/standard/code.fivefilters.org.txt | 1 + .../site_config/standard/code.google.com.txt | 8 +- .../site_config/standard/codeproject.com.txt | 3 + .../site_config/standard/codinghorror.com.txt | 26 ++-- .../site_config/standard/collegehumor.com.txt | 24 ++-- .../standard/communities-dominate.blogs.com.txt | 0 .../standard/community.service-now.com.txt | 12 +- inc/3rdparty/site_config/standard/computer.org.txt | 6 +- .../site_config/standard/computerbase.de.txt | 34 ++--- .../site_config/standard/computerworld.com.txt | 42 +++--- .../site_config/standard/computerworld.dk.txt | 8 +- .../site_config/standard/contemporist.com.txt | 14 +- .../standard/conversaciones.nokia.com.txt | 12 +- inc/3rdparty/site_config/standard/cooper.com.txt | 4 + inc/3rdparty/site_config/standard/core77.com.txt | 10 +- .../site_config/standard/counterpunch.org.txt | 10 +- .../site_config/standard/crazybutable.com.txt | 2 +- .../site_config/standard/crimemagazine.com.txt | 0 .../site_config/standard/crimethinc.com.txt | 2 +- inc/3rdparty/site_config/standard/crn.de.txt | 2 +- .../site_config/standard/csmonitor.com.txt | 32 ++--- .../site_config/standard/csnbayarea.com.txt | 10 +- .../site_config/standard/csnphilly.com.txt | 42 +++--- .../site_config/standard/css-tricks.com.txt | 6 + .../site_config/standard/cucharasonica.com.txt | 0 inc/3rdparty/site_config/standard/cw.com.tw.txt | 14 ++ .../site_config/standard/da.feedsportal.com.txt | 6 +- inc/3rdparty/site_config/standard/dagogtid.no.txt | 4 + inc/3rdparty/site_config/standard/dailydot.com.txt | 4 +- inc/3rdparty/site_config/standard/dailykos.com.txt | 20 +-- .../site_config/standard/dailymail.co.uk.txt | 22 +-- .../site_config/standard/dailystar.com.lb.txt | 6 + inc/3rdparty/site_config/standard/danleech.com.txt | 6 + inc/3rdparty/site_config/standard/dansdata.com.txt | 6 +- .../site_config/standard/dantri.com.vn.txt | 7 + .../site_config/standard/daringfireball.net.txt | 12 +- inc/3rdparty/site_config/standard/datanami.com.txt | 4 +- inc/3rdparty/site_config/standard/dcurt.is.txt | 14 +- .../site_config/standard/defomicron.net.txt | 9 ++ .../site_config/standard/delong.typepad.com.txt | 4 +- .../site_config/standard/democracynow.org.txt | 5 + .../site_config/standard/derstandard.at.txt | 22 +-- .../site_config/standard/designtagebuch.de.txt | 18 +-- .../site_config/standard/desitvforum.net.txt | 8 +- inc/3rdparty/site_config/standard/details.com.txt | 14 +- .../standard/developers.facebook.com.txt | 2 +- .../standard/devlinsangle.blogspot.co.at.txt | 8 +- .../standard/dictionary.reference.com.txt | 12 +- .../site_config/standard/diepresse.com.txt | 8 +- .../standard/digiphoto.techbang.com.txt | 12 +- .../standard/digital-photography-school.com.txt | 10 +- .../site_config/standard/digitalspy.co.uk.txt | 6 +- inc/3rdparty/site_config/standard/dilbert.com.txt | 17 ++- .../site_config/standard/dinamalar.com.txt | 34 ++--- inc/3rdparty/site_config/standard/dn.se.txt | 54 ++++---- .../site_config/standard/dobreprogramy.pl.txt | 6 + inc/3rdparty/site_config/standard/doctac.com.txt | 12 +- inc/3rdparty/site_config/standard/domusweb.it.txt | 38 +++--- inc/3rdparty/site_config/standard/dou.ua.txt | 12 +- inc/3rdparty/site_config/standard/douban.com.txt | 40 +++--- inc/3rdparty/site_config/standard/dpreview.com.txt | 14 +- inc/3rdparty/site_config/standard/dr.dk.txt | 14 +- .../site_config/standard/dramasonline.com.txt | 18 +-- inc/3rdparty/site_config/standard/drdobbs.com.txt | 0 inc/3rdparty/site_config/standard/drive2.ru.txt | 20 +-- inc/3rdparty/site_config/standard/dropbox.com.txt | 1 + inc/3rdparty/site_config/standard/drupal.org.txt | 12 +- .../standard/dukebasketballreport.com.txt | 18 +-- .../site_config/standard/dushumashang.com.txt | 17 +++ inc/3rdparty/site_config/standard/dvice.com.txt | 14 +- .../site_config/standard/eamesinerudition.com.txt | 12 +- .../site_config/standard/eandt.theiet.org.txt | 12 +- .../site_config/standard/eastoftheweb.com.txt | 32 ++--- inc/3rdparty/site_config/standard/ebay.com.txt | 8 +- inc/3rdparty/site_config/standard/ecetia.com.txt | 0 .../site_config/standard/econlog.econlib.org.txt | 8 +- .../standard/economia.estadao.com.br.txt | 10 +- .../site_config/standard/economist.com.txt | 16 +-- .../site_config/standard/edge-online.com.txt | 24 ++-- inc/3rdparty/site_config/standard/edge.org.txt | 8 +- .../standard/edition.channel5belize.com.txt | 9 ++ .../site_config/standard/edition.cnn.com.txt | 25 ++-- inc/3rdparty/site_config/standard/eetimes.com.txt | 8 ++ inc/3rdparty/site_config/standard/ekultura.hu.txt | 18 +-- inc/3rdparty/site_config/standard/elance.com.txt | 2 +- .../standard/elderscrollsonline.com.txt | 22 +++ .../site_config/standard/elektroniknet.de.txt | 50 +++---- .../site_config/standard/elmalpensante.com.txt | 4 +- inc/3rdparty/site_config/standard/elpais.com.txt | 40 +++--- .../site_config/standard/emaratalyoum.com.txt | 7 + .../site_config/standard/en.espnf1.com.txt | 16 +-- inc/3rdparty/site_config/standard/engadget.com.txt | 12 +- .../standard/engineering.tumblr.com.txt | 10 +- .../site_config/standard/english.aljazeera.net.txt | 12 +- inc/3rdparty/site_config/standard/enikos.gr.txt | 16 +-- .../standard/entertainment.timesonline.co.uk.txt | 16 +-- inc/3rdparty/site_config/standard/ericsuh.com.txt | 4 + inc/3rdparty/site_config/standard/es.hu.txt | 18 +-- .../site_config/standard/escapistmagazine.com.txt | 8 +- inc/3rdparty/site_config/standard/espn.go.com.txt | 20 +-- inc/3rdparty/site_config/standard/esquire.com.txt | 21 +-- .../standard/essentialpublicradio.org.txt | 8 +- inc/3rdparty/site_config/standard/etc.se.txt | 10 +- .../site_config/standard/eternabuenosaires.com.txt | 0 .../site_config/standard/eurogamer.net.txt | 14 +- inc/3rdparty/site_config/standard/evo.co.uk.txt | 18 +-- inc/3rdparty/site_config/standard/expressen.se.txt | 19 +-- .../site_config/standard/extracine.com.txt | 0 inc/3rdparty/site_config/standard/f1actual.com.txt | 0 inc/3rdparty/site_config/standard/facebook.com.txt | 5 + inc/3rdparty/site_config/standard/facta.co.jp.txt | 2 +- inc/3rdparty/site_config/standard/falter.at.txt | 32 ++--- .../site_config/standard/fanfiction.net.txt | 8 +- .../site_config/standard/fastcompany.com.txt | 30 ++--- inc/3rdparty/site_config/standard/faz.net.txt | 66 ++++----- inc/3rdparty/site_config/standard/fertigung.de.txt | 23 ++++ .../site_config/standard/fictionpress.com.txt | 6 +- inc/3rdparty/site_config/standard/ficwad.com.txt | 20 +-- .../site_config/standard/finance.yahoo.com.txt | 22 +-- .../standard/findtheswagger.tumblr.com.txt | 16 +-- .../site_config/standard/firstthings.com.txt | 12 +- .../site_config/standard/fivechapters.com.txt | 0 .../site_config/standard/fivefilters.org.txt | 0 .../site_config/standard/fivethirtyeight.com.txt | 10 +- .../standard/flyingmachinestudios.com.txt | 2 + inc/3rdparty/site_config/standard/fm4.orf.at.txt | 10 +- inc/3rdparty/site_config/standard/fnal.gov.txt | 26 ++-- inc/3rdparty/site_config/standard/focus.de.txt | 34 ++--- inc/3rdparty/site_config/standard/folklore.org.txt | 4 + inc/3rdparty/site_config/standard/food.com.txt | 11 ++ inc/3rdparty/site_config/standard/fool.com.txt | 20 +-- inc/3rdparty/site_config/standard/forbes.com.txt | 43 +++--- .../site_config/standard/foreignaffairs.com.txt | 34 +++++ .../site_config/standard/foreignpolicy.com.txt | 26 ++-- inc/3rdparty/site_config/standard/forsvaret.no.txt | 14 +- inc/3rdparty/site_config/standard/foxnews.com.txt | 16 +-- .../site_config/standard/freelancer.com.txt | 2 +- .../site_config/standard/freytag-film.com.txt | 6 +- inc/3rdparty/site_config/standard/fria.nu.txt | 8 ++ .../site_config/standard/friatidningen.se.txt | 7 + .../site_config/standard/friendskorner.com.txt | 20 +-- inc/3rdparty/site_config/standard/ft.com.txt | 6 +- .../site_config/standard/ftchinese.com.txt | 18 +++ inc/3rdparty/site_config/standard/ftd.de.txt | 6 +- inc/3rdparty/site_config/standard/fubiz.net.txt | 2 +- .../site_config/standard/futurezone.at.txt | 18 +-- .../site_config/standard/gamasutra.com.txt | 36 ++--- inc/3rdparty/site_config/standard/gameblog.fr.txt | 18 +-- .../site_config/standard/gamechurch.com.txt | 10 ++ inc/3rdparty/site_config/standard/gamer.no.txt | 11 ++ .../site_config/standard/gamereactor.no.txt | 11 ++ .../site_config/standard/garythink.com.txt | 4 +- .../site_config/standard/gasteroprod.com.txt | 4 +- .../site_config/standard/gatopardo.com.txt | 12 +- inc/3rdparty/site_config/standard/gawker.com.txt | 10 +- .../site_config/standard/geeksofdoom.com.txt | 2 +- inc/3rdparty/site_config/standard/geenstijl.nl.txt | 2 +- inc/3rdparty/site_config/standard/getnews.jp.txt | 2 +- .../site_config/standard/giantbomb.com.txt | 18 +-- inc/3rdparty/site_config/standard/giga.de.txt | 36 ++--- inc/3rdparty/site_config/standard/gigaom.com.txt | 29 ++-- inc/3rdparty/site_config/standard/gihyo.jp.txt | 2 +- .../site_config/standard/gist.github.com.txt | 10 +- .../standard/givemesomethingtoread.com.txt | 2 +- .../site_config/standard/gizmodo.co.uk.txt | 12 +- inc/3rdparty/site_config/standard/gizmodo.com.txt | 18 ++- .../site_config/standard/gizmodo.uol.com.br.txt | 6 + .../site_config/standard/gizmologia.com.txt | 0 inc/3rdparty/site_config/standard/gizmovil.com.txt | 0 inc/3rdparty/site_config/standard/global.txt | 22 ++- .../site_config/standard/globalissues.org.txt | 28 ++-- .../standard/globoesporte.globo.com.txt | 25 ++++ .../site_config/standard/gloswielkopolski.pl.txt | 8 ++ inc/3rdparty/site_config/standard/goal.com.txt | 30 ++--- inc/3rdparty/site_config/standard/golem.de.txt | 48 +++---- inc/3rdparty/site_config/standard/good.is.txt | 6 +- inc/3rdparty/site_config/standard/goodfil.ms.txt | 2 + inc/3rdparty/site_config/standard/gossip-tv.gr.txt | 26 ++-- .../site_config/standard/goteborgsfria.se.txt | 7 + .../site_config/standard/gothamist.com.txt | 10 +- .../site_config/standard/gotomanager.com.txt | 38 +++--- inc/3rdparty/site_config/standard/gov.ky.txt | 4 + inc/3rdparty/site_config/standard/gp.se.txt | 11 ++ inc/3rdparty/site_config/standard/gq.com.txt | 14 +- .../site_config/standard/grantland.com.txt | 38 +++--- .../standard/greatergreaterwashington.org.txt | 18 +-- .../site_config/standard/groups.drupal.org.txt | 6 +- inc/3rdparty/site_config/standard/gulfnews.com.txt | 8 +- inc/3rdparty/site_config/standard/guokr.com.txt | 42 +++--- inc/3rdparty/site_config/standard/haberler.com.txt | 6 +- inc/3rdparty/site_config/standard/hackmake.org.txt | 7 + .../site_config/standard/halo.bungie.org.txt | 6 +- .../standard/hammers.theoffside.com.txt | 10 +- .../site_config/standard/handelsblatt.com.txt | 31 +++++ .../site_config/standard/hanselman.com.txt | 4 +- inc/3rdparty/site_config/standard/hardware.fr.txt | 8 +- inc/3rdparty/site_config/standard/hardware.no.txt | 16 +++ inc/3rdparty/site_config/standard/hbr.org.txt | 13 +- .../site_config/standard/headrush.typepad.com.txt | 14 ++ .../site_config/standard/heise-online.mobi.txt | 2 +- inc/3rdparty/site_config/standard/heise.de.txt | 12 +- inc/3rdparty/site_config/standard/hemmings.com.txt | 9 ++ inc/3rdparty/site_config/standard/heroturko.me.txt | 6 + inc/3rdparty/site_config/standard/hespress.com.txt | 12 +- inc/3rdparty/site_config/standard/hiamag.com.txt | 3 + .../site_config/standard/highscalability.com.txt | 2 +- inc/3rdparty/site_config/standard/hiperpop.com.txt | 0 .../site_config/standard/hiphopleeft.nl.txt | 4 +- .../site_config/standard/historytoday.com.txt | 18 +-- inc/3rdparty/site_config/standard/hmercer.com.txt | 6 +- .../site_config/standard/hollywoodlife.com.txt | 22 +++ .../site_config/standard/hometheaterreview.com.txt | 4 +- .../site_config/standard/hosted.ap.org.txt | 8 +- .../site_config/standard/howtogeek.com.txt | 11 ++ inc/3rdparty/site_config/standard/hs.fi.txt | 2 +- inc/3rdparty/site_config/standard/ht.ly.txt | 4 +- .../site_config/standard/huffingtonpost.com.txt | 37 +++--- .../site_config/standard/humantransit.org.txt | 6 +- .../site_config/standard/hurriyet.com.tr.txt | 12 +- inc/3rdparty/site_config/standard/hvg.hu.txt | 14 +- .../site_config/standard/hypebeast.com.txt | 18 +-- .../site_config/standard/icannabis.tumblr.com.txt | 9 ++ .../site_config/standard/idealog.co.nz.txt | 12 ++ .../site_config/standard/idlewords.com.txt | 10 +- .../site_config/standard/igeneration.fr.txt | 6 +- .../standard/ignoredbydinosaurs.com.txt | 10 +- inc/3rdparty/site_config/standard/ilounge.com.txt | 22 +-- .../site_config/standard/ilyabirman.ru.txt | 6 +- inc/3rdparty/site_config/standard/inc.com.txt | 40 +++--- .../site_config/standard/independent.co.uk.txt | 14 +- .../site_config/standard/indiatimes.com.txt | 10 +- .../site_config/standard/inessential.com.txt | 6 +- .../site_config/standard/info.abril.com.br.txt | 4 +- inc/3rdparty/site_config/standard/infoq.com.txt | 24 ++-- .../site_config/standard/informador.com.mx.txt | 14 +- .../site_config/standard/information.dk.txt | 10 +- .../standard/informationarchitects.net.txt | 16 +-- .../standard/informationclearinghouse.info.txt | 10 +- inc/3rdparty/site_config/standard/informit.com.txt | 12 +- .../site_config/standard/infoworld.com.txt | 20 +-- inc/3rdparty/site_config/standard/infzm.com.txt | 14 +- .../site_config/standard/inhabitat.com.txt | 12 +- inc/3rdparty/site_config/standard/instagr.am.txt | 8 +- .../site_config/standard/interest.co.nz.txt | 0 .../site_config/standard/iolanguage.com.txt | 0 inc/3rdparty/site_config/standard/ipadclub.nl.txt | 10 +- .../site_config/standard/ipadplanet.nl.txt | 10 +- .../site_config/standard/iphoneclub.nl.txt | 12 +- .../site_config/standard/iphonehacks.com.txt | 16 +-- .../site_config/standard/iplaysoft.com.txt | 0 inc/3rdparty/site_config/standard/isource.com.txt | 8 +- inc/3rdparty/site_config/standard/itavisen.no.txt | 8 +- .../site_config/standard/itmedia.co.jp.txt | 8 ++ .../site_config/standard/itstactical.com.txt | 20 +-- inc/3rdparty/site_config/standard/itwire.com.txt | 5 + inc/3rdparty/site_config/standard/itworld.com.txt | 6 +- inc/3rdparty/site_config/standard/izismile.com.txt | 6 +- inc/3rdparty/site_config/standard/jalopnik.com.txt | 0 inc/3rdparty/site_config/standard/jandan.net.txt | 8 +- .../site_config/standard/jetzt.sueddeutsche.de.txt | 40 +++--- inc/3rdparty/site_config/standard/jjahnke.net.txt | 4 +- .../site_config/standard/jobbank.gc.ca.txt | 8 +- .../site_config/standard/joelonsoftware.com.txt | 38 +++--- inc/3rdparty/site_config/standard/jouire.com.txt | 2 +- inc/3rdparty/site_config/standard/joystiq.com.txt | 12 +- .../standard/juedische-allgemeine.de.txt | 36 ++--- inc/3rdparty/site_config/standard/juppy.org.txt | 12 +- inc/3rdparty/site_config/standard/kachestvo.ru.txt | 2 +- .../site_config/standard/kachiblog.com.txt | 7 + .../site_config/standard/kathimerini.gr.txt | 4 + .../site_config/standard/kenrockwell.com.txt | 10 +- inc/3rdparty/site_config/standard/kicker.de.txt | 38 +++--- .../site_config/standard/kickstarter.com.txt | 10 +- .../site_config/standard/kingarthurflour.com.txt | 4 +- inc/3rdparty/site_config/standard/kotaku.com.txt | 0 inc/3rdparty/site_config/standard/kottke.org.txt | 10 +- .../site_config/standard/kumailplus.com.txt | 2 +- inc/3rdparty/site_config/standard/kumb.com.txt | 16 +-- .../site_config/standard/kwerfeldein.de.txt | 14 +- .../site_config/standard/landetsfria.se.txt | 7 + .../site_config/standard/laphamsquarterly.org.txt | 24 ++-- .../site_config/standard/laprensagrafica.com.txt | 2 +- .../site_config/standard/laquadrature.net.txt | 18 +-- .../site_config/standard/lareviewofbooks.org.txt | 24 ++-- inc/3rdparty/site_config/standard/latimes.com.txt | 18 +-- .../site_config/standard/laughingsquid.com.txt | 2 +- inc/3rdparty/site_config/standard/leancrew.com.txt | 14 +- inc/3rdparty/site_config/standard/lefigaro.fr.txt | 14 +- inc/3rdparty/site_config/standard/lemonde.fr.txt | 31 +++-- .../site_config/standard/lesnumeriques.com.txt | 16 +-- inc/3rdparty/site_config/standard/letemps.ch.txt | 2 +- inc/3rdparty/site_config/standard/libcom.org.txt | 7 + .../site_config/standard/lifeandculture.fr.txt | 2 +- .../site_config/standard/lifehacker.com.txt | 89 +++++++------ .../standard/lifestyle.inquirer.net.txt | 7 + .../site_config/standard/lifeweek.com.cn.txt | 23 ++++ inc/3rdparty/site_config/standard/linkedin.com.txt | 0 .../site_config/standard/livescience.com.txt | 20 +++ inc/3rdparty/site_config/standard/longform.org.txt | 2 +- .../site_config/standard/loopinsight.com.txt | 16 +-- .../site_config/standard/lostgarden.com.txt | 2 +- inc/3rdparty/site_config/standard/lovefm.com.txt | 6 + .../site_config/standard/lovetv.com.bz.txt | 9 ++ inc/3rdparty/site_config/standard/lrb.co.uk.txt | 20 +-- .../standard/luminous-landscape.com.txt | 8 +- inc/3rdparty/site_config/standard/luxuo.com.txt | 4 + inc/3rdparty/site_config/standard/m.bbc.co.uk.txt | 12 +- inc/3rdparty/site_config/standard/m.douban.com.txt | 13 ++ .../site_config/standard/m.vanityfair.com.txt | 11 ++ inc/3rdparty/site_config/standard/mac4ever.com.txt | 6 +- .../site_config/standard/macdrifter.com.txt | 2 +- .../standard/macformat.techradar.com.txt | 14 +- .../site_config/standard/macgeneration.com.txt | 6 +- .../site_config/standard/macmagazine.com.br.txt | 38 +++--- .../site_config/standard/macrumors.com.txt | 20 +-- .../site_config/standard/macstories.net.txt | 14 +- .../site_config/standard/mactalk.com.au.txt | 4 +- .../site_config/standard/mactechnews.de.txt | 2 +- inc/3rdparty/site_config/standard/macworld.com.txt | 46 +++---- inc/3rdparty/site_config/standard/mainichi.jp.txt | 2 +- inc/3rdparty/site_config/standard/mainpost.de.txt | 52 ++++---- .../site_config/standard/makeuseof.com.txt | 10 +- .../site_config/standard/manager.co.th.txt | 26 ++++ inc/3rdparty/site_config/standard/marco.org.txt | 14 +- .../site_config/standard/marksdailyapple.com.txt | 0 .../site_config/standard/martinfowler.com.txt | 12 +- inc/3rdparty/site_config/standard/mashable.com.txt | 13 +- .../site_config/standard/matt.might.net.txt | 5 + .../site_config/standard/mattcutts.com.txt | 0 inc/3rdparty/site_config/standard/mbl.is.txt | 0 .../site_config/standard/medialens.org.txt | 4 +- inc/3rdparty/site_config/standard/medium.com.txt | 7 + inc/3rdparty/site_config/standard/megamp3.eu.txt | 8 ++ .../site_config/standard/menshealth.com.txt | 30 ++--- .../site_config/standard/metafilter.com.txt | 8 ++ .../site_config/standard/mforum.cari.com.my.txt | 6 + inc/3rdparty/site_config/standard/mikeash.com.txt | 6 +- .../site_config/standard/mikeindustries.com.txt | 14 +- .../standard/minnesota.publicradio.org.txt | 16 +-- inc/3rdparty/site_config/standard/minnpost.com.txt | 6 +- .../site_config/standard/mirrorfootball.co.uk.txt | 2 +- inc/3rdparty/site_config/standard/mises.org.txt | 6 +- inc/3rdparty/site_config/standard/mlb.mlb.com.txt | 26 ++-- .../site_config/standard/mlb.sbnation.com.txt | 24 ++-- .../site_config/standard/mlssoccer.com.txt | 8 +- .../site_config/standard/mmo-champion.com.txt | 6 +- inc/3rdparty/site_config/standard/mnn.com.txt | 18 +-- inc/3rdparty/site_config/standard/mno.hu.txt | 24 ++-- .../site_config/standard/mobile.nytimes.com.txt | 4 + .../site_config/standard/mobile.slate.com.txt | 6 +- .../standard/mobileopportunity.blogspot.com.txt | 18 +-- .../site_config/standard/modernghana.com.txt | 14 +- .../site_config/standard/money.cnn.com.txt | 46 +++---- .../site_config/standard/monkeyzen.com.txt | 0 inc/3rdparty/site_config/standard/moonsault.de.txt | 22 +-- .../standard/moreintelligentlife.com.txt | 12 +- .../site_config/standard/motherboard.vice.com.txt | 8 +- .../site_config/standard/mothering.com.txt | 10 +- .../site_config/standard/motherjones.com.txt | 26 ++-- .../site_config/standard/motorfull.com.txt | 0 .../site_config/standard/movie.douban.com.txt | 12 ++ .../site_config/standard/msdn.microsoft.com.txt | 2 +- .../site_config/standard/msnbc.msn.com.txt | 38 +++--- .../site_config/standard/myfoxatlanta.com.txt | 5 + .../site_config/standard/myfoxboston.com.txt | 6 +- .../site_config/standard/myrecipes.com.txt | 22 +-- inc/3rdparty/site_config/standard/narenji.ir.txt | 0 inc/3rdparty/site_config/standard/nasa.gov.txt | 12 +- inc/3rdparty/site_config/standard/nbweekly.com.txt | 16 +-- inc/3rdparty/site_config/standard/neh.gov.txt | 30 ++--- inc/3rdparty/site_config/standard/neomoney.co.txt | 2 +- .../site_config/standard/net-security.org.txt | 10 +- .../site_config/standard/netmagazine.com.txt | 28 ++-- .../site_config/standard/netzpolitik.org.txt | 8 +- .../site_config/standard/newleftproject.org.txt | 3 + .../site_config/standard/newmatilda.com.txt | 14 +- .../site_config/standard/newrepublic.com.txt | 8 ++ .../site_config/standard/news-gazette.com.txt | 12 +- .../site_config/standard/news.cnet.com.txt | 20 +-- .../site_config/standard/news.detik.com.txt | 12 +- .../site_config/standard/news.kanaloco.jp.txt | 14 +- .../site_config/standard/news.mynavi.jp.txt | 18 +-- inc/3rdparty/site_config/standard/news.orf.at.txt | 18 +-- .../site_config/standard/news.rambler.ru.txt | 14 +- .../site_config/standard/news.techmeme.com.txt | 6 +- .../site_config/standard/news.yahoo.com.txt | 20 +-- .../site_config/standard/news.ycombinator.com.txt | 2 +- inc/3rdparty/site_config/standard/news.zing.vn.txt | 3 + inc/3rdparty/site_config/standard/news247.gr.txt | 6 + inc/3rdparty/site_config/standard/newsbomb.gr.txt | 16 +-- inc/3rdparty/site_config/standard/newsle.com.txt | 0 inc/3rdparty/site_config/standard/newsmill.se.txt | 22 +-- .../site_config/standard/newsunspun.org.txt | 18 +-- inc/3rdparty/site_config/standard/newsweek.com.txt | 6 + inc/3rdparty/site_config/standard/newswise.com.txt | 17 +++ .../site_config/standard/newyorker.com.txt | 21 +-- inc/3rdparty/site_config/standard/next-gen.biz.txt | 28 ++-- inc/3rdparty/site_config/standard/nfl.com.txt | 18 +-- .../standard/ngm.nationalgeographic.com.txt | 10 +- inc/3rdparty/site_config/standard/nhk.or.jp.txt | 0 .../standard/nintendoworldreport.com.txt | 22 +-- .../site_config/standard/nojesguiden.se.txt | 6 +- .../site_config/standard/northumberlandview.ca.txt | 20 +-- inc/3rdparty/site_config/standard/nosalty.hu.txt | 6 + .../site_config/standard/nplusonemag.com.txt | 8 +- inc/3rdparty/site_config/standard/npr.org.txt | 66 ++++----- inc/3rdparty/site_config/standard/nybooks.com.txt | 24 ++-- inc/3rdparty/site_config/standard/nymag.com.txt | 14 +- inc/3rdparty/site_config/standard/nyteknik.se.txt | 12 +- inc/3rdparty/site_config/standard/nytimes.com.txt | 85 +++++++----- inc/3rdparty/site_config/standard/nzz.ch.txt | 22 +-- inc/3rdparty/site_config/standard/observer.com.txt | 12 +- inc/3rdparty/site_config/standard/off.net.mk.txt | 10 +- inc/3rdparty/site_config/standard/omaha.com.txt | 0 inc/3rdparty/site_config/standard/omiliya.org.txt | 14 +- inc/3rdparty/site_config/standard/on.net.mk.txt | 6 +- .../site_config/standard/online.wsj.com.txt | 48 +++---- .../site_config/standard/onlinewelten.com.txt | 0 .../site_config/standard/onstartups.com.txt | 0 .../site_config/standard/ontologicalgeek.com.txt | 8 ++ .../site_config/standard/opensource.org.txt | 0 .../site_config/standard/openthemagazine.com.txt | 4 +- inc/3rdparty/site_config/standard/openwebx.org.txt | 6 +- inc/3rdparty/site_config/standard/orf.at.txt | 18 +-- inc/3rdparty/site_config/standard/origo.hu.txt | 32 ++--- inc/3rdparty/site_config/standard/oschina.net.txt | 3 + .../site_config/standard/pakistantvdekho.com.txt | 20 +-- inc/3rdparty/site_config/standard/pakmedia.tv.txt | 17 +++ inc/3rdparty/site_config/standard/pandagon.net.txt | 6 +- .../site_config/standard/pandodaily.com.txt | 6 +- inc/3rdparty/site_config/standard/panic.com.txt | 2 +- .../site_config/standard/papodehomem.com.br.txt | 6 + .../site_config/standard/parislemon.com.txt | 8 +- .../site_config/standard/parliament.uk.txt | 2 +- inc/3rdparty/site_config/standard/pastebin.com.txt | 8 +- .../standard/pastepad.fivefilters.org.txt | 6 +- inc/3rdparty/site_config/standard/pathawks.com.txt | 12 +- inc/3rdparty/site_config/standard/pcast.me.txt | 0 inc/3rdparty/site_config/standard/pcmag.com.txt | 16 +-- inc/3rdparty/site_config/standard/pcworld.com.txt | 36 ++--- .../site_config/standard/penny-arcade.com.txt | 44 +++--- .../site_config/standard/pentaxforums.com.txt | 0 .../standard/philadelphiaeagles.com.txt | 8 +- inc/3rdparty/site_config/standard/philly.com.txt | 16 +-- .../site_config/standard/photo.tutsplus.com.txt | 8 +- inc/3rdparty/site_config/standard/php.net.txt | 8 +- .../site_config/standard/physicstoday.org.txt | 10 +- .../site_config/standard/pinterest.com.txt | 5 + .../site_config/standard/pitchfork.com.txt | 28 ++-- inc/3rdparty/site_config/standard/pittnews.com.txt | 12 +- .../standard/pittsburgh.pirates.mlb.com.txt | 26 ++-- .../site_config/standard/pittsburghlive.com.txt | 10 +- .../standard/pittsburghmagazine.com.txt | 12 +- .../standard/pittsburghpanthers.com.txt | 4 +- .../site_config/standard/pittscriptblog.com.txt | 12 +- .../site_config/standard/planetvita.de.txt | 5 + inc/3rdparty/site_config/standard/playboy.com.txt | 8 +- .../site_config/standard/plus.google.com.txt | 32 ++--- .../site_config/standard/plzkthxbai.com.txt | 4 +- .../standard/pogue.blogs.nytimes.com.txt | 4 +- inc/3rdparty/site_config/standard/politico.com.txt | 26 ++-- .../site_config/standard/politifact.com.txt | 4 +- inc/3rdparty/site_config/standard/politiken.dk.txt | 22 +-- inc/3rdparty/site_config/standard/polygon.com.txt | 34 +++++ .../site_config/standard/popularmechanics.com.txt | 12 +- .../site_config/standard/portertech.ca.txt | 3 + .../site_config/standard/positioningmag.com.txt | 34 ++--- .../site_config/standard/post-gazette.com.txt | 50 +++---- inc/3rdparty/site_config/standard/posta.com.tr.txt | 26 ++-- inc/3rdparty/site_config/standard/prb.org.txt | 12 +- .../site_config/standard/prog21.dadgum.com.txt | 14 +- inc/3rdparty/site_config/standard/prolost.com.txt | 4 +- .../site_config/standard/propublica.org.txt | 18 +-- inc/3rdparty/site_config/standard/prosa.dk.txt | 4 +- .../standard/prospectmagazine.co.uk.txt | 50 +++---- .../site_config/standard/protothema.gr.txt | 6 + .../site_config/standard/psychologytoday.com.txt | 14 +- .../standard/publications.parliament.uk.txt | 4 +- inc/3rdparty/site_config/standard/publico.pt.txt | 12 ++ .../site_config/standard/purpleplanetmedia.com.txt | 4 +- inc/3rdparty/site_config/standard/qctimes.com.txt | 5 + .../site_config/standard/quantumdiaries.org.txt | 24 ++-- inc/3rdparty/site_config/standard/queerty.com.txt | 2 +- inc/3rdparty/site_config/standard/quepasa.cl.txt | 8 +- inc/3rdparty/site_config/standard/quora.com.txt | 30 ++--- .../site_config/standard/racjonalista.pl.txt | 5 + .../site_config/standard/radar.oreilly.com.txt | 2 +- .../site_config/standard/radionz.co.nz.txt | 2 +- .../site_config/standard/randsinrepose.com.txt | 20 +-- .../site_config/standard/readability.com.txt | 2 +- .../site_config/standard/readwriteweb.com.txt | 14 +- inc/3rdparty/site_config/standard/real.gr.txt | 4 +- inc/3rdparty/site_config/standard/recipe.com.txt | 18 +-- .../site_config/standard/red-hot-girls.com.txt | 8 +- inc/3rdparty/site_config/standard/reddit.com.txt | 36 ++--- .../site_config/standard/redmondpie.com.txt | 24 ++-- .../site_config/standard/redtape.msnbc.msn.com.txt | 36 ++--- inc/3rdparty/site_config/standard/reflets.info.txt | 6 +- .../site_config/standard/renenekuda.cz.txt | 2 +- inc/3rdparty/site_config/standard/resume.se.txt | 9 ++ .../site_config/standard/retrieverweekly.com.txt | 8 +- inc/3rdparty/site_config/standard/reuters.com.txt | 18 +-- .../standard/revistapiaui.estadao.com.br.txt | 16 +-- .../site_config/standard/rezeptwelt.de.txt | 5 + .../standard/richardmuscat.wordpress.com.txt | 6 +- .../site_config/standard/ritemail.blogspot.com.txt | 8 +- inc/3rdparty/site_config/standard/ritholtz.com.txt | 5 + .../standard/robertsspaceindustries.com.txt | 4 + .../site_config/standard/robots.thoughtbot.com.txt | 5 + .../site_config/standard/rockpapershotgun.com.txt | 12 +- .../site_config/standard/rodrigo.sharpcube.com.txt | 10 +- .../site_config/standard/rogerebert.com.txt | 12 +- .../site_config/standard/rolfinjapan.nl.txt | 10 +- .../site_config/standard/rollingstone.com.txt | 0 .../site_config/standard/rottentomatoes.com.txt | 18 +-- .../site_config/standard/roughtype.com.txt | 6 +- inc/3rdparty/site_config/standard/roy.gbiv.com.txt | 0 inc/3rdparty/site_config/standard/rpgsite.net.txt | 6 +- inc/3rdparty/site_config/standard/rubysfera.pl.txt | 14 +- inc/3rdparty/site_config/standard/ruhlman.com.txt | 8 +- inc/3rdparty/site_config/standard/ruttloff.org.txt | 2 +- inc/3rdparty/site_config/standard/salon.com.txt | 20 +-- inc/3rdparty/site_config/standard/salzburg.com.txt | 10 +- .../site_config/standard/sanpedrosun.com.txt | 10 ++ .../site_config/standard/saveyourself.ca.txt | 46 +++---- inc/3rdparty/site_config/standard/sayidaty.net.txt | 4 + inc/3rdparty/site_config/standard/sbnation.com.txt | 52 ++++---- inc/3rdparty/site_config/standard/schneier.com.txt | 48 +++---- .../site_config/standard/science.orf.at.txt | 18 +-- .../site_config/standard/scienceblogs.de.txt | 20 +-- .../site_config/standard/scienceticker.info.txt | 18 +-- .../standard/scientificamerican.com.txt | 48 +++---- inc/3rdparty/site_config/standard/scilogs.de.txt | 15 +++ .../site_config/standard/scotusblog.com.txt | 12 +- inc/3rdparty/site_config/standard/scraplab.net.txt | 2 +- .../site_config/standard/scripting.com.txt | 12 +- .../site_config/standard/sct.temple.edu.txt | 6 +- .../standard/searchenginejournal.com.txt | 5 + .../site_config/standard/searchengineland.com.txt | 36 ++--- .../standard/seattletransitblog.com.txt | 0 inc/3rdparty/site_config/standard/sebbo.net.txt | 4 +- .../site_config/standard/select.yeeyan.org.txt | 18 +++ .../site_config/standard/seriouseats.com.txt | 26 ++-- .../site_config/standard/sf.curbed.com.txt | 10 +- inc/3rdparty/site_config/standard/sf.eater.com.txt | 10 +- inc/3rdparty/site_config/standard/sfgate.com.txt | 20 +-- inc/3rdparty/site_config/standard/sfweekly.com.txt | 2 +- inc/3rdparty/site_config/standard/shabayek.com.txt | 2 +- .../site_config/standard/shawnblanc.net.txt | 20 +-- .../site_config/standard/shifteleven.com.txt | 8 +- inc/3rdparty/site_config/standard/siasat.pk.txt | 20 +-- .../site_config/standard/signalscv.com.txt | 10 ++ .../site_config/standard/simonwillison.net.txt | 6 +- .../singaporeanstocksinvestor.blogspot.com.txt | 6 +- .../site_config/standard/singularityhub.com.txt | 0 .../site_config/standard/sintagoulis.gr.txt | 10 +- inc/3rdparty/site_config/standard/sivers.org.txt | 6 + .../site_config/standard/skanesfria.se.txt | 7 + .../site_config/standard/slashfilm.com.txt | 28 ++-- inc/3rdparty/site_config/standard/slate.com.txt | 36 ++--- .../site_config/standard/slice.seriouseats.com.txt | 26 ++-- .../site_config/standard/slog.thestranger.com.txt | 4 +- .../site_config/standard/smartinvestor.de.txt | 6 +- inc/3rdparty/site_config/standard/sme.sk.txt | 2 +- .../site_config/standard/smithsonianmag.com.txt | 36 ++--- .../site_config/standard/smokingapples.com.txt | 8 +- .../site_config/standard/somethingawful.com.txt | 17 +++ .../site_config/standard/songshuhui.net.txt | 7 + .../site_config/standard/sourcebooks.com.txt | 6 +- .../site_config/standard/spectator.co.uk.txt | 10 +- .../site_config/standard/spectrum.ieee.org.txt | 2 +- inc/3rdparty/site_config/standard/speirs.org.txt | 0 inc/3rdparty/site_config/standard/spiegel.de.txt | 148 ++++++++++----------- .../site_config/standard/spiked-online.com.txt | 7 + inc/3rdparty/site_config/standard/spin.com.txt | 6 +- inc/3rdparty/site_config/standard/splatf.com.txt | 6 +- .../site_config/standard/splitsider.com.txt | 4 +- .../site_config/standard/sport.detik.com.txt | 12 +- inc/3rdparty/site_config/standard/sport.orf.at.txt | 18 +-- inc/3rdparty/site_config/standard/sport365.fr.txt | 8 ++ .../site_config/standard/sports.espn.go.com.txt | 22 +-- .../site_config/standard/sports.yahoo.com.txt | 14 +- .../site_config/standard/sportschau.de.txt | 40 +++--- .../standard/sportsillustrated.cnn.com.txt | 50 +++---- .../site_config/standard/sprengsatz.de.txt | 6 +- inc/3rdparty/site_config/standard/sqlite.org.txt | 12 +- .../site_config/standard/squashed.tumblr.com.txt | 4 +- .../site_config/standard/stackoverflow.com.txt | 24 ++-- .../site_config/standard/stalbansreview.co.uk.txt | 26 ++-- .../site_config/standard/standard.co.uk.txt | 28 ++-- .../site_config/standard/staradvertiser.com.txt | 20 +-- .../site_config/standard/stephenfry.com.txt | 12 +- .../site_config/standard/stlbeacon.org.txt | 6 +- .../site_config/standard/stockholm.etc.se.txt | 6 +- .../site_config/standard/stockholmsfria.nu.txt | 7 + .../site_config/standard/straightdope.com.txt | 6 + .../site_config/standard/streetsblog.net.txt | 10 +- inc/3rdparty/site_config/standard/stuff.co.nz.txt | 42 +++--- .../site_config/standard/stumbleupon.com.txt | 6 +- .../site_config/standard/subtraction.com.txt | 32 ++--- .../site_config/standard/sueddeutsche.de.txt | 34 ++--- inc/3rdparty/site_config/standard/summify.com.txt | 0 inc/3rdparty/site_config/standard/suntimes.com.txt | 24 ++-- inc/3rdparty/site_config/standard/svd.se.txt | 16 ++- inc/3rdparty/site_config/standard/svt.se.txt | 16 +++ .../site_config/standard/sydsvenskan.se.txt | 29 ++-- .../site_config/standard/symmetrymagazine.org.txt | 20 +-- .../standard/sz-magazin.sueddeutsche.de.txt | 26 ++-- inc/3rdparty/site_config/standard/sz.de.txt | 18 +++ .../site_config/standard/tagesschau.de.txt | 42 +++--- inc/3rdparty/site_config/standard/tampabay.com.txt | 6 +- .../site_config/standard/taptaptap.com.txt | 4 +- .../site_config/standard/tasteofhome.com.txt | 26 ++-- inc/3rdparty/site_config/standard/taz.de.txt | 12 +- inc/3rdparty/site_config/standard/tbray.org.txt | 6 +- inc/3rdparty/site_config/standard/tcmanila.tk.txt | 7 + inc/3rdparty/site_config/standard/tcng.org.txt | 4 +- .../site_config/standard/tech.fortune.cnn.com.txt | 4 +- .../site_config/standard/tech.gilt.com.txt | 5 + .../site_config/standard/tech.sina.com.cn.txt | 18 +-- .../site_config/standard/techcrunch.com.txt | 34 ++--- inc/3rdparty/site_config/standard/techdirt.com.txt | 20 +-- inc/3rdparty/site_config/standard/techhive.com.txt | 18 +++ inc/3rdparty/site_config/standard/techmeme.com.txt | 4 +- .../standard/technicallyjordan.tumblr.com.txt | 14 +- .../site_config/standard/technologizer.com.txt | 5 + .../site_config/standard/technologyreview.com.txt | 30 ++--- .../site_config/standard/techpinions.com.txt | 10 +- .../site_config/standard/techradar.com.txt | 20 +-- inc/3rdparty/site_config/standard/telegraaf.nl.txt | 14 +- .../site_config/standard/telegraph.co.uk.txt | 16 +-- .../site_config/standard/thanhnien.com.vn.txt | 4 + .../site_config/standard/the-magazine.org.txt | 3 + .../site_config/standard/theage.com.au.txt | 5 + .../standard/theamericanscholar.org.txt | 13 ++ .../site_config/standard/theappleblog.com.txt | 2 +- .../site_config/standard/theatlantic.com.txt | 36 ++--- .../site_config/standard/theatlanticcities.com.txt | 17 +++ .../site_config/standard/thebostonchannel.com.txt | 12 +- .../site_config/standard/thebrowser.com.txt | 16 +-- .../site_config/standard/thecarton.net.txt | 18 +-- inc/3rdparty/site_config/standard/thedaily.com.txt | 46 +++---- .../site_config/standard/thedailybeast.com.txt | 12 +- .../site_config/standard/thedailymash.co.uk.txt | 26 ++-- .../site_config/standard/thedisneyblog.com.txt | 7 + .../standard/theeuropean-magazine.com.txt | 17 +++ .../site_config/standard/thefilmexperience.net.txt | 0 .../standard/thegamedesignforum.com.txt | 14 ++ .../site_config/standard/theglobalmail.org.txt | 78 +++++------ .../site_config/standard/theglobeandmail.com.txt | 6 +- .../standard/thegreatdiscontent.com.txt | 6 + .../site_config/standard/theguardian.com.txt | 13 ++ .../site_config/standard/theindychannel.com.txt | 22 +-- .../site_config/standard/themarker.com.txt | 11 ++ .../site_config/standard/themillions.com.txt | 16 +-- .../standard/themuseumofinnocence.com.txt | 10 +- .../site_config/standard/thenation.com.txt | 22 +-- .../standard/thenetworkgarden.blogs.com.txt | 4 +- .../site_config/standard/thenextgeneration.org.txt | 8 ++ .../site_config/standard/thenextweb.com.txt | 22 +-- .../site_config/standard/theoaklandpress.com.txt | 2 +- inc/3rdparty/site_config/standard/theonion.com.txt | 18 +-- .../site_config/standard/thepioneerwoman.com.txt | 18 +-- .../site_config/standard/theregister.co.uk.txt | 13 +- inc/3rdparty/site_config/standard/theroot.com.txt | 2 +- .../site_config/standard/therumpus.net.txt | 4 +- .../site_config/standard/thesiasat.com.txt | 20 +-- .../site_config/standard/thesimpledollar.com.txt | 4 +- .../site_config/standard/thespoiler.co.uk.txt | 2 +- inc/3rdparty/site_config/standard/thespoof.com.txt | 16 +-- .../site_config/standard/thestranger.com.txt | 20 +-- .../site_config/standard/thestreet.com.txt | 48 +++---- .../site_config/standard/thethaovanhoa.vn.txt | 0 inc/3rdparty/site_config/standard/theverge.com.txt | 79 ++++++----- inc/3rdparty/site_config/standard/theweek.com.txt | 4 +- .../site_config/standard/thinkprogress.org.txt | 4 +- .../site_config/standard/thisdaylive.com.txt | 2 +- .../site_config/standard/thisismynext.com.txt | 12 +- inc/3rdparty/site_config/standard/tidbits.com.txt | 4 +- inc/3rdparty/site_config/standard/time.com.txt | 26 ++-- .../standard/timeshighereducation.co.uk.txt | 8 +- inc/3rdparty/site_config/standard/tipb.com.txt | 16 +-- inc/3rdparty/site_config/standard/tnr.com.txt | 32 ++--- .../site_config/standard/tomdispatch.com.txt | 8 +- .../site_config/standard/tomshardware.com.txt | 12 +- .../site_config/standard/tomshardware.de.txt | 20 +-- .../site_config/standard/toolsandtoys.net.txt | 8 +- .../site_config/standard/tracks.ranea.org.txt | 14 ++ .../site_config/standard/trailer.web-view.net.txt | 0 .../site_config/standard/trailerzone.de.txt | 9 ++ .../site_config/standard/traningslara.se.txt | 12 +- inc/3rdparty/site_config/standard/triblive.com.txt | 24 ++-- inc/3rdparty/site_config/standard/truthdig.com.txt | 22 +-- .../site_config/standard/tthfanfic.org.txt | 4 +- inc/3rdparty/site_config/standard/tthor.com.txt | 0 inc/3rdparty/site_config/standard/tuaw.com.txt | 8 +- .../site_config/standard/tuckreview.com.txt | 10 +- inc/3rdparty/site_config/standard/tvtropes.org.txt | 36 ++--- inc/3rdparty/site_config/standard/twitter.com.txt | 16 +-- inc/3rdparty/site_config/standard/uefa.com.txt | 10 +- .../site_config/standard/uk.xbox360.ign.com.txt | 42 +++--- .../site_config/standard/uni-watch.com.txt | 30 ++--- .../site_config/standard/unwinnable.com.txt | 9 ++ .../site_config/standard/uppsalafria.se.txt | 7 + .../site_config/standard/urbandictionary.com.txt | 6 +- inc/3rdparty/site_config/standard/usatoday.com.txt | 8 ++ inc/3rdparty/site_config/standard/usccb.org.txt | 8 +- inc/3rdparty/site_config/standard/useit.com.txt | 12 +- inc/3rdparty/site_config/standard/usfirst.org.txt | 6 + .../site_config/standard/utdailybeacon.com.txt | 5 + inc/3rdparty/site_config/standard/ux.artu.tv.txt | 10 +- .../standard/uzivatelsketestovani.cz.txt | 0 .../site_config/standard/vanityfair.com.txt | 58 ++++---- inc/3rdparty/site_config/standard/varingen.no.txt | 6 +- .../site_config/standard/varsity.co.uk.txt | 4 +- inc/3rdparty/site_config/standard/vea.gov.vn.txt | 7 + inc/3rdparty/site_config/standard/vedomosti.ru.txt | 2 +- .../site_config/standard/veggbilder.no.txt | 6 +- inc/3rdparty/site_config/standard/vemedio.com.txt | 8 +- .../site_config/standard/venturebeat.com.txt | 8 +- inc/3rdparty/site_config/standard/version2.dk.txt | 20 +-- .../site_config/standard/verybestbaking.com.txt | 10 +- inc/3rdparty/site_config/standard/vg.no.txt | 2 +- .../site_config/standard/video.forbes.com.txt | 16 +-- inc/3rdparty/site_config/standard/videogum.com.txt | 8 +- .../site_config/standard/villagevoice.com.txt | 14 +- inc/3rdparty/site_config/standard/vimeo.com.txt | 32 ++--- inc/3rdparty/site_config/standard/viply.de.txt | 12 ++ inc/3rdparty/site_config/standard/visir.is.txt | 24 ++-- inc/3rdparty/site_config/standard/vitispr.com.txt | 8 +- .../site_config/standard/vivirmexico.com.txt | 0 .../site_config/standard/vnexpress.net.txt | 14 +- .../standard/voices.washingtonpost.com.txt | 2 +- inc/3rdparty/site_config/standard/vworker.com.txt | 2 +- .../site_config/standard/waffle.wootest.net.txt | 4 +- .../site_config/standard/walrusmagazine.com.txt | 24 ++-- .../site_config/standard/warnerbros.fr.txt | 2 +- .../standard/washingtoninstitute.org.txt | 6 + .../site_config/standard/washingtonmonthly.com.txt | 16 +-- .../site_config/standard/washingtonpost.com.txt | 51 ++++--- .../site_config/standard/web-libre.org.txt | 8 +- .../standard/weblog.bignerdranch.com.txt | 6 +- .../site_config/standard/weblogs.asp.net.txt | 14 +- .../site_config/standard/webpaper.nzz.ch.txt | 12 +- inc/3rdparty/site_config/standard/webwereld.nl.txt | 8 ++ inc/3rdparty/site_config/standard/welt.de.txt | 42 +++--- .../site_config/standard/westhamtillidie.com.txt | 8 +- .../site_config/standard/what-if.xkcd.com.txt | 0 .../site_config/standard/whatever.scalzi.com.txt | 10 +- .../site_config/standard/wheelyric.com.txt | 18 +-- .../site_config/standard/wiki.guildwars.com.txt | 12 +- .../site_config/standard/wiki.guildwars2.com.txt | 12 +- inc/3rdparty/site_config/standard/wikihow.com.txt | 15 +++ .../site_config/standard/wikitravel.org.txt | 24 ++-- .../site_config/standard/will-self.com.txt | 4 +- .../site_config/standard/williampfaff.com.txt | 2 +- inc/3rdparty/site_config/standard/winfuture.de.txt | 20 +-- .../site_config/standard/winrumors.com.txt | 10 +- .../site_config/standard/winsupersite.com.txt | 4 +- inc/3rdparty/site_config/standard/wired.com.txt | 47 ++++--- inc/3rdparty/site_config/standard/wmnf.org.txt | 22 +-- .../site_config/standard/wmpoweruser.com.txt | 4 +- .../site_config/standard/worldpoultry.net.txt | 6 +- .../site_config/standard/worldwidewords.org.txt | 4 +- .../site_config/standard/wow.joystiq.com.txt | 8 +- inc/3rdparty/site_config/standard/wpmayor.com.txt | 8 ++ .../site_config/standard/wtatennis.com.txt | 7 + .../site_config/standard/www1.folha.uol.com.br.txt | 28 ++-- .../site_config/standard/www3.imperial.ac.uk.txt | 0 inc/3rdparty/site_config/standard/wyborcza.pl.txt | 20 ++- inc/3rdparty/site_config/standard/wyctim.com.txt | 2 +- .../site_config/standard/wz-newsline.de.txt | 6 +- inc/3rdparty/site_config/standard/xfgjls.com.txt | 11 ++ inc/3rdparty/site_config/standard/xoeb.us.txt | 4 +- inc/3rdparty/site_config/standard/yated.com.txt | 0 inc/3rdparty/site_config/standard/ynet.co.il.txt | 26 ++++ .../site_config/standard/yostivanich.com.txt | 6 +- inc/3rdparty/site_config/standard/yourerie.com.txt | 2 + inc/3rdparty/site_config/standard/youtube.com.txt | 28 ++-- .../site_config/standard/zcommunications.org.txt | 7 + inc/3rdparty/site_config/standard/zdnet.com.txt | 18 +-- inc/3rdparty/site_config/standard/zeit.de.txt | 89 +++++++------ .../site_config/standard/zerohedge.com.txt | 10 ++ .../site_config/standard/zerokspot.com.txt | 2 +- inc/3rdparty/site_config/standard/zhihu.com.txt | 19 +++ .../site_config/standard/zingtrain.com.txt | 2 +- 951 files changed, 7577 insertions(+), 5674 deletions(-) mode change 100644 => 100755 inc/3rdparty/site_config/standard/24ways.org.txt create mode 100755 inc/3rdparty/site_config/standard/36kr.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/37signals.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/3quarksdaily.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/3voor12.vpro.nl.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/43folders.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/500px.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/512pixels.net.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/5by5.tv.txt create mode 100755 inc/3rdparty/site_config/standard/7newsbelize.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/944.com.txt create mode 100755 inc/3rdparty/site_config/standard/README.md mode change 100644 => 100755 inc/3rdparty/site_config/standard/aachener-nachrichten.de.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/aachener-zeitung.de.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/abc.es.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/abc.net.au.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/abcnews.go.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/accesstoinsight.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/acidcow.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/acquia.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/acroswing.fr.txt create mode 100755 inc/3rdparty/site_config/standard/aftenposten.no.txt create mode 100755 inc/3rdparty/site_config/standard/aftonbladet.se.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/aht.seriouseats.com.txt create mode 100755 inc/3rdparty/site_config/standard/albayan.ae.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/alex.mullr.net.txt create mode 100755 inc/3rdparty/site_config/standard/alexduner.com.txt create mode 100755 inc/3rdparty/site_config/standard/alexduner.squarespace.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/alistapart.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/aljazeera.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/allrecipes.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/allthingsd.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/allyou.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/alphabeta.argaam.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/alriyadh.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/alseraj.net.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/alt1040.com.txt create mode 100755 inc/3rdparty/site_config/standard/alternet.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/altfoto.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/alumni.stanford.edu.txt create mode 100755 inc/3rdparty/site_config/standard/amandala.com.bz.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/amazon.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/americandrink.net.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/americascup.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/americastestkitchenfeed.com.txt create mode 100755 inc/3rdparty/site_config/standard/amptoons.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/anandtech.com.txt create mode 100755 inc/3rdparty/site_config/standard/androidpolice.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/andyrutledge.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/annatravelling.wordpress.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/applature.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/apple.com.txt create mode 100755 inc/3rdparty/site_config/standard/appledaily.com.tw.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/appleinsider.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/appleweblog.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/archdaily.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/archiveofourown.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/arstechnica.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/articles.boston.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/articles.courant.com.txt create mode 100755 inc/3rdparty/site_config/standard/articles.washingtonpost.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/asahi.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/ascarter.net.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/astronews.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/asymco.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/autoblog.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/avclub.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/baltimoresun.com.txt create mode 100755 inc/3rdparty/site_config/standard/baseballprospectus.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/basicthinking.de.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/bb.is.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/bbc.co.uk.txt create mode 100755 inc/3rdparty/site_config/standard/bbcgoodfood.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/benoitmaison.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/berlingske.dk.txt create mode 100755 inc/3rdparty/site_config/standard/bernama.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/betabeat.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/betanews.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/biography.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/bitelia.com.txt create mode 100755 inc/3rdparty/site_config/standard/bizjournals.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/bjango.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/blog.arsln.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/blog.asmartbear.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/blog.cloudflare.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/blog.fefe.de.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/blog.instagram.com.txt create mode 100755 inc/3rdparty/site_config/standard/blog.instapaper.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/blog.jaysalvat.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/blog.kaelig.fr.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/blog.naver.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/blog.pchome.net.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/blog.pinboard.in.txt create mode 100755 inc/3rdparty/site_config/standard/blog.renren.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/blog.sina.com.cn.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/blog.spu.edu.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/blog.wells.ee.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/blogs.aljazeera.net.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/blogs.forbes.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/blogs.hbr.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/blogs.msdn.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/blogs.reuters.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/blogs.scientificamerican.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/blogs.smithsonianmag.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/blogs.technet.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/bluetouff.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/boagworld.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/boingboing.net.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/boldizsar.palotas.eu.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/book.douban.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/bookforum.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/borderhouseblog.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/bostonglobe.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/bostonreview.net.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/boundlessline.org.txt create mode 100755 inc/3rdparty/site_config/standard/bowdoinorient.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/brainfacts.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/brandeins.de.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/brandingstrategyinsider.com.txt create mode 100755 inc/3rdparty/site_config/standard/brasil.elpais.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/brettterpstra.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/brisbanetimes.com.au.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/brookings.edu.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/brooksreview.net.txt create mode 100755 inc/3rdparty/site_config/standard/bt.no.txt create mode 100755 inc/3rdparty/site_config/standard/buffed.de.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/buquad.com.txt create mode 100755 inc/3rdparty/site_config/standard/business2community.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/businessinsider.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/businessnews.com.tn.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/businessweek.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/buzzfeed.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/bygonebureau.com.txt create mode 100755 inc/3rdparty/site_config/standard/cable.co.uk.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/cardboardconnection.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/carpeaqua.com.txt create mode 100755 inc/3rdparty/site_config/standard/cars.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/catb.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/cbc.ca.txt create mode 100755 inc/3rdparty/site_config/standard/cbn.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/cbsnews.com.txt create mode 100755 inc/3rdparty/site_config/standard/cedarrepublican.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/chareidi.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/chinamining.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/chomsky.info.txt create mode 100755 inc/3rdparty/site_config/standard/chrisltd.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/christianitytoday.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/christianpf.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/christies.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/chrome.google.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/chronicle.com.txt create mode 100755 inc/3rdparty/site_config/standard/ciaosamin.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/cicero.de.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/ciperchile.cl.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/cjr.org.txt create mode 100755 inc/3rdparty/site_config/standard/classyllama.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/clientk.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/clubic.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/cmswire.com.txt create mode 100755 inc/3rdparty/site_config/standard/cn.engadget.com.txt create mode 100755 inc/3rdparty/site_config/standard/cn.reuters.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/cnet.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/cnn.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/cnnsi.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/code.activestate.com.txt create mode 100755 inc/3rdparty/site_config/standard/code.fivefilters.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/code.google.com.txt create mode 100755 inc/3rdparty/site_config/standard/codeproject.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/codinghorror.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/collegehumor.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/communities-dominate.blogs.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/community.service-now.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/computer.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/computerbase.de.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/computerworld.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/computerworld.dk.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/contemporist.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/conversaciones.nokia.com.txt create mode 100755 inc/3rdparty/site_config/standard/cooper.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/core77.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/counterpunch.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/crazybutable.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/crimemagazine.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/crimethinc.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/crn.de.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/csmonitor.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/csnbayarea.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/csnphilly.com.txt create mode 100755 inc/3rdparty/site_config/standard/css-tricks.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/cucharasonica.com.txt create mode 100755 inc/3rdparty/site_config/standard/cw.com.tw.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/da.feedsportal.com.txt create mode 100755 inc/3rdparty/site_config/standard/dagogtid.no.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/dailydot.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/dailykos.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/dailymail.co.uk.txt create mode 100755 inc/3rdparty/site_config/standard/dailystar.com.lb.txt create mode 100755 inc/3rdparty/site_config/standard/danleech.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/dansdata.com.txt create mode 100755 inc/3rdparty/site_config/standard/dantri.com.vn.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/daringfireball.net.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/datanami.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/dcurt.is.txt create mode 100755 inc/3rdparty/site_config/standard/defomicron.net.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/delong.typepad.com.txt create mode 100755 inc/3rdparty/site_config/standard/democracynow.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/derstandard.at.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/designtagebuch.de.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/desitvforum.net.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/details.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/developers.facebook.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/devlinsangle.blogspot.co.at.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/dictionary.reference.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/diepresse.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/digiphoto.techbang.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/digital-photography-school.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/digitalspy.co.uk.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/dilbert.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/dinamalar.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/dn.se.txt create mode 100755 inc/3rdparty/site_config/standard/dobreprogramy.pl.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/doctac.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/domusweb.it.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/dou.ua.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/douban.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/dpreview.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/dr.dk.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/dramasonline.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/drdobbs.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/drive2.ru.txt create mode 100755 inc/3rdparty/site_config/standard/dropbox.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/drupal.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/dukebasketballreport.com.txt create mode 100755 inc/3rdparty/site_config/standard/dushumashang.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/dvice.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/eamesinerudition.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/eandt.theiet.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/eastoftheweb.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/ebay.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/ecetia.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/econlog.econlib.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/economia.estadao.com.br.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/economist.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/edge-online.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/edge.org.txt create mode 100755 inc/3rdparty/site_config/standard/edition.channel5belize.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/edition.cnn.com.txt create mode 100755 inc/3rdparty/site_config/standard/eetimes.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/ekultura.hu.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/elance.com.txt create mode 100755 inc/3rdparty/site_config/standard/elderscrollsonline.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/elektroniknet.de.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/elmalpensante.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/elpais.com.txt create mode 100755 inc/3rdparty/site_config/standard/emaratalyoum.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/en.espnf1.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/engadget.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/engineering.tumblr.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/english.aljazeera.net.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/enikos.gr.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/entertainment.timesonline.co.uk.txt create mode 100755 inc/3rdparty/site_config/standard/ericsuh.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/es.hu.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/escapistmagazine.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/espn.go.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/esquire.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/essentialpublicradio.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/etc.se.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/eternabuenosaires.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/eurogamer.net.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/evo.co.uk.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/expressen.se.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/extracine.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/f1actual.com.txt create mode 100755 inc/3rdparty/site_config/standard/facebook.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/facta.co.jp.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/falter.at.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/fanfiction.net.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/fastcompany.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/faz.net.txt create mode 100755 inc/3rdparty/site_config/standard/fertigung.de.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/fictionpress.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/ficwad.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/finance.yahoo.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/findtheswagger.tumblr.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/firstthings.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/fivechapters.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/fivefilters.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/fivethirtyeight.com.txt create mode 100755 inc/3rdparty/site_config/standard/flyingmachinestudios.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/fm4.orf.at.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/fnal.gov.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/focus.de.txt create mode 100755 inc/3rdparty/site_config/standard/folklore.org.txt create mode 100755 inc/3rdparty/site_config/standard/food.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/fool.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/forbes.com.txt create mode 100755 inc/3rdparty/site_config/standard/foreignaffairs.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/foreignpolicy.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/forsvaret.no.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/foxnews.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/freelancer.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/freytag-film.com.txt create mode 100755 inc/3rdparty/site_config/standard/fria.nu.txt create mode 100755 inc/3rdparty/site_config/standard/friatidningen.se.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/friendskorner.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/ft.com.txt create mode 100755 inc/3rdparty/site_config/standard/ftchinese.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/ftd.de.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/fubiz.net.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/futurezone.at.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/gamasutra.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/gameblog.fr.txt create mode 100755 inc/3rdparty/site_config/standard/gamechurch.com.txt create mode 100755 inc/3rdparty/site_config/standard/gamer.no.txt create mode 100755 inc/3rdparty/site_config/standard/gamereactor.no.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/garythink.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/gasteroprod.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/gatopardo.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/gawker.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/geeksofdoom.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/geenstijl.nl.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/getnews.jp.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/giantbomb.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/giga.de.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/gigaom.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/gihyo.jp.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/gist.github.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/givemesomethingtoread.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/gizmodo.co.uk.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/gizmodo.com.txt create mode 100755 inc/3rdparty/site_config/standard/gizmodo.uol.com.br.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/gizmologia.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/gizmovil.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/global.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/globalissues.org.txt create mode 100755 inc/3rdparty/site_config/standard/globoesporte.globo.com.txt create mode 100755 inc/3rdparty/site_config/standard/gloswielkopolski.pl.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/goal.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/golem.de.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/good.is.txt create mode 100755 inc/3rdparty/site_config/standard/goodfil.ms.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/gossip-tv.gr.txt create mode 100755 inc/3rdparty/site_config/standard/goteborgsfria.se.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/gothamist.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/gotomanager.com.txt create mode 100755 inc/3rdparty/site_config/standard/gov.ky.txt create mode 100755 inc/3rdparty/site_config/standard/gp.se.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/gq.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/grantland.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/greatergreaterwashington.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/groups.drupal.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/gulfnews.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/guokr.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/haberler.com.txt create mode 100755 inc/3rdparty/site_config/standard/hackmake.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/halo.bungie.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/hammers.theoffside.com.txt create mode 100755 inc/3rdparty/site_config/standard/handelsblatt.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/hanselman.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/hardware.fr.txt create mode 100755 inc/3rdparty/site_config/standard/hardware.no.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/hbr.org.txt create mode 100755 inc/3rdparty/site_config/standard/headrush.typepad.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/heise-online.mobi.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/heise.de.txt create mode 100755 inc/3rdparty/site_config/standard/hemmings.com.txt create mode 100755 inc/3rdparty/site_config/standard/heroturko.me.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/hespress.com.txt create mode 100755 inc/3rdparty/site_config/standard/hiamag.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/highscalability.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/hiperpop.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/hiphopleeft.nl.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/historytoday.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/hmercer.com.txt create mode 100755 inc/3rdparty/site_config/standard/hollywoodlife.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/hometheaterreview.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/hosted.ap.org.txt create mode 100755 inc/3rdparty/site_config/standard/howtogeek.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/hs.fi.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/ht.ly.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/huffingtonpost.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/humantransit.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/hurriyet.com.tr.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/hvg.hu.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/hypebeast.com.txt create mode 100755 inc/3rdparty/site_config/standard/icannabis.tumblr.com.txt create mode 100755 inc/3rdparty/site_config/standard/idealog.co.nz.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/idlewords.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/igeneration.fr.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/ignoredbydinosaurs.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/ilounge.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/ilyabirman.ru.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/inc.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/independent.co.uk.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/indiatimes.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/inessential.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/info.abril.com.br.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/infoq.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/informador.com.mx.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/information.dk.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/informationarchitects.net.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/informationclearinghouse.info.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/informit.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/infoworld.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/infzm.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/inhabitat.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/instagr.am.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/interest.co.nz.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/iolanguage.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/ipadclub.nl.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/ipadplanet.nl.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/iphoneclub.nl.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/iphonehacks.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/iplaysoft.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/isource.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/itavisen.no.txt create mode 100755 inc/3rdparty/site_config/standard/itmedia.co.jp.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/itstactical.com.txt create mode 100755 inc/3rdparty/site_config/standard/itwire.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/itworld.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/izismile.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/jalopnik.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/jandan.net.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/jetzt.sueddeutsche.de.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/jjahnke.net.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/jobbank.gc.ca.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/joelonsoftware.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/jouire.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/joystiq.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/juedische-allgemeine.de.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/juppy.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/kachestvo.ru.txt create mode 100755 inc/3rdparty/site_config/standard/kachiblog.com.txt create mode 100755 inc/3rdparty/site_config/standard/kathimerini.gr.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/kenrockwell.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/kicker.de.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/kickstarter.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/kingarthurflour.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/kotaku.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/kottke.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/kumailplus.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/kumb.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/kwerfeldein.de.txt create mode 100755 inc/3rdparty/site_config/standard/landetsfria.se.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/laphamsquarterly.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/laprensagrafica.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/laquadrature.net.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/lareviewofbooks.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/latimes.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/laughingsquid.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/leancrew.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/lefigaro.fr.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/lemonde.fr.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/lesnumeriques.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/letemps.ch.txt create mode 100755 inc/3rdparty/site_config/standard/libcom.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/lifeandculture.fr.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/lifehacker.com.txt create mode 100755 inc/3rdparty/site_config/standard/lifestyle.inquirer.net.txt create mode 100755 inc/3rdparty/site_config/standard/lifeweek.com.cn.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/linkedin.com.txt create mode 100755 inc/3rdparty/site_config/standard/livescience.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/longform.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/loopinsight.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/lostgarden.com.txt create mode 100755 inc/3rdparty/site_config/standard/lovefm.com.txt create mode 100755 inc/3rdparty/site_config/standard/lovetv.com.bz.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/lrb.co.uk.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/luminous-landscape.com.txt create mode 100755 inc/3rdparty/site_config/standard/luxuo.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/m.bbc.co.uk.txt create mode 100755 inc/3rdparty/site_config/standard/m.douban.com.txt create mode 100755 inc/3rdparty/site_config/standard/m.vanityfair.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/mac4ever.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/macdrifter.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/macformat.techradar.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/macgeneration.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/macmagazine.com.br.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/macrumors.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/macstories.net.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/mactalk.com.au.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/mactechnews.de.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/macworld.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/mainichi.jp.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/mainpost.de.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/makeuseof.com.txt create mode 100755 inc/3rdparty/site_config/standard/manager.co.th.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/marco.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/marksdailyapple.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/martinfowler.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/mashable.com.txt create mode 100755 inc/3rdparty/site_config/standard/matt.might.net.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/mattcutts.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/mbl.is.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/medialens.org.txt create mode 100755 inc/3rdparty/site_config/standard/medium.com.txt create mode 100755 inc/3rdparty/site_config/standard/megamp3.eu.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/menshealth.com.txt create mode 100755 inc/3rdparty/site_config/standard/metafilter.com.txt create mode 100755 inc/3rdparty/site_config/standard/mforum.cari.com.my.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/mikeash.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/mikeindustries.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/minnesota.publicradio.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/minnpost.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/mirrorfootball.co.uk.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/mises.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/mlb.mlb.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/mlb.sbnation.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/mlssoccer.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/mmo-champion.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/mnn.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/mno.hu.txt create mode 100755 inc/3rdparty/site_config/standard/mobile.nytimes.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/mobile.slate.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/mobileopportunity.blogspot.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/modernghana.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/money.cnn.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/monkeyzen.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/moonsault.de.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/moreintelligentlife.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/motherboard.vice.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/mothering.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/motherjones.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/motorfull.com.txt create mode 100755 inc/3rdparty/site_config/standard/movie.douban.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/msdn.microsoft.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/msnbc.msn.com.txt create mode 100755 inc/3rdparty/site_config/standard/myfoxatlanta.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/myfoxboston.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/myrecipes.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/narenji.ir.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/nasa.gov.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/nbweekly.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/neh.gov.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/neomoney.co.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/net-security.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/netmagazine.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/netzpolitik.org.txt create mode 100755 inc/3rdparty/site_config/standard/newleftproject.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/newmatilda.com.txt create mode 100755 inc/3rdparty/site_config/standard/newrepublic.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/news-gazette.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/news.cnet.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/news.detik.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/news.kanaloco.jp.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/news.mynavi.jp.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/news.orf.at.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/news.rambler.ru.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/news.techmeme.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/news.yahoo.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/news.ycombinator.com.txt create mode 100755 inc/3rdparty/site_config/standard/news.zing.vn.txt create mode 100755 inc/3rdparty/site_config/standard/news247.gr.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/newsbomb.gr.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/newsle.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/newsmill.se.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/newsunspun.org.txt create mode 100755 inc/3rdparty/site_config/standard/newsweek.com.txt create mode 100755 inc/3rdparty/site_config/standard/newswise.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/newyorker.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/next-gen.biz.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/nfl.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/ngm.nationalgeographic.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/nhk.or.jp.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/nintendoworldreport.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/nojesguiden.se.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/northumberlandview.ca.txt create mode 100755 inc/3rdparty/site_config/standard/nosalty.hu.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/nplusonemag.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/npr.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/nybooks.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/nymag.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/nyteknik.se.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/nytimes.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/nzz.ch.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/observer.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/off.net.mk.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/omaha.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/omiliya.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/on.net.mk.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/online.wsj.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/onlinewelten.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/onstartups.com.txt create mode 100755 inc/3rdparty/site_config/standard/ontologicalgeek.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/opensource.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/openthemagazine.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/openwebx.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/orf.at.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/origo.hu.txt create mode 100755 inc/3rdparty/site_config/standard/oschina.net.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/pakistantvdekho.com.txt create mode 100755 inc/3rdparty/site_config/standard/pakmedia.tv.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/pandagon.net.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/pandodaily.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/panic.com.txt create mode 100755 inc/3rdparty/site_config/standard/papodehomem.com.br.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/parislemon.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/parliament.uk.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/pastebin.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/pastepad.fivefilters.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/pathawks.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/pcast.me.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/pcmag.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/pcworld.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/penny-arcade.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/pentaxforums.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/philadelphiaeagles.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/philly.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/photo.tutsplus.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/php.net.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/physicstoday.org.txt create mode 100755 inc/3rdparty/site_config/standard/pinterest.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/pitchfork.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/pittnews.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/pittsburgh.pirates.mlb.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/pittsburghlive.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/pittsburghmagazine.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/pittsburghpanthers.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/pittscriptblog.com.txt create mode 100755 inc/3rdparty/site_config/standard/planetvita.de.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/playboy.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/plus.google.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/plzkthxbai.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/pogue.blogs.nytimes.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/politifact.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/politiken.dk.txt create mode 100755 inc/3rdparty/site_config/standard/polygon.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/popularmechanics.com.txt create mode 100755 inc/3rdparty/site_config/standard/portertech.ca.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/positioningmag.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/post-gazette.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/posta.com.tr.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/prb.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/prog21.dadgum.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/prolost.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/propublica.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/prosa.dk.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/prospectmagazine.co.uk.txt create mode 100755 inc/3rdparty/site_config/standard/protothema.gr.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/psychologytoday.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/publications.parliament.uk.txt create mode 100755 inc/3rdparty/site_config/standard/publico.pt.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/purpleplanetmedia.com.txt create mode 100755 inc/3rdparty/site_config/standard/qctimes.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/quantumdiaries.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/queerty.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/quepasa.cl.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/quora.com.txt create mode 100755 inc/3rdparty/site_config/standard/racjonalista.pl.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/radar.oreilly.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/radionz.co.nz.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/randsinrepose.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/readability.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/readwriteweb.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/real.gr.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/recipe.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/red-hot-girls.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/reddit.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/redmondpie.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/redtape.msnbc.msn.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/reflets.info.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/renenekuda.cz.txt create mode 100755 inc/3rdparty/site_config/standard/resume.se.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/retrieverweekly.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/reuters.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/revistapiaui.estadao.com.br.txt create mode 100644 inc/3rdparty/site_config/standard/rezeptwelt.de.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/richardmuscat.wordpress.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/ritemail.blogspot.com.txt create mode 100755 inc/3rdparty/site_config/standard/ritholtz.com.txt create mode 100755 inc/3rdparty/site_config/standard/robertsspaceindustries.com.txt create mode 100755 inc/3rdparty/site_config/standard/robots.thoughtbot.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/rockpapershotgun.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/rodrigo.sharpcube.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/rogerebert.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/rolfinjapan.nl.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/rollingstone.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/rottentomatoes.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/roughtype.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/roy.gbiv.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/rpgsite.net.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/rubysfera.pl.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/ruhlman.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/ruttloff.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/salon.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/salzburg.com.txt create mode 100755 inc/3rdparty/site_config/standard/sanpedrosun.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/saveyourself.ca.txt create mode 100755 inc/3rdparty/site_config/standard/sayidaty.net.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/sbnation.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/schneier.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/science.orf.at.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/scienceblogs.de.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/scienceticker.info.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/scientificamerican.com.txt create mode 100755 inc/3rdparty/site_config/standard/scilogs.de.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/scotusblog.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/scraplab.net.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/scripting.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/sct.temple.edu.txt create mode 100755 inc/3rdparty/site_config/standard/searchenginejournal.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/searchengineland.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/seattletransitblog.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/sebbo.net.txt create mode 100755 inc/3rdparty/site_config/standard/select.yeeyan.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/seriouseats.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/sf.curbed.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/sf.eater.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/sfgate.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/sfweekly.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/shabayek.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/shawnblanc.net.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/shifteleven.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/siasat.pk.txt create mode 100755 inc/3rdparty/site_config/standard/signalscv.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/simonwillison.net.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/singaporeanstocksinvestor.blogspot.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/singularityhub.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/sintagoulis.gr.txt create mode 100755 inc/3rdparty/site_config/standard/sivers.org.txt create mode 100755 inc/3rdparty/site_config/standard/skanesfria.se.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/slashfilm.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/slate.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/slice.seriouseats.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/slog.thestranger.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/smartinvestor.de.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/sme.sk.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/smithsonianmag.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/smokingapples.com.txt create mode 100755 inc/3rdparty/site_config/standard/somethingawful.com.txt create mode 100755 inc/3rdparty/site_config/standard/songshuhui.net.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/sourcebooks.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/spectator.co.uk.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/spectrum.ieee.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/speirs.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/spiegel.de.txt create mode 100755 inc/3rdparty/site_config/standard/spiked-online.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/spin.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/splatf.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/splitsider.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/sport.detik.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/sport.orf.at.txt create mode 100755 inc/3rdparty/site_config/standard/sport365.fr.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/sports.espn.go.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/sports.yahoo.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/sportschau.de.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/sportsillustrated.cnn.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/sprengsatz.de.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/sqlite.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/squashed.tumblr.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/stackoverflow.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/stalbansreview.co.uk.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/standard.co.uk.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/staradvertiser.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/stephenfry.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/stlbeacon.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/stockholm.etc.se.txt create mode 100755 inc/3rdparty/site_config/standard/stockholmsfria.nu.txt create mode 100755 inc/3rdparty/site_config/standard/straightdope.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/streetsblog.net.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/stuff.co.nz.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/stumbleupon.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/subtraction.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/sueddeutsche.de.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/summify.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/suntimes.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/svd.se.txt create mode 100755 inc/3rdparty/site_config/standard/svt.se.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/sydsvenskan.se.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/symmetrymagazine.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/sz-magazin.sueddeutsche.de.txt create mode 100755 inc/3rdparty/site_config/standard/sz.de.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/tagesschau.de.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/tampabay.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/taptaptap.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/tasteofhome.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/taz.de.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/tbray.org.txt create mode 100755 inc/3rdparty/site_config/standard/tcmanila.tk.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/tcng.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/tech.fortune.cnn.com.txt create mode 100755 inc/3rdparty/site_config/standard/tech.gilt.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/tech.sina.com.cn.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/techcrunch.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/techdirt.com.txt create mode 100755 inc/3rdparty/site_config/standard/techhive.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/techmeme.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/technicallyjordan.tumblr.com.txt create mode 100755 inc/3rdparty/site_config/standard/technologizer.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/technologyreview.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/techpinions.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/techradar.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/telegraaf.nl.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/telegraph.co.uk.txt create mode 100755 inc/3rdparty/site_config/standard/thanhnien.com.vn.txt create mode 100755 inc/3rdparty/site_config/standard/the-magazine.org.txt create mode 100755 inc/3rdparty/site_config/standard/theage.com.au.txt create mode 100755 inc/3rdparty/site_config/standard/theamericanscholar.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/theappleblog.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/theatlantic.com.txt create mode 100755 inc/3rdparty/site_config/standard/theatlanticcities.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/thebostonchannel.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/thebrowser.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/thecarton.net.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/thedaily.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/thedailybeast.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/thedailymash.co.uk.txt create mode 100755 inc/3rdparty/site_config/standard/thedisneyblog.com.txt create mode 100755 inc/3rdparty/site_config/standard/theeuropean-magazine.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/thefilmexperience.net.txt create mode 100755 inc/3rdparty/site_config/standard/thegamedesignforum.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/theglobalmail.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/theglobeandmail.com.txt create mode 100755 inc/3rdparty/site_config/standard/thegreatdiscontent.com.txt create mode 100755 inc/3rdparty/site_config/standard/theguardian.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/theindychannel.com.txt create mode 100755 inc/3rdparty/site_config/standard/themarker.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/themillions.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/themuseumofinnocence.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/thenation.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/thenetworkgarden.blogs.com.txt create mode 100755 inc/3rdparty/site_config/standard/thenextgeneration.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/thenextweb.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/theoaklandpress.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/theonion.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/thepioneerwoman.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/theregister.co.uk.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/theroot.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/therumpus.net.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/thesiasat.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/thesimpledollar.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/thespoiler.co.uk.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/thespoof.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/thestranger.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/thestreet.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/thethaovanhoa.vn.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/theverge.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/theweek.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/thinkprogress.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/thisdaylive.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/thisismynext.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/tidbits.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/time.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/timeshighereducation.co.uk.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/tipb.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/tnr.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/tomdispatch.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/tomshardware.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/tomshardware.de.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/toolsandtoys.net.txt create mode 100755 inc/3rdparty/site_config/standard/tracks.ranea.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/trailer.web-view.net.txt create mode 100755 inc/3rdparty/site_config/standard/trailerzone.de.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/traningslara.se.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/triblive.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/truthdig.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/tthfanfic.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/tthor.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/tuaw.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/tuckreview.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/tvtropes.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/twitter.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/uefa.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/uk.xbox360.ign.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/uni-watch.com.txt create mode 100755 inc/3rdparty/site_config/standard/unwinnable.com.txt create mode 100755 inc/3rdparty/site_config/standard/uppsalafria.se.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/urbandictionary.com.txt create mode 100755 inc/3rdparty/site_config/standard/usatoday.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/usccb.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/useit.com.txt create mode 100755 inc/3rdparty/site_config/standard/usfirst.org.txt create mode 100755 inc/3rdparty/site_config/standard/utdailybeacon.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/ux.artu.tv.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/uzivatelsketestovani.cz.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/vanityfair.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/varingen.no.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/varsity.co.uk.txt create mode 100755 inc/3rdparty/site_config/standard/vea.gov.vn.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/vedomosti.ru.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/veggbilder.no.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/vemedio.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/venturebeat.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/version2.dk.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/verybestbaking.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/vg.no.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/video.forbes.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/videogum.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/villagevoice.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/vimeo.com.txt create mode 100755 inc/3rdparty/site_config/standard/viply.de.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/visir.is.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/vitispr.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/vivirmexico.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/vnexpress.net.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/voices.washingtonpost.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/vworker.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/waffle.wootest.net.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/walrusmagazine.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/warnerbros.fr.txt create mode 100755 inc/3rdparty/site_config/standard/washingtoninstitute.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/washingtonmonthly.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/washingtonpost.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/web-libre.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/weblog.bignerdranch.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/weblogs.asp.net.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/webpaper.nzz.ch.txt create mode 100755 inc/3rdparty/site_config/standard/webwereld.nl.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/welt.de.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/westhamtillidie.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/what-if.xkcd.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/whatever.scalzi.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/wheelyric.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/wiki.guildwars.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/wiki.guildwars2.com.txt create mode 100755 inc/3rdparty/site_config/standard/wikihow.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/wikitravel.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/will-self.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/williampfaff.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/winfuture.de.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/winrumors.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/winsupersite.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/wired.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/wmnf.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/wmpoweruser.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/worldpoultry.net.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/worldwidewords.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/wow.joystiq.com.txt create mode 100755 inc/3rdparty/site_config/standard/wpmayor.com.txt create mode 100755 inc/3rdparty/site_config/standard/wtatennis.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/www1.folha.uol.com.br.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/www3.imperial.ac.uk.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/wyborcza.pl.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/wyctim.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/wz-newsline.de.txt create mode 100755 inc/3rdparty/site_config/standard/xfgjls.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/xoeb.us.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/yated.com.txt create mode 100755 inc/3rdparty/site_config/standard/ynet.co.il.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/yostivanich.com.txt create mode 100755 inc/3rdparty/site_config/standard/yourerie.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/youtube.com.txt create mode 100755 inc/3rdparty/site_config/standard/zcommunications.org.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/zdnet.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/zeit.de.txt create mode 100755 inc/3rdparty/site_config/standard/zerohedge.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/zerokspot.com.txt create mode 100755 inc/3rdparty/site_config/standard/zhihu.com.txt mode change 100644 => 100755 inc/3rdparty/site_config/standard/zingtrain.com.txt (limited to 'inc/3rdparty/site_config') diff --git a/inc/3rdparty/site_config/standard/24ways.org.txt b/inc/3rdparty/site_config/standard/24ways.org.txt old mode 100644 new mode 100755 index 03bd1950..86c9e077 --- a/inc/3rdparty/site_config/standard/24ways.org.txt +++ b/inc/3rdparty/site_config/standard/24ways.org.txt @@ -1,6 +1,6 @@ -title: //div[@class='meta']/h2/a -author: //div[@class='meta']/h2/following-sibling::p/a/text() -date://div[@class='meta']/h2/strong -body: //div[@id='article'] +title: //div[@class='meta']/h2/a +author: //div[@class='meta']/h2/following-sibling::p/a/text() +date://div[@class='meta']/h2/strong +body: //div[@id='article'] strip: //div[@class='domore'] test_url: http://24ways.org/2011/composing-the-new-canon \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/36kr.com.txt b/inc/3rdparty/site_config/standard/36kr.com.txt new file mode 100755 index 00000000..d73d7de5 --- /dev/null +++ b/inc/3rdparty/site_config/standard/36kr.com.txt @@ -0,0 +1,8 @@ +title: //h1[contains(@class, 'entry-title')] +date: //meta[@name='weibo: article:create_at']/@content +body: //div[contains(@class, 'mainContent')] +strip_id_or_class: related_topics + +prune: no + +test_url: http://www.36kr.com/p/207879.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/37signals.com.txt b/inc/3rdparty/site_config/standard/37signals.com.txt old mode 100644 new mode 100755 index 43a10ae5..531cac1e --- a/inc/3rdparty/site_config/standard/37signals.com.txt +++ b/inc/3rdparty/site_config/standard/37signals.com.txt @@ -1,6 +1,6 @@ -title: //div[@class='post_header']//h2/a -author: //span[@class='author'] -date: //span[@class='date'] -body: //div[@id='Content'] +title: //div[@class='post_header']//h2/a +author: //span[@class='author'] +date: //span[@class='date'] +body: //div[@id='Content'] test_url: http://37signals.com/svn/posts/2785-the-end-of-the-it-department \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/3quarksdaily.com.txt b/inc/3rdparty/site_config/standard/3quarksdaily.com.txt old mode 100644 new mode 100755 index c4e7940f..80a3958f --- a/inc/3rdparty/site_config/standard/3quarksdaily.com.txt +++ b/inc/3rdparty/site_config/standard/3quarksdaily.com.txt @@ -1,9 +1,9 @@ -body: //div[@class='content'] -date: //div[@class='content']/h2 -strip: //div[@class='content']/h2 -title: //div[@class='content']/h3 - -strip: //div[@id='postmenu'] -strip: //div[@class='trackback'] -tidy: no +body: //div[@class='content'] +date: //div[@class='content']/h2 +strip: //div[@class='content']/h2 +title: //div[@class='content']/h3 + +strip: //div[@id='postmenu'] +strip: //div[@class='trackback'] +tidy: no test_url: http://www.3quarksdaily.com/3quarksdaily/2012/01/martin-luther-king-i-have-a-dream.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/3voor12.vpro.nl.txt b/inc/3rdparty/site_config/standard/3voor12.vpro.nl.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/43folders.com.txt b/inc/3rdparty/site_config/standard/43folders.com.txt old mode 100644 new mode 100755 index e8073f6f..3777c66f --- a/inc/3rdparty/site_config/standard/43folders.com.txt +++ b/inc/3rdparty/site_config/standard/43folders.com.txt @@ -1,4 +1,4 @@ -body: //*[@class = 'content'] -author: //*[@class = 'submitted']/a +body: //*[@class = 'content'] +author: //*[@class = 'submitted']/a date: substring-after(//*[@class = 'submitted']/text(), '|') test_url: http://www.43folders.com/2011/04/22/cranking \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/500px.com.txt b/inc/3rdparty/site_config/standard/500px.com.txt old mode 100644 new mode 100755 index 68e6b2d0..b9b7e9dd --- a/inc/3rdparty/site_config/standard/500px.com.txt +++ b/inc/3rdparty/site_config/standard/500px.com.txt @@ -1,27 +1,27 @@ -# very loose setup for both 500px.com/photo/* and 500px.com/blog/* -# photo page example: http://500px.com/photo/4181666 -# blog page example: http://500px.com/blog/110 - -# avoid "no text" error -tidy:no -prune:no - -# reorganize photo page elements -#body://div[contains(@class,'container')] -move_into(body)://div[contains(@id,'thephoto')] -move_into(body)://div[contains(@id,'description')] -move_into(body)://div[contains(@id,'tags')] -move_into(body)://div[contains(@id,'photo-info')] - -# clean photo page info -strip://span[contains(@id,'copyright')] -strip://*[contains(@id,'store')] -strip://*[contains(@id,'user-info')] -strip://*[contains(@id,'photo-stats')] -strip://*[contains(@id,'voting_controls_container')] -strip://*[contains(@id,'more-photos')] -strip://*[contains(@id,'embed-photo')] - -# clean blog page side bar +# very loose setup for both 500px.com/photo/* and 500px.com/blog/* +# photo page example: http://500px.com/photo/4181666 +# blog page example: http://500px.com/blog/110 + +# avoid "no text" error +tidy:no +prune:no + +# reorganize photo page elements +#body://div[contains(@class,'container')] +move_into(body)://div[contains(@id,'thephoto')] +move_into(body)://div[contains(@id,'description')] +move_into(body)://div[contains(@id,'tags')] +move_into(body)://div[contains(@id,'photo-info')] + +# clean photo page info +strip://span[contains(@id,'copyright')] +strip://*[contains(@id,'store')] +strip://*[contains(@id,'user-info')] +strip://*[contains(@id,'photo-stats')] +strip://*[contains(@id,'voting_controls_container')] +strip://*[contains(@id,'more-photos')] +strip://*[contains(@id,'embed-photo')] + +# clean blog page side bar strip://*[contains(@class,'col d3 clearafter')] test_url: http://500px.com/photo/3641041?from=editors \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/512pixels.net.txt b/inc/3rdparty/site_config/standard/512pixels.net.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/5by5.tv.txt b/inc/3rdparty/site_config/standard/5by5.tv.txt old mode 100644 new mode 100755 index dce0df4e..59b70a99 --- a/inc/3rdparty/site_config/standard/5by5.tv.txt +++ b/inc/3rdparty/site_config/standard/5by5.tv.txt @@ -1,9 +1,9 @@ -body: //*[@id="episode"] -prune: no -tidy: no - -autodetect_next_page: no -strip_id_or_class: player - +body: //*[@id="episode"] +prune: no +tidy: no + +autodetect_next_page: no +strip_id_or_class: player + strip://*[@id="header"] test_url: http://5by5.tv/buildanalyze/60 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/7newsbelize.com.txt b/inc/3rdparty/site_config/standard/7newsbelize.com.txt new file mode 100755 index 00000000..46d09f8e --- /dev/null +++ b/inc/3rdparty/site_config/standard/7newsbelize.com.txt @@ -0,0 +1,7 @@ +title: //*[@id='sstitle'] +body: //div[@id='sstory'] +strip_id_or_class: newsoptions +prune: no + +test_url: http://www.7newsbelize.com/sstory.php?nid=25654 +test_url: http://www.7newsbelize.com/7news.xml \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/944.com.txt b/inc/3rdparty/site_config/standard/944.com.txt old mode 100644 new mode 100755 index 84380e79..8bf6a4c2 --- a/inc/3rdparty/site_config/standard/944.com.txt +++ b/inc/3rdparty/site_config/standard/944.com.txt @@ -1,9 +1,9 @@ -title: //h2[@class='border'] -body: //div[@class='padding'] - -convert_double_br_tags: yes - -strip: //div[@id='social_sharing'] -strip: //div[@class='socialLinks'] +title: //h2[@class='border'] +body: //div[@class='padding'] + +convert_double_br_tags: yes + +strip: //div[@id='social_sharing'] +strip: //div[@class='socialLinks'] test_url: http://www.944.com/articles/mild-obsessions-frock-la-get-to-know-victoria-tik-s-haute-sustainable-fashion-line/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/README.md b/inc/3rdparty/site_config/standard/README.md new file mode 100755 index 00000000..9040ba85 --- /dev/null +++ b/inc/3rdparty/site_config/standard/README.md @@ -0,0 +1,38 @@ +Full-Text RSS site config files +================ + +[Full-Text RSS](http://fivefilters.org/content-only/), our article extraction tool, makes use of site-specific extraction rules to improve results. Each time a URL is processed, it checks to see if there are extraction rules for the site being processed. If there are no site patterns, it tries to detect the content block automatically. + +This repository contains the site config files we use in Full-Text RSS. + +### Contributing changes + +We chose GitHub for this set of files because they offer one feature which we hope will make contributing changes easier: [file editing](https://github.com/blog/844-forking-with-the-edit-button) through the web interface. + +You can now make changes to any of our site config files and request that your changes be pulled into the main set we maintain. This is what GitHub calls the Fork and Pull model: + +> The Fork & Pull Model lets anyone fork an existing repository and push changes to their personal fork without requiring access be granted to the source repository. The changes must then be pulled into the source repository by the project maintainer. This model reduces the amount of friction for new contributors and is popular with open source projects because it allows people to work independently without upfront coordination. + +When we receive a pull request we'll review the changes and if everything's okay we'll update our copy. + +If a site is not in our set, you can create a file for it in the same way. See [Creating files on GitHub](https://github.com/blog/1327-creating-files-on-github). + +### How to write a site config file + +The quickest and simplest way is to use our [point-and-click interface](http://siteconfig.fivefilters.org). It's a simple tool only intended to create a rule to extract the correct content block. + +For further refinements, e.g. selecting the title, stripping elements, dealing with multi-page articles, please see our [help page](http://help.fivefilters.org/customer/portal/articles/223153-site-patterns). + +### Instapaper + +When we introduced site patterns, we chose to adopt the [same format](http://blog.instapaper.com/post/730281947) used by Instapaper. This allows us to make use of the existing extraction rules contributed by Instapaper users. + +Marco, Instapaper's creator, graciously opened up the database of contributions to everyone: + +> And, recognizing that your efforts could be useful to a wide range of other tools and services, I'll make the list of all of these site-specific configurations available to the public, free, with no strings attached. + +Most of the extraction rules in our set are borrowed from Instapaper. You can see the list maintained by Instapaper at [instapaper.com/bodytext/](http://instapaper.com/bodytext/) (login required). + +### Testing site config files + +Currently you will have to have a copy of Full-Text RSS to test changes to the site config files. In the future we will try to make this process easier. diff --git a/inc/3rdparty/site_config/standard/aachener-nachrichten.de.txt b/inc/3rdparty/site_config/standard/aachener-nachrichten.de.txt old mode 100644 new mode 100755 index 379592e0..b60c15de --- a/inc/3rdparty/site_config/standard/aachener-nachrichten.de.txt +++ b/inc/3rdparty/site_config/standard/aachener-nachrichten.de.txt @@ -1,10 +1,10 @@ -title: //meta[@property='og:title']/@content -body: //*[@class='fliesstext_detail' or @class='detail_fliesstext'] | //img[@itemprop="image" and starts-with(@src, "/sixcms/media.php/")] - -strip_id_or_class: socialshareprivacy1 -strip_id_or_class: zvaFacebookButton - -tidy: no -prune: no - +title: //meta[@property='og:title']/@content +body: //*[@class='fliesstext_detail' or @class='detail_fliesstext'] | //img[@itemprop="image" and starts-with(@src, "/sixcms/media.php/")] + +strip_id_or_class: socialshareprivacy1 +strip_id_or_class: zvaFacebookButton + +tidy: no +prune: no + test_url: http://www.aachener-nachrichten.de/lokales/aachen-detail-an/2517757 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/aachener-zeitung.de.txt b/inc/3rdparty/site_config/standard/aachener-zeitung.de.txt old mode 100644 new mode 100755 index 4d76fac7..013afa4c --- a/inc/3rdparty/site_config/standard/aachener-zeitung.de.txt +++ b/inc/3rdparty/site_config/standard/aachener-zeitung.de.txt @@ -1,10 +1,10 @@ -title: //meta[@property='og:title']/@content -body: //*[@class='fliesstext_detail' or @class='detail_fliesstext'] | //img[@itemprop="image" and starts-with(@src, "/sixcms/media.php/")] - -strip_id_or_class: socialshareprivacy1 -strip_id_or_class: zvaFacebookButton - -tidy: no -prune: no - +title: //meta[@property='og:title']/@content +body: //*[@class='fliesstext_detail' or @class='detail_fliesstext'] | //img[@itemprop="image" and starts-with(@src, "/sixcms/media.php/")] + +strip_id_or_class: socialshareprivacy1 +strip_id_or_class: zvaFacebookButton + +tidy: no +prune: no + test_url: http://www.aachener-zeitung.de/sixcms/detail.php?template=az_detail&id=2552718 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/abc.es.txt b/inc/3rdparty/site_config/standard/abc.es.txt old mode 100644 new mode 100755 index a99833de..43aadc49 --- a/inc/3rdparty/site_config/standard/abc.es.txt +++ b/inc/3rdparty/site_config/standard/abc.es.txt @@ -1,7 +1,7 @@ -title: //meta[@property='og:title']/@content -body: //div[@class='datosi' or @class='date' or @class='photo-alt1' or @class='text'] -strip_id_or_class: colB - -prune: no +title: //meta[@property='og:title']/@content +body: //div[@class='datosi' or @class='date' or @class='photo-alt1' or @class='text' or @itemprop='articleBody'] +strip_id_or_class: colB + +prune: no test_url: http://www.abc.es/20120209/tv-series/abci-house-ultima-temporada-201202090936.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/abc.net.au.txt b/inc/3rdparty/site_config/standard/abc.net.au.txt old mode 100644 new mode 100755 index 5e6269cb..22b3a0f4 --- a/inc/3rdparty/site_config/standard/abc.net.au.txt +++ b/inc/3rdparty/site_config/standard/abc.net.au.txt @@ -1,10 +1,18 @@ -title: //h1 -author: //div[@class="byline"]/a -date: //span[@class="timestamp"] - -strip: //p[@class="topics"] -strip: //h1 -strip: //div[@class="byline"] -strip: //p[@class="published"] +title: //div[@class='article section']//h1 +author: //div[@class="byline"]/a +date: //span[@class="timestamp"] +body: //div[@class="page section"] + +strip: //a[@class="inline-caption"] +strip: //p[@class="ticker section noprint"] +strip: //p[@class="topics"] +strip: //h1 +strip: //div[@class="byline"] +strip: //p[@class="published"] strip: //div[contains(@class,"featured-scroller")] -test_url: http://www.abc.net.au/news/2011-11-08/crabb-carbon-legislation-abbott-demolition/3652544 \ No newline at end of file +strip_id_or_class: footer + +tidy: no + +test_url: http://www.abc.net.au/news/2013-03-27/open-speed-highways-change-clp-giles/4597892 +test_url: http://www.abc.net.au/news/2013-04-30/credit-growth-remains-subdued/4660054?section=business diff --git a/inc/3rdparty/site_config/standard/abcnews.go.com.txt b/inc/3rdparty/site_config/standard/abcnews.go.com.txt old mode 100644 new mode 100755 index c515d3e4..8d367351 --- a/inc/3rdparty/site_config/standard/abcnews.go.com.txt +++ b/inc/3rdparty/site_config/standard/abcnews.go.com.txt @@ -1,27 +1,27 @@ -title: //h1[@class='headline'] -body: //div[@id='storyText'] -# for video entries -body: //img[@id='ff-img'] | //div[@id='meta']//div[contains(@class, 'overview')] -author: //div[@class='byline'] -date: //div[@class='date'] -strip: //*[@id='date_partner'] - -strip: //div[@class='breadcrumb'] -strip: //div[contains(@class,'show_tools')] -strip: //div[@id='sponsoredByAd'] -strip: //div[contains(@class,'rel_container')] -strip: //p[a[starts-with(@href, 'http://www.twitter.com')]] -strip: //p[a[starts-with(@href, 'http://www.facebook.com')]] -strip: //p[contains(., 'Click here to return to')] -#strip_id_or_class: media -strip_id_or_class: mediaplayer - -replace_string(<link rel="image_src" href="http): <img id="ff-img" src="http - -prune: no - -single_page_link: concat(//li[@class='pager']//a/@href, '&singlePage=true') - -test_url: http://abcnews.go.com/Politics/newt-gingrich-rocky-rollout-presidential-campaign-recover/story?id=13632744 -# multi-page +title: //h1[@class='headline'] +body: //div[@id='storyText'] +# for video entries +body: //img[@id='ff-img'] | //div[@id='meta']//div[contains(@class, 'overview')] +author: //div[@class='byline'] +date: //div[@class='date'] +strip: //*[@id='date_partner'] + +strip: //div[@class='breadcrumb'] +strip: //div[contains(@class,'show_tools')] +strip: //div[@id='sponsoredByAd'] +strip: //div[contains(@class,'rel_container')] +strip: //p[a[starts-with(@href, 'http://www.twitter.com')]] +strip: //p[a[starts-with(@href, 'http://www.facebook.com')]] +strip: //p[contains(., 'Click here to return to')] +#strip_id_or_class: media +strip_id_or_class: mediaplayer + +replace_string(<link rel="image_src" href="http): <img id="ff-img" src="http + +prune: no + +single_page_link: concat(//li[@class='pager']//a/@href, '&singlePage=true') + +test_url: http://abcnews.go.com/Politics/newt-gingrich-rocky-rollout-presidential-campaign-recover/story?id=13632744 +# multi-page test_url: http://abcnews.go.com/Blotter/family-freed-american-hostage-somalia-seals-obama/story?id=15439544 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/accesstoinsight.org.txt b/inc/3rdparty/site_config/standard/accesstoinsight.org.txt old mode 100644 new mode 100755 index b5d85079..45d66533 --- a/inc/3rdparty/site_config/standard/accesstoinsight.org.txt +++ b/inc/3rdparty/site_config/standard/accesstoinsight.org.txt @@ -1,9 +1,9 @@ -title: //div[@id='H_docTitle'] - -body: //div[@id='H_meta' or @id='H_content' or @id='F_footer'] - -strip_id_or_class: F_toenail - -prune: no - +title: //div[@id='H_docTitle'] + +body: //div[@id='H_meta' or @id='H_content' or @id='F_footer'] + +strip_id_or_class: F_toenail + +prune: no + test_url: http://www.accesstoinsight.org/lib/authors/nyanaponika/wheel026.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/acidcow.com.txt b/inc/3rdparty/site_config/standard/acidcow.com.txt old mode 100644 new mode 100755 index 60ede6a6..21958651 --- a/inc/3rdparty/site_config/standard/acidcow.com.txt +++ b/inc/3rdparty/site_config/standard/acidcow.com.txt @@ -1,3 +1,3 @@ -body: //div[starts-with(@id, 'news-id-')] - +body: //div[starts-with(@id, 'news-id-')] + test_url: http://acidcow.com/fun/20933-acid-picdump-83-pics.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/acquia.com.txt b/inc/3rdparty/site_config/standard/acquia.com.txt old mode 100644 new mode 100755 index 5ddf542e..2803611f --- a/inc/3rdparty/site_config/standard/acquia.com.txt +++ b/inc/3rdparty/site_config/standard/acquia.com.txt @@ -1,9 +1,9 @@ -title://h1[@class="title"] -author://div[@class="submitted"]/span/a -date://div[@class="submitted"]/span -body://div[@class="content-wrapper"] - -strip://div[@id="skip-link"] -strip://div[@id="region-content-3-3"] +title://h1[@class="title"] +author://div[@class="submitted"]/span/a +date://div[@class="submitted"]/span +body://div[@class="content-wrapper"] + +strip://div[@id="skip-link"] +strip://div[@id="region-content-3-3"] strip://div[@id="section-footer"] test_url: https://www.acquia.com/blog/drupals-long-warmth-toward-third-party-code \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/acroswing.fr.txt b/inc/3rdparty/site_config/standard/acroswing.fr.txt old mode 100644 new mode 100755 index 57d86d2f..6b1d67fe --- a/inc/3rdparty/site_config/standard/acroswing.fr.txt +++ b/inc/3rdparty/site_config/standard/acroswing.fr.txt @@ -1,5 +1,5 @@ -tidy:no -date: //time[@class='updated'] -dissolve: //ul[@class='video-gallery']/li +tidy:no +date: //time[@class='updated'] +dissolve: //ul[@class='video-gallery']/li dissolve: //ul[@class='video-gallery'] test_url: http://www.acroswing.fr/actualites/competition_rock/selectif_bellegarde_sur_valserine__2012-02-26.php \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/aftenposten.no.txt b/inc/3rdparty/site_config/standard/aftenposten.no.txt new file mode 100755 index 00000000..8a69c357 --- /dev/null +++ b/inc/3rdparty/site_config/standard/aftenposten.no.txt @@ -0,0 +1,5 @@ +title: //h1[@class='articleTitle '] +body: //div[@class='bodyText widget storyContent'] +strip: //p/span[@class='quote']/.. +strip_id_or_class: 'pull1' +test_url: https://www.aftenposten.no/meninger/spaltister/Portrett-av-scenekunstneren-som-ung-mann-7167959.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/aftonbladet.se.txt b/inc/3rdparty/site_config/standard/aftonbladet.se.txt new file mode 100755 index 00000000..b6c576a8 --- /dev/null +++ b/inc/3rdparty/site_config/standard/aftonbladet.se.txt @@ -0,0 +1,13 @@ +author: //article//address[contains(@class, 'author')] +body: //article[.//div[contains(@class, 'abBodyText')]]//*[contains(@class, 'abLeadText') or contains(@class, 'abBodyText') or contains(@class, 'abImageBlock') or contains(@class, 'abIGSatellite')] + +strip: //address//img +strip: //footer +strip_id_or_class: abSticky + +prune: no + +test_url: http://www.aftonbladet.se/sportbladet/hockey/sverige/allsvenskan/article17498194.ab +test_url: http://www.aftonbladet.se/debatt/article16207536.ab +test_url: http://www.aftonbladet.se/debatt/debattamnen/politik/article17483377.ab +test_url: http://www.aftonbladet.se/rss.xml \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/aht.seriouseats.com.txt b/inc/3rdparty/site_config/standard/aht.seriouseats.com.txt old mode 100644 new mode 100755 index 408e9099..b2d88a05 --- a/inc/3rdparty/site_config/standard/aht.seriouseats.com.txt +++ b/inc/3rdparty/site_config/standard/aht.seriouseats.com.txt @@ -1,15 +1,15 @@ -body: //div[@id='content'] - -# clean up recipe pages -strip: //h2[@class='fn'] | //h2[@class='double-lined'] | //h3 | //div[@id='threeColumn2'] | //div[@id='threeColumn3'] - -#recipe pages -strip_id_or_class: "recipe-feedback" -strip_id_or_class: "comments" -strip_id_or_class: "procedure-number" -strip_id_or_class: "more-with-author" - -#slice -strip_id_or_class: "inner" +body: //div[@id='content'] + +# clean up recipe pages +strip: //h2[@class='fn'] | //h2[@class='double-lined'] | //h3 | //div[@id='threeColumn2'] | //div[@id='threeColumn3'] + +#recipe pages +strip_id_or_class: "recipe-feedback" +strip_id_or_class: "comments" +strip_id_or_class: "procedure-number" +strip_id_or_class: "more-with-author" + +#slice +strip_id_or_class: "inner" test_url: http://aht.seriouseats.com/archives/2009/12/the-burger-lab-salting-ground-beef.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/albayan.ae.txt b/inc/3rdparty/site_config/standard/albayan.ae.txt new file mode 100755 index 00000000..f6c093d2 --- /dev/null +++ b/inc/3rdparty/site_config/standard/albayan.ae.txt @@ -0,0 +1,6 @@ +body: //div[@id='main-column']//div[@class='content'] + +prune: no + +test_url: http://www.albayan.ae/across-the-uae/education/2013-08-29-1.1949645 +test_url: http://www.albayan.ae/1.448?ot=ot.AjaxPageLayout \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/alex.mullr.net.txt b/inc/3rdparty/site_config/standard/alex.mullr.net.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/alexduner.com.txt b/inc/3rdparty/site_config/standard/alexduner.com.txt new file mode 100755 index 00000000..bd9de9d7 --- /dev/null +++ b/inc/3rdparty/site_config/standard/alexduner.com.txt @@ -0,0 +1,4 @@ +body: //section[@class='content'] +date: //span[1] +author: //h1[@id='sitetitle'] +test_url: https://alexduner.com/blog/2013/1/something-i-learned-today \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/alexduner.squarespace.com.txt b/inc/3rdparty/site_config/standard/alexduner.squarespace.com.txt new file mode 100755 index 00000000..875405e4 --- /dev/null +++ b/inc/3rdparty/site_config/standard/alexduner.squarespace.com.txt @@ -0,0 +1,4 @@ +body: //section[@class='content'] +date: //span[1] +author: //h1[@id='sitetitle'] +test_url: https://alexduner.squarespace.com/blog/2013/1/tech-culture-from-the-outside-looking-in \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/alistapart.com.txt b/inc/3rdparty/site_config/standard/alistapart.com.txt old mode 100644 new mode 100755 index 090f7eb1..7a7096e2 --- a/inc/3rdparty/site_config/standard/alistapart.com.txt +++ b/inc/3rdparty/site_config/standard/alistapart.com.txt @@ -1,12 +1,12 @@ -title: //h1[@class='title'] -author: //h3[@class='byline']/a -date: //div[@class='ishinfo'] - -body: //*[@id='articletext'] -strip_id_or_class: 'ishinfo' -strip_id_or_class: 'metastuff' -strip_id_or_class: 'learnmore' -strip_id_or_class: 'discuss' - +title: //h1[@class='title'] +author: //h3[@class='byline']/a +date: //div[@class='ishinfo'] + +body: //*[@id='articletext'] +strip_id_or_class: 'ishinfo' +strip_id_or_class: 'metastuff' +strip_id_or_class: 'learnmore' +strip_id_or_class: 'discuss' + prune: no test_url: http://www.alistapart.com/articles/organizing-mobile/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/aljazeera.com.txt b/inc/3rdparty/site_config/standard/aljazeera.com.txt old mode 100644 new mode 100755 index 4f0148f4..d3bf4014 --- a/inc/3rdparty/site_config/standard/aljazeera.com.txt +++ b/inc/3rdparty/site_config/standard/aljazeera.com.txt @@ -1,8 +1,8 @@ -title: //span[@id='DetailedTitle'] -body: //td[@id='tdTextContent'] -strip_id_or_class: Skyscrapper_Body -date: //span[@id='ctl00_cphBody_lblDate'] -author: //div[@id="dvAuthorInfo"]//a/text() -strip: //table[ tbody/tr/td/object ] -prune: no +title: //span[@id='DetailedTitle'] +body: //td[@id='tdTextContent'] +strip_id_or_class: Skyscrapper_Body +date: //span[@id='ctl00_cphBody_lblDate'] +author: //div[@id="dvAuthorInfo"]//a/text() +strip: //table[ tbody/tr/td/object ] +prune: no test_url: http://www.aljazeera.com/indepth/opinion/2012/01/2012114121925380575.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/allrecipes.com.txt b/inc/3rdparty/site_config/standard/allrecipes.com.txt old mode 100644 new mode 100755 index e9767bda..85dc2a5a --- a/inc/3rdparty/site_config/standard/allrecipes.com.txt +++ b/inc/3rdparty/site_config/standard/allrecipes.com.txt @@ -1,14 +1,14 @@ -title: //h1[@id='itemTitle'] -body: //img[@id="ctl00_CenterColumnPlaceHolder_recipe_photoStuff_imgPhoto"] | //div[@id='ctl00_CenterColumnPlaceHolder_recipe_divSubmitter'] | //div[contains(@class, 'recipe-details-content')] -strip: //div[@class='top-left' or @class='top-right' or @class='bot-left' or @class='bot-right'] -strip: //div[contains(@class, 'rightcoltoolsdiv')] -strip: //div[contains(@class, 'servings-form')] -strip: //p[@class='nutritional-information'] -strip: //a[contains(@class, 'nutritional-information') or contains(@class, 'nutritionanchor')] -strip: //div[@id='nutri-info']/div[contains(@class, 'title')] -strip: //img[@id='ctl00_CenterColumnPlaceHolder_recipe_imgSubmitter'] -strip_id_or_class: eshaAttribute -strip_id_or_class: eshaParagraph -prune: no +title: //h1[@id='itemTitle'] +body: //img[@id="ctl00_CenterColumnPlaceHolder_recipe_photoStuff_imgPhoto"] | //div[@id='ctl00_CenterColumnPlaceHolder_recipe_divSubmitter'] | //div[contains(@class, 'recipe-details-content')] +strip: //div[@class='top-left' or @class='top-right' or @class='bot-left' or @class='bot-right'] +strip: //div[contains(@class, 'rightcoltoolsdiv')] +strip: //div[contains(@class, 'servings-form')] +strip: //p[@class='nutritional-information'] +strip: //a[contains(@class, 'nutritional-information') or contains(@class, 'nutritionanchor')] +strip: //div[@id='nutri-info']/div[contains(@class, 'title')] +strip: //img[@id='ctl00_CenterColumnPlaceHolder_recipe_imgSubmitter'] +strip_id_or_class: eshaAttribute +strip_id_or_class: eshaParagraph +prune: no test_url: http://allrecipes.com/Recipe/Taco-Pie/Detail.aspx?src=rotd \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/allthingsd.com.txt b/inc/3rdparty/site_config/standard/allthingsd.com.txt old mode 100644 new mode 100755 index cd52498f..f8c67d02 --- a/inc/3rdparty/site_config/standard/allthingsd.com.txt +++ b/inc/3rdparty/site_config/standard/allthingsd.com.txt @@ -1,10 +1,13 @@ -title://div[@class="article-title"]/h1[@class="title"] -date: //p[@class="article-date"] -body://*[@class="article-body article-text"] -# Trim out related posts at bottom of article -strip://blockquote[@class="memo"] - -# Yup, no idea why author won't work... -author://div[@class="page-header article-header clearfix"]/p[@class="title"] +title://div[@class="article-title"]/h1[@class="title"] +date: //p[@class="article-date"] +body://div[contains(@class, "article-body")] +# Trim out related posts at bottom of article +strip://blockquote[@class="memo"] + +tidy: no + +# Yup, no idea why author won't work... +author://div[@class="page-header article-header clearfix"]/p[@class="title"] # [Marco:] Author won't work here because the page defines the "home" link under the author's name as rel="author", which always gets priority if the page has defined it. -test_url: http://allthingsd.com/20120513/exclusive-yahoos-thompson-out-levinsohn-in-board-settlement-with-loeb-nears-completion/ \ No newline at end of file +test_url: http://allthingsd.com/20120513/exclusive-yahoos-thompson-out-levinsohn-in-board-settlement-with-loeb-nears-completion/ +test_url: http://allthingsd.com/20131010/google-cio-ben-fried-on-how-google-works/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/allyou.com.txt b/inc/3rdparty/site_config/standard/allyou.com.txt old mode 100644 new mode 100755 index 3c26c682..a13a7252 --- a/inc/3rdparty/site_config/standard/allyou.com.txt +++ b/inc/3rdparty/site_config/standard/allyou.com.txt @@ -1,8 +1,8 @@ -title: //div[@id='pageHdr']//h1 -body: //div[@id='pageHdr']/*[@class='dek'] | //div[@id='printArticle' or @id='slideShowPrint'] -strip: //div[contains(@class, 'infoBox') or @id='infoBox'] -single_page_link: //li[@id='print']/a - +title: //div[@id='pageHdr']//h1 +body: //div[@id='pageHdr']/*[@class='dek'] | //div[@id='printArticle' or @id='slideShowPrint'] +strip: //div[contains(@class, 'infoBox') or @id='infoBox'] +single_page_link: //li[@id='print']/a + prune: no - + test_url: http://www.allyou.com/budget-home/money-shopping/freebies-online-00400000066392/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/alphabeta.argaam.com.txt b/inc/3rdparty/site_config/standard/alphabeta.argaam.com.txt old mode 100644 new mode 100755 index f5865f89..da1a67bc --- a/inc/3rdparty/site_config/standard/alphabeta.argaam.com.txt +++ b/inc/3rdparty/site_config/standard/alphabeta.argaam.com.txt @@ -1,11 +1,11 @@ -body: //div[@class = 'entry'] -date: substring-after(//p[@class="date"],'بتاريخ ') -strip_id_or_class: date -strip_id_or_class: follow-single -strip_id_or_class: ratingblock -strip_id_or_class: newRatingHolder -strip_id_or_class: postmetadata -strip_id_or_class: addthis_toolbox -strip_id_or_class: addthis_default_style +body: //div[@class = 'entry'] +date: substring-after(//p[@class="date"],'بتاريخ ') +strip_id_or_class: date +strip_id_or_class: follow-single +strip_id_or_class: ratingblock +strip_id_or_class: newRatingHolder +strip_id_or_class: postmetadata +strip_id_or_class: addthis_toolbox +strip_id_or_class: addthis_default_style strip_id_or_class: size-full test_url: http://alphabeta.argaam.com/?p=35657 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/alriyadh.com.txt b/inc/3rdparty/site_config/standard/alriyadh.com.txt old mode 100644 new mode 100755 index d0060000..be7c43d5 --- a/inc/3rdparty/site_config/standard/alriyadh.com.txt +++ b/inc/3rdparty/site_config/standard/alriyadh.com.txt @@ -1,9 +1,9 @@ -body: //div[@id = "article-view"] -body: //div[contains(@class, 'article')]//div[contains(@class, 'photo_bg')] -author: //p[@class = "author"] -strip: //h1 -strip: //h2 -strip_id_or_class: author -prune: no -test_url: http://www.alriyadh.com/2011/10/10/article674357.html +body: //div[@id = "article-view"] +body: //div[contains(@class, 'article')]//div[contains(@class, 'photo_bg')] +author: //p[@class = "author"] +strip: //h1 +strip: //h2 +strip_id_or_class: author +prune: no +test_url: http://www.alriyadh.com/2011/10/10/article674357.html test_url: http://www.alriyadh.com/net/article/780935 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/alseraj.net.txt b/inc/3rdparty/site_config/standard/alseraj.net.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/alt1040.com.txt b/inc/3rdparty/site_config/standard/alt1040.com.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/alternet.org.txt b/inc/3rdparty/site_config/standard/alternet.org.txt new file mode 100755 index 00000000..e92252eb --- /dev/null +++ b/inc/3rdparty/site_config/standard/alternet.org.txt @@ -0,0 +1,4 @@ +single_page_link: //div[contains(@class, 'story_tools')]//a[contains(@href, '/print/')] + +test_url: http://www.alternet.org/civil-liberties/noam-chomsky-surveillance-state-beyond-imagination-being-created-one-freest +test_url: http://feeds.feedblitz.com/alternet \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/altfoto.com.txt b/inc/3rdparty/site_config/standard/altfoto.com.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/alumni.stanford.edu.txt b/inc/3rdparty/site_config/standard/alumni.stanford.edu.txt old mode 100644 new mode 100755 index 7fd47193..a5bd03bf --- a/inc/3rdparty/site_config/standard/alumni.stanford.edu.txt +++ b/inc/3rdparty/site_config/standard/alumni.stanford.edu.txt @@ -1,10 +1,10 @@ -title: //h1 - -author: substring-after(//div[@class="enableBullets"]/preceding-sibling::p[1], "By ") - -date: //div/a[contains (@href, "issue")] - -move_into(//div[@class="enableBullets"]/p): (//div[@id="content"]//img)[1] - +title: //h1 + +author: substring-after(//div[@class="enableBullets"]/preceding-sibling::p[1], "By ") + +date: //div/a[contains (@href, "issue")] + +move_into(//div[@class="enableBullets"]/p): (//div[@id="content"]//img)[1] + body: //div[@class="enableBullets"] test_url: http://alumni.stanford.edu/get/page/magazine/article/?article_id=54819 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/amandala.com.bz.txt b/inc/3rdparty/site_config/standard/amandala.com.bz.txt new file mode 100755 index 00000000..fb0e21b8 --- /dev/null +++ b/inc/3rdparty/site_config/standard/amandala.com.bz.txt @@ -0,0 +1,6 @@ +body: //div[@id='content']//div[contains(@class, 'content')] +strip_id_or_class: widget +strip: //a[contains(@href, 'upm_export=')] + +test_url: http://amandala.com.bz/news/feed/ +test_url: http://amandala.com.bz/news/poor-pse-results-30-raise/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/amazon.com.txt b/inc/3rdparty/site_config/standard/amazon.com.txt old mode 100644 new mode 100755 index 1a23c4b7..cd7ad159 --- a/inc/3rdparty/site_config/standard/amazon.com.txt +++ b/inc/3rdparty/site_config/standard/amazon.com.txt @@ -1,19 +1,19 @@ -title: //span[@id = 'btAsinTitle'] -body: (//*[@id='prodImageCell']//a)[1] | //div[@id = 'ps-content'] | //span[@id='actualPriceValue'] | //h2[.='Product Details']/following-sibling::div | //div[@class='h2' and .='Product Description']/following-sibling::div -#strip_id_or_class: quantityDropdownDiv -#strip_id_or_class: addToCartSpan -#strip_id_or_class: oneClickDiv -strip_id_or_class: nocontent -strip_id_or_class: masDynamicConten -strip_id_or_class: dynamic-content -prune: no - -find_string: <span id="actualPriceValue"> -replace_string: <span id="actualPriceValue"><br />Price: - -strip_id_or_class: collapsePS -strip_id_or_class: expandPS -strip_id_or_class: psPlaceHolde -strip: //li[contains(., 'update product info') or contains(., 'give feedback on images')] - +title: //span[@id = 'btAsinTitle'] +body: (//*[@id='prodImageCell']//a)[1] | //div[@id = 'ps-content'] | //span[@id='actualPriceValue'] | //h2[.='Product Details']/following-sibling::div | //div[@class='h2' and .='Product Description']/following-sibling::div +#strip_id_or_class: quantityDropdownDiv +#strip_id_or_class: addToCartSpan +#strip_id_or_class: oneClickDiv +strip_id_or_class: nocontent +strip_id_or_class: masDynamicConten +strip_id_or_class: dynamic-content +prune: no + +find_string: <span id="actualPriceValue"> +replace_string: <span id="actualPriceValue"><br />Price: + +strip_id_or_class: collapsePS +strip_id_or_class: expandPS +strip_id_or_class: psPlaceHolde +strip: //li[contains(., 'update product info') or contains(., 'give feedback on images')] + test_url: http://www.amazon.com/Common-Sense-Forestry-Living-Mother/dp/1931498210/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/americandrink.net.txt b/inc/3rdparty/site_config/standard/americandrink.net.txt old mode 100644 new mode 100755 index dee0e868..7145f3ff --- a/inc/3rdparty/site_config/standard/americandrink.net.txt +++ b/inc/3rdparty/site_config/standard/americandrink.net.txt @@ -1,6 +1,6 @@ -title: //div[@class='head']/h2/a -author: //div[@class='head']/a -date: //div[@class='head']/p[@class='date']/a -body: //div[@class='copy'] +title: //div[@class='head']/h2/a +author: //div[@class='head']/a +date: //div[@class='head']/p[@class='date']/a +body: //div[@class='copy'] strip: //p[@class='meta'] test_url: http://americandrink.net/post/10567188712/free-the-hooch \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/americascup.com.txt b/inc/3rdparty/site_config/standard/americascup.com.txt old mode 100644 new mode 100755 index b1673b6a..31723f81 --- a/inc/3rdparty/site_config/standard/americascup.com.txt +++ b/inc/3rdparty/site_config/standard/americascup.com.txt @@ -1,10 +1,10 @@ -title: //div[@class="editorial-content"]/h3 -body: //div[@class="hero-image" or @class="editorial-content"] - -strip: //ul[@class="hero-caption"] -strip_id_or_class: footer - -prune: no -tidy: no - +title: //div[@class="editorial-content"]/h3 +body: //div[@class="hero-image" or @class="editorial-content"] + +strip: //ul[@class="hero-caption"] +strip_id_or_class: footer + +prune: no +tidy: no + test_url: http://www.americascup.com/en/Latest/News/2012/3/Coutts-and-Peyron-tell-transformative-tale-at-Global-Sports-Forum/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/americastestkitchenfeed.com.txt b/inc/3rdparty/site_config/standard/americastestkitchenfeed.com.txt old mode 100644 new mode 100755 index 8bf31ec2..c2b62b5a --- a/inc/3rdparty/site_config/standard/americastestkitchenfeed.com.txt +++ b/inc/3rdparty/site_config/standard/americastestkitchenfeed.com.txt @@ -1,5 +1,5 @@ -title: //h1[@class="post-title"] -author: //span[@class="author"]/a -date: //span[@class="date"] +title: //h1[@class="post-title"] +author: //span[@class="author"]/a +date: //span[@class="date"] body: //div[@class="post-content main"] test_url: http://www.americastestkitchenfeed.com/gadgets-and-gear/2012/07/chill-out-with-tovolos-king-cube-silicone-ice-cube-tray/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/amptoons.com.txt b/inc/3rdparty/site_config/standard/amptoons.com.txt new file mode 100755 index 00000000..87547c63 --- /dev/null +++ b/inc/3rdparty/site_config/standard/amptoons.com.txt @@ -0,0 +1,8 @@ +title: //title + +body: //div[@class="entry-content"] + +author: //span[@class="author vcard"] + +date: //span[@class="entry-date"] +test_url: http://www.amptoons.com/blog/2013/03/14/open-thread-and-link-farm-i-hate-being-sick-edition/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/anandtech.com.txt b/inc/3rdparty/site_config/standard/anandtech.com.txt old mode 100644 new mode 100755 index 8067e03c..7d804918 --- a/inc/3rdparty/site_config/standard/anandtech.com.txt +++ b/inc/3rdparty/site_config/standard/anandtech.com.txt @@ -1,11 +1,11 @@ -author: //a[@class='b'][1] -date: substring-after(substring-before(//div, 'Posted in'), ' on ') -strip_image_src: /content/images/globals/ -strip: //h2[. = 'Page 1']/preceding::p -strip: //h2 - -prune: no - -single_page_link: concat('http://www.anandtech.com/print/', substring-after(//meta[@property='og:url']/@content, '/show/')) - +author: //a[@class='b'][1] +date: substring-after(substring-before(//div, 'Posted in'), ' on ') +strip_image_src: /content/images/globals/ +strip: //h2[. = 'Page 1']/preceding::p +strip: //h2 + +prune: no + +single_page_link: concat('http://www.anandtech.com/print/', substring-after(//meta[@property='og:url']/@content, '/show/')) + test_url: http://www.anandtech.com/show/5812/eurocom-monster-10-clevos-little-monster/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/androidpolice.com.txt b/inc/3rdparty/site_config/standard/androidpolice.com.txt new file mode 100755 index 00000000..8f9b1a21 --- /dev/null +++ b/inc/3rdparty/site_config/standard/androidpolice.com.txt @@ -0,0 +1,5 @@ +body: //div[@class='post_content'] +date: //div[@class='date_day'] | div[@class='date_month'] + +test_url: http://www.androidpolice.com/2014/03/30/music-boss-for-pebble-can-now-control-playback-and-volume-on-chromecast-content-from-your-smartwatch/ + diff --git a/inc/3rdparty/site_config/standard/andyrutledge.com.txt b/inc/3rdparty/site_config/standard/andyrutledge.com.txt old mode 100644 new mode 100755 index f9ffd3c3..ce31fcf5 --- a/inc/3rdparty/site_config/standard/andyrutledge.com.txt +++ b/inc/3rdparty/site_config/standard/andyrutledge.com.txt @@ -1,9 +1,9 @@ -title: //h2 -author: string('Andy Rutledge') -date: //div[@class='articledate'] -body: //div[@class='copybody'] - -strip: //*[@class='space'] -strip: //*[@class='articleFoot'] - +title: //h2 +author: string('Andy Rutledge') +date: //div[@class='articledate'] +body: //div[@class='copybody'] + +strip: //*[@class='space'] +strip: //*[@class='articleFoot'] + test_url: http://www.andyrutledge.com/hungry-for-a-better-menu.php \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/annatravelling.wordpress.com.txt b/inc/3rdparty/site_config/standard/annatravelling.wordpress.com.txt old mode 100644 new mode 100755 index a5c7c08a..2d8937f7 --- a/inc/3rdparty/site_config/standard/annatravelling.wordpress.com.txt +++ b/inc/3rdparty/site_config/standard/annatravelling.wordpress.com.txt @@ -1,9 +1,9 @@ -title: //h1[@class="title"] - -author: ("Anna Manasova") -# is ignored, unfortunately - -date: //p[@class="date"] - +title: //h1[@class="title"] + +author: ("Anna Manasova") +# is ignored, unfortunately + +date: //p[@class="date"] + body: //div[@class="entry"] test_url: http://annatravelling.wordpress.com/2011/11/07/a-day-of-cooking-thai/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/applature.com.txt b/inc/3rdparty/site_config/standard/applature.com.txt old mode 100644 new mode 100755 index a78a6150..a820bba4 --- a/inc/3rdparty/site_config/standard/applature.com.txt +++ b/inc/3rdparty/site_config/standard/applature.com.txt @@ -1,18 +1,18 @@ -title: //h1[contains(@class, 'title')# -body: //div[@id='mainContent']//div[contains(@class, 'section_content')] | //ul[@class='section_footer'] -date: //div[@class='date'] - -strip_id_or_class: sharethis -strip_id_or_class: stats -strip_id_or_class: apply_form -strip_id_or_class: job_map -strip_id_or_class: respond -strip: //h1//span[@class='type'] -strip: //li[@class='print' or @class='map'] - -replace_string(<ul class="section_footer" style="display): <ul class="section_footer" style="display-bla - -prune: no -tidy: no - +title: //h1[contains(@class, 'title')# +body: //div[@id='mainContent']//div[contains(@class, 'section_content')] | //ul[@class='section_footer'] +date: //div[@class='date'] + +strip_id_or_class: sharethis +strip_id_or_class: stats +strip_id_or_class: apply_form +strip_id_or_class: job_map +strip_id_or_class: respond +strip: //h1//span[@class='type'] +strip: //li[@class='print' or @class='map'] + +replace_string(<ul class="section_footer" style="display): <ul class="section_footer" style="display-bla + +prune: no +tidy: no + test_url: http://applature.com/mining-jobs/jobs/nickel-west-leinster-analytical-laboratory-technician/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/apple.com.txt b/inc/3rdparty/site_config/standard/apple.com.txt old mode 100644 new mode 100755 index 4c483955..a54dccc8 --- a/inc/3rdparty/site_config/standard/apple.com.txt +++ b/inc/3rdparty/site_config/standard/apple.com.txt @@ -1,7 +1,7 @@ -strip: //p[@class='sosumi'] -# Aren't they witty? - -# I can't work out what causes the before the title. -title: //h1[@class='title'] -strip: //h1[@class='title'] +strip: //p[@class='sosumi'] +# Aren't they witty? + +# I can't work out what causes the before the title. +title: //h1[@class='title'] +strip: //h1[@class='title'] test_url: http://www.apple.com/pr/library/2011/02/15appstore.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/appledaily.com.tw.txt b/inc/3rdparty/site_config/standard/appledaily.com.tw.txt new file mode 100755 index 00000000..82d6f376 --- /dev/null +++ b/inc/3rdparty/site_config/standard/appledaily.com.tw.txt @@ -0,0 +1,4 @@ +body: //div[contains(@class, 'articulum')] + +test_url: http://www.appledaily.com.tw/realtimenews/article/new/20140120/330479 +test_url: http://www.appledaily.com.tw/rss/create/kind/rnews/type/new/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/appleinsider.com.txt b/inc/3rdparty/site_config/standard/appleinsider.com.txt old mode 100644 new mode 100755 index 279fbce1..5ae1050b --- a/inc/3rdparty/site_config/standard/appleinsider.com.txt +++ b/inc/3rdparty/site_config/standard/appleinsider.com.txt @@ -1,11 +1,23 @@ -title: //p[@class='title'] - -author: //p[text() = 'By ']/a/text() -strip: //p[text() = 'By '] - -body: //td[@class='bod'] -strip_id_or_class: title -strip_id_or_class: minor - -strip_id_or_class: multipagefooter -test_url: http://www.appleinsider.com/articles/12/02/29/inside_os_x_108_mountain_lion_safari_52_gets_a_simplified_user_interface_with_new_sharing_features.html \ No newline at end of file +title: //h1[@class="art-head"] + +author: //p[contains(@class, 'byline')]/a +#author: //p[text() = 'By ']/a/text() +#strip: //p[text() = 'By '] + +date: //p[contains(@class, 'date-header')] + +body: //div[@class="article"] +strip_id_or_class: lazy +#strip_id_or_class: minor +strip_id_or_class: multipagefooter +strip_id_or_class: date-header +strip_id_or_class: byline + +find_string: <noscript> +replace_string: <div> +find_string: </noscript> +replace_string: </div> + +test_url: http://www.appleinsider.com/articles/12/02/29/inside_os_x_108_mountain_lion_safari_52_gets_a_simplified_user_interface_with_new_sharing_features.html +test_url: http://appleinsider.com/articles/13/10/03/goldee-companion-app-for-philips-hue-bulbs-offers-shifting-dynamic-light-scenes +test_url: http://appleinsider.com/appleinsider.rss \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/appleweblog.com.txt b/inc/3rdparty/site_config/standard/appleweblog.com.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/archdaily.com.txt b/inc/3rdparty/site_config/standard/archdaily.com.txt old mode 100644 new mode 100755 index 9476cf56..0178639e --- a/inc/3rdparty/site_config/standard/archdaily.com.txt +++ b/inc/3rdparty/site_config/standard/archdaily.com.txt @@ -1,5 +1,5 @@ -date: //div[@class='post_date'] - -body: //div[@class='post_content'] +date: //div[@class='post_date'] + +body: //div[@class='post_content'] test_url: http://www.archdaily.com/185325/p10-mixed-use-building-studio-up \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/archiveofourown.org.txt b/inc/3rdparty/site_config/standard/archiveofourown.org.txt old mode 100644 new mode 100755 index 50ff632d..579de517 --- a/inc/3rdparty/site_config/standard/archiveofourown.org.txt +++ b/inc/3rdparty/site_config/standard/archiveofourown.org.txt @@ -1,18 +1,22 @@ -# Description: Fix XPaths to include ALL chapters on 'view_full_work' pages. -# Include: work meta, summary, chapter information, and notes which Instapaper strips out on default. -# Exclude: header, footer, navigation, comments. -# Notes: User is a newbie with XPaths. - -title: //h2[@class='title'] -author: //h3[@class='byline'] -author: //a[@class='login author'] - -strip_id_or_class:header -strip_id_or_class:navigation -strip_id_or_class:feedback -strip_id_or_class:kudos -strip_id_or_class:add_comment_placeholder -strip_id_or_class:add_comment -strip_id_or_class:globalize +# Description: Fix XPaths to include ALL chapters on 'view_full_work' pages. +# Include: work meta, summary, chapter information, and notes which Instapaper strips out on default. +# Exclude: header, footer, navigation, comments. +# Notes: User is a newbie with XPaths. + +title: //h2[@class='title'] +author: //h3[@class='byline'] +author: //a[@class='login author'] + +strip_id_or_class:header +strip_id_or_class:navigation +strip_id_or_class:feedback +strip_id_or_class:kudos +strip_id_or_class:add_comment_placeholder +strip_id_or_class:add_comment +strip_id_or_class:globalize strip_id_or_class:footer -test_url: http://archiveofourown.org/works/229402?view_full_work=true \ No newline at end of file + +single_page_link: //div[@id='main']//a[contains(@href, 'view_adult=true')] + +test_url: http://archiveofourown.org/works/229402?view_full_work=true +test_url: http://archiveofourown.org/works/750111/chapters/1399929 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/arstechnica.com.txt b/inc/3rdparty/site_config/standard/arstechnica.com.txt old mode 100644 new mode 100755 index 49bb3dbc..767f6800 --- a/inc/3rdparty/site_config/standard/arstechnica.com.txt +++ b/inc/3rdparty/site_config/standard/arstechnica.com.txt @@ -1,16 +1,17 @@ -author: //p[@class='byline']/a -body: //div[contains(@class,'article-content')] -strip: //h2[@class='title'] -strip_id_or_class: byline -prune: no - -date: //div[@class='byline']/span[@class='posted']//abbr/@original-title -date: //div[@class='byline']/span[@class='posted']//abbr - -title: //div[@id='story']//h2[@class='title'] - -strip: //div[@class='pager'] -next_page_link: //nav//a[span/@class='next']/@href - -test_url: http://arstechnica.com/tech-policy/news/2012/02/gigabit-internet-for-80-the-unlikely-success-of-californias-sonicnet.ars -test_url: http://arstechnica.com/apple/2005/04/macosx-10-4/ \ No newline at end of file +author: //p[@class='byline']/a +body: //div[contains(@class,'article-content')] +strip: //h2[@class='title'] +strip_id_or_class: byline +strip_id_or_class: story-sidebar +prune: no + +date: //div[@class='byline']/span[@class='posted']//abbr/@original-title +date: //div[@class='byline']/span[@class='posted']//abbr + +title: //div[@id='story']//h2[@class='title'] + +strip: //div[@class='pager'] +next_page_link: //nav//a[span/@class='next']/@href + +test_url: http://arstechnica.com/tech-policy/news/2012/02/gigabit-internet-for-80-the-unlikely-success-of-californias-sonicnet.ars +test_url: http://arstechnica.com/apple/2005/04/macosx-10-4/ diff --git a/inc/3rdparty/site_config/standard/articles.boston.com.txt b/inc/3rdparty/site_config/standard/articles.boston.com.txt old mode 100644 new mode 100755 index e54423be..73bcdb4e --- a/inc/3rdparty/site_config/standard/articles.boston.com.txt +++ b/inc/3rdparty/site_config/standard/articles.boston.com.txt @@ -1,6 +1,6 @@ -title: //div[@class="mod-bostonarticleheader mod-articleheader"]/h1 -author: substring-after(//div[@class="mod-bostonarticlebyline mod-articlebyline"]/span[3],"By ") -date: //div[@class="mod-bostonarticlebyline mod-articlebyline"]/span[@class="pubdate"] - +title: //div[@class="mod-bostonarticleheader mod-articleheader"]/h1 +author: substring-after(//div[@class="mod-bostonarticlebyline mod-articlebyline"]/span[3],"By ") +date: //div[@class="mod-bostonarticlebyline mod-articlebyline"]/span[@class="pubdate"] + strip_id_or_class: mod-pagination test_url: http://articles.boston.com/2011-10-23/news/30313691_1_bigfoot-free-speech-monadnock-state-park \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/articles.courant.com.txt b/inc/3rdparty/site_config/standard/articles.courant.com.txt old mode 100644 new mode 100755 index a08f2041..984d81de --- a/inc/3rdparty/site_config/standard/articles.courant.com.txt +++ b/inc/3rdparty/site_config/standard/articles.courant.com.txt @@ -1,11 +1,11 @@ -title: //div[@class="mod-courantarticleheader mod-articleheader"]/h1 -date: //div[@class="mod-courantarticlebyline mod-articlebyline"]/span[@class="pubdate"] -author: //div[@class="mod-courantarticlebyline mod-articlebyline"]/span[3] - -strip_id_or_class: mod-article-byline -strip_id_or_class: mod-article-header -strip_id_or_class: mod-article-subtitle -#This leaves some crud after the article, but it's better than nothing. -#It would be ideal if we could set the body to every element matching //div[contains(@class, "mod-articletext")]/p, but it seems like body only takes the first matching element. +title: //div[@class="mod-courantarticleheader mod-articleheader"]/h1 +date: //div[@class="mod-courantarticlebyline mod-articlebyline"]/span[@class="pubdate"] +author: //div[@class="mod-courantarticlebyline mod-articlebyline"]/span[3] + +strip_id_or_class: mod-article-byline +strip_id_or_class: mod-article-header +strip_id_or_class: mod-article-subtitle +#This leaves some crud after the article, but it's better than nothing. +#It would be ideal if we could set the body to every element matching //div[contains(@class, "mod-articletext")]/p, but it seems like body only takes the first matching element. test_url: http://articles.courant.com/2011-10-22/news/hc-green-drugsearch--1022-20111022_1_drugs-in-student-lockers-police-dogs-lockdown \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/articles.washingtonpost.com.txt b/inc/3rdparty/site_config/standard/articles.washingtonpost.com.txt new file mode 100755 index 00000000..a76c2d02 --- /dev/null +++ b/inc/3rdparty/site_config/standard/articles.washingtonpost.com.txt @@ -0,0 +1,11 @@ +body: //div[contains(@class, "article_body")] +# print view +body: //div[@id='print_facet']//div[@id='body'] + +tidy: no +prune: no + +single_page_link: concat(substring-before(//div[@id="echo_container_a"]/@guid, '_story.html'), '_print.html') + +test_url: http://articles.washingtonpost.com/2011-10-22/world/35279694_1_germany-acts-german-leaders-chancellor-angela-merkel +test_url: http://articles.washingtonpost.com/2013-05-31/opinions/39658000_1_chemical-weapons-mass-destruction-cartels \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/asahi.com.txt b/inc/3rdparty/site_config/standard/asahi.com.txt old mode 100644 new mode 100755 index 2562edb9..b4eec7bd --- a/inc/3rdparty/site_config/standard/asahi.com.txt +++ b/inc/3rdparty/site_config/standard/asahi.com.txt @@ -1,3 +1,3 @@ -body: //div[@id='HeadLine'] +body: //div[@id='HeadLine'] strip: //div[@id='utility_right'] test_url: http://www.asahi.com/culture/update/0520/TKY201105200321.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/ascarter.net.txt b/inc/3rdparty/site_config/standard/ascarter.net.txt old mode 100644 new mode 100755 index 5236d09e..0327e846 --- a/inc/3rdparty/site_config/standard/ascarter.net.txt +++ b/inc/3rdparty/site_config/standard/ascarter.net.txt @@ -1,5 +1,5 @@ -title: //h1[@class='article_title'] -author: //span[@class='author'] -date: //h2[@class='dateline'] +title: //h1[@class='article_title'] +author: //span[@class='author'] +date: //h2[@class='dateline'] body: //div[@class='article_body'] test_url: http://ascarter.net/2012/02/20/enough-is-enough.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/astronews.com.txt b/inc/3rdparty/site_config/standard/astronews.com.txt old mode 100644 new mode 100755 index 33e8153d..8de22270 --- a/inc/3rdparty/site_config/standard/astronews.com.txt +++ b/inc/3rdparty/site_config/standard/astronews.com.txt @@ -1,7 +1,7 @@ -title: //span[@class='titel'] -author: //span[@class='metadaten_C']/a//span[@class='metadaten_C'] -date: substring-after(//span[@class='metadaten_C'],'astronews.com') -strip: //span[@class='bu'] -strip_image_src: '/_images/' +title: //span[@class='titel'] +author: //span[@class='metadaten_C']/a//span[@class='metadaten_C'] +date: substring-after(//span[@class='metadaten_C'],'astronews.com') +strip: //span[@class='bu'] +strip_image_src: '/_images/' test_url: http://www.astronews.com/news/artikel/2011/10/1110-021.shtml \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/asymco.com.txt b/inc/3rdparty/site_config/standard/asymco.com.txt old mode 100644 new mode 100755 index adad5f18..f639b048 --- a/inc/3rdparty/site_config/standard/asymco.com.txt +++ b/inc/3rdparty/site_config/standard/asymco.com.txt @@ -1,8 +1,8 @@ -# Johannes St�hler - -title://h2 -author://span[@class='meta-content'] -date://abbr[@class='date published']/@title -body://div[@class='entry-content'] +# Johannes Stühler + +title://h2 +author://span[@class='meta-content'] +date://abbr[@class='date published']/@title +body://div[@class='entry-content'] test_url: http://www.asymco.com/2011/01/14/is-android-more-efficient-than-ios-at-generating-search-revenue/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/autoblog.com.txt b/inc/3rdparty/site_config/standard/autoblog.com.txt old mode 100644 new mode 100755 index 58681bf9..291db992 --- a/inc/3rdparty/site_config/standard/autoblog.com.txt +++ b/inc/3rdparty/site_config/standard/autoblog.com.txt @@ -1,6 +1,6 @@ -prune: no -body: //div[@class='post-body'] -author: //p[@class='byline']//a -date: substring-after(//div[@class='about']/p[2], 'Posted') +prune: no +body: //div[@class='post-body'] +author: //p[@class='byline']//a +date: substring-after(//div[@class='about']/p[2], 'Posted') strip: //div[@class='body']/div[@class='meta'] test_url: http://www.autoblog.com/2012/01/17/next-gen-bmw-x5-caught-again/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/avclub.com.txt b/inc/3rdparty/site_config/standard/avclub.com.txt old mode 100644 new mode 100755 index 776ee108..c365a7aa --- a/inc/3rdparty/site_config/standard/avclub.com.txt +++ b/inc/3rdparty/site_config/standard/avclub.com.txt @@ -1,4 +1,4 @@ -author: //*[@id="article_wrapper"]/div[1]/a[1] -body: //*[@id="article_wrapper"]/div[2] +author: //*[@id="article_wrapper"]/div[1]/a[1] +body: //*[@id="article_wrapper"]/div[2] date: //*[@id="article_wrapper"]/div[1]/text()[2] test_url: http://www.avclub.com/articles/forgetmenot,70904 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/baltimoresun.com.txt b/inc/3rdparty/site_config/standard/baltimoresun.com.txt old mode 100644 new mode 100755 index 32adff8d..35b62427 --- a/inc/3rdparty/site_config/standard/baltimoresun.com.txt +++ b/inc/3rdparty/site_config/standard/baltimoresun.com.txt @@ -1,12 +1,12 @@ -single_page_link: //div[@class='toppaginate']//a[@rel='nofollow'] -convert_double_br_tags: yes - -title: //div[@class="story"]/h1 -body: //div[@id="story-body-text"] -author: //span[@class="byline"] -date: //p[@class="date"] - -strip: //*[@class='all'] -strip: //*[@class='articlerail'] +single_page_link: //div[@class='toppaginate']//a[@rel='nofollow'] +convert_double_br_tags: yes + +title: //div[@class="story"]/h1 +body: //div[@id="story-body-text"] +author: //span[@class="byline"] +date: //p[@class="date"] + +strip: //*[@class='all'] +strip: //*[@class='articlerail'] test_url: http://www.baltimoresun.com/news/maryland/bs-md-omalley-budget-2-20120116,0,5340585.story \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/baseballprospectus.com.txt b/inc/3rdparty/site_config/standard/baseballprospectus.com.txt new file mode 100755 index 00000000..1207b343 --- /dev/null +++ b/inc/3rdparty/site_config/standard/baseballprospectus.com.txt @@ -0,0 +1,13 @@ +title: //h1[@class='title'] +author: //p[@class="author"]/a[1] +body: //div[@class="article"] +date: //p[@class="date"] + +# remove user tools +strip: //div[@class='tools'] +strip: //h1 +strip: //h2[@class='subtitle'] +strip: //p[@class='author'] +strip: //p[@class='date'] + +test_url: http://www.baseballprospectus.com/article.php?articleid=18463 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/basicthinking.de.txt b/inc/3rdparty/site_config/standard/basicthinking.de.txt old mode 100644 new mode 100755 index ab583145..f08c1f26 --- a/inc/3rdparty/site_config/standard/basicthinking.de.txt +++ b/inc/3rdparty/site_config/standard/basicthinking.de.txt @@ -1,7 +1,7 @@ -title: //h2 -date: //span[@class='date'] -body: //div[@class='entry'] - -strip: //div[@class='zusatz'] +title: //h2 +date: //span[@class='date'] +body: //div[@class='entry'] + +strip: //div[@class='zusatz'] test_url: http://www.basicthinking.de/blog/2011/12/13/sagt-social-networks-adieu-begrust-private-networks/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/bb.is.txt b/inc/3rdparty/site_config/standard/bb.is.txt old mode 100644 new mode 100755 index eaafaf18..57f7fdfa --- a/inc/3rdparty/site_config/standard/bb.is.txt +++ b/inc/3rdparty/site_config/standard/bb.is.txt @@ -1,13 +1,13 @@ -author: substring(//h3[@class='headlines']/span[@class='dates'],0,string-length(//h3[@class='headlines']/span[@class='dates'])-20) - - -date: substring((//h3[@class='headlines']/span[@class='dates']),string-length(//h3[@class='headlines']/span[@class='dates'])-18,12) - - -body: //div[@class='first-article-big'] -strip: //table[@class='newsimagecontainer'] -strip: //h3[@class='headlines'] -strip: //iframe[@class='headlines'] -strip: //a[@class='newslink'] +author: substring(//h3[@class='headlines']/span[@class='dates'],0,string-length(//h3[@class='headlines']/span[@class='dates'])-20) + + +date: substring((//h3[@class='headlines']/span[@class='dates']),string-length(//h3[@class='headlines']/span[@class='dates'])-18,12) + + +body: //div[@class='first-article-big'] +strip: //table[@class='newsimagecontainer'] +strip: //h3[@class='headlines'] +strip: //iframe[@class='headlines'] +strip: //a[@class='newslink'] convert_double_br_tags: yes test_url: http://bb.is/Pages/82?NewsID=174119 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/bbc.co.uk.txt b/inc/3rdparty/site_config/standard/bbc.co.uk.txt old mode 100644 new mode 100755 index 9c5c3419..ef1f491a --- a/inc/3rdparty/site_config/standard/bbc.co.uk.txt +++ b/inc/3rdparty/site_config/standard/bbc.co.uk.txt @@ -1,32 +1,42 @@ -body: //div[@class="story-body"] -title: //h1[@class="story-header"] -date: //span[@class="story-date"]/span[@class='date'] - -# recipes, e.g. http://www.bbc.co.uk/food/recipes/mymincepies_71055 -body: //div[contains(@class, 'hrecipe')]//div[@id='subcolumn-1'] - -#strip: //div[@class="story-feature narrow"] -#strip: //div[@class="story-feature wide"] -#strip: //div[@class="story-feature dslideshow-enclosure"] -strip: //div[contains(@class, "story-feature")] -strip: //span[@class="story-date"] -#strip: //div[@class="caption body-narrow-width"] -strip: //div[@class="warning"]//p -strip: //div[@id='page-bookmark-links-head'] -strip: //object -strip: //div[contains(@class, "bbccom_advert_placeholder")] -strip: //div[contains(@class, "embedded-hyper")] -strip: //div[contains(@class, 'market-data')] -strip: //a[contains(@class, 'hidden')] -strip: //div[contains(@class, 'hypertabs')] -strip: //div[contains(@class, 'related')] -strip: //form[@id='comment-form'] -strip: //div[contains(@class, 'comment-introduction')] - -replace_string(<noscript>): <div> -replace_string(</noscript>): </div> - -prune: no - -dissolve: //h2 -test_url: http://www.bbc.co.uk/news/business-15060862 \ No newline at end of file +body: //div[@class="story-body"] +# for video entries +body: //div[contains(@class, "videoInStory") or @id="meta-information"] +title: //h1[@class="story-header"] +date: //span[@class="story-date"]/span[@class='date'] +# for sport site +date: //meta[@name='DCTERMS.created']/@content +author: //div[@id='headline']//span[@class='byline-name'] + +# recipes, e.g. http://www.bbc.co.uk/food/recipes/mymincepies_71055 +body: //div[contains(@class, 'hrecipe')]//div[@id='subcolumn-1'] + +#strip: //div[@class="story-feature narrow"] +#strip: //div[@class="story-feature wide"] +#strip: //div[@class="story-feature dslideshow-enclosure"] +strip: //div[contains(@class, "story-feature")] +strip: //span[@class="story-date"] +#strip: //div[@class="caption body-narrow-width"] +strip: //div[@class="warning"]//p +strip: //div[@id='page-bookmark-links-head'] +strip: //object +strip: //div[contains(@class, "bbccom_advert_placeholder")] +strip: //div[contains(@class, "embedded-hyper")] +strip: //div[contains(@class, 'market-data')] +strip: //a[contains(@class, 'hidden')] +strip: //div[contains(@class, 'hypertabs')] +strip: //div[contains(@class, 'related')] +strip: //form[@id='comment-form'] +strip: //div[contains(@class, 'comment-introduction')] +strip: //div[contains(@class, 'share-tools')] +strip: //div[@id='also-related-links'] + +replace_string(<noscript>): <div> +replace_string(</noscript>): </div> + +prune: no + +dissolve: //h2 +test_url: http://www.bbc.co.uk/sport/0/football/23224017 +test_url: http://www.bbc.co.uk/news/business-15060862 +# video entry +test_url: http://www.bbc.co.uk/news/world-asia-22056933 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/bbcgoodfood.com.txt b/inc/3rdparty/site_config/standard/bbcgoodfood.com.txt new file mode 100755 index 00000000..1547d625 --- /dev/null +++ b/inc/3rdparty/site_config/standard/bbcgoodfood.com.txt @@ -0,0 +1,16 @@ +title: //header//h1 +#body: //article[contains(@class, 'node-full')] +body: //div[contains(@class, 'recipe-details') or contains(@class, 'tips-carousel')] | //section[@id='recipe-ingredients' or @id='recipe-method'] + +strip_id_or_class: recipe-rating-wrapper +strip_id_or_class: magazine-subcribe-header +strip_id_or_class: hide +strip_id_or_class: recipe-actions +strip_id_or_class: buy-ingredients +strip_id_or_class: related-content +strip_id_or_class: recipe-magazine-ad +strip_id_or_class: copy-right + +prune: no + +test_url: http://www.bbcgoodfood.com/recipes/1131634/minced-beef-wellington \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/benoitmaison.org.txt b/inc/3rdparty/site_config/standard/benoitmaison.org.txt old mode 100644 new mode 100755 index f341d593..72c1baed --- a/inc/3rdparty/site_config/standard/benoitmaison.org.txt +++ b/inc/3rdparty/site_config/standard/benoitmaison.org.txt @@ -1,16 +1,16 @@ -body: //div[@class="entry-content"] - -# Remove text ‘Tweet’ -strip: //div[@class="entry-content"]/div[last()] - -title: h1[@class="entry-title"] - -# If the Instapaper text parser worked with HTML5 tags, we would use: -date: //time[@class="entry-date"] - -# But since it does not, use this more complicated rule: -date: //div[@class="entry-meta"]/a[@rel="bookmark"] - -# Unfortunately, the following rule is overridden by the automatically found author. +body: //div[@class="entry-content"] + +# Remove text ‘Tweet’ +strip: //div[@class="entry-content"]/div[last()] + +title: h1[@class="entry-title"] + +# If the Instapaper text parser worked with HTML5 tags, we would use: +date: //time[@class="entry-date"] + +# But since it does not, use this more complicated rule: +date: //div[@class="entry-meta"]/a[@rel="bookmark"] + +# Unfortunately, the following rule is overridden by the automatically found author. author: ("Benoit Maison") test_url: http://www.benoitmaison.org/2011/12/06/why-siri-had-to-start-in-beta/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/berlingske.dk.txt b/inc/3rdparty/site_config/standard/berlingske.dk.txt old mode 100644 new mode 100755 index 607c998d..9f8c41c6 --- a/inc/3rdparty/site_config/standard/berlingske.dk.txt +++ b/inc/3rdparty/site_config/standard/berlingske.dk.txt @@ -1,3 +1,3 @@ -title: //h1[@class='headline'] +title: //h1[@class='headline'] body: //div[contains(@class, 'article-wrapper')] test_url: http://www.berlingske.dk/danmark/festen-er-flyttet-nordpaa \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/bernama.com.txt b/inc/3rdparty/site_config/standard/bernama.com.txt new file mode 100755 index 00000000..fdc04b7f --- /dev/null +++ b/inc/3rdparty/site_config/standard/bernama.com.txt @@ -0,0 +1,5 @@ +body: //div[contains(@class, "NewsText"] +prune: no + +test_url: http://www.bernama.com/bernama/v7/rss/english.php +test_url: http://www.bernama.com/bernama/v7/newsindex.php?id=943513 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/betabeat.com.txt b/inc/3rdparty/site_config/standard/betabeat.com.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/betanews.com.txt b/inc/3rdparty/site_config/standard/betanews.com.txt old mode 100644 new mode 100755 index 0eaf085e..90a54a23 --- a/inc/3rdparty/site_config/standard/betanews.com.txt +++ b/inc/3rdparty/site_config/standard/betanews.com.txt @@ -1,7 +1,7 @@ -# some articles at this site like this one doesn't -# seem to pick up the article body via normal -# processing, other articles come through fine -# http://www.betanews.com/joewilcox/article -# /Google-is-a-marketing-sensation/1309708375 +# some articles at this site like this one doesn't +# seem to pick up the article body via normal +# processing, other articles come through fine +# http://www.betanews.com/joewilcox/article +# /Google-is-a-marketing-sensation/1309708375 body: //*[@id="article"] test_url: http://www.betanews.com/joewilcox/article/Google-is-a-marketing-sensation/1309708375 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/biography.com.txt b/inc/3rdparty/site_config/standard/biography.com.txt old mode 100644 new mode 100755 index dc071299..e431037a --- a/inc/3rdparty/site_config/standard/biography.com.txt +++ b/inc/3rdparty/site_config/standard/biography.com.txt @@ -1,8 +1,8 @@ -title: //div[contains(@class, 'main-content')]//h1 -body: //div[@class='summary-column'] | //div[contains(@class, 'main-content')] - -prune: no - -single_page_link: //div[@id='biography-action-links']//a[contains(@href, '/print/')] +title: //div[contains(@class, 'main-content')]//h1 +body: //div[@class='summary-column'] | //div[contains(@class, 'main-content')] + +prune: no + +single_page_link: //div[@id='biography-action-links']//a[contains(@href, '/print/')] test_url: http://www.biography.com/print/profile/martin-luther-9389283 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/bitelia.com.txt b/inc/3rdparty/site_config/standard/bitelia.com.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/bizjournals.com.txt b/inc/3rdparty/site_config/standard/bizjournals.com.txt new file mode 100755 index 00000000..cfba766f --- /dev/null +++ b/inc/3rdparty/site_config/standard/bizjournals.com.txt @@ -0,0 +1,13 @@ +date: //meta[@name='publish-date']/@content +body: //div[contains(@class, 'articleContentWrapper')] +prune: no + +strip: //div[contains(@class, 'staff_info')]//dd[contains(., 'Twitter')] + +strip_id_or_class: related_content +strip_id_or_class: enlarge +strip_id_or_class: photoBy +strip_id_or_class: older + +test_url: http://www.bizjournals.com/cincinnati/news/2013/10/03/harris-teeter-shareholders-vote-on.html +test_url: http://feeds.bizjournals.com/industry_20?format=xml \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/bjango.com.txt b/inc/3rdparty/site_config/standard/bjango.com.txt old mode 100644 new mode 100755 index 6cb04631..0fed5526 --- a/inc/3rdparty/site_config/standard/bjango.com.txt +++ b/inc/3rdparty/site_config/standard/bjango.com.txt @@ -1,7 +1,7 @@ -title: //h1[@class='articlehead'] -body: //div[@class='column'] -strip: //h1 -strip: //div[@class='help'] - +title: //h1[@class='articlehead'] +body: //div[@class='column'] +strip: //h1 +strip: //div[@class='help'] + #no author or date/time provided in current layout test_url: http://bjango.com/articles/actions/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/blog.arsln.org.txt b/inc/3rdparty/site_config/standard/blog.arsln.org.txt old mode 100644 new mode 100755 index 1f43f490..7ac8cc11 --- a/inc/3rdparty/site_config/standard/blog.arsln.org.txt +++ b/inc/3rdparty/site_config/standard/blog.arsln.org.txt @@ -1,8 +1,8 @@ -tidy: no -prune: no -date: //article/header/h6/time -title: //article/header/h3 -author: //meta[@name='author']/@content -body: //article//post +tidy: no +prune: no +date: //article/header/h6/time +title: //article/header/h3 +author: //meta[@name='author']/@content +body: //article//post test_url: http://blog.arsln.org/aska-ayip-oluyor/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/blog.asmartbear.com.txt b/inc/3rdparty/site_config/standard/blog.asmartbear.com.txt old mode 100644 new mode 100755 index 81c3bda6..78d7f516 --- a/inc/3rdparty/site_config/standard/blog.asmartbear.com.txt +++ b/inc/3rdparty/site_config/standard/blog.asmartbear.com.txt @@ -1,7 +1,7 @@ -title: //title -author: //span[@class='author vcard']/a -date: //p[@class='headline_meta']/abbr[@class='published'] -body: //div[@class='format_text entry-content'] - +title: //title +author: //span[@class='author vcard']/a +date: //p[@class='headline_meta']/abbr[@class='published'] +body: //div[@class='format_text entry-content'] + strip: //div[@id='dd_ajax_float'] test_url: http://blog.asmartbear.com/how-to-get-quality-freelance-graphics-design-work-on-a-budget.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/blog.cloudflare.com.txt b/inc/3rdparty/site_config/standard/blog.cloudflare.com.txt old mode 100644 new mode 100755 index a4c5aaea..db80a35f --- a/inc/3rdparty/site_config/standard/blog.cloudflare.com.txt +++ b/inc/3rdparty/site_config/standard/blog.cloudflare.com.txt @@ -1,9 +1,9 @@ -# Instapaper gets this back to front and only gets the blog title instead of the article title. -title: substring-before(//title, '-') - -author: //a[ contains(@href, '/people') ] - -body: //div[ @class='post' ] - +# Instapaper gets this back to front and only gets the blog title instead of the article title. +title: substring-before(//title, '-') + +author: //a[ contains(@href, '/people') ] + +body: //div[ @class='post' ] + # Date is impossible to retrieve since they use those stupid "fuzzy" dates, inserted through javascript, at posterous. test_url: http://blog.cloudflare.com/understanding-analytics-when-is-a-page-view-n \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/blog.fefe.de.txt b/inc/3rdparty/site_config/standard/blog.fefe.de.txt old mode 100644 new mode 100755 index 92272b70..97e48e69 --- a/inc/3rdparty/site_config/standard/blog.fefe.de.txt +++ b/inc/3rdparty/site_config/standard/blog.fefe.de.txt @@ -1,5 +1,5 @@ -title: //h2 -date: //h3 -body: //ul +title: //h2 +date: //h3 +body: //ul test_url: http://blog.fefe.de/?ts=b063bf55 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/blog.instagram.com.txt b/inc/3rdparty/site_config/standard/blog.instagram.com.txt old mode 100644 new mode 100755 index 3065dd80..13d1d44a --- a/inc/3rdparty/site_config/standard/blog.instagram.com.txt +++ b/inc/3rdparty/site_config/standard/blog.instagram.com.txt @@ -1,11 +1,11 @@ -# clean Instagram blog a little bit - -tidy:no -prune:no - -body://div[contains(@id,'content')] - -strip_id_or_class:meta -strip_id_or_class:notes +# clean Instagram blog a little bit + +tidy:no +prune:no + +body://div[contains(@id,'content')] + +strip_id_or_class:meta +strip_id_or_class:notes strip_id_or_class:pagination test_url: http://blog.instagram.com/post/8757832007/fromwhereistand \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/blog.instapaper.com.txt b/inc/3rdparty/site_config/standard/blog.instapaper.com.txt new file mode 100755 index 00000000..fda01b15 --- /dev/null +++ b/inc/3rdparty/site_config/standard/blog.instapaper.com.txt @@ -0,0 +1,9 @@ +author: //a[@href="http://www.marco.org/about"] +date: //span[@class="date"] + +# Remove the date from article body. +strip: //span[@class="date"] + +# Remove pagination links from article body. +strip: //div[@id="pagination"] +test_url: http://blog.instapaper.com/post/31303984531 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/blog.jaysalvat.com.txt b/inc/3rdparty/site_config/standard/blog.jaysalvat.com.txt old mode 100644 new mode 100755 index 4e467fe9..e89ad3a5 --- a/inc/3rdparty/site_config/standard/blog.jaysalvat.com.txt +++ b/inc/3rdparty/site_config/standard/blog.jaysalvat.com.txt @@ -1,4 +1,4 @@ -date: //span[contains(@class, 'date-links')] -author: //span[contains(@class, 'author-links')] +date: //span[contains(@class, 'date-links')] +author: //span[contains(@class, 'author-links')] body: //div[contains(@class, 'entry-content')] test_url: http://blog.jaysalvat.com/article/celui-qui-avait-refait-son-site-web \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/blog.kaelig.fr.txt b/inc/3rdparty/site_config/standard/blog.kaelig.fr.txt old mode 100644 new mode 100755 index ac18ad15..bcd3bdc9 --- a/inc/3rdparty/site_config/standard/blog.kaelig.fr.txt +++ b/inc/3rdparty/site_config/standard/blog.kaelig.fr.txt @@ -1,5 +1,5 @@ -body: //*[contains(@class, 'post_content')] -author: string('Kaelig Deloumeau-Prigent') -title: //h1[@class='title'] +body: //*[contains(@class, 'post_content')] +author: string('Kaelig Deloumeau-Prigent') +title: //h1[@class='title'] date: //span[@class='date'] test_url: http://blog.kaelig.fr/post/24877648508/preprocesseurs-css-renoncer-par-choix-ou-par \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/blog.naver.com.txt b/inc/3rdparty/site_config/standard/blog.naver.com.txt old mode 100644 new mode 100755 index 702789ad..73c30c47 --- a/inc/3rdparty/site_config/standard/blog.naver.com.txt +++ b/inc/3rdparty/site_config/standard/blog.naver.com.txt @@ -1,6 +1,6 @@ -title: //span[@class='pcol1 itemSubjectBoldfont'] -body: //div[@id='postListBody'] -date: //p[@class='date fil5 pcol2'] -single_page_link: /html/frameset/frame[1]/attribute::src +title: //span[@class='pcol1 itemSubjectBoldfont'] +body: //div[@id='postListBody'] +date: //p[@class='date fil5 pcol2'] +single_page_link: /html/frameset/frame[1]/attribute::src strip: //div[@class='post-btn'] test_url: http://blog.naver.com/how2invest/110135068757 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/blog.pchome.net.txt b/inc/3rdparty/site_config/standard/blog.pchome.net.txt old mode 100644 new mode 100755 index 3089001e..de81beba --- a/inc/3rdparty/site_config/standard/blog.pchome.net.txt +++ b/inc/3rdparty/site_config/standard/blog.pchome.net.txt @@ -1,12 +1,12 @@ -# PCHOME blog, a popular Chinese blog host -# Oct 15, 2011 -# - -title://*[contains(@class,'imp')]/h2 - -date://*[contains(@class,'imp')]/span -body://div[contains(@id,'blog_content')] - - +# PCHOME blog, a popular Chinese blog host +# Oct 15, 2011 +# + +title://*[contains(@class,'imp')]/h2 + +date://*[contains(@class,'imp')]/span +body://div[contains(@id,'blog_content')] + + test_url: http://blog.pchome.net/article/462502.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/blog.pinboard.in.txt b/inc/3rdparty/site_config/standard/blog.pinboard.in.txt old mode 100644 new mode 100755 index b7afe455..40f0c560 --- a/inc/3rdparty/site_config/standard/blog.pinboard.in.txt +++ b/inc/3rdparty/site_config/standard/blog.pinboard.in.txt @@ -1,6 +1,6 @@ -title: //a[@class="blog_title"] -date: //p[@class="when"]/a -body: //div[@class="blog_entry"] -strip_id_or_class:blog_title +title: //a[@class="blog_title"] +date: //p[@class="when"]/a +body: //div[@class="blog_entry"] +strip_id_or_class:blog_title strip_id_or_class:when test_url: http://blog.pinboard.in/2011/11/the_social_graph_is_neither/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/blog.renren.com.txt b/inc/3rdparty/site_config/standard/blog.renren.com.txt new file mode 100755 index 00000000..401d31e5 --- /dev/null +++ b/inc/3rdparty/site_config/standard/blog.renren.com.txt @@ -0,0 +1,11 @@ +# This filter is tested on: +# http://blog.renren.com/share/224959024/14260739544 +# http://blog.renren.com/share/231323504/14261768898 +# http://blog.renren.com/share/230305019/1502806705 + +title://h1[contains(@class, 'title-article')] +author://span[contains(@class, 'name')] +body://div[contains(@class, 'content-body')] + +convert_double_br_tags:yes +test_url: http://blog.renren.com/share/230305019/1502806705 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/blog.sina.com.cn.txt b/inc/3rdparty/site_config/standard/blog.sina.com.cn.txt old mode 100644 new mode 100755 index acb9ce81..4895272a --- a/inc/3rdparty/site_config/standard/blog.sina.com.cn.txt +++ b/inc/3rdparty/site_config/standard/blog.sina.com.cn.txt @@ -1,26 +1,26 @@ -# Sina blog, the most popular blog host in China. -# Its source code is horrible. -# -# Issue: -# Only the first image in the article is displayed. -# The rest images are replace by a 1x1 transparent gif by sina blog host. -# - -title://*[contains(@class,'titName SG_txta')] -author://*[contains(@id,'ownernick')] -date://*[contains(@class,'time SG_txtc')] -body://div[contains(@class,'articalContent')] - -# Remove redundant content which has span class start with "MASS" -# Example <span class="MASSf21674ffeef7"></span> -strip://span[contains(@class,'MASS')] - -# Remove comment -strip://div[contains(@class,'allComm')] - -# Remove hiden text and link -strip://ins - -tidy:no -convert_double_br_tags:yes +# Sina blog, the most popular blog host in China. +# Its source code is horrible. +# +# Issue: +# Only the first image in the article is displayed. +# The rest images are replace by a 1x1 transparent gif by sina blog host. +# + +title://*[contains(@class,'titName SG_txta')] +author://*[contains(@id,'ownernick')] +date://*[contains(@class,'time SG_txtc')] +body://div[contains(@class,'articalContent')] + +# Remove redundant content which has span class start with "MASS" +# Example <span class="MASSf21674ffeef7"></span> +strip://span[contains(@class,'MASS')] + +# Remove comment +strip://div[contains(@class,'allComm')] + +# Remove hiden text and link +strip://ins + +tidy:no +convert_double_br_tags:yes test_url: http://blog.sina.com.cn/s/blog_5054769e0102dtja.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/blog.spu.edu.txt b/inc/3rdparty/site_config/standard/blog.spu.edu.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/blog.wells.ee.txt b/inc/3rdparty/site_config/standard/blog.wells.ee.txt old mode 100644 new mode 100755 index 8c8b3838..eae6982b --- a/inc/3rdparty/site_config/standard/blog.wells.ee.txt +++ b/inc/3rdparty/site_config/standard/blog.wells.ee.txt @@ -1,6 +1,6 @@ -title: //h2/a[@class="no-link title"] -author: //h2[@id="blog_owner"] -date: //time -strip: //h2/a[@class="no-link title"] -test_url: http://blog.wells.ee/retina +title: //h2/a[@class="no-link title"] +author: //h2[@id="blog_owner"] +date: //time +strip: //h2/a[@class="no-link title"] +test_url: http://blog.wells.ee/retina test_url: http://blog.wells.ee/skeuomorphism \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/blogs.aljazeera.net.txt b/inc/3rdparty/site_config/standard/blogs.aljazeera.net.txt old mode 100644 new mode 100755 index f630127b..2a66952b --- a/inc/3rdparty/site_config/standard/blogs.aljazeera.net.txt +++ b/inc/3rdparty/site_config/standard/blogs.aljazeera.net.txt @@ -1,8 +1,8 @@ -# 2011-08-23 [carlo@...] Initial version. - -author: //div[@id="blogauthordatebox-node"]//a[@title="View user profile."]/text() - -# why yes, I do feel a bit dirty -date: substring-before( substring-after( substring-after( //div[@id="blogauthordatebox-node"]//td[3], "on " ), ", "), " " ) +# 2011-08-23 [carlo@...] Initial version. + +author: //div[@id="blogauthordatebox-node"]//a[@title="View user profile."]/text() + +# why yes, I do feel a bit dirty +date: substring-before( substring-after( substring-after( //div[@id="blogauthordatebox-node"]//td[3], "on " ), ", "), " " ) test_url: http://blogs.aljazeera.net/asia/2011/08/22/peoples-hero \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/blogs.forbes.com.txt b/inc/3rdparty/site_config/standard/blogs.forbes.com.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/blogs.hbr.org.txt b/inc/3rdparty/site_config/standard/blogs.hbr.org.txt old mode 100644 new mode 100755 index 3664d16c..d47c3520 --- a/inc/3rdparty/site_config/standard/blogs.hbr.org.txt +++ b/inc/3rdparty/site_config/standard/blogs.hbr.org.txt @@ -1,4 +1,4 @@ -title: //div[@id='pageFeature']/h1 -body: //div[@id='articleBody'] -strip: //div[@class='module wide'] +title: //div[@id='pageFeature']/h1 +body: //div[@id='articleBody'] +strip: //div[@class='module wide'] test_url: http://blogs.hbr.org/bregman/2011/04/the-1-killer-of-meetings-and-w.html?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+harvardbusiness+%28HBR.org%29 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/blogs.msdn.com.txt b/inc/3rdparty/site_config/standard/blogs.msdn.com.txt old mode 100644 new mode 100755 index 3d3ec020..b2ff8332 --- a/inc/3rdparty/site_config/standard/blogs.msdn.com.txt +++ b/inc/3rdparty/site_config/standard/blogs.msdn.com.txt @@ -1,6 +1,6 @@ -title: //h3[@class="post-name"] -author: //span[@class="user-name"] -date: //div[@class="post-date"] -body: //div[@class="post-content user-defined-markup"] +title: //h3[@class="post-name"] +author: //span[@class="user-name"] +date: //div[@class="post-date"] +body: //div[@class="post-content user-defined-markup"] footnotes: no test_url: http://blogs.msdn.com/b/b8/archive/2011/10/04/designing-the-start-screen.aspx \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/blogs.reuters.com.txt b/inc/3rdparty/site_config/standard/blogs.reuters.com.txt old mode 100644 new mode 100755 index 6907bcb2..d3eb9966 --- a/inc/3rdparty/site_config/standard/blogs.reuters.com.txt +++ b/inc/3rdparty/site_config/standard/blogs.reuters.com.txt @@ -1,3 +1,3 @@ -title: //div[@id='single']/h1 +title: //div[@id='single']/h1 body: //div[@id='postcontent'] test_url: http://blogs.reuters.com/felix-salmon/2010/07/16/the-value-of-a-strong-brand-apple-edition/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/blogs.scientificamerican.com.txt b/inc/3rdparty/site_config/standard/blogs.scientificamerican.com.txt old mode 100644 new mode 100755 index a7d15081..2102015d --- a/inc/3rdparty/site_config/standard/blogs.scientificamerican.com.txt +++ b/inc/3rdparty/site_config/standard/blogs.scientificamerican.com.txt @@ -1,16 +1,16 @@ -# meta data -title://h1[@class = 'postTitle'] -author:substring-before(substring-after(//span[@class = 'byline'],'By '),'|') -date://span[@class = 'datestamp'] - -#body content -body://div[@id = 'singleBlogPost'] - -#reclaim author info -move_into(//div[@id = 'singleBlogPost'])://div[@id = 'aboutAuthorDiv'] -strip://p[@class = 'moreLink mobileHide'] - -#cleanup comments, there might be some open <div> sections -strip://div[@id = 'comments2'] +# meta data +title://h1[@class = 'postTitle'] +author:substring-before(substring-after(//span[@class = 'byline'],'By '),'|') +date://span[@class = 'datestamp'] + +#body content +body://div[@id = 'singleBlogPost'] + +#reclaim author info +move_into(//div[@id = 'singleBlogPost'])://div[@id = 'aboutAuthorDiv'] +strip://p[@class = 'moreLink mobileHide'] + +#cleanup comments, there might be some open <div> sections +strip://div[@id = 'comments2'] strip://h3[a[@href = '#add-comment']] test_url: http://blogs.scientificamerican.com/a-blog-around-the-clock/2012/07/10/science-blogs-definition-and-a-history/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/blogs.smithsonianmag.com.txt b/inc/3rdparty/site_config/standard/blogs.smithsonianmag.com.txt old mode 100644 new mode 100755 index ba8bc6e7..1bc65e77 --- a/inc/3rdparty/site_config/standard/blogs.smithsonianmag.com.txt +++ b/inc/3rdparty/site_config/standard/blogs.smithsonianmag.com.txt @@ -1,15 +1,15 @@ -# metadata -author://div[@class = 'post']/div[@class='meta']/a[1] -date://div[@id = 'rap']/h2[1] -body://div[@class = 'post'] - -# wrapping caption and image -wrap_in(fieldset)://div[contains(@class, 'wp-caption')] - - -# clean up -strip://div[@class = 'post']/h3[@class = 'storytitle'] -strip://div[@class = 'post']/div[@class = 'social'] -strip://img[@style = 'display:none;'] +# metadata +author://div[@class = 'post']/div[@class='meta']/a[1] +date://div[@id = 'rap']/h2[1] +body://div[@class = 'post'] + +# wrapping caption and image +wrap_in(fieldset)://div[contains(@class, 'wp-caption')] + + +# clean up +strip://div[@class = 'post']/h3[@class = 'storytitle'] +strip://div[@class = 'post']/div[@class = 'social'] +strip://img[@style = 'display:none;'] strip://img[@height='0' and @width='0'] test_url: http://blogs.smithsonianmag.com/adventure/2011/10/tips-for-women-traveling-in-turkey/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/blogs.technet.com.txt b/inc/3rdparty/site_config/standard/blogs.technet.com.txt old mode 100644 new mode 100755 index a2909fd1..3d0fbadc --- a/inc/3rdparty/site_config/standard/blogs.technet.com.txt +++ b/inc/3rdparty/site_config/standard/blogs.technet.com.txt @@ -1,6 +1,9 @@ -title: //h3[@class="post-name"] -author: //span[@class="user-name"] -date: //div[@class="post-date"] -body: //div[@class="post-content user-defined-markup"] +title: //h3[@class="post-name"] +author: //span[@class="user-name"] +date: //div[@class="post-date"] +body: //div[@class="post-content user-defined-markup"] +strip_id_or_class: log-feedback-list +tidy: no footnotes: no -test_url: http://blogs.technet.com/b/dlemson/archive/2004/03/03/83304.aspx \ No newline at end of file +test_url: http://blogs.technet.com/b/dlemson/archive/2004/03/03/83304.aspx +test_url: http://blogs.technet.com/b/isablog/archive/2009/01/07/a-pptp-client-might-fail-to-connect-to-a-vpn-server-on-the-internet-through-an-isa-server-2006.aspx \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/bluetouff.com.txt b/inc/3rdparty/site_config/standard/bluetouff.com.txt old mode 100644 new mode 100755 index fbe7a5c6..543d3920 --- a/inc/3rdparty/site_config/standard/bluetouff.com.txt +++ b/inc/3rdparty/site_config/standard/bluetouff.com.txt @@ -1,4 +1,4 @@ -body://div[@class='entry'] -date://div[@class='meta'] +body://div[@class='entry'] +date://div[@class='meta'] strip://a[@class='FlattrButton'] test_url: http://bluetouff.com/2012/03/02/polemique-google-vie-privee/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/boagworld.com.txt b/inc/3rdparty/site_config/standard/boagworld.com.txt old mode 100644 new mode 100755 index 91e48fdb..3b3da991 --- a/inc/3rdparty/site_config/standard/boagworld.com.txt +++ b/inc/3rdparty/site_config/standard/boagworld.com.txt @@ -1,8 +1,8 @@ -title: //h1[@class="entry-title"][2] -author: string("Paul Boag") -date: substring(//span[@class="meta"], 11) -body: //article -strip: //h2 -strip: //h1 +title: //h1[@class="entry-title"][2] +author: string("Paul Boag") +date: substring(//span[@class="meta"], 11) +body: //article +strip: //h2 +strip: //h1 strip: //div[@id="callsToAction"] test_url: http://boagworld.com/working-in-web-design/dealing-with-the-dickheads/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/boingboing.net.txt b/inc/3rdparty/site_config/standard/boingboing.net.txt old mode 100644 new mode 100755 index 9169e8fb..4f39661b --- a/inc/3rdparty/site_config/standard/boingboing.net.txt +++ b/inc/3rdparty/site_config/standard/boingboing.net.txt @@ -1,11 +1,11 @@ -# This is far from perfect, but so is BoingBoing's markup -title: //h2[@class="headline"] -single_page_link: //h2[@class="headline"]/a -#date: //p[@class="byline"] -body: //div[@class="post"] - -strip_id_or_class: shareMe -strip_id_or_class: authorbox -strip_id_or_class: byline +# This is far from perfect, but so is BoingBoing's markup +title: //h2[@class="headline"] +single_page_link: //h2[@class="headline"]/a +#date: //p[@class="byline"] +body: //div[@class="post"] + +strip_id_or_class: shareMe +strip_id_or_class: authorbox +strip_id_or_class: byline test_url: http://boingboing.net/2011/10/23/understanding-the-hyperrich-through-the-lens-of-tomorrows-history.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/boldizsar.palotas.eu.txt b/inc/3rdparty/site_config/standard/boldizsar.palotas.eu.txt old mode 100644 new mode 100755 index 4cc49043..3616bbf2 --- a/inc/3rdparty/site_config/standard/boldizsar.palotas.eu.txt +++ b/inc/3rdparty/site_config/standard/boldizsar.palotas.eu.txt @@ -1,3 +1,3 @@ -title: //h2[@class='entry-title'] +title: //h2[@class='entry-title'] body: //div[@class='entry-content'] test_url: http://boldizsar.palotas.eu/blog/?p=1394 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/book.douban.com.txt b/inc/3rdparty/site_config/standard/book.douban.com.txt old mode 100644 new mode 100755 index 8b958562..fe2d2cbf --- a/inc/3rdparty/site_config/standard/book.douban.com.txt +++ b/inc/3rdparty/site_config/standard/book.douban.com.txt @@ -1,6 +1,6 @@ -body: //span[@property='v:description'] -date: //span[@property='v:dtreviewed'] -author: //span[@property='v:reviewer'] -prune: no +body: //span[@property='v:description'] +date: //span[@property='v:dtreviewed'] +author: //span[@property='v:reviewer'] +prune: no test_url: http://book.douban.com/review/2422662/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/bookforum.com.txt b/inc/3rdparty/site_config/standard/bookforum.com.txt old mode 100644 new mode 100755 index 331f415e..03b60039 --- a/inc/3rdparty/site_config/standard/bookforum.com.txt +++ b/inc/3rdparty/site_config/standard/bookforum.com.txt @@ -1,19 +1,19 @@ -#metadata -title://div[@class = 'Topper']/h1 -author://div[@class = 'Topper']/h3 -date://div[@class = 'Topper']/h6 -body://div[@class = 'Core'] - - - -# clean up -strip://div[@class = 'Topper']/h1 -strip://div[@class = 'Topper']/h3 -strip://div[@class = 'Topper']/h4 -strip://div[@class = 'Topper']/h5 -strip://div[@class = 'Topper']/h6 -strip://br[@clear = 'all'] -strip://div[@class = 'adCore'] -strip://div[@class = 'BookR'] +#metadata +title://div[@class = 'Topper']/h1 +author://div[@class = 'Topper']/h3 +date://div[@class = 'Topper']/h6 +body://div[@class = 'Core'] + + + +# clean up +strip://div[@class = 'Topper']/h1 +strip://div[@class = 'Topper']/h3 +strip://div[@class = 'Topper']/h4 +strip://div[@class = 'Topper']/h5 +strip://div[@class = 'Topper']/h6 +strip://br[@clear = 'all'] +strip://div[@class = 'adCore'] +strip://div[@class = 'BookR'] strip://div[@class = 'InfoBox'] test_url: http://bookforum.com/inprint/018_04/8595 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/borderhouseblog.com.txt b/inc/3rdparty/site_config/standard/borderhouseblog.com.txt old mode 100644 new mode 100755 index 190738d5..b4e116fe --- a/inc/3rdparty/site_config/standard/borderhouseblog.com.txt +++ b/inc/3rdparty/site_config/standard/borderhouseblog.com.txt @@ -1,7 +1,7 @@ -title://h1 -author://div[@class="meta"]/span/a -date://div[@class="date"] -body://div[@class="content article"] -strip://div[@class="content article"]/h1 +title://h1 +author://div[@class="meta"]/span/a +date://div[@class="date"] +body://div[@class="content article"] +strip://div[@class="content article"]/h1 test_url: http://borderhouseblog.com/?p=7832 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/bostonglobe.com.txt b/inc/3rdparty/site_config/standard/bostonglobe.com.txt old mode 100644 new mode 100755 index d3e6f43f..4c74a34e --- a/inc/3rdparty/site_config/standard/bostonglobe.com.txt +++ b/inc/3rdparty/site_config/standard/bostonglobe.com.txt @@ -1,16 +1,16 @@ -# NOTE: If testing this configuration yields bad results, including junk text like "Try BostonGlobe.com today" and "THIS STORY APPEARED IN", please replace the Test URL with a current-day headline link from bostonglobe.com. - -title: //div[@class="header"]/h1 -author: substring-after(//div[@class="byline"]/h2[@class="author"],"By ") -date: //div[@class="byline"]/p[last()] -body: //div[@class="article-body"] - -strip_id_or_class: aside -strip_id_or_class: promo -strip_id_or_class: skip-nav -strip_id_or_class: article-more -strip_id_or_class: article-bar - -# This removes image captions. If the parser starts saving images from bostonglobe.com (currently, it does not), then this directive should be removed. +# NOTE: If testing this configuration yields bad results, including junk text like "Try BostonGlobe.com today" and "THIS STORY APPEARED IN", please replace the Test URL with a current-day headline link from bostonglobe.com. + +title: //div[@class="header"]/h1 +author: substring-after(//div[@class="byline"]/h2[@class="author"],"By ") +date: //div[@class="byline"]/p[last()] +body: //div[@class="article-body"] + +strip_id_or_class: aside +strip_id_or_class: promo +strip_id_or_class: skip-nav +strip_id_or_class: article-more +strip_id_or_class: article-bar + +# This removes image captions. If the parser starts saving images from bostonglobe.com (currently, it does not), then this directive should be removed. strip_id_or_class: figure test_url: http://bostonglobe.com/news/nation/2012/03/17/illinois-primary-could-pivotal/PsDzFZqvhEYyXbOcF9FOkO/story.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/bostonreview.net.txt b/inc/3rdparty/site_config/standard/bostonreview.net.txt old mode 100644 new mode 100755 index 68567012..64e04a1c --- a/inc/3rdparty/site_config/standard/bostonreview.net.txt +++ b/inc/3rdparty/site_config/standard/bostonreview.net.txt @@ -1,15 +1,15 @@ -#basics -title://h3[@class = 'article_title'] -date://span[@class = 'article_date'] -body://div[@id = 'center_column_article'] -#correct, but author not being picked up in preview -author://span[@class = 'article_author'] - -#strips basics from article -strip_id_or_class:article_title -strip_id_or_class:article_date -strip_id_or_class:article_author - -#strips pull quotes +#basics +title://h3[@class = 'article_title'] +date://span[@class = 'article_date'] +body://div[@id = 'center_column_article'] +#correct, but author not being picked up in preview +author://span[@class = 'article_author'] + +#strips basics from article +strip_id_or_class:article_title +strip_id_or_class:article_date +strip_id_or_class:article_author + +#strips pull quotes strip_id_or_class:pull_quote test_url: http://www.bostonreview.net/BR36.4/megan_pugh_agnes_de_mille_dance.php \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/boundlessline.org.txt b/inc/3rdparty/site_config/standard/boundlessline.org.txt old mode 100644 new mode 100755 index bfc3f3d1..a836e1e2 --- a/inc/3rdparty/site_config/standard/boundlessline.org.txt +++ b/inc/3rdparty/site_config/standard/boundlessline.org.txt @@ -1,5 +1,5 @@ -title: substring-before(//title, '|') -body: //div[@class="entry"] -# Remove the author's picture +title: substring-before(//title, '|') +body: //div[@class="entry"] +# Remove the author's picture strip: //div[@class="entry"]/a[1] test_url: http://www.boundlessline.org/2011/06/the-nyts-on-gender-over-the-weekend.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/bowdoinorient.com.txt b/inc/3rdparty/site_config/standard/bowdoinorient.com.txt new file mode 100755 index 00000000..932143d1 --- /dev/null +++ b/inc/3rdparty/site_config/standard/bowdoinorient.com.txt @@ -0,0 +1,6 @@ +title: //*[@class='articletitle'] +body: //*[(@id='articlebody')] +date: //*[(@class='articledate')] +author: //*[(@class='articleauthor')] +autodetect_next_page: no +test_url: http://bowdoinorient.com/article/8045 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/brainfacts.org.txt b/inc/3rdparty/site_config/standard/brainfacts.org.txt old mode 100644 new mode 100755 index 94b0f56d..9705f621 --- a/inc/3rdparty/site_config/standard/brainfacts.org.txt +++ b/inc/3rdparty/site_config/standard/brainfacts.org.txt @@ -1,10 +1,10 @@ -title: //div[@class="standard"]/h1 -author: string("BrainFacts.org") -date: //div[@class="meta"]/strong - -strip: //p[@class="skip"] -strip: //div[@class="meta"] -strip: //div[@class="standard"]/h1 -strip: //div[@class="modal"] +title: //div[@class="standard"]/h1 +author: string("BrainFacts.org") +date: //div[@class="meta"]/strong + +strip: //p[@class="skip"] +strip: //div[@class="meta"] +strip: //div[@class="standard"]/h1 +strip: //div[@class="modal"] strip: //div[@class="columnRight"] test_url: http://brainfacts.org/diseases-disorders/childhood-disorders/articles/2011/autism-the-pervasive-developmental-disorder/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/brandeins.de.txt b/inc/3rdparty/site_config/standard/brandeins.de.txt old mode 100644 new mode 100755 index 3753ce67..36aa2efa --- a/inc/3rdparty/site_config/standard/brandeins.de.txt +++ b/inc/3rdparty/site_config/standard/brandeins.de.txt @@ -1,7 +1,7 @@ -# set body -body: //div[@id='theContent'] - -# set title -title: //div[@id='theContent']/h3 +# set body +body: //div[@id='theContent'] + +# set title +title: //div[@id='theContent']/h3 strip: //div[@id='theContent']/h3 test_url: http://www.brandeins.de/archiv/magazin/gegessen-wird-immer/artikel/hunger.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/brandingstrategyinsider.com.txt b/inc/3rdparty/site_config/standard/brandingstrategyinsider.com.txt old mode 100644 new mode 100755 index 19504844..fc020539 --- a/inc/3rdparty/site_config/standard/brandingstrategyinsider.com.txt +++ b/inc/3rdparty/site_config/standard/brandingstrategyinsider.com.txt @@ -1,3 +1,3 @@ -date://h2[@class="date-header"] +date://h2[@class="date-header"] body://div[@class="entry-content"] test_url: http://www.brandingstrategyinsider.com/2011/12/top-twelve-branding-keys-for-2012.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/brasil.elpais.com.txt b/inc/3rdparty/site_config/standard/brasil.elpais.com.txt new file mode 100755 index 00000000..0b8feb6a --- /dev/null +++ b/inc/3rdparty/site_config/standard/brasil.elpais.com.txt @@ -0,0 +1,23 @@ +title: //meta[@name='DC.title']/@content +title: //div[contains(@class, 'cabecera_noticia')]//h1 +date: //meta[@name='DC.date']/@content +date: //meta[@name='date']/@content +body: //div[@class='columna_texto'] +body: //div[@id='cuerpo_noticia'] +body: //div[@class='estructura_2col_1zq']//div[@class='margen_n'] + +prune: no + +strip_id_or_class: disposicion_vertical +strip_id_or_class: ampliar_foto +strip_id_or_class: utilidades +strip_id_or_class: info_relacionada +strip_id_or_class: m-kiosko +strip_id_or_class: info_complementa + +strip: //p[@class='nota_pie'] +strip: //div[starts-with(@id, 'sumario') and contains(., 'más información')] +strip: //div[@id='coment' or @id='foros_not'] + +test_url: http://elpais.com/elpais/2012/02/06/gente/1328526783_491687.html +test_url: http://www.elpais.com/articulo/cultura/mano/retrato/materia/elpepicul/20120207elpepicul_2/Tes diff --git a/inc/3rdparty/site_config/standard/brettterpstra.com.txt b/inc/3rdparty/site_config/standard/brettterpstra.com.txt old mode 100644 new mode 100755 index f6f73778..55da1787 --- a/inc/3rdparty/site_config/standard/brettterpstra.com.txt +++ b/inc/3rdparty/site_config/standard/brettterpstra.com.txt @@ -1,5 +1,5 @@ -body: //div[@class='post full'] -title: //h1 -author: substring-after(//title, '- ') +body: //div[@class='post full'] +title: //h1 +author: substring-after(//title, '- ') date: //span[@class='date'] test_url: http://brettterpstra.com/byword-for-ios/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/brisbanetimes.com.au.txt b/inc/3rdparty/site_config/standard/brisbanetimes.com.au.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/brookings.edu.txt b/inc/3rdparty/site_config/standard/brookings.edu.txt old mode 100644 new mode 100755 index 9f4fc4e3..17a47605 --- a/inc/3rdparty/site_config/standard/brookings.edu.txt +++ b/inc/3rdparty/site_config/standard/brookings.edu.txt @@ -1,13 +1,13 @@ -title: //div[@id='contentheader']/h1 -author: //p[@class='attribution']/span[@class='author']/* -# Is there a way to pull multiple authors? My XPath here is just grabbing the first - -date: /html/head/meta[@name="date"]/@content -body: //div[@class='main-content'] - -strip: //p[@class='byline'] -strip: //div[@class='img-gallery'] -strip: //div[@class='callout'] -strip: //div[@class='add-your-view'] +title: //div[@id='contentheader']/h1 +author: //p[@class='attribution']/span[@class='author']/* +# Is there a way to pull multiple authors? My XPath here is just grabbing the first + +date: /html/head/meta[@name="date"]/@content +body: //div[@class='main-content'] + +strip: //p[@class='byline'] +strip: //div[@class='img-gallery'] +strip: //div[@class='callout'] +strip: //div[@class='add-your-view'] convert_double_br_tags: yes test_url: http://www.brookings.edu/opinions/2011/1018_cyberattack_libya_goldsmith.aspx \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/brooksreview.net.txt b/inc/3rdparty/site_config/standard/brooksreview.net.txt old mode 100644 new mode 100755 index 71cafcdb..d33d7d4e --- a/inc/3rdparty/site_config/standard/brooksreview.net.txt +++ b/inc/3rdparty/site_config/standard/brooksreview.net.txt @@ -1,6 +1,6 @@ -title: //h1 -body: //div[@class='article'] -body: //div[@class='post'] -date: //*[@id='single']/span -prune: no +title: //h1 +body: //div[@class='article'] +body: //div[@class='post'] +date: //*[@id='single']/span +prune: no test_url: http://brooksreview.net/2011/11/readability-agency/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/bt.no.txt b/inc/3rdparty/site_config/standard/bt.no.txt new file mode 100755 index 00000000..200c2e4e --- /dev/null +++ b/inc/3rdparty/site_config/standard/bt.no.txt @@ -0,0 +1,12 @@ +title: //h1[contains(@class,'articleTitle')] +author: //span[@itemprop='name'] +date: //time[@class='published'] +body: //div[contains(@class,'bodyText')] + +strip_id_or_class: 'pull1' +strip_id_or_class: 'relationArticle' +strip: //span[@class='quote'] + +# strip h2 if at end of article (typically a request for comments) +strip: //div[contains(@class,'bodyText')]/node()[last()-1]/self::h2 +test_url: http://www.bt.no/meninger/debatt/Typisk-norsk-a-vare-god-nok-2884108.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/buffed.de.txt b/inc/3rdparty/site_config/standard/buffed.de.txt new file mode 100755 index 00000000..3dd36ce6 --- /dev/null +++ b/inc/3rdparty/site_config/standard/buffed.de.txt @@ -0,0 +1,14 @@ +date: //meta[@itemProp='datePublished']/@content +body: //div[@class='intro' or contains(@class, 'article_text')] +prune: no +strip_id_or_class: embedcode +strip_id_or_class: EmbedSwitch +strip_id_or_class: EmbedText +strip_id_or_class: bildergalerie +strip_id_or_class: subline_seohour_image +strip_id_or_class: ova-player +strip_id_or_class: jcarouseloutput +strip_id_or_class: cbox_embedded + +test_url: http://www.buffed.de/SWTOR-Star-Wars-The-Old-Republic-PC-218697/News/SWTOR-Ab-Patch-24-Lore-Klamotten-faerben-1090051/ +test_url: http://www.buffed.de/feed.cfm?menu_alias=home \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/buquad.com.txt b/inc/3rdparty/site_config/standard/buquad.com.txt old mode 100644 new mode 100755 index a75fa046..f0fd08db --- a/inc/3rdparty/site_config/standard/buquad.com.txt +++ b/inc/3rdparty/site_config/standard/buquad.com.txt @@ -1,8 +1,8 @@ -title: //h1 -author: //h2/a -date: substring-after(//h2, '|') -strip_id_or_class: 'attachment' -strip: //h3 - +title: //h1 +author: //h2/a +date: substring-after(//h2, '|') +strip_id_or_class: 'attachment' +strip: //h3 + body: //div[@class='entry'] test_url: http://buquad.com/2012/04/09/paul-ryan/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/business2community.com.txt b/inc/3rdparty/site_config/standard/business2community.com.txt new file mode 100755 index 00000000..0dcc7ff8 --- /dev/null +++ b/inc/3rdparty/site_config/standard/business2community.com.txt @@ -0,0 +1,5 @@ +date: substring-after(//p[@class='byline'],'Published') + +strip: //div[@class='article-meta'] + +test_url: http://www.business2community.com/social-media/funky-ways-to-print-instagram-photos-0485340 diff --git a/inc/3rdparty/site_config/standard/businessinsider.com.txt b/inc/3rdparty/site_config/standard/businessinsider.com.txt old mode 100644 new mode 100755 index c773db8b..39eb7426 --- a/inc/3rdparty/site_config/standard/businessinsider.com.txt +++ b/inc/3rdparty/site_config/standard/businessinsider.com.txt @@ -1,12 +1,16 @@ -title://div[@class="sl-layout-post"]/h1 -body: //div[contains(@class, 'post-content') or contains(@class, 'KonaBody')] -strip: //div[contains(@class, "post-sidebar")] -strip: //div[@id='related-links'] -author://div[@class="byline"]/a -date://div[@class="byline"]/span[@class="date"] -prune: no - -strip://*[contains(@class,'sponsored-text')] -strip: //div[@id='post_footer'] - -test_url: http://www.businessinsider.com/microsoft-just-put-one-of-its-hardcore-technical-geniuses-on-xbox-2012-1 \ No newline at end of file +title://div[@class="sl-layout-post"]/h1 +body: //div[contains(@class, 'post-content') or contains(@class, 'slide-module') or contains(@class, 'KonaBody')] +strip: //div[contains(@class, "post-sidebar")] +strip: //div[@id='related-links'] +strip: //div[@class='related-links-container'] +strip: //p[@class='source'] +author://div[@class="byline"]/a +date://div[@class="byline"]/span[@class="date"] +prune: no + +single_page_link: //a[contains(text(), 'View as one page')] + +strip://*[contains(@class,'sponsored-text')] +strip: //div[@id='post_footer'] + +test_url: http://www.businessinsider.com/microsoft-just-put-one-of-its-hardcore-technical-geniuses-on-xbox-2012-1 diff --git a/inc/3rdparty/site_config/standard/businessnews.com.tn.txt b/inc/3rdparty/site_config/standard/businessnews.com.tn.txt old mode 100644 new mode 100755 index 714cfc90..6502b8e1 --- a/inc/3rdparty/site_config/standard/businessnews.com.tn.txt +++ b/inc/3rdparty/site_config/standard/businessnews.com.tn.txt @@ -1,12 +1,12 @@ -body: //div[@id='article_detail'] -title: //meta[@property='og:title']/@content -date: //div[@id='date_com_art']//a[@class='date'] -author: //div[@id='article_detail']//font[@class='auteur'] - -strip_id_or_class: porte_titre_theme -strip_id_or_class: cont_param -strip_id_or_class: date_com_art - -prune: no - +body: //div[@id='article_detail'] +title: //meta[@property='og:title']/@content +date: //div[@id='date_com_art']//a[@class='date'] +author: //div[@id='article_detail']//font[@class='auteur'] + +strip_id_or_class: porte_titre_theme +strip_id_or_class: cont_param +strip_id_or_class: date_com_art + +prune: no + test_url: http://www.businessnews.com.tn/details_article.php?a=31073&t=522&lang=fr&temp=1 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/businessweek.com.txt b/inc/3rdparty/site_config/standard/businessweek.com.txt old mode 100644 new mode 100755 index 7b3d063b..03085593 --- a/inc/3rdparty/site_config/standard/businessweek.com.txt +++ b/inc/3rdparty/site_config/standard/businessweek.com.txt @@ -1,30 +1,30 @@ -# story has several pages, should be detected -body: //div[@id='storyBody'] -body: //div[@id='article_body'] -body: //div[@id='story_body'] - -title://h1[@id='article_headline'] - -# article author -author: //p[@class='author']/a -# story author(s) -author: substring-after(//p[@class='byline'], 'By ') - -# article date -date: //span[@class='published_date'] -# story date -date: //span[@class='date'] - -date: substring-after(//div[contains(@class,'attributor')],'on') -strip_id_or_class: inset -strip: //p/span[@class='photoCredit'] -strip: //h1 - -strip_id_or_class: page_count -strip_id_or_class: tools -strip_id_or_class: pagination - -single_page_link: //li[@id='stPrint']/a - -test_url: http://www.businessweek.com/magazine/buyback-insurance-a-good-deal-for-retailers-07282011.html +# story has several pages, should be detected +body: //div[@id='storyBody'] +body: //div[@id='article_body'] +body: //div[@id='story_body'] + +title://h1[@id='article_headline'] + +# article author +author: //p[@class='author']/a +# story author(s) +author: substring-after(//p[@class='byline'], 'By ') + +# article date +date: //span[@class='published_date'] +# story date +date: //span[@class='date'] + +date: substring-after(//div[contains(@class,'attributor')],'on') +strip_id_or_class: inset +strip: //p/span[@class='photoCredit'] +strip: //h1 + +strip_id_or_class: page_count +strip_id_or_class: tools +strip_id_or_class: pagination + +single_page_link: //li[@id='stPrint']/a + +test_url: http://www.businessweek.com/magazine/buyback-insurance-a-good-deal-for-retailers-07282011.html test_url: http://www.businessweek.com/articles/2012-06-06/american-pain-the-largest-u-dot-s-dot-pill-mills-rise-and-fall \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/buzzfeed.com.txt b/inc/3rdparty/site_config/standard/buzzfeed.com.txt old mode 100644 new mode 100755 index 6df8bc47..97dddaee --- a/inc/3rdparty/site_config/standard/buzzfeed.com.txt +++ b/inc/3rdparty/site_config/standard/buzzfeed.com.txt @@ -1,15 +1,15 @@ -# Creator: Greg Leuch <greg@...> - -# It can be messy. -tidy:no - -# The basic template. -title: //h1[@data-print='title'] -author: //a[@data-print='author'] -date: //time[@data-print='date'] -body: //div[@data-print='body'] -body: //section[@data-print='body'] - -# For various things... +# Creator: Greg Leuch <greg@...> + +# It can be messy. +tidy:no + +# The basic template. +title: //h1[@data-print='title'] +author: //a[@data-print='author'] +date: //time[@data-print='date'] +body: //div[@data-print='body'] +body: //section[@data-print='body'] + +# For various things... strip: *[@data-print="ignore"] test_url: http://www.buzzfeed.com/hgrant/35-reasons-why-dogs-hate-the-holidays \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/bygonebureau.com.txt b/inc/3rdparty/site_config/standard/bygonebureau.com.txt old mode 100644 new mode 100755 index 0abb6436..63c82130 --- a/inc/3rdparty/site_config/standard/bygonebureau.com.txt +++ b/inc/3rdparty/site_config/standard/bygonebureau.com.txt @@ -1,6 +1,6 @@ -title: //h1 -author: //a[contains(@href, '/author/')] -date: //*[@class='post-date'] -strip: //*[@class='post-date'] +title: //h1 +author: //a[contains(@href, '/author/')] +date: //*[@class='post-date'] +strip: //*[@class='post-date'] strip: //h1 test_url: http://bygonebureau.com/2011/06/20/an-existential-psychoanalysis/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/cable.co.uk.txt b/inc/3rdparty/site_config/standard/cable.co.uk.txt new file mode 100755 index 00000000..435bf3b5 --- /dev/null +++ b/inc/3rdparty/site_config/standard/cable.co.uk.txt @@ -0,0 +1,11 @@ +title: //div[@class='page-content']//h1 +body: //div[@class='page-content'] +strip_id_or_class: editorial-bar-top +strip_id_or_class: social-bottom +strip_id_or_class: comment-form +strip_id_or_class: pc-why + +prune: no +tidy: no + +test_url: http://www.cable.co.uk/news/bt-vision-unveils-interactive-guide-application-800734218/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/cardboardconnection.com.txt b/inc/3rdparty/site_config/standard/cardboardconnection.com.txt old mode 100644 new mode 100755 index 3adc7a35..49f34302 --- a/inc/3rdparty/site_config/standard/cardboardconnection.com.txt +++ b/inc/3rdparty/site_config/standard/cardboardconnection.com.txt @@ -1,8 +1,8 @@ -title: //h1[@class='producttabbed-title'] -body: //div[@class='postTabs_divs postTabs_curr_div'] -strip: //div[@class='ratingblock2'] -strip: //p[@id='breadcrumbs'] -strip: //div[@style='display: none'] - +title: //h1[@class='producttabbed-title'] +body: //div[@class='postTabs_divs postTabs_curr_div'] +strip: //div[@class='ratingblock2'] +strip: //p[@id='breadcrumbs'] +strip: //div[@style='display: none'] + test_url: http://www.cardboardconnection.com/2012-topps-archives-baseball-cards \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/carpeaqua.com.txt b/inc/3rdparty/site_config/standard/carpeaqua.com.txt old mode 100644 new mode 100755 index 7ba1ed78..5ea302e0 --- a/inc/3rdparty/site_config/standard/carpeaqua.com.txt +++ b/inc/3rdparty/site_config/standard/carpeaqua.com.txt @@ -1,6 +1,6 @@ -title: //h2 -body: //div[@class='entry'] - -prune: no +title: //h2 +body: //div[@class='entry'] + +prune: no # otherwise the footnotes are removed test_url: http://carpeaqua.com/2011/03/27/the-intersection-of-power-and-portability/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/cars.com.txt b/inc/3rdparty/site_config/standard/cars.com.txt new file mode 100755 index 00000000..71c5c050 --- /dev/null +++ b/inc/3rdparty/site_config/standard/cars.com.txt @@ -0,0 +1,7 @@ +title: //div[contains(@class, 'basicInfo')]//h1 + +body: //img[@id='chosenPhotoIMG'] | //div[@id='aboutThisVehicleBox'] + +prune: no + +test_url: http://www.cars.com/go/search/detail.jsp?listingId=115364779 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/catb.org.txt b/inc/3rdparty/site_config/standard/catb.org.txt old mode 100644 new mode 100755 index 8908292c..2cd197fb --- a/inc/3rdparty/site_config/standard/catb.org.txt +++ b/inc/3rdparty/site_config/standard/catb.org.txt @@ -1,7 +1,7 @@ -body: //div[@class='article'] -strip: //div[@class='revhistory'] -strip: //div[@class='toc'] -tidy: no -prune: no +body: //div[@class='article'] +strip: //div[@class='revhistory'] +strip: //div[@class='toc'] +tidy: no +prune: no test_url: http://catb.org/~esr/faqs/smart-questions.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/cbc.ca.txt b/inc/3rdparty/site_config/standard/cbc.ca.txt old mode 100644 new mode 100755 index 25305109..ba5faf3f --- a/inc/3rdparty/site_config/standard/cbc.ca.txt +++ b/inc/3rdparty/site_config/standard/cbc.ca.txt @@ -1,5 +1,5 @@ -title: //div[contains(@class, 'headline')]/h1 -author: //h5[contains(@class, 'byline')] -date: substring-after(//h4[contains(@class, 'posted')], 'Posted: ') +title: //div[contains(@class, 'headline')]/h1 +author: //h5[contains(@class, 'byline')] +date: substring-after(//h4[contains(@class, 'posted')], 'Posted: ') body: //div[@id="storyboard"] test_url: http://www.cbc.ca/news/world/story/2012/01/16/cruise-ship-monday.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/cbn.com.txt b/inc/3rdparty/site_config/standard/cbn.com.txt new file mode 100755 index 00000000..de8d8839 --- /dev/null +++ b/inc/3rdparty/site_config/standard/cbn.com.txt @@ -0,0 +1,8 @@ +body: //div[contains(@class, 'articleText')] +date: //div[contains(@class, 'articleDate')] +author: //a[contains(@id, 'articleDetails_lnkByLine')] +prune: no + +test_url: http://www.cbn.com/cbnnews/world/2013/June/Chilly-G-8-Obama-Putin-Agree-to-Disagree-on-Syria/ +test_url: http://www.cbn.com/cbnnews/world/2013/June/UK-Agency-Accused-of-Hacking-Foreign-Diplomats/ +test_url: http://www.cbn.com/cbnnews/feed/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/cbsnews.com.txt b/inc/3rdparty/site_config/standard/cbsnews.com.txt old mode 100644 new mode 100755 index 4ba3da19..04d20230 --- a/inc/3rdparty/site_config/standard/cbsnews.com.txt +++ b/inc/3rdparty/site_config/standard/cbsnews.com.txt @@ -1,14 +1,15 @@ -date: //meta[@name="published"]/@content -date: //div[@class="timeLine"] -title: //div[@id='contentBody']//h1 -author: //dl[@class="storyBlogByline"]/dd/a -body: //div[@id='storyMediaBox'] | //div[contains(@class, 'storyText')] - -# Content Pruning -strip: //div[@class="scrollingArrows"] -strip: //div[@class="timeLine"] -strip: //dl[@class="storyBlogByline"] - -prune: no - -test_url: http://www.cbsnews.com/8301-201_162-57366361/rescued-americans-dad-proud-of-the-u.s/ \ No newline at end of file +date: //meta[@name="published"]/@content +date: //div[@class="timeLine"] +title: //div[@id='contentBody']//h1 +author: //dl[@class="storyBlogByline"]/dd/a +body: //div[@id='storyMediaBox'] | //div[contains(@class, 'storyText')] + +# Content Pruning +strip: //div[@class="scrollingArrows"] +strip: //div[@class="timeLine"] +strip: //dl[@class="storyBlogByline"] +strip: //span[@class='image-credit'] + +prune: no + +test_url: http://www.cbsnews.com/8301-201_162-57366361/rescued-americans-dad-proud-of-the-u.s/ diff --git a/inc/3rdparty/site_config/standard/cedarrepublican.com.txt b/inc/3rdparty/site_config/standard/cedarrepublican.com.txt new file mode 100755 index 00000000..42faa521 --- /dev/null +++ b/inc/3rdparty/site_config/standard/cedarrepublican.com.txt @@ -0,0 +1,2 @@ +body: //div[@class='frame']//img[@class='horizontal'] | //div[@class='content'] +test_url: http://cedarrepublican.com/online_features/gift_ideas/sending-mother-s-day-flowers-how-to-be-sure-they/article_b69af9b8-1f05-5352-8621-16ce007e5623.html diff --git a/inc/3rdparty/site_config/standard/chareidi.org.txt b/inc/3rdparty/site_config/standard/chareidi.org.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/chinamining.org.txt b/inc/3rdparty/site_config/standard/chinamining.org.txt old mode 100644 new mode 100755 index ea0df2a3..d00d65de --- a/inc/3rdparty/site_config/standard/chinamining.org.txt +++ b/inc/3rdparty/site_config/standard/chinamining.org.txt @@ -1,10 +1,10 @@ -title: //*[@id='Content']/span[1] -author: substring-after(substring-before(//*[@id='Content']/span[2], ')'), '(') -date: substring-before(substring-after(//*[@id='Content']/span[2], 'Updated: '), 'Counter') - -strip: //*[@id='Content']/span[1] -strip: //*[@id='Content']/span[2] - -body: //*[@id='Content'] +title: //*[@id='Content']/span[1] +author: substring-after(substring-before(//*[@id='Content']/span[2], ')'), '(') +date: substring-before(substring-after(//*[@id='Content']/span[2], 'Updated: '), 'Counter') + +strip: //*[@id='Content']/span[1] +strip: //*[@id='Content']/span[2] + +body: //*[@id='Content'] test_url: http://www.chinamining.org/News/2011-07-22/1311319069d48087.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/chomsky.info.txt b/inc/3rdparty/site_config/standard/chomsky.info.txt old mode 100644 new mode 100755 index 1d294109..31440538 --- a/inc/3rdparty/site_config/standard/chomsky.info.txt +++ b/inc/3rdparty/site_config/standard/chomsky.info.txt @@ -1,5 +1,5 @@ -title: //div[@class='title'] -author: //div[@class='author'] -prune: no - +title: //div[@class='title'] +author: //div[@class='author'] +prune: no + test_url: http://www.chomsky.info/onchomsky/2002----.htm \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/chrisltd.com.txt b/inc/3rdparty/site_config/standard/chrisltd.com.txt new file mode 100755 index 00000000..86d0f5db --- /dev/null +++ b/inc/3rdparty/site_config/standard/chrisltd.com.txt @@ -0,0 +1,6 @@ +title: //header/h1/b[contains(@class, 'title')] +author: substring-after(//article/header/div, 'By ') +date: //header/h1/span[contains(@class, 'date')] +body: //div[@id='main]/article +strip: //header +test_url: http://chrisltd.com/blog/2012/03/fix-widows-indesign/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/christianitytoday.com.txt b/inc/3rdparty/site_config/standard/christianitytoday.com.txt old mode 100644 new mode 100755 index 44288a46..86be14ce --- a/inc/3rdparty/site_config/standard/christianitytoday.com.txt +++ b/inc/3rdparty/site_config/standard/christianitytoday.com.txt @@ -1,13 +1,13 @@ -title://div[@class='title'] -author://div[@class='byline']/b -date:substring-after(//div[@class='byline'], 'posted') -body://div[@id='body'] -wrap_in(h2)://span[@class='subhead'] -wrap_in(i)://p[@class='bio'] -wrap_in(i)://p[@class='copyright'] -strip://div[@class='title'] -strip://div[@class='deck'] -strip://div[@class='byline'] -strip://div[@class='copyright'] +title://div[@class='title'] +author://div[@class='byline']/b +date:substring-after(//div[@class='byline'], 'posted') +body://div[@id='body'] +wrap_in(h2)://span[@class='subhead'] +wrap_in(i)://p[@class='bio'] +wrap_in(i)://p[@class='copyright'] +strip://div[@class='title'] +strip://div[@class='deck'] +strip://div[@class='byline'] +strip://div[@class='copyright'] strip://br test_url: http://www.christianitytoday.com/ct/2012/aprilweb-only/my-god-forsaken-me.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/christianpf.com.txt b/inc/3rdparty/site_config/standard/christianpf.com.txt old mode 100644 new mode 100755 index 7f089c55..fb5f342d --- a/inc/3rdparty/site_config/standard/christianpf.com.txt +++ b/inc/3rdparty/site_config/standard/christianpf.com.txt @@ -1,5 +1,5 @@ -title: //h1[@class="entry-title"] -author: //*[@class="author vcard fn"] -date: //*[@class="published"] +title: //h1[@class="entry-title"] +author: //*[@class="author vcard fn"] +date: //*[@class="published"] body: //div[(@class = "dd_content_wrap")] test_url: http://christianpf.com/do-ibuys-lead-to-more-buying/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/christies.com.txt b/inc/3rdparty/site_config/standard/christies.com.txt old mode 100644 new mode 100755 index 5c5889a2..b3c76519 --- a/inc/3rdparty/site_config/standard/christies.com.txt +++ b/inc/3rdparty/site_config/standard/christies.com.txt @@ -1,6 +1,6 @@ -tidy: no -prune: no -date: //article//time[@pubdate] -title: //article/header/h2 +tidy: no +prune: no +date: //article//time[@pubdate] +title: //article/header/h2 body: //article test_url: http://www.christies.com/LotFinder/custom/lot_details_MultiLanguage.aspx?from=salesummary&intObjectID=5556662&sid=e536ed1a-b763-41c4-afcf-c94815ec6eee&LID=3 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/chrome.google.com.txt b/inc/3rdparty/site_config/standard/chrome.google.com.txt old mode 100644 new mode 100755 index d4cc8581..5a1d043d --- a/inc/3rdparty/site_config/standard/chrome.google.com.txt +++ b/inc/3rdparty/site_config/standard/chrome.google.com.txt @@ -1,9 +1,9 @@ -body: //pre[@id='cx-desc-text'] -body: //div[contains(@class, 'overview-tab-right-bar-info')] -title: //h1[contains(@class, 'detail-dialog-title')] -tidy: no -prune: no -replace_string(<noscript>): <div> -replace_string(</noscript>): </div> +body: //pre[@id='cx-desc-text'] +body: //div[contains(@class, 'overview-tab-right-bar-info')] +title: //h1[contains(@class, 'detail-dialog-title')] +tidy: no +prune: no +replace_string(<noscript>): <div> +replace_string(</noscript>): </div> test_url: https://chrome.google.com/webstore/detail/pnaiinchjaonopoejhknmgjingcnaloc \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/chronicle.com.txt b/inc/3rdparty/site_config/standard/chronicle.com.txt old mode 100644 new mode 100755 index 0c6c11ed..e86d3eca --- a/inc/3rdparty/site_config/standard/chronicle.com.txt +++ b/inc/3rdparty/site_config/standard/chronicle.com.txt @@ -1,17 +1,17 @@ -title: //h1[contains(@class, "entry-title")] -author: //p[contains(@class, "byline")] - -# blog articles (chronicle.com/blogs/*) -body: //div[contains(@class, "abstract")] -date: //p[contains(@class, "time")] - -# all (?) other articles -body: //div[@id="article-body"] -date: //p[contains(@class, "dateline")] - -# remove sidebars containing images (I assume this is desired for Instapaper) -strip: //div[@id="related"] -strip: //div[contains(@class, "image")] - +title: //h1[contains(@class, "entry-title")] +author: //p[contains(@class, "byline")] + +# blog articles (chronicle.com/blogs/*) +body: //div[contains(@class, "abstract")] +date: //p[contains(@class, "time")] + +# all (?) other articles +body: //div[@id="article-body"] +date: //p[contains(@class, "dateline")] + +# remove sidebars containing images (I assume this is desired for Instapaper) +strip: //div[@id="related"] +strip: //div[contains(@class, "image")] + # note that if you're not a Chronicle subscriber (personally or institutionally), you'll only see the first couple of paragraphs of the article, and Instapaper will display that with some crap above and below. thank goodness for that bookmarklet test_url: http://chronicle.com/article/In-a-Land-of-Second-Chances/128375/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/ciaosamin.com.txt b/inc/3rdparty/site_config/standard/ciaosamin.com.txt new file mode 100755 index 00000000..02fd3434 --- /dev/null +++ b/inc/3rdparty/site_config/standard/ciaosamin.com.txt @@ -0,0 +1,4 @@ +body://div[contains(@class, 'entry-content')] +date://h2[contains(@class, 'date-header')] +title://h3[contains(@class, 'post-title')] +test_url: http://www.ciaosamin.com/2013/04/how-this-happened.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/cicero.de.txt b/inc/3rdparty/site_config/standard/cicero.de.txt old mode 100644 new mode 100755 index b9f9a12b..b8913639 --- a/inc/3rdparty/site_config/standard/cicero.de.txt +++ b/inc/3rdparty/site_config/standard/cicero.de.txt @@ -1,33 +1,33 @@ -# fforst@... - -# Use link to print article for single page view -single_page_link: //a[@class="print"] - -# set body -tidy: no -body: //div[@class='artikel-content'] - -# strip title and subtitle since we got it already -strip: //div[@class='issue'] -strip: //div[@class='artikel-content']/h2 - -# some authors are known and have a link, others don't -author: //a[contains(@href, 'autor?')] - -#date -date: //span[@class='article-date'] - -# Strip author since we got him -strip_id_or_class: author - -#strip captions -strip_id_or_class: field-name-field-image-credit -strip_id_or_class: field-name-field-article-image-subtitle - -# remove community functions -strip: //div[@class='meta'] -strip: //div[@id='comments'] - -# remove "continue on the next page" text +# fforst@... + +# Use link to print article for single page view +single_page_link: //a[@class="print"] + +# set body +tidy: no +body: //div[@class='artikel-content'] + +# strip title and subtitle since we got it already +strip: //div[@class='issue'] +strip: //div[@class='artikel-content']/h2 + +# some authors are known and have a link, others don't +author: //a[contains(@href, 'autor?')] + +#date +date: //span[@class='article-date'] + +# Strip author since we got him +strip_id_or_class: author + +#strip captions +strip_id_or_class: field-name-field-image-credit +strip_id_or_class: field-name-field-article-image-subtitle + +# remove community functions +strip: //div[@class='meta'] +strip: //div[@id='comments'] + +# remove "continue on the next page" text strip: //p[text()="[SEITE]"] test_url: http://www.cicero.de/weltbuehne/ihre-wut-ist-global-krise-jugend-revolten-aufstaende-zelte/43049 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/ciperchile.cl.txt b/inc/3rdparty/site_config/standard/ciperchile.cl.txt old mode 100644 new mode 100755 index 4d3ac804..d7e9b762 --- a/inc/3rdparty/site_config/standard/ciperchile.cl.txt +++ b/inc/3rdparty/site_config/standard/ciperchile.cl.txt @@ -1,4 +1,4 @@ -body: //*[(@id = "articlebody")] -strip_id_or_class: rotulo +body: //*[(@id = "articlebody")] +strip_id_or_class: rotulo test_url: http://ciperchile.cl/2011/04/18/las-operaciones-secretas-que-ordenaba-karadima-para-aniquilar-a-su-competencia/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/cjr.org.txt b/inc/3rdparty/site_config/standard/cjr.org.txt old mode 100644 new mode 100755 index a0c3ea5d..df4c7cc4 --- a/inc/3rdparty/site_config/standard/cjr.org.txt +++ b/inc/3rdparty/site_config/standard/cjr.org.txt @@ -1,6 +1,6 @@ -body: //p[@class='subhead' or @class='attribution'] | //div[@class='article-body'] -prune: no - -single_page_link: //li[@class='print']/a - +body: //p[@class='subhead' or @class='attribution'] | //div[@class='article-body'] +prune: no + +single_page_link: //li[@class='print']/a + test_url: http://www.cjr.org/behind_the_news/from_breaking_news_to_baseless.php \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/classyllama.com.txt b/inc/3rdparty/site_config/standard/classyllama.com.txt new file mode 100755 index 00000000..1864eee8 --- /dev/null +++ b/inc/3rdparty/site_config/standard/classyllama.com.txt @@ -0,0 +1,6 @@ +date: //div[@id='content']//p[contains(@class, 'date')]/span +author: substring-after(//div[@id='content']//div[contains(@class, 'over-under-bars')]/p[last()]/text(), 'Posted by ') +body: //div[@id='content']//div[@class='pane-content'] +strip_id_or_class: trackback-url +strip_id_or_class: over-under-bars +test_url: http://www.classyllama.com/content/layout-caching \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/clientk.com.txt b/inc/3rdparty/site_config/standard/clientk.com.txt old mode 100644 new mode 100755 index 369e88ad..d5a22ccb --- a/inc/3rdparty/site_config/standard/clientk.com.txt +++ b/inc/3rdparty/site_config/standard/clientk.com.txt @@ -1,6 +1,6 @@ -title://div[@class="entrytitle"]/a -author:substring-after(substring-before(//div[@class="entrytime"], "|"), "By ") -date:substring-before(substring-after(//div[@class="entrytime"], "|"), "- Posted") -body://div[@class="entrybody"] +title://div[@class="entrytitle"]/a +author:substring-after(substring-before(//div[@class="entrytime"], "|"), "By ") +date:substring-before(substring-after(//div[@class="entrytime"], "|"), "- Posted") +body://div[@class="entrybody"] strip://div[@class="entrybody"]//p[@class="singleinfo"] test_url: http://clientk.com/2011/12/19/the-impact-of-more/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/clubic.com.txt b/inc/3rdparty/site_config/standard/clubic.com.txt old mode 100644 new mode 100755 index b356bbdf..0148e54c --- a/inc/3rdparty/site_config/standard/clubic.com.txt +++ b/inc/3rdparty/site_config/standard/clubic.com.txt @@ -1,11 +1,11 @@ -title: //h1 -author: //a[@class='auteur'] -body: //div[@class='editorial'] -next_page_link: //a[contains(text(),'Page suivante')] -strip: //a[contains(text(),'Page suivante')] -strip: //a[contains(text(),'Page précédente')] -strip_id_or_class: slideshow - -prune: no - +title: //h1 +author: //a[@class='auteur'] +body: //div[@class='editorial'] +next_page_link: //a[contains(text(),'Page suivante')] +strip: //a[contains(text(),'Page suivante')] +strip: //a[contains(text(),'Page précédente')] +strip_id_or_class: slideshow + +prune: no + test_url: http://www.clubic.com/carte-graphique/carte-graphique-amd/radeon-hd-7770/article-478936-1-radeon-hd-7750-7770.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/cmswire.com.txt b/inc/3rdparty/site_config/standard/cmswire.com.txt old mode 100644 new mode 100755 index 2bc96d2e..0b76377a --- a/inc/3rdparty/site_config/standard/cmswire.com.txt +++ b/inc/3rdparty/site_config/standard/cmswire.com.txt @@ -1,6 +1,6 @@ -body: //div[contains(@id,'article-body')] -strip://div[contains(@id,'disqus_count_block')] -strip://div[contains(@id,'col-left')] -strip://div[contains(@id,'col-right')] +body: //div[contains(@id,'article-body')] +strip://div[contains(@id,'disqus_count_block')] +strip://div[contains(@id,'col-left')] +strip://div[contains(@id,'col-right')] test_url: http://www.cmswire.com/cms/customer-experience/for-apps-and-appstores-the-singularity-is-approaching-014888.php \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/cn.engadget.com.txt b/inc/3rdparty/site_config/standard/cn.engadget.com.txt new file mode 100755 index 00000000..63f6f7ea --- /dev/null +++ b/inc/3rdparty/site_config/standard/cn.engadget.com.txt @@ -0,0 +1,5 @@ +title: //h2[@class="posttitle"] +body: //div[@class="postbody"] +prune: no + +test_url: http://cn.engadget.com/2013/06/29/google-play-music-all-access/ diff --git a/inc/3rdparty/site_config/standard/cn.reuters.com.txt b/inc/3rdparty/site_config/standard/cn.reuters.com.txt new file mode 100755 index 00000000..b3878662 --- /dev/null +++ b/inc/3rdparty/site_config/standard/cn.reuters.com.txt @@ -0,0 +1,5 @@ +title: //div[@id='maincontent']//h1 +body: //div[@id='resizeableText'] + +test_url: http://cn.reuters.com/article/CNAnalysesNews/idCNKBS0FF0NM20140710 +test_url: http://cn.reuters.feedsportal.com/CNAnalysesNews \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/cnet.com.txt b/inc/3rdparty/site_config/standard/cnet.com.txt old mode 100644 new mode 100755 index 74f46ba9..eac08aaa --- a/inc/3rdparty/site_config/standard/cnet.com.txt +++ b/inc/3rdparty/site_config/standard/cnet.com.txt @@ -1,16 +1,16 @@ -title: //meta[@property="og:title"]/@content -body: //div[contains(@class, 'postBody')] -date: //div[@id='nameAndTime']/time -author: //div[@id='nameAndTime']/span[@class='author'] - -strip_id_or_class: image-credit -strip_id_or_class: noAutolink -strip_id_or_class: related - -prune: no -tidy: no - -# early end -replace_string(Download today's podcast</a>): Download today's podcast</a></div></body></html> - +title: //meta[@property="og:title"]/@content +body: //div[contains(@class, 'postBody')] +date: //div[@id='nameAndTime']/time +author: //div[@id='nameAndTime']/span[@class='author'] + +strip_id_or_class: image-credit +strip_id_or_class: noAutolink +strip_id_or_class: related + +prune: no +tidy: no + +# early end +replace_string(Download today's podcast</a>): Download today's podcast</a></div></body></html> + test_url: http://www.cnet.com/8301-13952_1-57367607-81/the-404-981-where-the-world-is-a-vampire-podcast/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/cnn.com.txt b/inc/3rdparty/site_config/standard/cnn.com.txt old mode 100644 new mode 100755 index 995e2c79..6f69e4e8 --- a/inc/3rdparty/site_config/standard/cnn.com.txt +++ b/inc/3rdparty/site_config/standard/cnn.com.txt @@ -1,19 +1,23 @@ -title: //div[@class="cnn_storyarea"]/h1 -author: //div[@class="cnnByline"]/strong -date: substring-after(//div[@class="cnn_strytmstmp"], 'Sun') -date: substring-after(//div[@class="cnn_strytmstmp"], 'Mon') -date: substring-after(//div[@class="cnn_strytmstmp"], 'Tue') -date: substring-after(//div[@class="cnn_strytmstmp"], 'Wed') -date: substring-after(//div[@class="cnn_strytmstmp"], 'Thu') -date: substring-after(//div[@class="cnn_strytmstmp"], 'Fri') -date: substring-after(//div[@class="cnn_strytmstmp"], 'Sat') -strip: //div[@class="cnn_storyarea"]/h1 -strip_id_or_class: cnnByline -strip_id_or_class: cnn_strytmstmp -strip_id_or_class: cnn_strycaptiontxt -strip_id_or_class: cnn_strybtntoolsbttm -strip_id_or_class: cnn_strybtntools -strip_id_or_class: cnn_strybtmcntnt -strip_id_or_class: cnn_containerwht -strip_id_or_class: cnn_stryathrtmp -test_url: http://www.cnn.com/2012/05/13/us/new-york-police-policy/index.html?eref=rss_topstories \ No newline at end of file +body: //div[@id='cnnContentContainer']//div[contains(@class, 'cnn_strycntntlft')] +title: //div[@class="cnn_storyarea"]/h1 +author: //div[@class="cnnByline"]/strong +date: substring-after(//div[@class="cnn_strytmstmp"], 'Sun') +date: substring-after(//div[@class="cnn_strytmstmp"], 'Mon') +date: substring-after(//div[@class="cnn_strytmstmp"], 'Tue') +date: substring-after(//div[@class="cnn_strytmstmp"], 'Wed') +date: substring-after(//div[@class="cnn_strytmstmp"], 'Thu') +date: substring-after(//div[@class="cnn_strytmstmp"], 'Fri') +date: substring-after(//div[@class="cnn_strytmstmp"], 'Sat') +strip: //div[@class="cnn_storyarea"]/h1 +strip_id_or_class: cnnByline +strip_id_or_class: cnn_strytmstmp +strip_id_or_class: cnn_strycaptiontxt +strip_id_or_class: cnn_strybtntoolsbttm +strip_id_or_class: cnn_strybtntools +strip_id_or_class: cnn_strybtmcntnt +strip_id_or_class: sharebar +#strip_id_or_class: cnn_containerwht +strip_id_or_class: cnn_stryathrtmp +replace_string(<a name="em0"></a>): <!-- a name --> +test_url: http://www.cnn.com/2012/05/13/us/new-york-police-policy/index.html?eref=rss_topstories +test_url: http://rss.cnn.com/rss/edition.rss \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/cnnsi.com.txt b/inc/3rdparty/site_config/standard/cnnsi.com.txt old mode 100644 new mode 100755 index 6a2c2b80..ac49aef9 --- a/inc/3rdparty/site_config/standard/cnnsi.com.txt +++ b/inc/3rdparty/site_config/standard/cnnsi.com.txt @@ -1,26 +1,26 @@ -# main sportsillustrated.com articles - -body: //div[@id="cnnStoryContent"] -title: //div[@id="cnnStoryHeadline"]//h1 -author: //div[@id="cnnSubBanner"]//strong -date: substring-after(//div[@id="cnnTimeStamp"], "Updated: ") -date: substring-after(//div[@id="cnnTimeStamp"], "Posted: ") - -# kill ugly font buttons -strip: //div[@id="cnnSCFontButtons"] - -# kill misc filler videos & etc -strip: //div[@class="cnnDivideContent"] -strip: //*[@class="cnnTMbox"] - -# si vault articles -# ------------- -body: //div[@class="siv_artPara"] -title: //div[@class="siv_artHeader"]//h1 -author: //div[@class="byline"] -date: //div[@class="date"] - -next_page_link: //div[@id='cnnStoryContinue']/a -strip_id_or_class: cnnstorypagination - +# main sportsillustrated.com articles + +body: //div[@id="cnnStoryContent"] +title: //div[@id="cnnStoryHeadline"]//h1 +author: //div[@id="cnnSubBanner"]//strong +date: substring-after(//div[@id="cnnTimeStamp"], "Updated: ") +date: substring-after(//div[@id="cnnTimeStamp"], "Posted: ") + +# kill ugly font buttons +strip: //div[@id="cnnSCFontButtons"] + +# kill misc filler videos & etc +strip: //div[@class="cnnDivideContent"] +strip: //*[@class="cnnTMbox"] + +# si vault articles +# ------------- +body: //div[@class="siv_artPara"] +title: //div[@class="siv_artHeader"]//h1 +author: //div[@class="byline"] +date: //div[@class="date"] + +next_page_link: //div[@id='cnnStoryContinue']/a +strip_id_or_class: cnnstorypagination + test_url: http://cnnsi.com/2012/writers/peter_king/01/08/wild.card.round/index.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/code.activestate.com.txt b/inc/3rdparty/site_config/standard/code.activestate.com.txt old mode 100644 new mode 100755 index 6cf72e23..83a21e19 --- a/inc/3rdparty/site_config/standard/code.activestate.com.txt +++ b/inc/3rdparty/site_config/standard/code.activestate.com.txt @@ -1,10 +1,10 @@ -body: //div[@id='content'] -title: //div[@id='page_header']/h1 - -strip_id_or_class: 'lineno' -strip_id_or_class: 'block-toolbar-button' -strip_id_or_class: 'recipe_score' -strip: //div[@id='recipe_tools'] -strip: //div[@id='addcomment'] - +body: //div[@id='content'] +title: //div[@id='page_header']/h1 + +strip_id_or_class: 'lineno' +strip_id_or_class: 'block-toolbar-button' +strip_id_or_class: 'recipe_score' +strip: //div[@id='recipe_tools'] +strip: //div[@id='addcomment'] + test_url: http://code.activestate.com/recipes/500261-named-tuples/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/code.fivefilters.org.txt b/inc/3rdparty/site_config/standard/code.fivefilters.org.txt new file mode 100755 index 00000000..269fb547 --- /dev/null +++ b/inc/3rdparty/site_config/standard/code.fivefilters.org.txt @@ -0,0 +1 @@ +body: //div[@id='content'] diff --git a/inc/3rdparty/site_config/standard/code.google.com.txt b/inc/3rdparty/site_config/standard/code.google.com.txt old mode 100644 new mode 100755 index 40a16209..6e9c00a7 --- a/inc/3rdparty/site_config/standard/code.google.com.txt +++ b/inc/3rdparty/site_config/standard/code.google.com.txt @@ -1,5 +1,5 @@ -body: //div[@id="gc-pagecontent"] -strip: //a[@class="backtotop"] -prune: no - +body: //div[@id="gc-pagecontent"] +strip: //a[@class="backtotop"] +prune: no + test_url: http://code.google.com/apis/analytics/docs/tracking/gaTrackingEcommerce.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/codeproject.com.txt b/inc/3rdparty/site_config/standard/codeproject.com.txt new file mode 100755 index 00000000..d1191acc --- /dev/null +++ b/inc/3rdparty/site_config/standard/codeproject.com.txt @@ -0,0 +1,3 @@ +body: //div[@id="contentdiv"] +date: //span[@class="date"] +test_url: http://www.codeproject.com/Articles/499902/Profiling-Entity-Framework-5-in-code \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/codinghorror.com.txt b/inc/3rdparty/site_config/standard/codinghorror.com.txt old mode 100644 new mode 100755 index 9c95f107..adf6e5a0 --- a/inc/3rdparty/site_config/standard/codinghorror.com.txt +++ b/inc/3rdparty/site_config/standard/codinghorror.com.txt @@ -1,15 +1,15 @@ -body: //div[@class='blogbody'] -strip: //h3[@class='title'] -date: //h2[@class='date'] -#Should Atwood just be a literal? -author: substring-before( substring-after(//div[@class='posted'], 'y'), 'V') - -# tim.kingman@... 2011-07-26 -# Prune:no to retain all-link ULs that are part of the body content like -# http://www.codinghorror.com/blog/2011/07/building-a-pc-part-vii-rebooting.html -# Then explicitly strip the "Posted By" and prev/next links that Prune:yes would have removed. - -prune: no -strip: //div[@class='posted']/following-sibling::* +body: //div[@class='blogbody'] +strip: //h3[@class='title'] +date: //h2[@class='date'] +#Should Atwood just be a literal? +author: substring-before( substring-after(//div[@class='posted'], 'y'), 'V') + +# tim.kingman@... 2011-07-26 +# Prune:no to retain all-link ULs that are part of the body content like +# http://www.codinghorror.com/blog/2011/07/building-a-pc-part-vii-rebooting.html +# Then explicitly strip the "Posted By" and prev/next links that Prune:yes would have removed. + +prune: no +strip: //div[@class='posted']/following-sibling::* strip: //div[@class='posted'] test_url: http://www.codinghorror.com/blog/2011/07/building-a-pc-part-vii-rebooting.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/collegehumor.com.txt b/inc/3rdparty/site_config/standard/collegehumor.com.txt old mode 100644 new mode 100755 index 9d75d641..318e6ff4 --- a/inc/3rdparty/site_config/standard/collegehumor.com.txt +++ b/inc/3rdparty/site_config/standard/collegehumor.com.txt @@ -1,14 +1,14 @@ -title: //h1[@class='title'] -author: //p[@class='byline']/a[1] -date: //*[@class='date'] - -body: //div[@class='article_body'] -strip: //p[@class='ca_intro'] -strip: //div[@id='action_bar'] -strip: //div[@class='below_content'] -strip: //div[@id='announcement'] -strip: //div[@id='leftovers'] -strip: //div[@class='form'] -strip: //div[@id='email_overlay'] +title: //h1[@class='title'] +author: //p[@class='byline']/a[1] +date: //*[@class='date'] + +body: //div[@class='article_body'] +strip: //p[@class='ca_intro'] +strip: //div[@id='action_bar'] +strip: //div[@class='below_content'] +strip: //div[@id='announcement'] +strip: //div[@id='leftovers'] +strip: //div[@class='form'] +strip: //div[@id='email_overlay'] strip: //a[@class='close'] test_url: http://www.collegehumor.com/article/6599562/how-it-happened-the-necktie \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/communities-dominate.blogs.com.txt b/inc/3rdparty/site_config/standard/communities-dominate.blogs.com.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/community.service-now.com.txt b/inc/3rdparty/site_config/standard/community.service-now.com.txt old mode 100644 new mode 100755 index 10fd2516..c9854b43 --- a/inc/3rdparty/site_config/standard/community.service-now.com.txt +++ b/inc/3rdparty/site_config/standard/community.service-now.com.txt @@ -1,8 +1,8 @@ -body: //div[@id="center"]//div[@class="node"] -title: //div[@id="center"]//h2 -author: substring-after(//div[@id="center"]//div[@class="node"]//span[@class="submitted"], "—") -date: substring-before(//div[@id="center"]//div[@class="node"]//span[@class="submitted"], "—") -strip: //div[@id="center"]//h2[1] -strip: //span[@class="submitted"][1] +body: //div[@id="center"]//div[@class="node"] +title: //div[@id="center"]//h2 +author: substring-after(//div[@id="center"]//div[@class="node"]//span[@class="submitted"], "—") +date: substring-before(//div[@id="center"]//div[@class="node"]//span[@class="submitted"], "—") +strip: //div[@id="center"]//h2[1] +strip: //span[@class="submitted"][1] move_into(//div[@class="node"])://div[@class="breadcrumb"] test_url: http://community.service-now.com/blog/lawrenceeng/seasons-greetings-servicenow-team \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/computer.org.txt b/inc/3rdparty/site_config/standard/computer.org.txt old mode 100644 new mode 100755 index 00e6fddf..8345cf50 --- a/inc/3rdparty/site_config/standard/computer.org.txt +++ b/inc/3rdparty/site_config/standard/computer.org.txt @@ -1,5 +1,5 @@ -strip_id_or_class:column-3 -strip_id_or_class:portlet-boundary -strip_id_or_class:banner +strip_id_or_class:column-3 +strip_id_or_class:portlet-boundary +strip_id_or_class:banner test_url: http://www.computer.org/portal/web/buildyourcareer/careerwatch/jt19 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/computerbase.de.txt b/inc/3rdparty/site_config/standard/computerbase.de.txt old mode 100644 new mode 100755 index 29199242..5973c50b --- a/inc/3rdparty/site_config/standard/computerbase.de.txt +++ b/inc/3rdparty/site_config/standard/computerbase.de.txt @@ -1,18 +1,18 @@ -title://h1 - -author://div[@id="news-meta"]/a - -body://*[@id="main"]/div[1] - -strip://*[@id="main"]/div[2] -strip://*[@id="main"]/div[3] -strip://*[@id="page"]//footer - -#date: didn't manage to parse it - -#Images have to be stripped because the page does it with overlay -strip://img - -#figures are not displayed in instapaper... -strip://figure | //figcaption +title://h1 + +author://div[@id="news-meta"]/a + +body://*[@id="main"]/div[1] + +strip://*[@id="main"]/div[2] +strip://*[@id="main"]/div[3] +strip://*[@id="page"]//footer + +#date: didn't manage to parse it + +#Images have to be stripped because the page does it with overlay +strip://img + +#figures are not displayed in instapaper... +strip://figure | //figcaption test_url: http://www.computerbase.de/news/2012-06/verbraucherzentrale-mahnt-blizzard-fuer-diablo-3-ab/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/computerworld.com.txt b/inc/3rdparty/site_config/standard/computerworld.com.txt old mode 100644 new mode 100755 index 8e1f3e11..7f20a4da --- a/inc/3rdparty/site_config/standard/computerworld.com.txt +++ b/inc/3rdparty/site_config/standard/computerworld.com.txt @@ -1,22 +1,22 @@ -title: //meta[@name='headline']/@content -date: //meta[@name='date']/@content -author: //meta[@name='author']/@content -body: //div[contains(@class, 'article')] -body://div[@id="article_body"] - -strip_id_or_class: banner -strip: //noscript -strip: //div[@style='width:1px;height:130px;float:right;'] -strip: //div[@class='storyby'] -strip_image_src: twitter_icon -strip_image_src: rss_bug - -tidy: no -prune: no - -next_page_link://div[@id="next_page"]/a - -single_page_link: concat('http://www.computerworld.com/s/article/print/', substring-after(//link[@rel='canonical']/@href, '/s/article/')) - -test_url: http://www.computerworld.com/s/article/9224348/Apple_s_new_OS_X_tightens_screws_on_some_malware +title: //meta[@name='headline']/@content +date: //meta[@name='date']/@content +author: //meta[@name='author']/@content +body: //div[contains(@class, 'article')] +body://div[@id="article_body"] + +strip_id_or_class: banner +strip: //noscript +strip: //div[@style='width:1px;height:130px;float:right;'] +strip: //div[@class='storyby'] +strip_image_src: twitter_icon +strip_image_src: rss_bug + +tidy: no +prune: no + +next_page_link://div[@id="next_page"]/a + +single_page_link: concat('http://www.computerworld.com/s/article/print/', substring-after(//link[@rel='canonical']/@href, '/s/article/')) + +test_url: http://www.computerworld.com/s/article/9224348/Apple_s_new_OS_X_tightens_screws_on_some_malware test_url: http://www.computerworld.com/s/article/9227679/Windows_8_Release_Preview_Updated_but_still_uneasy \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/computerworld.dk.txt b/inc/3rdparty/site_config/standard/computerworld.dk.txt old mode 100644 new mode 100755 index a83f366f..d819109c --- a/inc/3rdparty/site_config/standard/computerworld.dk.txt +++ b/inc/3rdparty/site_config/standard/computerworld.dk.txt @@ -1,5 +1,5 @@ -strip: //div[contains(@class, 'articleAdtechAd')] -title: //div[@id='article']/h1 -title: //div[contains(@class, 'article')]/h1 -body: //div[@id='articleText'] +strip: //div[contains(@class, 'articleAdtechAd')] +title: //div[@id='article']/h1 +title: //div[contains(@class, 'article')]/h1 +body: //div[@id='articleText'] test_url: http://www.computerworld.dk/art/56748/test-din-viden-med-computerworlds-store-sommerquiz?a=fp_1&i=0 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/contemporist.com.txt b/inc/3rdparty/site_config/standard/contemporist.com.txt old mode 100644 new mode 100755 index d2b289a3..c3120fe8 --- a/inc/3rdparty/site_config/standard/contemporist.com.txt +++ b/inc/3rdparty/site_config/standard/contemporist.com.txt @@ -1,9 +1,9 @@ -# get author from string like "Posted by <author> on <date>" -author: substring-before(substring-after(//div[@class='post']/p[@class='post-meta'], 'by'), 'on') - -# get date from string like "Posted by <author> on <date>" -date: substring-after(//div[@class='post']/p[@class='post-meta'], 'on') - -# this keeps thumbnail images +# get author from string like "Posted by <author> on <date>" +author: substring-before(substring-after(//div[@class='post']/p[@class='post-meta'], 'by'), 'on') + +# get date from string like "Posted by <author> on <date>" +date: substring-after(//div[@class='post']/p[@class='post-meta'], 'on') + +# this keeps thumbnail images prune: no test_url: http://www.contemporist.com/2011/11/02/landing-200-lamp-by-kim-hyunjoo \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/conversaciones.nokia.com.txt b/inc/3rdparty/site_config/standard/conversaciones.nokia.com.txt old mode 100644 new mode 100755 index 9bad2c84..966cc861 --- a/inc/3rdparty/site_config/standard/conversaciones.nokia.com.txt +++ b/inc/3rdparty/site_config/standard/conversaciones.nokia.com.txt @@ -1,7 +1,7 @@ -title: //div[@class='article_header']/h1 -body: //div[@class='article_header']/p | //div[@class='article_body'] -strip_id_or_class: share_this -strip_id_or_class: sociable -prune: no - +title: //div[@class='article_header']/h1 +body: //div[@class='article_header']/p | //div[@class='article_body'] +strip_id_or_class: share_this +strip_id_or_class: sociable +prune: no + test_url: http://conversaciones.nokia.com/2011/10/07/cinco-atajos-en-el-nokia-n8/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/cooper.com.txt b/inc/3rdparty/site_config/standard/cooper.com.txt new file mode 100755 index 00000000..a4244097 --- /dev/null +++ b/inc/3rdparty/site_config/standard/cooper.com.txt @@ -0,0 +1,4 @@ +body: //*[contains(@class,'body')] +date: //abbr[@class='published'] + +test_url: http://www.cooper.com/journal/2012/08/2-weeks-left-to-win-your-way-to-the-woodstock-of-ux-coopers-ux-boot-camp.html/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/core77.com.txt b/inc/3rdparty/site_config/standard/core77.com.txt old mode 100644 new mode 100755 index a24374d8..cf1fa93c --- a/inc/3rdparty/site_config/standard/core77.com.txt +++ b/inc/3rdparty/site_config/standard/core77.com.txt @@ -1,7 +1,7 @@ -body: //div[@id="permalink"]/div[@class="post"] - -strip: //div[@id='backArrow'] -strip: //div[@id='fwdArrow'] -strip: //div[@class="post-title"] +body: //div[@id="permalink"]/div[@class="post"] + +strip: //div[@id='backArrow'] +strip: //div[@id='fwdArrow'] +strip: //div[@class="post-title"] strip: //div[@class="sharing"] test_url: http://www.core77.com/blog/columns/why_design_education_must_change_17993.asp \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/counterpunch.org.txt b/inc/3rdparty/site_config/standard/counterpunch.org.txt old mode 100644 new mode 100755 index c9e92287..b6bd8be5 --- a/inc/3rdparty/site_config/standard/counterpunch.org.txt +++ b/inc/3rdparty/site_config/standard/counterpunch.org.txt @@ -1,6 +1,6 @@ -title: //div[@class='main']//h1[contains(@class, 'article-title')] -author: //div[@class='mainauthorstyle'] -body: //div[@class='main']//div[@class='main-text'] -strip: //td[@width='140'] - +title: //div[@class='main']//h1[contains(@class, 'article-title')] +author: //div[@class='mainauthorstyle'] +body: //div[@class='main']//div[@class='main-text'] +strip: //td[@width='140'] + test_url: http://www.counterpunch.org/johnstone05172011.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/crazybutable.com.txt b/inc/3rdparty/site_config/standard/crazybutable.com.txt old mode 100644 new mode 100755 index d25cd05d..037cd177 --- a/inc/3rdparty/site_config/standard/crazybutable.com.txt +++ b/inc/3rdparty/site_config/standard/crazybutable.com.txt @@ -1,3 +1,3 @@ -title://h2 +title://h2 body://div[contains(@class, 'entrytext')] test_url: http://www.crazybutable.com/weblog/archives/2010/07/01/house-ideas-that-worked/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/crimemagazine.com.txt b/inc/3rdparty/site_config/standard/crimemagazine.com.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/crimethinc.com.txt b/inc/3rdparty/site_config/standard/crimethinc.com.txt old mode 100644 new mode 100755 index 74bc6db9..b5a8018a --- a/inc/3rdparty/site_config/standard/crimethinc.com.txt +++ b/inc/3rdparty/site_config/standard/crimethinc.com.txt @@ -1,3 +1,3 @@ -body: //div[@class="readingtext"] +body: //div[@class="readingtext"] title: substring-after(substring-after(//title, ':'), ':') test_url: http://www.crimethinc.com/texts/recentfeatures/nightmares.php \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/crn.de.txt b/inc/3rdparty/site_config/standard/crn.de.txt old mode 100644 new mode 100755 index 7fa950af..61d5d6a7 --- a/inc/3rdparty/site_config/standard/crn.de.txt +++ b/inc/3rdparty/site_config/standard/crn.de.txt @@ -1,3 +1,3 @@ -author: //p[contains(@class,'author')]/a +author: //p[contains(@class,'author')]/a date: //div[contains(@class,'date')] test_url: http://www.crn.de/netzwerke-tk/artikel-93103.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/csmonitor.com.txt b/inc/3rdparty/site_config/standard/csmonitor.com.txt old mode 100644 new mode 100755 index d4dbc5c8..b482e34e --- a/inc/3rdparty/site_config/standard/csmonitor.com.txt +++ b/inc/3rdparty/site_config/standard/csmonitor.com.txt @@ -1,18 +1,18 @@ -title: //h1[contains(@class, 'head')] - -# standard page -body: //div[@id='mainColumn']//div[contains(@class, 'list-article-full')] -# print page -body: //div[@id='mainColumn'] - -author: //a[contains(@class, 'ui-author')] - -single_page_link: //div[@class='storyToolbar']//a[contains(@href, '/print/')] - -strip_id_or_class: storyToolbar -strip_id_or_class: promotion-tag - -tidy: no -prune: no +title: //h1[contains(@class, 'head')] + +# standard page +body: //div[@id='mainColumn']//div[contains(@class, 'list-article-full')] +# print page +body: //div[@id='mainColumn'] + +author: //a[contains(@class, 'ui-author')] + +single_page_link: //div[@class='storyToolbar']//a[contains(@href, '/print/')] + +strip_id_or_class: storyToolbar +strip_id_or_class: promotion-tag + +tidy: no +prune: no test_url: www.csmonitor.com/World/Middle-East/2011/1108/Imminent-Iran-nuclear-threat-A-timeline-of-warnings-since-1979/Earliest-warnings-1979-84 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/csnbayarea.com.txt b/inc/3rdparty/site_config/standard/csnbayarea.com.txt old mode 100644 new mode 100755 index 131a923b..1da60b4e --- a/inc/3rdparty/site_config/standard/csnbayarea.com.txt +++ b/inc/3rdparty/site_config/standard/csnbayarea.com.txt @@ -1,7 +1,7 @@ -title: //div[@id='csn_blogST_headline']/h1 - -body: //div[@id='csn_blogST_main'] -strip_id_or_class: ipfootnotes -strip: //div[@id='csn_blogST_main']/p[1]/img +title: //div[@id='csn_blogST_headline']/h1 + +body: //div[@id='csn_blogST_main'] +strip_id_or_class: ipfootnotes +strip: //div[@id='csn_blogST_main']/p[1]/img strip: //div[@id='csn_blogST_sidebar'] test_url: http://www.csnbayarea.com/blog/giants-talk/post/-?blog%2Fgiants-talk%2Fpost%2F-=&blockID=578902&feedID=5987 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/csnphilly.com.txt b/inc/3rdparty/site_config/standard/csnphilly.com.txt old mode 100644 new mode 100755 index 0df72c32..c14a934a --- a/inc/3rdparty/site_config/standard/csnphilly.com.txt +++ b/inc/3rdparty/site_config/standard/csnphilly.com.txt @@ -1,22 +1,22 @@ -# author's name is not isolated as a tag.... ugh -convert_double_br_tags: yes -body: //csn_blogST_main - -#junk above and around the article -strip: /html/body/div[4]/div[3]/div/div/div/section/div/div/div/div/div/div -strip: /html/body/div[4]/header -strip_id_or_class: article-right-sidebar -strip_id_or_class: rsn-gigya-sharebar-container -strip_id_or_class: article-bottom -strip_id_or_class: hider -strip_id_or_class: footer -strip_id_or_class: masthead -strip_id_or_class: block-menu-menu-rsn-login-or-register -strip_id_or_class: block-menu-menu-header-links -strip_id_or_class: block-rsn-follow-bar-follow-bar -strip_id_or_class: block-rsn-weather-rsn-weather-scoreboard -strip_id_or_class: logo -strip_id_or_class: element-invisible -strip_id_or_class: site-name -strip: //div[contains(@style, 'none')] +# author's name is not isolated as a tag.... ugh +convert_double_br_tags: yes +body: //csn_blogST_main + +#junk above and around the article +strip: /html/body/div[4]/div[3]/div/div/div/section/div/div/div/div/div/div +strip: /html/body/div[4]/header +strip_id_or_class: article-right-sidebar +strip_id_or_class: rsn-gigya-sharebar-container +strip_id_or_class: article-bottom +strip_id_or_class: hider +strip_id_or_class: footer +strip_id_or_class: masthead +strip_id_or_class: block-menu-menu-rsn-login-or-register +strip_id_or_class: block-menu-menu-header-links +strip_id_or_class: block-rsn-follow-bar-follow-bar +strip_id_or_class: block-rsn-weather-rsn-weather-scoreboard +strip_id_or_class: logo +strip_id_or_class: element-invisible +strip_id_or_class: site-name +strip: //div[contains(@style, 'none')] test_url: http://www.csnphilly.com/eagles/can-stoutland-save-danny-watkins-career \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/css-tricks.com.txt b/inc/3rdparty/site_config/standard/css-tricks.com.txt new file mode 100755 index 00000000..3d8174aa --- /dev/null +++ b/inc/3rdparty/site_config/standard/css-tricks.com.txt @@ -0,0 +1,6 @@ +title://article[contains(@id, "post-")]/h1 +date://article[contains(@id, "post-")]/p[@class="time"]/time +body://article[contains(@id, "post-")] +strip://article[contains(@id, "post-")]/p[@class="time"]/time +prune:yes +test_url: http://css-tricks.com/off-canvas-menu-with-css-target/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/cucharasonica.com.txt b/inc/3rdparty/site_config/standard/cucharasonica.com.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/cw.com.tw.txt b/inc/3rdparty/site_config/standard/cw.com.tw.txt new file mode 100755 index 00000000..6e3a91ee --- /dev/null +++ b/inc/3rdparty/site_config/standard/cw.com.tw.txt @@ -0,0 +1,14 @@ +author://span[contains(@class,'reporter')] + +date://span[contains(@class,'date')] + +body://div[contains(@class,'mainContaner')] + +strip://div[contains(@class,'mainHeaer')] +strip://div[contains(@class,'keyW')] +strip://div[contains(@class,'wonderful')] +strip://div[contains(@class,'pages')] +strip://div[contains(@class,'Topics TopicsW3')] + +next_page_link://li[@class='pageNext']/a[contains(.,'下一頁')] +test_url: http://www.cw.com.tw/article/article.action?id=5032848 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/da.feedsportal.com.txt b/inc/3rdparty/site_config/standard/da.feedsportal.com.txt old mode 100644 new mode 100755 index 4a00ef44..381446e5 --- a/inc/3rdparty/site_config/standard/da.feedsportal.com.txt +++ b/inc/3rdparty/site_config/standard/da.feedsportal.com.txt @@ -1,5 +1,5 @@ -single_page_link: //a -tidy: no -prune: no +single_page_link: //a +tidy: no +prune: no test_url: da.feedsportal.com/c/585/f/413794/s/17037b5a/l/0L0Stelegraaf0Bnl0Cbinnenland0C10A2757860C0I0IKlacht0Itegen0Idr0B0IFrank0Iniet0I0Eontvankelijk0I0I0Bhtml0Dcid0Frss/ia1.htm \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/dagogtid.no.txt b/inc/3rdparty/site_config/standard/dagogtid.no.txt new file mode 100755 index 00000000..1531472c --- /dev/null +++ b/inc/3rdparty/site_config/standard/dagogtid.no.txt @@ -0,0 +1,4 @@ +title: //span[@class = 'overskriftEkstrastor'] +author: //em/a + +test_url: http://dagogtid.no/nyhet.cfm?nyhetid=2414 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/dailydot.com.txt b/inc/3rdparty/site_config/standard/dailydot.com.txt old mode 100644 new mode 100755 index 61013993..978ed1ce --- a/inc/3rdparty/site_config/standard/dailydot.com.txt +++ b/inc/3rdparty/site_config/standard/dailydot.com.txt @@ -1,4 +1,4 @@ -tidy: no -body: //article +tidy: no +body: //article test_url: http://www.dailydot.com/entertainment/tumblr-christopher-price-topherchris/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/dailykos.com.txt b/inc/3rdparty/site_config/standard/dailykos.com.txt old mode 100644 new mode 100755 index 124675cb..6d4cb82a --- a/inc/3rdparty/site_config/standard/dailykos.com.txt +++ b/inc/3rdparty/site_config/standard/dailykos.com.txt @@ -1,10 +1,10 @@ -body: //div[@id='article-1']//div[contains(@class, 'article-body')] -title: //div[@class='meta']//a[@id='titleHref'] -date: //div[@class='meta']//p[@class='date'] - -strip_id_or_class: invisible -strip_id_or_class: divider-doodle - -prune: no - -test_url: http://www.dailykos.com/story/2012/01/26/1058790/-Newt-Gingrichs-campaign-admits-he-lied-during-debate-about-ABC-News-interview-with-his�ex-wife \ No newline at end of file +body: //div[@id='article-1']//div[contains(@class, 'article-body')] +title: //div[@class='meta']//a[@id='titleHref'] +date: //div[@class='meta']//p[@class='date'] + +strip_id_or_class: invisible +strip_id_or_class: divider-doodle + +prune: no + +test_url: http://www.dailykos.com/story/2012/01/26/1058790/-Newt-Gingrich-s-campaign-admits-he-lied-during-debate-about-ABC-News-interview-with-his-ex-wife diff --git a/inc/3rdparty/site_config/standard/dailymail.co.uk.txt b/inc/3rdparty/site_config/standard/dailymail.co.uk.txt old mode 100644 new mode 100755 index c83dbdb0..cd29a4d4 --- a/inc/3rdparty/site_config/standard/dailymail.co.uk.txt +++ b/inc/3rdparty/site_config/standard/dailymail.co.uk.txt @@ -1,12 +1,12 @@ -body: //div[@id='js-article-text'] -strip: //div[@class='explore-links'] -strip: //div[@id='js-article-text']/br[position()=1] -strip_id_or_class: print-or-mail-links -strip_id_or_class: shareArticles -strip_id_or_class: googleAds -strip_id_or_class: digg-button -strip_id_or_class: article-icon-links-container -strip_id_or_class: clickToEnlarge -tidy: no - +body: //div[@id='js-article-text'] +strip: //div[@class='explore-links'] +strip: //div[@id='js-article-text']/br[position()=1] +strip_id_or_class: print-or-mail-links +strip_id_or_class: shareArticles +strip_id_or_class: googleAds +strip_id_or_class: digg-button +strip_id_or_class: article-icon-links-container +strip_id_or_class: clickToEnlarge +tidy: no + test_url: http://www.dailymail.co.uk/news/article-1375423/Royal-wedding-Texan-billionaire-Joe-Albritton-invited-Prince-Charles.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/dailystar.com.lb.txt b/inc/3rdparty/site_config/standard/dailystar.com.lb.txt new file mode 100755 index 00000000..3b153042 --- /dev/null +++ b/inc/3rdparty/site_config/standard/dailystar.com.lb.txt @@ -0,0 +1,6 @@ +title: //div[@class='ec-blog-headline'] +body: //*[@id="divDetails"] +date: //*[@id="ctl00_ContentPlaceHolder1_tdDate"] +author: //*[@id="ctl00_ContentPlaceHolder1_anchorAuthor"]/a +autodetect_next_page: no +test_url: http://dailystar.com.lb/Opinion/Columnist/2012/Oct-10/190803-americas-new-modesty-in-the-mideast.ashx#axzz2928JP5xE \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/danleech.com.txt b/inc/3rdparty/site_config/standard/danleech.com.txt new file mode 100755 index 00000000..1d4cec77 --- /dev/null +++ b/inc/3rdparty/site_config/standard/danleech.com.txt @@ -0,0 +1,6 @@ +tidy: no +prune: no +date: //article//time[@pubdate] +title: //article/h1//span[contains(@class, 'entry-title')] +body: //article/div[contains(@class, 'entry-content')] +test_url: http://danleech.com/post/36822126876/simple-icons \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/dansdata.com.txt b/inc/3rdparty/site_config/standard/dansdata.com.txt old mode 100644 new mode 100755 index 96a2bc41..60669480 --- a/inc/3rdparty/site_config/standard/dansdata.com.txt +++ b/inc/3rdparty/site_config/standard/dansdata.com.txt @@ -1,5 +1,5 @@ -autodetect_next_page: no -tidy: no -prune: no +autodetect_next_page: no +tidy: no +prune: no body: //div[@class='NoOverflow'] test_url: http://www.dansdata.com/gz129.htm \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/dantri.com.vn.txt b/inc/3rdparty/site_config/standard/dantri.com.vn.txt new file mode 100755 index 00000000..f19fee7c --- /dev/null +++ b/inc/3rdparty/site_config/standard/dantri.com.vn.txt @@ -0,0 +1,7 @@ +title: //h1[contains(@class, 'fon31 mt2')] +body: //h2[contains(@class, 'fon33 mt1')] | //div[contains(@class, 'fon34 mt3')] + +prune: no + +test_url: http://dantri.com.vn/su-kien/chang-trai-mot-minh-dap-xe-vuot-450km-de-vieng-mo-dai-tuong-869763.htm +test_url: http://dantri.com.vn/trangchu.rss \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/daringfireball.net.txt b/inc/3rdparty/site_config/standard/daringfireball.net.txt old mode 100644 new mode 100755 index dca8ade7..251cc670 --- a/inc/3rdparty/site_config/standard/daringfireball.net.txt +++ b/inc/3rdparty/site_config/standard/daringfireball.net.txt @@ -1,7 +1,7 @@ -title: //div[@class="article"]/h1 -author: //div[@id="Sidebar"]/p/strong -date: //h6[@class="dateline"] -body: //div[@class="article"] -strip: //h6[@class="dateline"] -strip: //div[@class="article"]/h1 +title: //div[@class="article"]/h1 +author: //div[@id="Sidebar"]/p/strong +date: //h6[@class="dateline"] +body: //div[@class="article"] +strip: //h6[@class="dateline"] +strip: //div[@class="article"]/h1 test_url: http://daringfireball.net/2011/10/apps_are_the_new_channels \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/datanami.com.txt b/inc/3rdparty/site_config/standard/datanami.com.txt old mode 100644 new mode 100755 index 3534002a..e9111a48 --- a/inc/3rdparty/site_config/standard/datanami.com.txt +++ b/inc/3rdparty/site_config/standard/datanami.com.txt @@ -1,4 +1,4 @@ -body: //div[@id="article"] -date: //p[@class="date"] +body: //div[@id="article"] +date: //p[@class="date"] author: //p[@class="byline"] test_url: http://www.datanami.com/datanami/2011-12-07/new_path_for_sap:_in_memory_computing,_predictive_analysis_converge.html?featured=top \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/dcurt.is.txt b/inc/3rdparty/site_config/standard/dcurt.is.txt old mode 100644 new mode 100755 index 7d11c6e1..524c4bf1 --- a/inc/3rdparty/site_config/standard/dcurt.is.txt +++ b/inc/3rdparty/site_config/standard/dcurt.is.txt @@ -1,8 +1,8 @@ -title: (//article//h2)[1] -body: //article[contains(@class, 'post')] -date: //time[@id='top_time']/@datetime - -prune: no -tidy: no - +title: (//article//h2)[1] +body: //article[contains(@class, 'post')] +date: //time[@id='top_time']/@datetime + +prune: no +tidy: no + test_url: http://dcurt.is/predictions-txt \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/defomicron.net.txt b/inc/3rdparty/site_config/standard/defomicron.net.txt new file mode 100755 index 00000000..9f11258c --- /dev/null +++ b/inc/3rdparty/site_config/standard/defomicron.net.txt @@ -0,0 +1,9 @@ +title: //article/h1 +author: //hgroup/h3/a +date: //time +body: //article +strip: //aside +footnotes: yes +prune: no +tidy: no +test_url: https://defomicron.net/2012/09/ios-6/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/delong.typepad.com.txt b/inc/3rdparty/site_config/standard/delong.typepad.com.txt old mode 100644 new mode 100755 index 84fd4f79..c4b922e4 --- a/inc/3rdparty/site_config/standard/delong.typepad.com.txt +++ b/inc/3rdparty/site_config/standard/delong.typepad.com.txt @@ -1,4 +1,4 @@ -strip_id_or_class: banner -strip_id_or_class: gamma +strip_id_or_class: banner +strip_id_or_class: gamma strip_id_or_class: module-list test_url: http://delong.typepad.com/sdj/2011/02/in-which-suresh-naidu-visits-the-new-jerusalem.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/democracynow.org.txt b/inc/3rdparty/site_config/standard/democracynow.org.txt new file mode 100755 index 00000000..b0050b4f --- /dev/null +++ b/inc/3rdparty/site_config/standard/democracynow.org.txt @@ -0,0 +1,5 @@ +body: //div[contains(@class, 'blog_body')] + +prune: no + +test_url: http://www.democracynow.org/blog/2014/1/9/the_fbi_the_nsa_and_a \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/derstandard.at.txt b/inc/3rdparty/site_config/standard/derstandard.at.txt old mode 100644 new mode 100755 index 48722ebd..07db3521 --- a/inc/3rdparty/site_config/standard/derstandard.at.txt +++ b/inc/3rdparty/site_config/standard/derstandard.at.txt @@ -1,13 +1,13 @@ -title: //div[@id='artikelHeader']/h1 -author: //span[@class='author'] -date: //span[@class='date'] -body: //div[@class='copytext'] -strip: //ul[@class='lookupLinksArtikel'] - -strip: //div[@id='pageTop'] -strip: //div[@id='toolbar'] -strip: //div[@id='articleTools'] -strip: //div[@id='weiterlesen'] -strip: //div[@id='communityCanvas'] +title: //div[@id='artikelHeader']/h1 +author: //span[@class='author'] +date: //span[@class='date'] +body: //div[@class='copytext'] +strip: //ul[@class='lookupLinksArtikel'] + +strip: //div[@id='pageTop'] +strip: //div[@id='toolbar'] +strip: //div[@id='articleTools'] +strip: //div[@id='weiterlesen'] +strip: //div[@id='communityCanvas'] test_url: http://derstandard.at/1318726018343/Breitband-LTE-Was-bringt-die-neue-Mobilfunk-Generation \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/designtagebuch.de.txt b/inc/3rdparty/site_config/standard/designtagebuch.de.txt old mode 100644 new mode 100755 index 6096db0b..9020847f --- a/inc/3rdparty/site_config/standard/designtagebuch.de.txt +++ b/inc/3rdparty/site_config/standard/designtagebuch.de.txt @@ -1,11 +1,11 @@ -tidy: no -body: //div[@class='main'] - -author: substring-before(substring-after(//div[@class='meta-single'], 'erstellt von '), ' am') -date: substring-before(substring-after(//div[@class='meta-single'], ' am '), ' | ') - -strip_id_or_class: pagelink -strip_id_or_class: wp-polls - +tidy: no +body: //div[@class='main'] + +author: substring-before(substring-after(//div[@class='meta-single'], 'erstellt von '), ' am') +date: substring-before(substring-after(//div[@class='meta-single'], ' am '), ' | ') + +strip_id_or_class: pagelink +strip_id_or_class: wp-polls + next_page_link: //div[@class='post-page-next']/a test_url: http://www.designtagebuch.de/die-gefuehlte-lesbarkeit/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/desitvforum.net.txt b/inc/3rdparty/site_config/standard/desitvforum.net.txt old mode 100644 new mode 100755 index a6dac5fd..efa85f76 --- a/inc/3rdparty/site_config/standard/desitvforum.net.txt +++ b/inc/3rdparty/site_config/standard/desitvforum.net.txt @@ -1,5 +1,5 @@ -body: (//blockquote[contains(@class, 'postcontent')])[1] -body: (//div[starts-with(@id, 'post_message')])[1] - -prune: no +body: (//blockquote[contains(@class, 'postcontent')])[1] +body: (//div[starts-with(@id, 'post_message')])[1] + +prune: no tidy: no \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/details.com.txt b/inc/3rdparty/site_config/standard/details.com.txt old mode 100644 new mode 100755 index 548cabad..d1d8a29a --- a/inc/3rdparty/site_config/standard/details.com.txt +++ b/inc/3rdparty/site_config/standard/details.com.txt @@ -1,8 +1,8 @@ -title: //h1[@class="content-headline"] -body: //div[@class="headers-container"] | //div[@class="content-container"] -prune: no -tidy: no - -single_page_link: //li[@class='utility-print']/a - +title: //h1[@class="content-headline"] +body: //div[@class="headers-container"] | //div[@class="content-container"] +prune: no +tidy: no + +single_page_link: //li[@class='utility-print']/a + test_url: http://www.details.com/culture-trends/critical-eye/201108/best-new-designers-innovations \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/developers.facebook.com.txt b/inc/3rdparty/site_config/standard/developers.facebook.com.txt old mode 100644 new mode 100755 index 43a8f0a0..7609b72f --- a/inc/3rdparty/site_config/standard/developers.facebook.com.txt +++ b/inc/3rdparty/site_config/standard/developers.facebook.com.txt @@ -1,3 +1,3 @@ -title: //div[@class="bodyText"]/h1 +title: //div[@class="bodyText"]/h1 author: //div[@class="picture"]/a/img/@alt test_url: https://developers.facebook.com/blog/post/2012/03/22/developer-spotlight--foodspotting/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/devlinsangle.blogspot.co.at.txt b/inc/3rdparty/site_config/standard/devlinsangle.blogspot.co.at.txt old mode 100644 new mode 100755 index b960b37e..6f1d4e27 --- a/inc/3rdparty/site_config/standard/devlinsangle.blogspot.co.at.txt +++ b/inc/3rdparty/site_config/standard/devlinsangle.blogspot.co.at.txt @@ -1,6 +1,6 @@ -date: //h2[@class='date-header'] -body: //div[@class='post hentry'] -title: //h3 -strip: //div[@class='post-footer'] +date: //h2[@class='date-header'] +body: //div[@class='post hentry'] +title: //h3 +strip: //div[@class='post-footer'] test_url: http://devlinsangle.blogspot.co.at/2012/03/difference-between-teaching-and_01.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/dictionary.reference.com.txt b/inc/3rdparty/site_config/standard/dictionary.reference.com.txt old mode 100644 new mode 100755 index a1172024..f8b79c80 --- a/inc/3rdparty/site_config/standard/dictionary.reference.com.txt +++ b/inc/3rdparty/site_config/standard/dictionary.reference.com.txt @@ -1,8 +1,8 @@ -title: //h1[@id='query_h1'] -body: //div[contains(@class, 'lunatext results_content')] -strip_id_or_class: spl_unshd -#replace_string(<div class="dicTl">): <div class="dicTl">------------------<br /> - -prune: no +title: //h1[@id='query_h1'] +body: //div[contains(@class, 'lunatext results_content')] +strip_id_or_class: spl_unshd +#replace_string(<div class="dicTl">): <div class="dicTl">------------------<br /> + +prune: no test_url: http://www.wired.com/cloudline/2011/10/meet-arms-cortex-a15-the-future-of-the-ipad-and-possibly-the-macbook-air/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/diepresse.com.txt b/inc/3rdparty/site_config/standard/diepresse.com.txt old mode 100644 new mode 100755 index 7e825a91..ced189cc --- a/inc/3rdparty/site_config/standard/diepresse.com.txt +++ b/inc/3rdparty/site_config/standard/diepresse.com.txt @@ -1,6 +1,6 @@ -title: //div[@class='article']/h1 -date: substring-before(//p[@class='articletime'],'|') -body: //div[@id='articletext'] -strip: //div[@class='inlineDiashow'] +title: //div[@class='article']/h1 +date: substring-before(//p[@class='articletime'],'|') +body: //div[@id='articletext'] +strip: //div[@class='inlineDiashow'] test_url: http://diepresse.com/home/politik/aussenpolitik/701905/TibeterProteste_Nonne-verbrennt-sich-selbst?_vl_backlink=/home/politik/index.do \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/digiphoto.techbang.com.txt b/inc/3rdparty/site_config/standard/digiphoto.techbang.com.txt old mode 100644 new mode 100755 index 2d2ae2c2..80ce5ff3 --- a/inc/3rdparty/site_config/standard/digiphoto.techbang.com.txt +++ b/inc/3rdparty/site_config/standard/digiphoto.techbang.com.txt @@ -1,8 +1,8 @@ -# default parser works great -# only add "author" and "next page link" reference -# 2012-04-13 - -next_page_link: //div[@class = 'pagination']/a[@class = 'next_page'] - +# default parser works great +# only add "author" and "next page link" reference +# 2012-04-13 + +next_page_link: //div[@class = 'pagination']/a[@class = 'next_page'] + author: //*[@class = 'author metadata']/a test_url: http://digiphoto.techbang.com/posts/2433--commercial-photography-communication-is-the-key-to-a-good-work \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/digital-photography-school.com.txt b/inc/3rdparty/site_config/standard/digital-photography-school.com.txt old mode 100644 new mode 100755 index 37192ac0..18ce370e --- a/inc/3rdparty/site_config/standard/digital-photography-school.com.txt +++ b/inc/3rdparty/site_config/standard/digital-photography-school.com.txt @@ -1,6 +1,6 @@ -title: //div[@class='post-title']/h1 -author: //a[@href='#author'] -body: //div[@class='post-content'] -strip: //div[@class='post-meta'] - +title: //div[@class='post-title']/h1 +author: //a[@href='#author'] +body: //div[@class='post-content'] +strip: //div[@class='post-meta'] + test_url: http://www.digital-photography-school.com/10-ways-to-develop-yourself-photographically \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/digitalspy.co.uk.txt b/inc/3rdparty/site_config/standard/digitalspy.co.uk.txt old mode 100644 new mode 100755 index b21431d7..f48bdfdb --- a/inc/3rdparty/site_config/standard/digitalspy.co.uk.txt +++ b/inc/3rdparty/site_config/standard/digitalspy.co.uk.txt @@ -1,5 +1,5 @@ -title: //div[@class="article_header"]/h1 -date: //div[@class="article_pub"]/span[@class="time"] -author: //div[@class="article_pub"]/span[@class="editors"]/a/text() +title: //div[@class="article_header"]/h1 +date: //div[@class="article_pub"]/span[@class="time"] +author: //div[@class="article_pub"]/span[@class="editors"]/a/text() body: //div[@class="article_body clear_left"] test_url: http://www.digitalspy.co.uk/movies/at-the-movies/a364066/top-5-super-bowl-movie-trailers-the-avengers-battleship-more.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/dilbert.com.txt b/inc/3rdparty/site_config/standard/dilbert.com.txt old mode 100644 new mode 100755 index 413e5506..85cc78e5 --- a/inc/3rdparty/site_config/standard/dilbert.com.txt +++ b/inc/3rdparty/site_config/standard/dilbert.com.txt @@ -1,8 +1,11 @@ -convert_double_br_tags: yes - -title: substring(substring-after(//title, ':'), 1, string-length(substring-after(//title, ':')) - 10) -body: //*[contains(@class, 'SB_Content')] -author: string('Scott Adams') -date: //*[contains(@class, 'SB_Detail')]/text()[1] +#title: substring(substring-after(//title, ':'), 1, string-length(substring-after(//title, ':')) - 10) +title: //div[contains(@class, 'SB_Title')]//a +body: //div[contains(@class, 'STR_Image')] +body: //*[contains(@class, 'SB_Content')] +author: string('Scott Adams') +date: //*[contains(@class, 'SB_Detail')]/text()[1] -test_url: http://dilbert.com/blog/entry/death_by_hypnosis_or_not/ \ No newline at end of file + +test_url: http://dilbert.com/blog/entry/death_by_hypnosis_or_not/ +test_url: http://dilbert.com/strips/comic/2013-10-22 +test_url: http://feed.dilbert.com/dilbert/daily_strip \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/dinamalar.com.txt b/inc/3rdparty/site_config/standard/dinamalar.com.txt old mode 100644 new mode 100755 index 9ef198c9..bc315cf1 --- a/inc/3rdparty/site_config/standard/dinamalar.com.txt +++ b/inc/3rdparty/site_config/standard/dinamalar.com.txt @@ -1,19 +1,19 @@ -title: //div[@class='newsdetbd'] -body: //div[@id='innerleft'] -#//p[@class = 'plnht'] -strip_image_src: /albums/ -strip: //div[@class='mrrt'] -prune: yes -strip_id_or_class: 'fdpd' -strip_id_or_class: 'epapt' -strip_id_or_class: 'newsrtwd' -strip_id_or_class: 'padtp' -strip_id_or_class: 'newdt' -strip_id_or_class: 'newdlt' -strip: //div[@id='selNotes'] -strip_id_or_class: 'clsNotes' -strip_id_or_class: 'clear' -strip_id_or_class: 'cmtwrap' -strip_id_or_class: 'sess' +title: //div[@class='newsdetbd'] +body: //div[@id='innerleft'] +#//p[@class = 'plnht'] +strip_image_src: /albums/ +strip: //div[@class='mrrt'] +prune: yes +strip_id_or_class: 'fdpd' +strip_id_or_class: 'epapt' +strip_id_or_class: 'newsrtwd' +strip_id_or_class: 'padtp' +strip_id_or_class: 'newdt' +strip_id_or_class: 'newdlt' +strip: //div[@id='selNotes'] +strip_id_or_class: 'clsNotes' +strip_id_or_class: 'clear' +strip_id_or_class: 'cmtwrap' +strip_id_or_class: 'sess' strip_id_or_class: 'parents' test_url: http://www.dinamalar.com/News_Detail.asp?Id=295725 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/dn.se.txt b/inc/3rdparty/site_config/standard/dn.se.txt old mode 100644 new mode 100755 index 86bb3b8d..5283a0cd --- a/inc/3rdparty/site_config/standard/dn.se.txt +++ b/inc/3rdparty/site_config/standard/dn.se.txt @@ -1,26 +1,28 @@ -# Since this element has class="clear", the Instapaper stylesheets (at least this text parser preview), will render it unreadable, with a 1px font size and line height. - -body: //div[@id="article-content"] - - -# Ads -strip_id_or_class: advert-space - -# Read more, recommend, comments etc -strip_id_or_class: fbc-recommend -strip_id_or_class: recommend -strip_id_or_class: article-readers -strip_id_or_class: article-addons -strip_id_or_class: hook -strip_id_or_class: right -strip_id_or_class: footer - -# Other news -strip: //div[@id="mirrors"] - -# Author -author: //div[@id="byline"]/div/p/strong - -# Date -date: substring(substring-after(//p[@class="published"], 'Publicerad '), 0, 11) -test_url: http://www.dn.se/nyheter/varlden/landade-flygplan-mitt-i-villaomrade \ No newline at end of file +# Since this element has class="clear", the Instapaper stylesheets (at least this text parser preview), will render it unreadable, with a 1px font size and line height. + +body: //div[@id="article-content"] + + +# Ads +strip_id_or_class: advert-space + +# Read more, recommend, comments etc +strip_id_or_class: fbc-recommend +strip_id_or_class: recommend +strip_id_or_class: article-readers +strip_id_or_class: article-addons +strip_id_or_class: hook +strip_id_or_class: right +strip_id_or_class: footer + +# Other news +strip: //div[@id="mirrors"] + +# Author +author: //div[@id="byline"]/div/p/strong + +# Date +date: substring(substring-after(//p[@class="published"], 'Publicerad '), 0, 11) + +test_url: http://www.dn.se/nyheter/varlden/landade-flygplan-mitt-i-villaomrade +test_url: http://www.dn.se/m/rss/senaste-nytt \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/dobreprogramy.pl.txt b/inc/3rdparty/site_config/standard/dobreprogramy.pl.txt new file mode 100755 index 00000000..972293bc --- /dev/null +++ b/inc/3rdparty/site_config/standard/dobreprogramy.pl.txt @@ -0,0 +1,6 @@ +title: //*[@class="news"]//h1[@class="title"] +author: //*[@class="news"]//*[@class="newsInfo"]/a +date: substring-before(//*[@class="news"]//*[@class="newsInfo"]/text(), ',') +body: //*[@class="news"]//*[@class="newsContent"] +footnotes: no +test_url: http://www.dobreprogramy.pl/Sony-konczy-z-Foldinghome-na-PS3,Aktualnosc,36899.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/doctac.com.txt b/inc/3rdparty/site_config/standard/doctac.com.txt old mode 100644 new mode 100755 index 9f65ea9b..1c518a9b --- a/inc/3rdparty/site_config/standard/doctac.com.txt +++ b/inc/3rdparty/site_config/standard/doctac.com.txt @@ -1,8 +1,8 @@ -strip: //*[(@id = "featured")] - -author:substring-after( //div[@class='posttitle']/h2[@class='author'],'by ') - -date: concat(//div[@class='month'],' ',//div[@class='day']) - +strip: //*[(@id = "featured")] + +author:substring-after( //div[@class='posttitle']/h2[@class='author'],'by ') + +date: concat(//div[@class='month'],' ',//div[@class='day']) + #doctac doesn't provide a year, but month/day is better than nothing test_url: http://www.doctac.com/mac/iphone/instapaper-update-app/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/domusweb.it.txt b/inc/3rdparty/site_config/standard/domusweb.it.txt old mode 100644 new mode 100755 index 81683f02..20566ee3 --- a/inc/3rdparty/site_config/standard/domusweb.it.txt +++ b/inc/3rdparty/site_config/standard/domusweb.it.txt @@ -1,21 +1,21 @@ -# TODO: clean up the extra junk at the end of articles - -# general text formatting -prune: no -convert_double_br_tags:yes - -# where to find the basic metadata -author://a[@class='articleauthor'] -date://a[starts-with(@href,'/en/search/published/')] -title:substring-before(//h2[@class='title'],'—') -body://div[@id='maincontainer'] - -dissolve://div[starts-with(@id,'commentableblock')] - -# clean up the crap -strip://div[contains(@class,'domusnetwork')] -strip://div[contains(@class,'relative_wrapper')] - -strip://div[contains(@class,'captionsubimage')]/img[contains(@class,'arrow')] +# TODO: clean up the extra junk at the end of articles + +# general text formatting +prune: no +convert_double_br_tags:yes + +# where to find the basic metadata +author://a[@class='articleauthor'] +date://a[starts-with(@href,'/en/search/published/')] +title:substring-before(//h2[@class='title'],'—') +body://div[@id='maincontainer'] + +dissolve://div[starts-with(@id,'commentableblock')] + +# clean up the crap +strip://div[contains(@class,'domusnetwork')] +strip://div[contains(@class,'relative_wrapper')] + +strip://div[contains(@class,'captionsubimage')]/img[contains(@class,'arrow')] wrap_in(em): //div[contains(@class,'captionsubimage')]/span test_url: http://www.domusweb.it/en/design/in-praise-of-lost-time/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/dou.ua.txt b/inc/3rdparty/site_config/standard/dou.ua.txt old mode 100644 new mode 100755 index 22907c22..0f983112 --- a/inc/3rdparty/site_config/standard/dou.ua.txt +++ b/inc/3rdparty/site_config/standard/dou.ua.txt @@ -1,8 +1,8 @@ -title: //h1[@itemprop="name"] - -author: //div[contains(@class, 'author')]//div[contains(@class, 'name')]/a - -date: //div[contains(@class, 'b-info')]//span[contains(@class, 'date')] - +title: //h1[@itemprop="name"] + +author: //div[contains(@class, 'author')]//div[contains(@class, 'name')]/a + +date: //div[contains(@class, 'b-info')]//span[contains(@class, 'date')] + body: //div[contains(@class, 'b-typo')] test_url: http://dou.ua/lenta/interviews/andrej-havryuchenko/?from=sb_mostcomm \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/douban.com.txt b/inc/3rdparty/site_config/standard/douban.com.txt old mode 100644 new mode 100755 index 99d7e5dc..d72a2223 --- a/inc/3rdparty/site_config/standard/douban.com.txt +++ b/inc/3rdparty/site_config/standard/douban.com.txt @@ -1,21 +1,21 @@ -# This filter is tested on: -# http://www.douban.com/note/215003067/ -# http://www.douban.com/note/213540049/ -# http://www.douban.com/group/topic/31140104/ - -title: //div[@class='note-header']/h1 -title: //div[@id='content']/h1 - -author: //div[@class='info']/ul/li/a -author: //h3/span/a - -date://div[@class='note-header']/div/span -date://h3/span[contains(@class, 'color-green')] - -body://div[contains(@class, 'note')] -body://div[contains(@class, 'topic-content')] - -strip://h3 - -convert_double_br_tags: yes +# This filter is tested on: +# http://www.douban.com/note/215003067/ +# http://www.douban.com/note/213540049/ +# http://www.douban.com/group/topic/31140104/ + +title: //div[@class='note-header']/h1 +title: //div[@id='content']/h1 + +author: //div[@class='info']/ul/li/a +author: //h3/span/a + +date://div[@class='note-header']/div/span +date://h3/span[contains(@class, 'color-green')] + +body://div[contains(@class, 'note')] +body://div[contains(@class, 'topic-content')] + +strip://h3 + +convert_double_br_tags: yes test_url: http://www.douban.com/group/topic/31140104/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/dpreview.com.txt b/inc/3rdparty/site_config/standard/dpreview.com.txt old mode 100644 new mode 100755 index 30179a3b..001c810f --- a/inc/3rdparty/site_config/standard/dpreview.com.txt +++ b/inc/3rdparty/site_config/standard/dpreview.com.txt @@ -1,9 +1,9 @@ -# next_page_link for product review -# example: http://www.dpreview.com/reviews/lytro/ -next_page_link: //img[@alt = 'Next page']/../@href - -# next_page_link for other articles -# example: http://www.dpreview.com/articles/6126592906/first-impressions-using-the-fujifilm-x-pro1 -next_page_link: //*[@class = 'pages']/*/td[@class = 'next enabled']/a +# next_page_link for product review +# example: http://www.dpreview.com/reviews/lytro/ +next_page_link: //img[@alt = 'Next page']/../@href + +# next_page_link for other articles +# example: http://www.dpreview.com/articles/6126592906/first-impressions-using-the-fujifilm-x-pro1 +next_page_link: //*[@class = 'pages']/*/td[@class = 'next enabled']/a single_page_link: //a[contains(.,'Print view')] test_url: http://www.dpreview.com/articles/6126592906/first-impressions-using-the-fujifilm-x-pro1 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/dr.dk.txt b/inc/3rdparty/site_config/standard/dr.dk.txt old mode 100644 new mode 100755 index 7e46b0d6..d8ec1acf --- a/inc/3rdparty/site_config/standard/dr.dk.txt +++ b/inc/3rdparty/site_config/standard/dr.dk.txt @@ -1,9 +1,9 @@ -title: //meta[@property='og:title']/@content -author: //div[@class='articleFunctions']//a -date: //meta[@name='pubdate']/@content - -# Can you strip elements from the body only? It is required here (`//div[@class='articleContent']/p` breaks for some reason) -body: //div[@class='articleContent'] - +title: //meta[@property='og:title']/@content +author: //div[@class='articleFunctions']//a +date: //meta[@name='pubdate']/@content + +# Can you strip elements from the body only? It is required here (`//div[@class='articleContent']/p` breaks for some reason) +body: //div[@class='articleContent'] + tidy: no test_url: http://www.dr.dk/Nyheder/Udland/2011/10/24/150115.htm \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/dramasonline.com.txt b/inc/3rdparty/site_config/standard/dramasonline.com.txt old mode 100644 new mode 100755 index 659d0443..4898353b --- a/inc/3rdparty/site_config/standard/dramasonline.com.txt +++ b/inc/3rdparty/site_config/standard/dramasonline.com.txt @@ -1,10 +1,10 @@ -body: //div[@class='postext'] - -strip_id_or_class: ratingblock -strip_id_or_class: hreview-aggregate -strip: //div[contains(@style, 'display: none;')] - -tidy: no -prune: no - +body: //div[@class='postext'] + +strip_id_or_class: ratingblock +strip_id_or_class: hreview-aggregate +strip: //div[contains(@style, 'display: none;')] + +tidy: no +prune: no + test_url: http://www.dramasonline.com/jago-pakistan-jago-7th-december-2012-ali-gul-pir/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/drdobbs.com.txt b/inc/3rdparty/site_config/standard/drdobbs.com.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/drive2.ru.txt b/inc/3rdparty/site_config/standard/drive2.ru.txt old mode 100644 new mode 100755 index 6125ce79..d500cb81 --- a/inc/3rdparty/site_config/standard/drive2.ru.txt +++ b/inc/3rdparty/site_config/standard/drive2.ru.txt @@ -1,12 +1,12 @@ -body: //div[@class = "description"] -body: //div[@id = "post"] - -strip_id_or_class: vcard -strip_id_or_class: journallist -strip_id_or_class: infobox -strip_id_or_class: terms -strip_id_or_class: replieslist -strip_id_or_class: communityside - +body: //div[@class = "description"] +body: //div[@id = "post"] + +strip_id_or_class: vcard +strip_id_or_class: journallist +strip_id_or_class: infobox +strip_id_or_class: terms +strip_id_or_class: replieslist +strip_id_or_class: communityside + test_url: http://www.drive2.ru/cars/audi/a6/a6_c5/elysey/journal/288230376151836654/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/dropbox.com.txt b/inc/3rdparty/site_config/standard/dropbox.com.txt new file mode 100755 index 00000000..92ae31b2 --- /dev/null +++ b/inc/3rdparty/site_config/standard/dropbox.com.txt @@ -0,0 +1 @@ +single_page_link: //a[@id='download_button_link'] \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/drupal.org.txt b/inc/3rdparty/site_config/standard/drupal.org.txt old mode 100644 new mode 100755 index ffb77e4d..2da3eb1c --- a/inc/3rdparty/site_config/standard/drupal.org.txt +++ b/inc/3rdparty/site_config/standard/drupal.org.txt @@ -1,8 +1,8 @@ -title://h1 -author://div[@class="submitted"]/a -date:substring-after(//div[@class="meta"],'modified: ') -date:substring-after(//div[@class="submitted"],'on ') -body://div[@class="node-content"] -strip://div[@class="meta"] +title://h1 +author://div[@class="submitted"]/a +date:substring-after(//div[@class="meta"],'modified: ') +date:substring-after(//div[@class="submitted"],'on ') +body://div[@class="node-content"] +strip://div[@class="meta"] strip_id_or_class:book-navigation test_url: http://drupal.org/node/1327354 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/dukebasketballreport.com.txt b/inc/3rdparty/site_config/standard/dukebasketballreport.com.txt old mode 100644 new mode 100755 index 418c9f62..2978797e --- a/inc/3rdparty/site_config/standard/dukebasketballreport.com.txt +++ b/inc/3rdparty/site_config/standard/dukebasketballreport.com.txt @@ -1,11 +1,11 @@ -title: //h2/a -author: substring-before(substring-after(//span[@class='byline'], 'by'), ',') -date: substring-before(substring-after(//span[@class='byline'], ','), '|') -body: //div[@class='entry'] - - -# strip out auction stuff at the end of posts -# tidy kills the center tag, so disable it -tidy: no +title: //h2/a +author: substring-before(substring-after(//span[@class='byline'], 'by'), ',') +date: substring-before(substring-after(//span[@class='byline'], ','), '|') +body: //div[@class='entry'] + + +# strip out auction stuff at the end of posts +# tidy kills the center tag, so disable it +tidy: no strip: //center//table test_url: http://www.dukebasketballreport.com/articles/?p=42660 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/dushumashang.com.txt b/inc/3rdparty/site_config/standard/dushumashang.com.txt new file mode 100755 index 00000000..6a50a77e --- /dev/null +++ b/inc/3rdparty/site_config/standard/dushumashang.com.txt @@ -0,0 +1,17 @@ +# This filter is tested on: +# http://www.dushumashang.com/2389 +# http://www.dushumashang.com/2415 +# http://www.dushumashang.com/2355 + +body://div[@class='main_content'] +#body://section[@class='entry_content fl'] +title://h2 +author://span[@class='article_author']/a +date://span[@class='pub_date']/time + +strip://span[@class='article_author'] +strip://span[@class='pub_date'] +strip://div[@class='page_turn'] +strip://span[@class='source_link']/em +wrap_in(strong)://span[@class='source_link']/a +test_url: http://www.dushumashang.com/2355 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/dvice.com.txt b/inc/3rdparty/site_config/standard/dvice.com.txt old mode 100644 new mode 100755 index c8163680..1a1990ee --- a/inc/3rdparty/site_config/standard/dvice.com.txt +++ b/inc/3rdparty/site_config/standard/dvice.com.txt @@ -1,9 +1,9 @@ -strip://*[@id = 'blog_top_stories'] -strip://*[@id = 'takeover_off'] -strip://*[@id = 'right_gray_box'] -strip://*[@class = 'blog_topics'] -strip://*[@class = 'section_titles'] - -author://div[@class = 'post_author_info']/a +strip://*[@id = 'blog_top_stories'] +strip://*[@id = 'takeover_off'] +strip://*[@id = 'right_gray_box'] +strip://*[@class = 'blog_topics'] +strip://*[@class = 'section_titles'] + +author://div[@class = 'post_author_info']/a date://div[@class = 'post_date_info'] test_url: http://dvice.com/archives/2012/05/is-nfc-and-smar.php \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/eamesinerudition.com.txt b/inc/3rdparty/site_config/standard/eamesinerudition.com.txt old mode 100644 new mode 100755 index 908a1b51..89a68bcd --- a/inc/3rdparty/site_config/standard/eamesinerudition.com.txt +++ b/inc/3rdparty/site_config/standard/eamesinerudition.com.txt @@ -1,8 +1,8 @@ -title: //div [@class="post contain"]/h1 -strip: //div [@class="post contain"]/h1 -body: //div [@class="post contain"] -author: substring-before(//title, ':') -author: substring-before(//title, ' ') - +title: //div [@class="post contain"]/h1 +strip: //div [@class="post contain"]/h1 +body: //div [@class="post contain"] +author: substring-before(//title, ':') +author: substring-before(//title, ' ') + test_url: http://eamesinerudition.com/2012/03/hospital-numbers-are-bad-for-you \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/eandt.theiet.org.txt b/inc/3rdparty/site_config/standard/eandt.theiet.org.txt old mode 100644 new mode 100755 index c4c38f25..ba9d312d --- a/inc/3rdparty/site_config/standard/eandt.theiet.org.txt +++ b/inc/3rdparty/site_config/standard/eandt.theiet.org.txt @@ -1,8 +1,8 @@ -title: //h1 -date: //div[@class="et_dateUnderTitle"] -author: substring-after(//div[@class="et_authorUnderTitle"], 'By ') -body: //div[@id="et_leftCol640split"] - -strip: //div[@id="et_leftCol640splitRight"] +title: //h1 +date: //div[@class="et_dateUnderTitle"] +author: substring-after(//div[@class="et_authorUnderTitle"], 'By ') +body: //div[@id="et_leftCol640split"] + +strip: //div[@id="et_leftCol640splitRight"] strip: //div[@class="et_light_greybgboxlower"] test_url: http://eandt.theiet.org/magazine/2011/12/this-festive-waste.cfm \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/eastoftheweb.com.txt b/inc/3rdparty/site_config/standard/eastoftheweb.com.txt old mode 100644 new mode 100755 index d762091c..36708da3 --- a/inc/3rdparty/site_config/standard/eastoftheweb.com.txt +++ b/inc/3rdparty/site_config/standard/eastoftheweb.com.txt @@ -1,18 +1,18 @@ -title: //div[@class='title_text'] - -author: //div[@class='author_text'] - -body: //div[@class='story_text']/.. - -strip: //b - -strip_id_or_class: back_to_top -strip_id_or_class: author_text -strip_id_or_class: title_text - -wrap_in(center): //a - -dissolve: //a - +title: //div[@class='title_text'] + +author: //div[@class='author_text'] + +body: //div[@class='story_text']/.. + +strip: //b + +strip_id_or_class: back_to_top +strip_id_or_class: author_text +strip_id_or_class: title_text + +wrap_in(center): //a + +dissolve: //a + footnotes: no test_url: http://www.eastoftheweb.com/short-stories/UBooks/Horl.shtml \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/ebay.com.txt b/inc/3rdparty/site_config/standard/ebay.com.txt old mode 100644 new mode 100755 index 5fa18ff3..f17e1f72 --- a/inc/3rdparty/site_config/standard/ebay.com.txt +++ b/inc/3rdparty/site_config/standard/ebay.com.txt @@ -1,5 +1,5 @@ -body: //h1[@class='it-ttl'] | //div[@id='mainImgHldr'] | //span[@id='prcIsum'] - -strip_image_src: imgLoading_30x30.gif - +body: //h1[@class='it-ttl'] | //div[@id='mainImgHldr'] | //span[@id='prcIsum'] + +strip_image_src: imgLoading_30x30.gif + test_url: http://www.ebay.com/itm/BRAND-NEW-FM-Transmitter-Ca-r-Charger-iPhone-4S-4-4G-3GS-3G-2G-iPod-Touch-/190657497204 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/ecetia.com.txt b/inc/3rdparty/site_config/standard/ecetia.com.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/econlog.econlib.org.txt b/inc/3rdparty/site_config/standard/econlog.econlib.org.txt old mode 100644 new mode 100755 index ebafc197..729affd4 --- a/inc/3rdparty/site_config/standard/econlog.econlib.org.txt +++ b/inc/3rdparty/site_config/standard/econlog.econlib.org.txt @@ -1,6 +1,6 @@ -title: //h1[@class="title"] -author: //div[@class="hosted"]/a -date: substring-after(//div[@class="dateline"]/text(), '|') - +title: //h1[@class="title"] +author: //div[@class="hosted"]/a +date: substring-after(//div[@class="dateline"]/text(), '|') + strip: //a[@class="top" and @href="#"] test_url: http://econlog.econlib.org/archives/2012/04/blinder_on_heal.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/economia.estadao.com.br.txt b/inc/3rdparty/site_config/standard/economia.estadao.com.br.txt old mode 100644 new mode 100755 index b59f554e..936a191d --- a/inc/3rdparty/site_config/standard/economia.estadao.com.br.txt +++ b/inc/3rdparty/site_config/standard/economia.estadao.com.br.txt @@ -1,7 +1,7 @@ -date: //div[@class="bb-md-noticia-fecha"] -body: //div[@class="corpo"] -dissolve: //div[@class="bb-md-noticia-extras"] -strip: //strong -strip_id_or_class: bb-md-noticia-foto-autor +date: //div[@class="bb-md-noticia-fecha"] +body: //div[@class="corpo"] +dissolve: //div[@class="bb-md-noticia-extras"] +strip: //strong +strip_id_or_class: bb-md-noticia-foto-autor strip_id_or_class: bb-md-noticia-foto-bajada test_url: http://economia.estadao.com.br/noticias/economia,cmn-aprova-r-67-bi-em-credito-para-20-setores-da-economia,118501,0.htm \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/economist.com.txt b/inc/3rdparty/site_config/standard/economist.com.txt old mode 100644 new mode 100755 index 71dd62f5..16c9ed64 --- a/inc/3rdparty/site_config/standard/economist.com.txt +++ b/inc/3rdparty/site_config/standard/economist.com.txt @@ -1,10 +1,8 @@ -title: //div[@class='ec-blog-headline'] -body: //div[@class='ec-blog-body'] -body: //div[@class='ec-article-content clear'] -strip: //div[@class='related-items'] -date: substring-before(//p[@class='ec-article-info'], '|') -prune: no - -autodetect_next_page: no - +body: //div[@class='main-content'] +date: //time[@class='date-created'] +strip: //aside +prune: no + +autodetect_next_page: no + test_url: http://www.economist.com/node/21528429 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/edge-online.com.txt b/inc/3rdparty/site_config/standard/edge-online.com.txt old mode 100644 new mode 100755 index 461d909c..cf585815 --- a/inc/3rdparty/site_config/standard/edge-online.com.txt +++ b/inc/3rdparty/site_config/standard/edge-online.com.txt @@ -1,13 +1,13 @@ -title: //meta[@property="og:title"]/@content -body: //h2[@class='strapline'] | //article[contains(@class, 'node-article')] -date: //time[@pubdate]/@datetime -author: //span[@class='author-name'] -prune: no -tidy: no -strip: //footer - -replace_string(<p>[ pagebreak ]</p>): <!-- pagebreak --> - -single_page_link: //a[contains(@href, '?page=show')] - +title: //meta[@property="og:title"]/@content +body: //h2[@class='strapline'] | //article[contains(@class, 'node-article')] +date: //time[@pubdate]/@datetime +author: //span[@class='author-name'] +prune: no +tidy: no +strip: //footer + +replace_string(<p>[ pagebreak ]</p>): <!-- pagebreak --> + +single_page_link: //a[contains(@href, '?page=show')] + test_url: http://www.edge-online.com/features/telling-modern-warfares-story \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/edge.org.txt b/inc/3rdparty/site_config/standard/edge.org.txt old mode 100644 new mode 100755 index 9980000d..95805f6e --- a/inc/3rdparty/site_config/standard/edge.org.txt +++ b/inc/3rdparty/site_config/standard/edge.org.txt @@ -1,5 +1,5 @@ -title: //div[@class='HomeLeftPannel IMGCTRL']/h2 -body: //div[@class='HomeLeftPannel IMGCTRL']//div[@class='Brownalink' or @id='shortdesc'] -tidy: no - +title: //div[@class='HomeLeftPannel IMGCTRL']/h2 +body: //div[@class='HomeLeftPannel IMGCTRL']//div[@class='Brownalink' or @id='shortdesc'] +tidy: no + test_url: http://edge.org/print/conversation.php?cid=the-argumentative-theory \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/edition.channel5belize.com.txt b/inc/3rdparty/site_config/standard/edition.channel5belize.com.txt new file mode 100755 index 00000000..6d5f170a --- /dev/null +++ b/inc/3rdparty/site_config/standard/edition.channel5belize.com.txt @@ -0,0 +1,9 @@ +title: //div[@id='singlePage']//h2 +body: //div[@id='singlePage']//div[contains(@class, 'post')] +strip: //a[@title='Email This Story'] +strip_id_or_class: sociable + +prune: no + +test_url: http://edition.channel5belize.com/archives/86016 +test_url: http://edition.channel5belize.com/feed \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/edition.cnn.com.txt b/inc/3rdparty/site_config/standard/edition.cnn.com.txt old mode 100644 new mode 100755 index dc8ebe14..6fc82d24 --- a/inc/3rdparty/site_config/standard/edition.cnn.com.txt +++ b/inc/3rdparty/site_config/standard/edition.cnn.com.txt @@ -1,9 +1,18 @@ -body: //div[@id='cnnContentContainer']//div[contains(@class, 'cnn_strycntntlft')] -strip: //div[@id='cnnCVP2'] -strip_id_or_class: cnn_strylftcexpbx -strip_id_or_class: cnn_strylctcqrelt -strip_id_or_class: cnn_strybtntoolsbttm -strip_id_or_class: cnn_stryftsbttm -strip_id_or_class: cnn_strybtmcntnt +body: //div[@id='cnnContentContainer']//div[contains(@class, 'cnn_strycntntlft')] +strip: //a[starts-with(@name, 'em')] +strip: //div[@id='cnnCVP2'] +strip_id_or_class: cnn_strylftcexpbx +strip_id_or_class: cnn_strylctcqrelt +strip_id_or_class: cnn_strybtntoolsbttm +strip_id_or_class: cnn_stryftsbttm +strip_id_or_class: cnn_strybtmcntnt +strip_id_or_class: cnn_stryshrwdgtbtm +strip_id_or_class: cnnGalleryContainer +strip_id_or_class: cnn_strycrcntr +strip_id_or_class: cnn_html_slideshow prune: no -test_url: http://edition.cnn.com/2011/US/04/29/severe.weather/index.html \ No newline at end of file + +test_url: http://edition.cnn.com/2011/US/04/29/severe.weather/index.html +test_url: http://edition.cnn.com/2013/08/15/world/africa/nigeria-boko-haram-commander-killed/index.html?eref=edition +test_url: http://rss.cnn.com/rss/edition.rss +test_url: http://rss.cnn.com/rss/edition_technology.rss \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/eetimes.com.txt b/inc/3rdparty/site_config/standard/eetimes.com.txt new file mode 100755 index 00000000..300db307 --- /dev/null +++ b/inc/3rdparty/site_config/standard/eetimes.com.txt @@ -0,0 +1,8 @@ +body: //div[contains(@class, 'grayshowlinks')] + +next_page_link: //div[@id='sitecontentcol']//a[.='Next >'] +# Doesn't work (site doesn't always load full content in print view) +#single_page_link: //div[@id='sitecontentcol']//a[contains(@href, 'print=yes')] + +test_url: http://www.eetimes.com/document.asp?doc_id=1319966& +test_url: http://www.eetimes.com/rss_simple.asp \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/ekultura.hu.txt b/inc/3rdparty/site_config/standard/ekultura.hu.txt old mode 100644 new mode 100755 index 59f6a711..3756027c --- a/inc/3rdparty/site_config/standard/ekultura.hu.txt +++ b/inc/3rdparty/site_config/standard/ekultura.hu.txt @@ -1,11 +1,11 @@ -title: //h1[@class='style6 nevek'] - -body: //div[@class='bal3'] - - -prune: yes - -tidy: yes -convert_double_br_tags: yes +title: //h1[@class='style6 nevek'] + +body: //div[@class='bal3'] + + +prune: yes + +tidy: yes +convert_double_br_tags: yes test_url: http://ekultura.hu/olvasnivalo/egyeb/cikk/2010-12-15/interju-galvolgyi-judit-2010-december \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/elance.com.txt b/inc/3rdparty/site_config/standard/elance.com.txt old mode 100644 new mode 100755 index 52ffe2d0..d4b0a9b8 --- a/inc/3rdparty/site_config/standard/elance.com.txt +++ b/inc/3rdparty/site_config/standard/elance.com.txt @@ -1,3 +1,3 @@ -body: //div[@id='jobDesc-bd']/p +body: //div[@id='jobDesc-bd']/p test_url: http://www.elance.com/j/xml-technical-intergration/23687172/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/elderscrollsonline.com.txt b/inc/3rdparty/site_config/standard/elderscrollsonline.com.txt new file mode 100755 index 00000000..fa3892c6 --- /dev/null +++ b/inc/3rdparty/site_config/standard/elderscrollsonline.com.txt @@ -0,0 +1,22 @@ +date: //time +title: //h1[contains(@class, "alpha")] +body: //article[contains(@class, "news-post")] + +# fix dates - dates as they are won't work as strtotime doesn't understand format (03.28.2013) +replace_string(<time class="gamma">01.): <time class="gamma">January. +replace_string(<time class="gamma">02.): <time class="gamma">February. +replace_string(<time class="gamma">03.): <time class="gamma">March. +replace_string(<time class="gamma">04.): <time class="gamma">April. +replace_string(<time class="gamma">05.): <time class="gamma">May. +replace_string(<time class="gamma">06.): <time class="gamma">June. +replace_string(<time class="gamma">07.): <time class="gamma">July. +replace_string(<time class="gamma">08.): <time class="gamma">August. +replace_string(<time class="gamma">09.): <time class="gamma">September. +replace_string(<time class="gamma">10.): <time class="gamma">October. +replace_string(<time class="gamma">11.): <time class="gamma">November. +replace_string(<time class="gamma">12.): <time class="gamma">December. + +prune: no + +test_url: http://elderscrollsonline.com/en/rss +test_url: http://elderscrollsonline.com/en/news/post/2013/03/27/developer-question-of-the-week-17 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/elektroniknet.de.txt b/inc/3rdparty/site_config/standard/elektroniknet.de.txt old mode 100644 new mode 100755 index 07664719..56fba5ff --- a/inc/3rdparty/site_config/standard/elektroniknet.de.txt +++ b/inc/3rdparty/site_config/standard/elektroniknet.de.txt @@ -1,27 +1,27 @@ -title: //h1 -date: //div[@class='datum'] -single_page_link: //a[contains(@href, '?type=99')] - -# this hack preserves the intro text, because it would be striped otherwise if the title is set to //h1 -dissolve: //div[@class='artikelMeldung'] - - -strip_id_or_class: anzeige -strip_id_or_class: top_page_navigation -strip_id_or_class: cr_image_container -strip_id_or_class: cr_image_reference -strip_id_or_class: cr_image_icon -strip_id_or_class: _close_txt -strip_id_or_class: _close_ico -strip_id_or_class: clearer - -strip://h1 -strip://h6 -strip://div[contains(@id, 'plista')] -strip://img[contains(@id,'tiny')] -strip://img[@class='cr_image'] - -# strip url at the top -strip: //p[@style='font-size: 10px;'] +title: //h1 +date: //div[@class='datum'] +single_page_link: //a[contains(@href, '?type=99')] + +# this hack preserves the intro text, because it would be striped otherwise if the title is set to //h1 +dissolve: //div[@class='artikelMeldung'] + + +strip_id_or_class: anzeige +strip_id_or_class: top_page_navigation +strip_id_or_class: cr_image_container +strip_id_or_class: cr_image_reference +strip_id_or_class: cr_image_icon +strip_id_or_class: _close_txt +strip_id_or_class: _close_ico +strip_id_or_class: clearer + +strip://h1 +strip://h6 +strip://div[contains(@id, 'plista')] +strip://img[contains(@id,'tiny')] +strip://img[@class='cr_image'] + +# strip url at the top +strip: //p[@style='font-size: 10px;'] test_url: http://www.elektroniknet.de/automotive/technik-know-how/sicherheitselektronik/article/87717/0/Besser_als_die_Wirklichkeit/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/elmalpensante.com.txt b/inc/3rdparty/site_config/standard/elmalpensante.com.txt old mode 100644 new mode 100755 index 9fecd663..435c6c20 --- a/inc/3rdparty/site_config/standard/elmalpensante.com.txt +++ b/inc/3rdparty/site_config/standard/elmalpensante.com.txt @@ -1,4 +1,4 @@ -single_page_link: //a[contains(@href, 'print_contenido')] -title: //h2 +single_page_link: //a[contains(@href, 'print_contenido')] +title: //h2 author: //div[@class="autor"] test_url: http://www.elmalpensante.com/index.php?doc=display_contenido&id=668 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/elpais.com.txt b/inc/3rdparty/site_config/standard/elpais.com.txt old mode 100644 new mode 100755 index 32f9fc3f..c6f9787b --- a/inc/3rdparty/site_config/standard/elpais.com.txt +++ b/inc/3rdparty/site_config/standard/elpais.com.txt @@ -1,22 +1,22 @@ -title: //meta[@name='DC.title']/@content -title: //div[contains(@class, 'cabecera_noticia')]//h1 -date: //meta[@name='DC.date']/@content -date: //meta[@name='date']/@content -body: //div[@class='columna_texto'] -body: //div[@id='cuerpo_noticia'] -body: //div[@class='estructura_2col_1zq']//div[@class='margen_n'] - -prune: no - -strip_id_or_class: disposicion_vertical -strip_id_or_class: ampliar_foto -strip_id_or_class: utilidades -strip_id_or_class: info_relacionada -strip_id_or_class: m-kiosko -strip_id_or_class: info_complementa - -strip: //div[starts-with(@id, 'sumario') and contains(., 'más información')] -strip: //div[@id='coment' or @id='foros_not'] +title: //meta[@name='DC.title']/@content +title: //div[contains(@class, 'cabecera_noticia')]//h1 +date: //meta[@name='DC.date']/@content +date: //meta[@name='date']/@content +body: //div[@class='columna_texto'] +body: //div[@id='cuerpo_noticia'] +body: //div[@class='estructura_2col_1zq']//div[@class='margen_n'] -test_url: http://elpais.com/elpais/2012/02/06/gente/1328526783_491687.html +prune: no + +strip_id_or_class: disposicion_vertical +strip_id_or_class: ampliar_foto +strip_id_or_class: utilidades +strip_id_or_class: info_relacionada +strip_id_or_class: m-kiosko +strip_id_or_class: info_complementa + +strip: //div[starts-with(@id, 'sumario') and contains(., 'más información')] +strip: //div[@id='coment' or @id='foros_not'] + +test_url: http://elpais.com/elpais/2012/02/06/gente/1328526783_491687.html test_url: http://www.elpais.com/articulo/cultura/mano/retrato/materia/elpepicul/20120207elpepicul_2/Tes \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/emaratalyoum.com.txt b/inc/3rdparty/site_config/standard/emaratalyoum.com.txt new file mode 100755 index 00000000..3d1313e2 --- /dev/null +++ b/inc/3rdparty/site_config/standard/emaratalyoum.com.txt @@ -0,0 +1,7 @@ +body: //div[@id='main-column']//div[@class='content'] + +prune: no + +test_url: http://www.emaratalyoum.com/sports/arab-and-international/2013-08-29-1.601844 +test_url: http://www.emaratalyoum.com/sports/arab-and-international/2013-08-29-1.601842 +test_url: http://www.emaratalyoum.com/public-sports-1.533088?ot=ot.AjaxPageLayout \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/en.espnf1.com.txt b/inc/3rdparty/site_config/standard/en.espnf1.com.txt old mode 100644 new mode 100755 index c1a91063..2ca0216b --- a/inc/3rdparty/site_config/standard/en.espnf1.com.txt +++ b/inc/3rdparty/site_config/standard/en.espnf1.com.txt @@ -1,10 +1,10 @@ -body: //div[@id='content'] -strip: //div[@class='rl'] -strip: //p[@class='authdesc'] -strip: //p[@class='strybtm'] -strip: //div[@id='stryFtrLft'] -strip: //div[@id='f1Conversation'] -strip: //div[@id='cmtSpncrRuler'] -strip: //div[@id='stryComments'] +body: //div[@id='content'] +strip: //div[@class='rl'] +strip: //p[@class='authdesc'] +strip: //p[@class='strybtm'] +strip: //div[@id='stryFtrLft'] +strip: //div[@id='f1Conversation'] +strip: //div[@id='cmtSpncrRuler'] +strip: //div[@id='stryComments'] strip: //div[@id='athrData'] test_url: http://en.espnf1.com/monaco/motorsport/story/50529.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/engadget.com.txt b/inc/3rdparty/site_config/standard/engadget.com.txt old mode 100644 new mode 100755 index 6cc6b14e..52acddb0 --- a/inc/3rdparty/site_config/standard/engadget.com.txt +++ b/inc/3rdparty/site_config/standard/engadget.com.txt @@ -1,7 +1,7 @@ -title: //meta[@property="og:title"]/@content -body: //div[@class='post_body'] -date: //*[@class='post_time'] - -prune: no - +title: //meta[@property="og:title"]/@content +body: //div[@class='post_body'] +date: //*[@class='post_time'] + +prune: no + test_url: http://www.engadget.com/2011/05/20/screen-grabs-the-mentalist-takes-the-ipad-to-new-heights/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/engineering.tumblr.com.txt b/inc/3rdparty/site_config/standard/engineering.tumblr.com.txt old mode 100644 new mode 100755 index 35ace467..48f301fe --- a/inc/3rdparty/site_config/standard/engineering.tumblr.com.txt +++ b/inc/3rdparty/site_config/standard/engineering.tumblr.com.txt @@ -1,7 +1,7 @@ -title: //h2 -body: //div[@class="post_content"] -author: //p[@class="author"]/a -date: //p[@class="date"] -strip: //h2 +title: //h2 +body: //div[@class="post_content"] +author: //p[@class="author"]/a +date: //p[@class="date"] +strip: //h2 strip: //header test_url: http://engineering.tumblr.com/post/21276808338/tumblr-firehose \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/english.aljazeera.net.txt b/inc/3rdparty/site_config/standard/english.aljazeera.net.txt old mode 100644 new mode 100755 index aed3a5f9..97365994 --- a/inc/3rdparty/site_config/standard/english.aljazeera.net.txt +++ b/inc/3rdparty/site_config/standard/english.aljazeera.net.txt @@ -1,7 +1,7 @@ -title: //span[@id='DetailedTitle'] -body: //div[@id='ctl00_cphBody_dvArticleInfoBlock'] | //td[@class='DetailedSummary'] -strip_id_or_class: sidebar -strip_id_or_class: Skyscrapper_Body -strip: //td[@class='DetailedSummary']/table[position() != 1] -prune: no +title: //span[@id='DetailedTitle'] +body: //div[@id='ctl00_cphBody_dvArticleInfoBlock'] | //td[@class='DetailedSummary'] +strip_id_or_class: sidebar +strip_id_or_class: Skyscrapper_Body +strip: //td[@class='DetailedSummary']/table[position() != 1] +prune: no test_url: http://english.aljazeera.net//news/middleeast/2011/04/20114681444376835.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/enikos.gr.txt b/inc/3rdparty/site_config/standard/enikos.gr.txt old mode 100644 new mode 100755 index e2b99bfc..ddd51c4b --- a/inc/3rdparty/site_config/standard/enikos.gr.txt +++ b/inc/3rdparty/site_config/standard/enikos.gr.txt @@ -1,9 +1,9 @@ -body: //div[@id='article']//div[contains(@class, 'inside')] - -strip_id_or_class: tags -strip_id_or_class: actions -strip_id_or_class: google-ads - -prune: no - +body: //div[@id='article']//div[contains(@class, 'inside')] + +strip_id_or_class: tags +strip_id_or_class: actions +strip_id_or_class: google-ads + +prune: no + test_url: http://www.enikos.gr/politics/98606,To_oxi_toy_Agorastoy_stoys_Germanoys.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/entertainment.timesonline.co.uk.txt b/inc/3rdparty/site_config/standard/entertainment.timesonline.co.uk.txt old mode 100644 new mode 100755 index 3e7fba09..a756c457 --- a/inc/3rdparty/site_config/standard/entertainment.timesonline.co.uk.txt +++ b/inc/3rdparty/site_config/standard/entertainment.timesonline.co.uk.txt @@ -1,10 +1,10 @@ -author://div[@class = 'article-author']/span[@class = 'byline'] -title://h1[@class = 'heading'] -body://div[@id = 'related-article-links'] -strip://div[@id = 'comment-sort-order'] -strip://div[@id = 'my-profile'] -strip://div[@class = 'article-author'] -strip://div[@class = 'bg-f8f1d8 width-385 text-left'] -strip://div[@id = 'login-status'] +author://div[@class = 'article-author']/span[@class = 'byline'] +title://h1[@class = 'heading'] +body://div[@id = 'related-article-links'] +strip://div[@id = 'comment-sort-order'] +strip://div[@id = 'my-profile'] +strip://div[@class = 'article-author'] +strip://div[@class = 'bg-f8f1d8 width-385 text-left'] +strip://div[@id = 'login-status'] strip://div[@class = 'puff-padding'] test_url: http://entertainment.timesonline.co.uk/tol/arts_and_entertainment/the_tls/article7177738.ece \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/ericsuh.com.txt b/inc/3rdparty/site_config/standard/ericsuh.com.txt new file mode 100755 index 00000000..d25140c5 --- /dev/null +++ b/inc/3rdparty/site_config/standard/ericsuh.com.txt @@ -0,0 +1,4 @@ +date: //h6[@class='datetime']/child::text() +author: string("Eric J. Suh") +footnotes: yes +test_url: http://www.ericsuh.com/blog/posts/2012/8/strange-numbers.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/es.hu.txt b/inc/3rdparty/site_config/standard/es.hu.txt old mode 100644 new mode 100755 index 19a1e9dd..21691a56 --- a/inc/3rdparty/site_config/standard/es.hu.txt +++ b/inc/3rdparty/site_config/standard/es.hu.txt @@ -1,11 +1,11 @@ -title: concat(//div[@class='doc_author'], ' - ', upper-case(//div[@class='doc_title'])) - -body: //div[@class='doc'] - -prune: yes - -tidy: yes -convert_double_br_tags: yes - +title: concat(//div[@class='doc_author'], ' - ', upper-case(//div[@class='doc_title'])) + +body: //div[@class='doc'] + +prune: yes + +tidy: yes +convert_double_br_tags: yes + strip: //a[contains(@href, 'www.facebook.com/pages/Elet-es-Irodalom/')] test_url: http://www.es.hu/2010-12-08_vissza-a-partpenzt \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/escapistmagazine.com.txt b/inc/3rdparty/site_config/standard/escapistmagazine.com.txt old mode 100644 new mode 100755 index 7e17a04d..fd453a19 --- a/inc/3rdparty/site_config/standard/escapistmagazine.com.txt +++ b/inc/3rdparty/site_config/standard/escapistmagazine.com.txt @@ -1,2 +1,8 @@ +title: //h1[@class='headline']/div[@class='name'] + +strip_image_src: 'http://cdn.themis-media.com/media/global/images/library/deriv/115/115825.png' + +next_page_link: //a[@class='next_page'] + strip_comments: no -test_url: http://www.escapistmagazine.com/articles/view/columns/extraconsideration/8717-Extra-Consideration-The-Story \ No newline at end of file +test_url: http://www.escapistmagazine.com/articles/view/columns/criticalintel/10302-I-Hate-Magic \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/espn.go.com.txt b/inc/3rdparty/site_config/standard/espn.go.com.txt old mode 100644 new mode 100755 index 319d352b..06476296 --- a/inc/3rdparty/site_config/standard/espn.go.com.txt +++ b/inc/3rdparty/site_config/standard/espn.go.com.txt @@ -1,12 +1,12 @@ -title: //div[@class='headline'] | //div[@class='mod-header']/h3 -body: //div[contains(@class, 'article')] -strip: //div[contains(@class, 'mod-inline')] -strip: //*/span[@class='page-actions'] -strip: //div[@class='page-actions']/* -strip: //div[@class='headline'] | //div[@class='mod-header']/h3 -strip: //div[@class='mod-blog-navigation'] -strip: //div[@class='monthday'] -strip: //div[@class='time'] -strip: //div[@class='timeofday'] +title: //div[@class='headline'] | //div[@class='mod-header']/h3 +body: //div[contains(@class, 'article')] +strip: //div[contains(@class, 'mod-inline')] +strip: //*/span[@class='page-actions'] +strip: //div[@class='page-actions']/* +strip: //div[@class='headline'] | //div[@class='mod-header']/h3 +strip: //div[@class='mod-blog-navigation'] +strip: //div[@class='monthday'] +strip: //div[@class='time'] +strip: //div[@class='timeofday'] strip: //div[contains(@class, 'mod-conversations')] test_url: http://espn.go.com/boston/mlb/story/_/id/7092528/terry-francona-victim-latest-red-sox-smear-campaign \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/esquire.com.txt b/inc/3rdparty/site_config/standard/esquire.com.txt old mode 100644 new mode 100755 index 7566e8cc..b9cb1e55 --- a/inc/3rdparty/site_config/standard/esquire.com.txt +++ b/inc/3rdparty/site_config/standard/esquire.com.txt @@ -1,10 +1,11 @@ -title: //h1 -author: //div[@id='byline'] - -body: //div[@id='printBody'] - -single_page_link: concat('http://www.esquire.com/print-this/', substring-after(//link[@rel='canonical']/@href, 'esquire.com/')) - -prune: no - -test_url: http://www.esquire.com/features/impossible/price-is-right-perfect-bid-0810 \ No newline at end of file +title: //h1 +author: //div[@id='byline'] + +body: //div[@id='printBody'] + +single_page_link: concat('http://www.esquire.com/print-this/', substring-after(//link[@rel='canonical']/@href, 'esquire.com/')) + +prune: no + +test_url: http://www.esquire.com/features/impossible/price-is-right-perfect-bid-0810 +test_url: http://www.esquire.com/blogs/politics/police-getting-leftover-armoured-iraq-trucks-112513 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/essentialpublicradio.org.txt b/inc/3rdparty/site_config/standard/essentialpublicradio.org.txt old mode 100644 new mode 100755 index 88c8c560..9a922392 --- a/inc/3rdparty/site_config/standard/essentialpublicradio.org.txt +++ b/inc/3rdparty/site_config/standard/essentialpublicradio.org.txt @@ -1,6 +1,6 @@ -title: //*[@itemprop='headline'] -author: //*[@itemprop='author'] -date: //*[@itemprop='datePublished'] -body: //*[@itemprop='articleBody'] +title: //*[@itemprop='headline'] +author: //*[@itemprop='author'] +date: //*[@itemprop='datePublished'] +body: //*[@itemprop='articleBody'] strip: //*[contains(@class, 'instapaper_ignore')] test_url: http://www.essentialpublicradio.org/story/2011-11-14/volunteers-sought-federal-tax-assistance-program-pennsylvania-9421 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/etc.se.txt b/inc/3rdparty/site_config/standard/etc.se.txt old mode 100644 new mode 100755 index 58da5ef7..95f8cf78 --- a/inc/3rdparty/site_config/standard/etc.se.txt +++ b/inc/3rdparty/site_config/standard/etc.se.txt @@ -1,6 +1,6 @@ -strip_id_or_class: 'left' -strip_id_or_class: 'right' -strip_id_or_class: 'block-belowcontent' -author: //span[@class = 'name']/a -date: //div[@class= 'datum'] +strip_id_or_class: 'left' +strip_id_or_class: 'right' +strip_id_or_class: 'block-belowcontent' +author: //span[@class = 'name']/a +date: //div[@class= 'datum'] test_url: http://www.etc.se/intervju/lonsamt-att-radda-jorden \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/eternabuenosaires.com.txt b/inc/3rdparty/site_config/standard/eternabuenosaires.com.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/eurogamer.net.txt b/inc/3rdparty/site_config/standard/eurogamer.net.txt old mode 100644 new mode 100755 index 6ecdf6bd..8a351667 --- a/inc/3rdparty/site_config/standard/eurogamer.net.txt +++ b/inc/3rdparty/site_config/standard/eurogamer.net.txt @@ -1,8 +1,8 @@ -body: //div[ @class='content' ] | //div[ @class='blog-entry' ] - -strip: //h2/abbr | //div[ @class='lowleader' ] | //*[ @class='discussion' ] | //img[ @class='play-button' ] | //div[ @class='boxout' ] | //h2/a | //h2 | //h2/div | //p[ @class='timestamp' ] | //a[ @class='eurogamer-author' ] | //p[ @class='aPager' ] | //h1 | //div[ @id='lowleader' ] | //a[ @class='next' ] | //div[contains(concat(' ', normalize-space(@class), ' '), ' pullquote ')] - -date://p[ @class='timestamp' ] - -author://a[ @class='eurogamer-author' ] +body: //div[ @class='content' ] | //div[ @class='blog-entry' ] + +strip: //h2/abbr | //div[ @class='lowleader' ] | //*[ @class='discussion' ] | //img[ @class='play-button' ] | //div[ @class='boxout' ] | //h2/a | //h2 | //h2/div | //p[ @class='timestamp' ] | //a[ @class='eurogamer-author' ] | //p[ @class='aPager' ] | //h1 | //div[ @id='lowleader' ] | //a[ @class='next' ] | //div[contains(concat(' ', normalize-space(@class), ' '), ' pullquote ')] + +date://p[ @class='timestamp' ] + +author://a[ @class='eurogamer-author' ] test_url: http://www.eurogamer.net/articles/digitalfoundry-vs-unreal-engine-4 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/evo.co.uk.txt b/inc/3rdparty/site_config/standard/evo.co.uk.txt old mode 100644 new mode 100755 index 07162513..ccb4f879 --- a/inc/3rdparty/site_config/standard/evo.co.uk.txt +++ b/inc/3rdparty/site_config/standard/evo.co.uk.txt @@ -1,11 +1,11 @@ -author: substring-after(//div[@class='articleauthor'],'By ') - -# Blog posts -date: //div[@class='articledate'] -# News -date: //div[@class='articledate_b'] - -body: //div[@class='articletext'] - +author: substring-after(//div[@class='articleauthor'],'By ') + +# Blog posts +date: //div[@class='articledate'] +# News +date: //div[@class='articledate_b'] + +body: //div[@class='articletext'] + convert_double_br_tags: yes test_url: http://www.evo.co.uk/carreviews/evolongtermtests/280072/bmw_330d_sport_touring.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/expressen.se.txt b/inc/3rdparty/site_config/standard/expressen.se.txt old mode 100644 new mode 100755 index d0cb283e..d81d3251 --- a/inc/3rdparty/site_config/standard/expressen.se.txt +++ b/inc/3rdparty/site_config/standard/expressen.se.txt @@ -1,9 +1,10 @@ -title: //div[@id='article']/div[contains(@class, 'content')]/h1 -body: //div[@id='article']/div[contains(@class, 'content')] -date: //div[contains(@class, 'article-slot')]/descendant::div[contains(@id, 'articledates')] - -strip: //img[contains(@src, 'img/px.gif')] -prune: no -# remove Facebook banner and obtrusive ad -strip: //div[@id='article']/div[contains(@class, 'content')]/div[contains(@class, 'art-right')] -test_url: http://www.expressen.se/kultur/1.2683904/medan-natet-dras-at \ No newline at end of file +title: //h1[contains(@class, 'b-headline_article')] +body: //div[contains(@class, 'b-article_print')] + +single_page_link: //div[contains(@class, 'b-page__footer__actions')]//a[contains(@href, 'print=true')] + +prune: no + +test_url: http://www.expressen.se/kultur/1.2683904/medan-natet-dras-at +test_url: http://www.expressen.se/gt/polis-om-styckmordet-extremt-markligt-fall/ +test_url: http://www.expressen.se/Pages/OutboundFeedsPage.aspx?id=3642159&viewstyle=rss \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/extracine.com.txt b/inc/3rdparty/site_config/standard/extracine.com.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/f1actual.com.txt b/inc/3rdparty/site_config/standard/f1actual.com.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/facebook.com.txt b/inc/3rdparty/site_config/standard/facebook.com.txt new file mode 100755 index 00000000..6a492767 --- /dev/null +++ b/inc/3rdparty/site_config/standard/facebook.com.txt @@ -0,0 +1,5 @@ +body: //div[@id='imagestage'] +prune: no +tidy: no + +test_url: https://www.facebook.com/feeds/page.php?id=338077742912613&format=rss20 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/facta.co.jp.txt b/inc/3rdparty/site_config/standard/facta.co.jp.txt old mode 100644 new mode 100755 index c17e0b8c..4c96a1a4 --- a/inc/3rdparty/site_config/standard/facta.co.jp.txt +++ b/inc/3rdparty/site_config/standard/facta.co.jp.txt @@ -1,3 +1,3 @@ -bosdy: //div[@class='content'] +bosdy: //div[@class='content'] test_url: http://facta.co.jp/blog/archives/20111026001026.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/falter.at.txt b/inc/3rdparty/site_config/standard/falter.at.txt old mode 100644 new mode 100755 index b941b740..2bfcc9b4 --- a/inc/3rdparty/site_config/standard/falter.at.txt +++ b/inc/3rdparty/site_config/standard/falter.at.txt @@ -1,18 +1,14 @@ -title: //h2[@class='related relatedTitle'] -author: //a[contains(@href, 'liste.php?author_id')] - -# can't think of a better way unfortunately, really bad markup on this site -date: substring-after(//td[@style='width:85%;'], 'vom') - -# not sure why, but instapaper seems to suck up the teaser paragraph -# not solved! -body: //div[contains(@class, 'teaser')] -body: //div[@id='content'] - -# cleanup -strip: //img[@src='http://www.falter.at/web/_pics/falterlogo_dblau.gif'] -strip: //div[@class='servicebox'] -strip: //h1 -strip: //br -strip: //td[@id='adcol'] -test_url: http://www.falter.at/web/print/detail.php?id=1634 \ No newline at end of file +title: //h1 +author: //a[contains(@href, '/kategorie/autoren')] +date: //a[contains(@href, '/falter/ausgabe')] +body: //article[@class='spanMain'] + +# cleanup +strip_id_or_class: 'respond' +strip: //img[@src='http://www.falter.at/web/_pics/falterlogo_dblau.gif'] +strip_id_or_class: 'meta' +strip_id_or_class: 'servicebox' +strip_id_or_class: 'related' +strip_id_or_class: 'twitter-share-button' +strip: //br +test_url: http://www.falter.at/falter/2013/03/26/der-dandy-auf-der-sinkenden-galeere/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/fanfiction.net.txt b/inc/3rdparty/site_config/standard/fanfiction.net.txt old mode 100644 new mode 100755 index 8d0c4daf..e7cab4d4 --- a/inc/3rdparty/site_config/standard/fanfiction.net.txt +++ b/inc/3rdparty/site_config/standard/fanfiction.net.txt @@ -1,6 +1,6 @@ -body: //*[@id = 'story text'] -author: //a[starts-with(@href, '/u/')] -next_page_link: substring-after(//input[contains(@value, 'Next')]/@onclick, "self.location='") -autodetect_next_page:yes +body: //*[@id = 'story text'] +author: //a[starts-with(@href, '/u/')] +next_page_link: substring-after(//input[contains(@value, 'Next')]/@onclick, "self.location='") +autodetect_next_page:yes strip_id_or_class: 'a2a_kit' test_url: http://www.fanfiction.net/s/6497403/1/Spartan_Love \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/fastcompany.com.txt b/inc/3rdparty/site_config/standard/fastcompany.com.txt old mode 100644 new mode 100755 index 5547a76c..a6417237 --- a/inc/3rdparty/site_config/standard/fastcompany.com.txt +++ b/inc/3rdparty/site_config/standard/fastcompany.com.txt @@ -1,16 +1,16 @@ -title: //h1 -author: //h5[@class='byline']//a -date: //h5[@class='date'] -body: //figure[@class='node-poster'] | //div[contains(@class, "node-content")] -strip_id_or_class: article-top-wrapper -strip_id_or_class: footer-message -strip_id_or_class: print-logo -strip: //cite -strip://*[@class='timestamp'] -strip://div[@id='page_right'] -strip://section[@id='header_region'] -strip://h1[@class='node-title'] -strip://div[@class='node-submitted'] -strip_id_or_class: skipnav -test_url: http://www.fastcompany.com/3000226/link-between-quietness-and-productivity +title: //h1 +author: //h5[@class='byline']//a +date: //h5[@class='date'] +body: //figure[@class='node-poster'] | //div[contains(@class, "node-content")] +strip_id_or_class: article-top-wrapper +strip_id_or_class: footer-message +strip_id_or_class: print-logo +strip: //cite +strip://*[@class='timestamp'] +strip://div[@id='page_right'] +strip://section[@id='header_region'] +strip://h1[@class='node-title'] +strip://div[@class='node-submitted'] +strip_id_or_class: skipnav +test_url: http://www.fastcompany.com/3000226/link-between-quietness-and-productivity test_url: http://www.fastcompany.com/3003586/6-simple-rituals-reach-your-potential-every-day \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/faz.net.txt b/inc/3rdparty/site_config/standard/faz.net.txt old mode 100644 new mode 100755 index 4fe5968b..d087d2aa --- a/inc/3rdparty/site_config/standard/faz.net.txt +++ b/inc/3rdparty/site_config/standard/faz.net.txt @@ -1,30 +1,36 @@ -# Title -title: //p[@class='Content HeadlineShort'] - -# Authors -# some are known and have a link, others don't -author: substring-after(//span[@class='Autor'], 'Von') - -# Date -date: //span[@class='Datum'] - -# Body -body: //div[@class='Artikel'] - -# Removements before body text -strip: //div[@class='Breadcrumbs'] -strip: //div[@class='QuickSearchBox'] -strip: //div[@class='FAZArtikelEinleitung'] -strip: //div[@class='FAZArtikelReiter'] -strip: //div[@class='clear'] - -# General removements -strip: //span[@class='Bildnachweis'] - -# Removements after body text -strip: //div[@class='ArtikelAbbinder'] -strip: //div[@class='ArtikelKommentieren Artikelfuss GETS;tk;boxen.top-lesermeinungen;tp;content'] -strip: //div[@class='FAZArtikelKommentare FAZArtikelContent'] -strip: //div[@class='FAZArtikelFunktionen'] -strip: //div[@id='FAZContentRight'] -test_url: http://www.faz.net/aktuell/gesellschaft/ehe-haltbarkeitsformel-verliebe-dich-oft-verlobe-dich-selten-heirate-vielleicht-11685306.html \ No newline at end of file +# Title +title: //p[@class='Content HeadlineShort'] + +# Authors +# some are known and have a link, others don't +author: substring-after(//span[@class='Autor'], 'Von') + +# Date +date: //span[@class='Datum'] + +# Body +body: //div[@class='Artikel'] + +# Removements before body text +strip: //div[@class='Breadcrumbs'] +strip: //div[@class='QuickSearchBox'] +strip: //div[@class='FAZArtikelEinleitung'] +strip: //div[@class='FAZArtikelReiter'] +strip: //div[@class='clear'] + +# General removements +strip: //span[@class='Bildnachweis'] +strip: //img[@class='MediaIcon'] +strip: //div[@class='ArtikelMediaLink'] +dissolve: //a[img] + +# Removements after body text +strip: //div[@class='ArtikelAbbinder'] +strip: //div[@class='ArtikelKommentieren Artikelfuss GETS;tk;boxen.top-lesermeinungen;tp;content'] +strip: //div[@class='FAZArtikelKommentare FAZArtikelContent'] +strip: //div[@class='FAZArtikelFunktionen'] +strip: //div[@id='FAZContentRight'] + +# Fix picture captions +wrap_in(small): //span[@class='Bildunterschrift']/text() +test_url: http://www.faz.net/aktuell/feuilleton/zum-tod-von-margaret-thatcher-die-reizfigur-12141919.html#Drucken \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/fertigung.de.txt b/inc/3rdparty/site_config/standard/fertigung.de.txt new file mode 100755 index 00000000..90145e58 --- /dev/null +++ b/inc/3rdparty/site_config/standard/fertigung.de.txt @@ -0,0 +1,23 @@ +title: //title + +body: //div[@id='content'] + +strip: (//div[@id='content']/h2)[1] + +strip: //h2[contains(., 'mehr News')]/following::* +strip: //h2[contains(., 'mehr News')] + +strip: //div[contains(@class, 'indizar')]/following::* +strip: //div[contains(@class, 'indizar')] + +strip: //h1[contains(@class, 'single')]/preceding::* +strip: //h1[contains(@class, 'single')] + +strip_id_or_class: plista_widget + +prune: no + +next_page_link: //a[contains(., 'Weiter')] + +test_url: http://www.fertigung.de/2013/04/igus-neuer-energiekettenkatalog/ +test_url: http://www.fertigung.de/2013/04/dynamisch-und-hochpraezise/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/fictionpress.com.txt b/inc/3rdparty/site_config/standard/fictionpress.com.txt old mode 100644 new mode 100755 index 4a04e832..19ec16b0 --- a/inc/3rdparty/site_config/standard/fictionpress.com.txt +++ b/inc/3rdparty/site_config/standard/fictionpress.com.txt @@ -1,5 +1,5 @@ -body: id('storytext') -author: //a[starts-with(@href, '/u/')] -#next_page_link: substring-after(//input[contains(@value, 'Next')]/@onclick, "self.location='") +body: id('storytext') +author: //a[starts-with(@href, '/u/')] +#next_page_link: substring-after(//input[contains(@value, 'Next')]/@onclick, "self.location='") strip_id_or_class: 'a2a_kit' test_url: http://www.fictionpress.com/s/2897964/1/All_We_Knew \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/ficwad.com.txt b/inc/3rdparty/site_config/standard/ficwad.com.txt old mode 100644 new mode 100755 index 3dbfe76f..081f0bb0 --- a/inc/3rdparty/site_config/standard/ficwad.com.txt +++ b/inc/3rdparty/site_config/standard/ficwad.com.txt @@ -1,12 +1,12 @@ -title: //h4 -author: //span[@class="author"] -body: //div[@id="story"] -strip_id_or_class: summary -strip_id_or_class: meta -strip_id_or_class: storyfoot -convert_double_br_tags: yes -prune: no - -# Note: this site still has trouble because single <br> tags are stripped, but I don't see a way to fix that with this interface. +title: //h4 +author: //span[@class="author"] +body: //div[@id="story"] +strip_id_or_class: summary +strip_id_or_class: meta +strip_id_or_class: storyfoot +convert_double_br_tags: yes +prune: no + +# Note: this site still has trouble because single <br> tags are stripped, but I don't see a way to fix that with this interface. test_url: http://www.ficwad.com/story/158977 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/finance.yahoo.com.txt b/inc/3rdparty/site_config/standard/finance.yahoo.com.txt old mode 100644 new mode 100755 index 81c18fd3..248522cb --- a/inc/3rdparty/site_config/standard/finance.yahoo.com.txt +++ b/inc/3rdparty/site_config/standard/finance.yahoo.com.txt @@ -1,12 +1,12 @@ -title: //meta[@property='og:title']/@content -body: //div[@id='y-article-bd'] -body: //div[contains(@class, 'yom-art-content')] -strip: //div[contains(@class, 'related-companies')] -strip: //div[@id='y-article-related'] -strip: //div[@id='ypf-article-related'] -prune: no - -single_page_link: //div[@class='ft']//a[contains(@href, 'page=all')] - -test_url: http://sg.finance.yahoo.com/news/Motorola-takes-wraps-249-rsg-3508842732.html?x=0&.v=1 +title: //meta[@property='og:title']/@content +body: //div[@id='y-article-bd'] +body: //div[contains(@class, 'yom-art-content')] +strip: //div[contains(@class, 'related-companies')] +strip: //div[@id='y-article-related'] +strip: //div[@id='ypf-article-related'] +prune: no + +single_page_link: //div[@class='ft']//a[contains(@href, 'page=all')] + +test_url: http://sg.finance.yahoo.com/news/Motorola-takes-wraps-249-rsg-3508842732.html?x=0&.v=1 test_url: http://finance.yahoo.com/news/super-young-retirement-savers.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/findtheswagger.tumblr.com.txt b/inc/3rdparty/site_config/standard/findtheswagger.tumblr.com.txt old mode 100644 new mode 100755 index 1a5cd2e1..43aef750 --- a/inc/3rdparty/site_config/standard/findtheswagger.tumblr.com.txt +++ b/inc/3rdparty/site_config/standard/findtheswagger.tumblr.com.txt @@ -1,10 +1,10 @@ -date: //div[@class='notes']/a -body: //div[@id='content'] - -strip_id_or_class: tags -strip_id_or_class: permalink -strip_id_or_class: notes -strip_id_or_class: post_nav -strip: //div[@id='content']//h2 +date: //div[@class='notes']/a +body: //div[@id='content'] + +strip_id_or_class: tags +strip_id_or_class: permalink +strip_id_or_class: notes +strip_id_or_class: post_nav +strip: //div[@id='content']//h2 strip_id_or_class: right_column test_url: http://findtheswagger.tumblr.com/post/11589145141/moe-resners-end-of-an-era-1957-giants-final \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/firstthings.com.txt b/inc/3rdparty/site_config/standard/firstthings.com.txt old mode 100644 new mode 100755 index dd56da22..ce972bac --- a/inc/3rdparty/site_config/standard/firstthings.com.txt +++ b/inc/3rdparty/site_config/standard/firstthings.com.txt @@ -1,7 +1,7 @@ -title: //div[@class='articleTitle'] -author: //div[@class='articleAuthor'] -body: //div[@class='articleContent'] -prune: no -convert_double_br_tags: yes - +title: //div[@class='articleTitle'] +author: //div[@class='articleAuthor'] +body: //div[@class='articleContent'] +prune: no +convert_double_br_tags: yes + test_url: http://www.firstthings.com/article/2011/05/the-trouble-with-ayn-rand \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/fivechapters.com.txt b/inc/3rdparty/site_config/standard/fivechapters.com.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/fivefilters.org.txt b/inc/3rdparty/site_config/standard/fivefilters.org.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/fivethirtyeight.com.txt b/inc/3rdparty/site_config/standard/fivethirtyeight.com.txt old mode 100644 new mode 100755 index 3d7b45a8..d0a0a772 --- a/inc/3rdparty/site_config/standard/fivethirtyeight.com.txt +++ b/inc/3rdparty/site_config/standard/fivethirtyeight.com.txt @@ -1,7 +1,7 @@ -title: substring-after(//title, 'Right:') -body: //div[@class = 'post-body'] -author: substring-after(//*[@class='post-author'], 'by') -date: concat(//*[@class='date-header'], ' ', //*[@class='post-timestamp']/a) -convert_double_br_tags: yes +title: substring-after(//title, 'Right:') +body: //div[@class = 'post-body'] +author: substring-after(//*[@class='post-author'], 'by') +date: concat(//*[@class='date-header'], ' ', //*[@class='post-timestamp']/a) +convert_double_br_tags: yes test_url: http://www.fivethirtyeight.com/2010/07/does-rnc-have-structural-problems.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/flyingmachinestudios.com.txt b/inc/3rdparty/site_config/standard/flyingmachinestudios.com.txt new file mode 100755 index 00000000..2053f801 --- /dev/null +++ b/inc/3rdparty/site_config/standard/flyingmachinestudios.com.txt @@ -0,0 +1,2 @@ +strip_id_or_class: linenos +test_url: http://www.flyingmachinestudios.com/programming/whoops-dci-refactoring/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/fm4.orf.at.txt b/inc/3rdparty/site_config/standard/fm4.orf.at.txt old mode 100644 new mode 100755 index 32d44c87..5db3e58c --- a/inc/3rdparty/site_config/standard/fm4.orf.at.txt +++ b/inc/3rdparty/site_config/standard/fm4.orf.at.txt @@ -1,7 +1,7 @@ -author: //div[@class='authorDescription']/h2 -body: //div[@id='story'] -date: substring-before(substring-after(//p[@class='date'],'Erstellt am:'), '-') -title: //h1[@class='detail'] -strip: //div[@class='fact'] +author: //div[@class='authorDescription']/h2 +body: //div[@id='story'] +date: substring-before(substring-after(//p[@class='date'],'Erstellt am:'), '-') +title: //h1[@class='detail'] +strip: //div[@class='fact'] test_url: http://fm4.orf.at/stories/1689156/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/fnal.gov.txt b/inc/3rdparty/site_config/standard/fnal.gov.txt old mode 100644 new mode 100755 index 7faa6bfc..e404ccb8 --- a/inc/3rdparty/site_config/standard/fnal.gov.txt +++ b/inc/3rdparty/site_config/standard/fnal.gov.txt @@ -1,15 +1,15 @@ -title: normalize(//h1) - -author: //td/p[position()=last()]/em - -# I swear, this is really the best way to do this -date: normalize(//td[contains(@style, "color: #ffffff")]) - -# my god, it's full of tables -body: /table/tbody/tr[5]//table/tbody//table/tbody/tr/td -strip: //h1 - -# the following two lines strip the byline at the end of the article (the byline is a <p> that consists of an em dash and then some text in an <em>). I have no idea why I can't just strip //p[position()=last()], but trying to do so includes a bunch of other crap in the output. -strip: //p[position()=last()]/em +title: normalize(//h1) + +author: //td/p[position()=last()]/em + +# I swear, this is really the best way to do this +date: normalize(//td[contains(@style, "color: #ffffff")]) + +# my god, it's full of tables +body: /table/tbody/tr[5]//table/tbody//table/tbody/tr/td +strip: //h1 + +# the following two lines strip the byline at the end of the article (the byline is a <p> that consists of an em dash and then some text in an <em>). I have no idea why I can't just strip //p[position()=last()], but trying to do so includes a bunch of other crap in the output. +strip: //p[position()=last()]/em strip: //p[position()=last()]/child::text() test_url: http://www.fnal.gov/pub/today/archive_2011/today11-11-09_MuonDepartmentReadMore.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/focus.de.txt b/inc/3rdparty/site_config/standard/focus.de.txt old mode 100644 new mode 100755 index 3ad5cabf..6da3687e --- a/inc/3rdparty/site_config/standard/focus.de.txt +++ b/inc/3rdparty/site_config/standard/focus.de.txt @@ -1,19 +1,19 @@ -title: //h1 - -author: //div[@class='articleContent small']/div[@class='textBlock']//span[@class='created'] - -date: //div[@class='articleHead']/span[@class='created'] - -body: //div[@id='article'] - -strip: //span[@class='markerText'] -strip: //div[@class='articleContent small']/div[@class='textBlock']//span[@class='created'] -strip: //div[@class='sidebar'] -strip: //div[@class='starbar'] -strip: //div[@class='actions clearfix'] -strip: //div[@id='commentForm'] -strip: //div[@id='commentSent'] -strip: //div[@id='comments'] -strip: //div[@class='similarityBlock'] +title: //h1 + +author: //div[@class='articleContent small']/div[@class='textBlock']//span[@class='created'] + +date: //div[@class='articleHead']/span[@class='created'] + +body: //div[@id='article'] + +strip: //span[@class='markerText'] +strip: //div[@class='articleContent small']/div[@class='textBlock']//span[@class='created'] +strip: //div[@class='sidebar'] +strip: //div[@class='starbar'] +strip: //div[@class='actions clearfix'] +strip: //div[@id='commentForm'] +strip: //div[@id='commentSent'] +strip: //div[@id='comments'] +strip: //div[@class='similarityBlock'] test_url: http://www.focus.de/politik/ausland/ein-jahr-nach-bombenanschlag-u-bahn-attentaeter-von-minsk-hingerichtet_aid_724958.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/folklore.org.txt b/inc/3rdparty/site_config/standard/folklore.org.txt new file mode 100755 index 00000000..ed23a0b6 --- /dev/null +++ b/inc/3rdparty/site_config/standard/folklore.org.txt @@ -0,0 +1,4 @@ +author: /html/body/table[3]/tbody/tr/td[1]/table[2]/tbody/tr[1]/td[2] +date: /html/body/table[3]/tbody/tr/td[1]/table[2]/tbody/tr[2]/td[2] +body: //div[@class='main'] +test_url: http://www.folklore.org/StoryView.py?story=Calculator_Construction_Set.txt \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/food.com.txt b/inc/3rdparty/site_config/standard/food.com.txt new file mode 100755 index 00000000..a70da766 --- /dev/null +++ b/inc/3rdparty/site_config/standard/food.com.txt @@ -0,0 +1,11 @@ +body: //div[@id='print-area'] +title: //h1[contains(@class, 'section-title')] +single_page_link: //a[@id='prntrec'] +strip_image_src: food-logo-small +strip_id_or_class: timer +strip_id_or_class: photo-sm +strip_id_or_class: page-header + +prune: no + +test_url: http://www.food.com/recipe/couldnt-be-easier-bbq-pork-tenderloin-crock-pot-317152 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/fool.com.txt b/inc/3rdparty/site_config/standard/fool.com.txt old mode 100644 new mode 100755 index 69867ccb..89cb8b9a --- a/inc/3rdparty/site_config/standard/fool.com.txt +++ b/inc/3rdparty/site_config/standard/fool.com.txt @@ -1,11 +1,11 @@ -body: //div[@class='entry-content'] -date: //meta[@name="date"]/@content -author: //meta[@name="author"]/@content - -strip_id_or_class: ecapShell -strip_id_or_class: noindent -strip_id_or_class: targetedPromotion - -prune: no - +body: //div[@class='entry-content'] +date: //meta[@name="date"]/@content +author: //meta[@name="author"]/@content + +strip_id_or_class: ecapShell +strip_id_or_class: noindent +strip_id_or_class: targetedPromotion + +prune: no + test_url: http://www.fool.com/investing/general/2012/01/27/dfc-global-beats-up-on-analysts-yet-again.aspx \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/forbes.com.txt b/inc/3rdparty/site_config/standard/forbes.com.txt old mode 100644 new mode 100755 index 2381b56a..9e1d04c1 --- a/inc/3rdparty/site_config/standard/forbes.com.txt +++ b/inc/3rdparty/site_config/standard/forbes.com.txt @@ -1,16 +1,27 @@ -title: //hgroup//h1 -title: //span[@class='mainarttitle'] - -body: //div[@id='leftRail']//div[contains(@class, 'body')] - -author: //meta[@name="author"]/@content -author: //span[@class='mainartauthor'] - -date: substring-before(//hgroup//h6, '@') -date: //span[@class='mainartdate'] - -prune: no - -single_page_link: //a[contains(@href, '/print/')] - -test_url: http://www.forbes.com/forbes/2011/0509/technology-frog-design-jan-chipchase-ethnographer-birth-cool_print.html \ No newline at end of file +title: //hgroup//h1 +title: //span[@class='mainarttitle'] + +body: //div[@id='leftRail']//div[contains(@class, 'body')] + +author: //meta[@name="author"]/@content +author: //span[@class='mainartauthor'] + +date: substring-before(//hgroup//h6, '@') +date: //span[@class='mainartdate'] + +prune: no +strip: //aside +strip_id_or_class: sticky_sharing +strip_id_or_class: pagination +strip_id_or_class: controlsbox +strip_id_or_class: storyboxes +strip_id_or_class: sponsoredlinks +strip_id_or_class: nextpage +strip_id_or_class: contextuallinks +strip_id_or_class: article_actions +strip_id_or_class: engagement_block + +single_page_link: //a[contains(@href, '/print/')] + +test_url: http://www.forbes.com/forbes/2011/0509/technology-frog-design-jan-chipchase-ethnographer-birth-cool_print.html +test_url: http://www.forbes.com/sites/bruceupbin/2012/09/11/the-iphone-5-winners-and-losers/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/foreignaffairs.com.txt b/inc/3rdparty/site_config/standard/foreignaffairs.com.txt new file mode 100755 index 00000000..cf8b742f --- /dev/null +++ b/inc/3rdparty/site_config/standard/foreignaffairs.com.txt @@ -0,0 +1,34 @@ +# TIDY +#tidy: no +# PRUNE +#prune: no + +# SINGLE PAGE +single_page_link: //div[@class='showlinks']/a + +# TITLE +title: //h1[@class="title"] + +# AUTHOR +author: //div[contains(@class,"field-field-article-display-authors")]/div/div/a/text() + +# DATE +date: //div[contains(@class,"field-field-article-issue")]/div/div/a/text() | //span[@class="date-display-single"] + +# BODY +body: //div[contains(@class,"content-resize")] + +# Remove clutter +strip: //div[@class="article-sidebar"] +strip: //div[@class="showlinks"] +strip: //div[contains(@class,"premium-box")] +strip: //div[contains(@class,"premium-box")] +strip: //table[contains(@border,"2")] + +# Fix picture captions +wrap_in(small): //p/img/following-sibling::em +wrap_in(small): //p[img]/text() + +# Fix sub-headlines +wrap_in(h3): //div[contains(@class,"field-field-article-subtitle")]/div/div/text() +test_url: http://www.foreignaffairs.com/articles/138810/pierre-n-leval/the-long-arm-of-international-law \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/foreignpolicy.com.txt b/inc/3rdparty/site_config/standard/foreignpolicy.com.txt old mode 100644 new mode 100755 index 6ab7a091..4e84b989 --- a/inc/3rdparty/site_config/standard/foreignpolicy.com.txt +++ b/inc/3rdparty/site_config/standard/foreignpolicy.com.txt @@ -1,11 +1,15 @@ -title: //div[@id='art-mast']//h1 -author: substring-after(//span[@id='by-line'], 'BY ') -date: //span[@id='pub-date'] -body: //div[@id='art-mast']//h2 | //div[@id='art-mast']/h3 | //div[@id='art-body']//div[@class='translateBody'] -strip: //div[@id='share-box'] -prune: no - -single_page_link: //span[@id='controls']/a[contains(@href, 'print=yes')] - -test_url: http://www.foreignpolicy.com/articles/2011/08/01/a_murderers_manifesto_and_me -test_url: test_url: http://www.foreignpolicy.com/articles/2012/02/29/five_years_in_damascus \ No newline at end of file +title: //div[@class='translateHead']//h1 | //div[@id='art-mast']//h1 +author: substring-after(//span[@id='by-line'], 'BY ') +date: //span[@id='pub-date'] +body: //div[@id='art-mast']/h2 | //div[@class='translateBody'] | //div[@id='art-body'] +#Strip inside article content +strip: //div[@id='share-box'] +strip: //div[@id='special-box'] + +prune: no + +single_page_link: //span[@id='controls']/a[contains(@href, 'print=yes')] +single_page_link: //a[text()='SINGLE PAGE'] + +test_url: http://www.foreignpolicy.com/articles/2011/08/01/a_murderers_manifesto_and_me +test_url: http://www.foreignpolicy.com/articles/2012/02/29/five_years_in_damascus \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/forsvaret.no.txt b/inc/3rdparty/site_config/standard/forsvaret.no.txt old mode 100644 new mode 100755 index 3085c8f2..c1bd2bac --- a/inc/3rdparty/site_config/standard/forsvaret.no.txt +++ b/inc/3rdparty/site_config/standard/forsvaret.no.txt @@ -1,9 +1,9 @@ -title: //div[@class="articleHeader"]/h1 -author: //p[@class="byline"] -date: //p[contains(@class,"publishedDate")]/span -# remove the right menu -strip: //div[contains(@class,"aside")] -# remove some SharePoint webpart label junk -strip: //div[@id="ctl00_PlaceHolderMain_ArticleLeadField_label"] +title: //div[@class="articleHeader"]/h1 +author: //p[@class="byline"] +date: //p[contains(@class,"publishedDate")]/span +# remove the right menu +strip: //div[contains(@class,"aside")] +# remove some SharePoint webpart label junk +strip: //div[@id="ctl00_PlaceHolderMain_ArticleLeadField_label"] strip: //div[@id="ctl00_PlaceHolderMain_PublishingPageContentField_label"] test_url: http://forsvaret.no/aktuelt/publisert/nyheter/Sider/F5-fly-til-Skedsmo.aspx \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/foxnews.com.txt b/inc/3rdparty/site_config/standard/foxnews.com.txt old mode 100644 new mode 100755 index f1ee4851..e19c77db --- a/inc/3rdparty/site_config/standard/foxnews.com.txt +++ b/inc/3rdparty/site_config/standard/foxnews.com.txt @@ -1,9 +1,9 @@ -prune: no - -author: //meta[@name="dc.publisher"]/@content -date: //meta[@name="dc.date"]/@content -strip: //p[contains(@class, 'contributor vcard')] -replace_string(<ul><li><div class="photo">): <div class="photo"> -strip: //p[a[contains(., 'Click here to read more on this story ')]] - +prune: no + +author: //meta[@name="dc.publisher"]/@content +date: //meta[@name="dc.date"]/@content +strip: //p[contains(@class, 'contributor vcard')] +replace_string(<ul><li><div class="photo">): <div class="photo"> +strip: //p[a[contains(., 'Click here to read more on this story ')]] + test_url: http://www.foxnews.com/entertainment/2011/05/04/dwayne-johnson-guys-grow-pair-driving-hybrid/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/freelancer.com.txt b/inc/3rdparty/site_config/standard/freelancer.com.txt old mode 100644 new mode 100755 index f3d5425c..78d37729 --- a/inc/3rdparty/site_config/standard/freelancer.com.txt +++ b/inc/3rdparty/site_config/standard/freelancer.com.txt @@ -1,3 +1,3 @@ -body: //div[@id="projectDetailsContent"]//td +body: //div[@id="projectDetailsContent"]//td test_url: http://www.freelancer.com/projects/PHP-Website-Design/debug-Forum-website-code.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/freytag-film.com.txt b/inc/3rdparty/site_config/standard/freytag-film.com.txt old mode 100644 new mode 100755 index 8dc0dabc..c83f8303 --- a/inc/3rdparty/site_config/standard/freytag-film.com.txt +++ b/inc/3rdparty/site_config/standard/freytag-film.com.txt @@ -1,5 +1,5 @@ -body: //div[@class = 'instapaperbody'] -convert_double_br_tags: no -date: //div[@class='instadate'] +body: //div[@class = 'instapaperbody'] +convert_double_br_tags: no +date: //div[@class='instadate'] title: //h2[@class = 'instatitle'] test_url: http://freytag-film.com/blog/artikel/shooting_a_feature_film_in_10_days \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/fria.nu.txt b/inc/3rdparty/site_config/standard/fria.nu.txt new file mode 100755 index 00000000..9d8eff97 --- /dev/null +++ b/inc/3rdparty/site_config/standard/fria.nu.txt @@ -0,0 +1,8 @@ +body: //div[contains(@class, 'layout__inner')]//div[contains(@class, 'file-image') or contains(@class, 'node__content')] +author: //article//div[contains(@class, 'field-byline')] +strip_id_or_class: rekommenderade +strip_id_or_class: disqus +strip_id_or_class: annonser + +test_url: http://www.fria.nu/artikel/112079 +test_url: http://www.fria.nu/taxonomy/term/1928/all/feed \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/friatidningen.se.txt b/inc/3rdparty/site_config/standard/friatidningen.se.txt new file mode 100755 index 00000000..1e4abc5a --- /dev/null +++ b/inc/3rdparty/site_config/standard/friatidningen.se.txt @@ -0,0 +1,7 @@ +body: //div[contains(@class, 'layout__inner')]//div[contains(@class, 'file-image') or contains(@class, 'node__content')] +author: //article//div[contains(@class, 'field-byline')] +strip_id_or_class: rekommenderade +strip_id_or_class: disqus +strip_id_or_class: annonser + +test_url: http://www.friatidningen.se/artikel/112074 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/friendskorner.com.txt b/inc/3rdparty/site_config/standard/friendskorner.com.txt old mode 100644 new mode 100755 index 39a9973f..b067d88a --- a/inc/3rdparty/site_config/standard/friendskorner.com.txt +++ b/inc/3rdparty/site_config/standard/friendskorner.com.txt @@ -1,11 +1,11 @@ -#body: (//div[@class='ftr-yt-vid'])[1] -body: (//blockquote[contains(@class, 'postcontent')])[1] -body: (//div[starts-with(@id, 'post_message')])[1] - -prune: no -tidy: no - -#replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player" -#replace_string(</iframe>): </iframe> </div> - +#body: (//div[@class='ftr-yt-vid'])[1] +body: (//blockquote[contains(@class, 'postcontent')])[1] +body: (//div[starts-with(@id, 'post_message')])[1] + +prune: no +tidy: no + +#replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player" +#replace_string(</iframe>): </iframe> </div> + test_url: http://www.friendskorner.com/forum/f137/debate-personal-lives-leaders-west-vs-pakistan-must-read-297989/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/ft.com.txt b/inc/3rdparty/site_config/standard/ft.com.txt old mode 100644 new mode 100755 index 38d9d326..e66b9603 --- a/inc/3rdparty/site_config/standard/ft.com.txt +++ b/inc/3rdparty/site_config/standard/ft.com.txt @@ -1,5 +1,5 @@ -body: //div[contains(@class, 'ft-story-body')] - -author: substring-after(//div[contains(@class, 'ft-story-header')]/p[1], 'By ') +body: //div[contains(@class, 'ft-story-body')] + +author: substring-after(//div[contains(@class, 'ft-story-header')]/p[1], 'By ') date: substring-before(substring-after(//div[contains(@class, 'ft-story-header')]/p[2], 'Published:'), '|') test_url: http://www.ft.com/cms/s/2/e1be4b5a-620c-11e0-8ee4-00144feab49a.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/ftchinese.com.txt b/inc/3rdparty/site_config/standard/ftchinese.com.txt new file mode 100755 index 00000000..5c94d9b0 --- /dev/null +++ b/inc/3rdparty/site_config/standard/ftchinese.com.txt @@ -0,0 +1,18 @@ +# Modified to define the single_page_link +# This filter is tested on: +# http://www.ftchinese.com/story/001047373 +# http://www.ftchinese.com/story/001047631 +# http://www.ftchinese.com/story/001047622/?print=y +# http://www.ftchinese.com/story/001049052 +# http://www.ftchinese.com/story/001049088 + +title:substring-before(//title, '-') +author: //div[@class='byline']/a +date: //a[@class='storytime'] +#Set date in print view +#date: //div[@class='byline']/a/following-sibling::a +body: //div[@id="bodytext"] +strip://div[@class='pagination'] +single_page_link://div[@class='pagination']/a[.='全文'] +#next_page_link: //div[@class='pagination']//a[.='下一页'] +test_url: http://www.ftchinese.com/story/001049088 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/ftd.de.txt b/inc/3rdparty/site_config/standard/ftd.de.txt old mode 100644 new mode 100755 index a58765b0..7d76af00 --- a/inc/3rdparty/site_config/standard/ftd.de.txt +++ b/inc/3rdparty/site_config/standard/ftd.de.txt @@ -1,5 +1,5 @@ -body: //div[@class='boxIntroHead']/span[@class='h3'] | //div[@class='section']/div[@class='paragraph' or @class='embObjLeft'] -single_page_link: //a[@class='icon print'] +body: //div[@class='boxIntroHead']/span[@class='h3'] | //div[@class='section']/div[@class='paragraph' or @class='embObjLeft'] +single_page_link: //a[@class='icon print'] -test_url: http://www.ftd.de/it-medien/it-telekommunikation/:mobilfunk-vivendi-und-vodafone-trennen-sich-in-frankreich/60034691.html +test_url: http://www.ftd.de/it-medien/it-telekommunikation/:mobilfunk-vivendi-und-vodafone-trennen-sich-in-frankreich/60034691.html test_url: http://www.ftd.de/it-medien/medien-internet/:verkauf-von-warner-music-musikbranche-auf-dem-sprung/60048185.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/fubiz.net.txt b/inc/3rdparty/site_config/standard/fubiz.net.txt old mode 100644 new mode 100755 index 8e6356bf..0dc30475 --- a/inc/3rdparty/site_config/standard/fubiz.net.txt +++ b/inc/3rdparty/site_config/standard/fubiz.net.txt @@ -1,3 +1,3 @@ -body: //div[@class = 'entry'] +body: //div[@class = 'entry'] test_url: http://www.fubiz.net/2011/05/31/world-press-photo-2011/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/futurezone.at.txt b/inc/3rdparty/site_config/standard/futurezone.at.txt old mode 100644 new mode 100755 index 50fc144a..808c1f1b --- a/inc/3rdparty/site_config/standard/futurezone.at.txt +++ b/inc/3rdparty/site_config/standard/futurezone.at.txt @@ -1,11 +1,11 @@ -date: //span[@class='date'] -strip: //div[@class='postsidebar'] -body: //div[@class='singlepost'] -title: //div[@class='singlepost']/h1 -move_into(//div[@class='singlepost']): //div[@class='info'] -strip: //div[@class='gallery'] -strip: //div[@class='biggallery'] -strip: //ul[@class='social'] -strip: //ul[@class='social_mail'] +date: //span[@class='date'] +strip: //div[@class='postsidebar'] +body: //div[@class='singlepost'] +title: //div[@class='singlepost']/h1 +move_into(//div[@class='singlepost']): //div[@class='info'] +strip: //div[@class='gallery'] +strip: //div[@class='biggallery'] +strip: //ul[@class='social'] +strip: //ul[@class='social_mail'] test_url: http://futurezone.at/future/5502-erste-galileo-satelliten-starten-ins-all.php \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/gamasutra.com.txt b/inc/3rdparty/site_config/standard/gamasutra.com.txt old mode 100644 new mode 100755 index 35a8762a..7c808cfd --- a/inc/3rdparty/site_config/standard/gamasutra.com.txt +++ b/inc/3rdparty/site_config/standard/gamasutra.com.txt @@ -1,20 +1,20 @@ -# default view title -title: //span[@class='newsTitle'] -# print view title -title: //h3[@class='title'] - -# default view author -author: //span[@class='newsAuth']/a -author: substring-after(//span[@class='newsAuth'], 'by ') - -# default view date -date: //td[@class='newsDate'] - -# default view body -body: //td[@class='featureText'] -body: //td[@class='newsText'] - -strip: //h3[@class='title'] - +# default view title +title: //span[@class='newsTitle'] +# print view title +title: //h3[@class='title'] + +# default view author +author: //span[@class='newsAuth']/a +author: substring-after(//span[@class='newsAuth'], 'by ') + +# default view date +date: //td[@class='newsDate'] + +# default view body +body: //td[@class='featureText'] +body: //td[@class='newsText'] + +strip: //h3[@class='title'] + single_page_link: //a[contains(@href, '?print=1')] test_url: http://www.gamasutra.com/view/feature/132559/staying_power_rethinking_feedback_.php \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/gameblog.fr.txt b/inc/3rdparty/site_config/standard/gameblog.fr.txt old mode 100644 new mode 100755 index 2cc4b378..73f8342f --- a/inc/3rdparty/site_config/standard/gameblog.fr.txt +++ b/inc/3rdparty/site_config/standard/gameblog.fr.txt @@ -1,10 +1,10 @@ -title: //meta[@property="og:title"]/@content -body: //div[@id='GBTVPlayer'] | //div[contains(@class, 'col490')] - -prune: no - -strip_id_or_class: noprint -strip: //div[@id='gbNewsTextContent']/following-sibling::* - -test_url: http://www.gameblog.fr/news/26330-les-sims-3-showtime-s-annonce-en-video +title: //meta[@property="og:title"]/@content +body: //div[@id='GBTVPlayer'] | //div[contains(@class, 'col490')] + +prune: no + +strip_id_or_class: noprint +strip: //div[@id='gbNewsTextContent']/following-sibling::* + +test_url: http://www.gameblog.fr/news/26330-les-sims-3-showtime-s-annonce-en-video test_url: http://www.gameblog.fr/news/26306-mise-a-jour-du-dashboard-de-la-xbox-360-disponible \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/gamechurch.com.txt b/inc/3rdparty/site_config/standard/gamechurch.com.txt new file mode 100755 index 00000000..c9eea5f8 --- /dev/null +++ b/inc/3rdparty/site_config/standard/gamechurch.com.txt @@ -0,0 +1,10 @@ +title: //h1[@class='title'] + +date: substring-before(substring-after(//div[@class='comment-bubble']/.., 'Posted'), 'by') + +body: //div[@class='the-content'] + +strip: //div[@class='article-image responsive'] + +strip_id_or_class: 'pullquote' +test_url: http://gamechurch.com/virtual-gun-control-the-best-amendment/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/gamer.no.txt b/inc/3rdparty/site_config/standard/gamer.no.txt new file mode 100755 index 00000000..e76a59d9 --- /dev/null +++ b/inc/3rdparty/site_config/standard/gamer.no.txt @@ -0,0 +1,11 @@ +body: //div[@class='pageContent description'] +date: //div[@class='authorsAndDateTime']/span[@title] +single_page_link: //div[@class='pages']/a[last()-1] + +# fix images and captions +wrap_in(figure): //div[contains(concat(' ', @class, ' '), ' image')] +wrap_in(figcaption): //div[contains(concat(' ', @class, ' '), ' image')]/div[@class='text']/text() + +# get rid of videos +strip_id_or_class: 'video full' +test_url: http://www.gamer.no/artikler/142455/slik-blei-ambisiose-dragons-dogma-skapt/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/gamereactor.no.txt b/inc/3rdparty/site_config/standard/gamereactor.no.txt new file mode 100755 index 00000000..6f7c1b9b --- /dev/null +++ b/inc/3rdparty/site_config/standard/gamereactor.no.txt @@ -0,0 +1,11 @@ +title: //div[@id='content']/div/h1 + +author: //a[@itemprop='reviewer'] + +date: //time[@itemprop='dtreviewed']/@datetime + +body: //div[@id='breadtext'] + +# fix for NOT magically removing anchors with text identical to title +dissolve: //a[text()=//div[@id='content']/div/h1/text()] +test_url: http://www.gamereactor.no/previews/177481/The+Evil+Within/?sid=38b5bd30f56f1b7214de4ff5bed4b76f \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/garythink.com.txt b/inc/3rdparty/site_config/standard/garythink.com.txt old mode 100644 new mode 100755 index 1791e816..327ac55b --- a/inc/3rdparty/site_config/standard/garythink.com.txt +++ b/inc/3rdparty/site_config/standard/garythink.com.txt @@ -1,3 +1,3 @@ -tidy: no - +tidy: no + test_url: http://www.garythink.com/eft/testing.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/gasteroprod.com.txt b/inc/3rdparty/site_config/standard/gasteroprod.com.txt old mode 100644 new mode 100755 index ef68082a..8eda0c36 --- a/inc/3rdparty/site_config/standard/gasteroprod.com.txt +++ b/inc/3rdparty/site_config/standard/gasteroprod.com.txt @@ -1,4 +1,4 @@ -# These should work, but don't. They were given by Firefox XPather extension -title: //article//header//a//h1 +# These should work, but don't. They were given by Firefox XPather extension +title: //article//header//a//h1 body: //article//section test_url: http://gasteroprod.com/blog/faut-il-continuer-a-supporter-internet-explorer-6.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/gatopardo.com.txt b/inc/3rdparty/site_config/standard/gatopardo.com.txt old mode 100644 new mode 100755 index 74346328..2ab144f5 --- a/inc/3rdparty/site_config/standard/gatopardo.com.txt +++ b/inc/3rdparty/site_config/standard/gatopardo.com.txt @@ -1,8 +1,8 @@ -body: //div[@class='panel'] -strip: //div[@style='float:right'] -strip: //span[@class='titulosHomePublicidad'] -strip: //div[@id='TitTop5Der'] -strip: //img[@src='/ImagesGatoPardo/LogoGatopardo.png'] - +body: //div[@class='panel'] +strip: //div[@style='float:right'] +strip: //span[@class='titulosHomePublicidad'] +strip: //div[@id='TitTop5Der'] +strip: //img[@src='/ImagesGatoPardo/LogoGatopardo.png'] + prune: yes test_url: http://www.gatopardo.com/ReportajesGP.php?R=95 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/gawker.com.txt b/inc/3rdparty/site_config/standard/gawker.com.txt old mode 100644 new mode 100755 index 6531d81a..9bc5613a --- a/inc/3rdparty/site_config/standard/gawker.com.txt +++ b/inc/3rdparty/site_config/standard/gawker.com.txt @@ -1,6 +1,6 @@ -body: //div[@class="post-body"] - -# Remove 'content is restricted' -strip: //div[@id='agegate_IDHERE'] - +body: //div[@class="post-body"] + +# Remove 'content is restricted' +strip: //div[@id='agegate_IDHERE'] + test_url: http://gawker.com/#!5782070/russian-bomb-squad-successfully-defuses-sex-toy \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/geeksofdoom.com.txt b/inc/3rdparty/site_config/standard/geeksofdoom.com.txt old mode 100644 new mode 100755 index 55586e1c..89eb402f --- a/inc/3rdparty/site_config/standard/geeksofdoom.com.txt +++ b/inc/3rdparty/site_config/standard/geeksofdoom.com.txt @@ -1,3 +1,3 @@ -author: substring-after(//span[@class='storyauthor'],'Posted by') +author: substring-after(//span[@class='storyauthor'],'Posted by') date: //span[@class='storydate'] test_url: http://www.geeksofdoom.com/2012/03/14/robert-rodriguez-says-machete-kills-and-sin-city-2-will-film-this-year/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/geenstijl.nl.txt b/inc/3rdparty/site_config/standard/geenstijl.nl.txt old mode 100644 new mode 100755 index f6dccf48..a664b4d9 --- a/inc/3rdparty/site_config/standard/geenstijl.nl.txt +++ b/inc/3rdparty/site_config/standard/geenstijl.nl.txt @@ -1,3 +1,3 @@ -body: //div[@id = 'article'] +body: //div[@id = 'article'] strip: //div[@id = 'klasbox'] test_url: http://www.geenstijl.nl/mt/archieven/2010/10/vrouw_lange_frans_wou_baas_b_d.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/getnews.jp.txt b/inc/3rdparty/site_config/standard/getnews.jp.txt old mode 100644 new mode 100755 index 537b4c2e..e28d4b8b --- a/inc/3rdparty/site_config/standard/getnews.jp.txt +++ b/inc/3rdparty/site_config/standard/getnews.jp.txt @@ -1,3 +1,3 @@ -body: //div[@class='post'] +body: //div[@class='post'] strip: //ul[@id='bookmark_single'] test_url: http://getnews.jp/archives/117312 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/giantbomb.com.txt b/inc/3rdparty/site_config/standard/giantbomb.com.txt old mode 100644 new mode 100755 index 8a54bc07..61de51b2 --- a/inc/3rdparty/site_config/standard/giantbomb.com.txt +++ b/inc/3rdparty/site_config/standard/giantbomb.com.txt @@ -1,11 +1,11 @@ -# 2011-11-19 - carlo@... - Initial setup. - -strip_id_or_class: user-review-detail -strip: //h1 - -body: //div[@class="wiki-content"] | //div[@class="section-bd"] | //div[@class="news-story"] - -author: //span[@class="reviewer"] | //p[@class="byline"]/a/text() -date: //span[@class="dtreviewed"] +# 2011-11-19 - carlo@... - Initial setup. + +strip_id_or_class: user-review-detail +strip: //h1 + +body: //div[@class="wiki-content"] | //div[@class="section-bd"] | //div[@class="news-story"] + +author: //span[@class="reviewer"] | //p[@class="byline"]/a/text() +date: //span[@class="dtreviewed"] test_url: http://www.giantbomb.com/the-elder-scrolls-v-skyrim/61-33394/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/giga.de.txt b/inc/3rdparty/site_config/standard/giga.de.txt old mode 100644 new mode 100755 index f60199ad..e2689eae --- a/inc/3rdparty/site_config/standard/giga.de.txt +++ b/inc/3rdparty/site_config/standard/giga.de.txt @@ -1,20 +1,20 @@ -tidy:no -title://h2[@class="title"] -# author:"Ben Miller" -date://div[@id="stats"]/span -strip_id_or_class:stats -strip_id_or_class:breadcrumbs -strip_id_or_class:gn-why-content -strip_id_or_class:single-social -strip_id_or_class:sidebar-ads -strip_id_or_class:sidebar-top -strip_id_or_class:footer -strip_id_or_class:post_meta -# strip_id_or_class: -# strip_id_or_class: -# strip_id_or_class: -# strip_id_or_class: -# strip_id_or_class: -# strip_id_or_class: +tidy:no +title://h2[@class="title"] +# author:"Ben Miller" +date://div[@id="stats"]/span +strip_id_or_class:stats +strip_id_or_class:breadcrumbs +strip_id_or_class:gn-why-content +strip_id_or_class:single-social +strip_id_or_class:sidebar-ads +strip_id_or_class:sidebar-top +strip_id_or_class:footer +strip_id_or_class:post_meta +# strip_id_or_class: +# strip_id_or_class: +# strip_id_or_class: +# strip_id_or_class: +# strip_id_or_class: +# strip_id_or_class: test_url: http://www.giga.de/benm/2011/10/17/probleme-mit-ios-5-wenn-die-daten-weg-sind/#more-58033 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/gigaom.com.txt b/inc/3rdparty/site_config/standard/gigaom.com.txt old mode 100644 new mode 100755 index 348bdf23..cc8fdfa0 --- a/inc/3rdparty/site_config/standard/gigaom.com.txt +++ b/inc/3rdparty/site_config/standard/gigaom.com.txt @@ -1,17 +1,12 @@ -date: //meta[@name='DC.date.issued']/@content -date: //span[@class='post-meta the-date'] - -title: //meta[@property='og:title']/@content - -author: //meta[@name='DC.creator']/@content - -body: //div[contains(@class, 'post-sub-head') or starts-with(@id, 'post-content-')] - -find_string: id="content" -replace_string: id="content-ignore" - -strip_id_or_class: sharedaddy - -prune: no - -test_url: http://gigaom.com/2011/10/24/groupon-google-lawsuit/ \ No newline at end of file +date: //meta[@name='dcterms.created']/@content +title: //meta[@property='og:title']/@content +author: //section[@class="post-meta"]//a[@rel="author"] + +body: //div[starts-with(@id, 'post-content-')] + +strip_id_or_class: sharedaddy + +prune: no + +test_url: http://gigaom.com/2011/10/24/groupon-google-lawsuit/ +test_url: http://gigaom.com/2012/12/26/snapchat-rises-why-pokes-decline-shows-facebooks-inability-to-invent/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/gihyo.jp.txt b/inc/3rdparty/site_config/standard/gihyo.jp.txt old mode 100644 new mode 100755 index 478b23a3..d3534b29 --- a/inc/3rdparty/site_config/standard/gihyo.jp.txt +++ b/inc/3rdparty/site_config/standard/gihyo.jp.txt @@ -1,3 +1,3 @@ -single_page_link: //p[@id='skip']//a[contains(@href, 'skip')] +single_page_link: //p[@id='skip']//a[contains(@href, 'skip')] test_url: http://gihyo.jp/dev/serial/01/machine-learning/0010 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/gist.github.com.txt b/inc/3rdparty/site_config/standard/gist.github.com.txt old mode 100644 new mode 100755 index 53095b34..90207862 --- a/inc/3rdparty/site_config/standard/gist.github.com.txt +++ b/inc/3rdparty/site_config/standard/gist.github.com.txt @@ -1,6 +1,6 @@ -body: //div[@class="highlight"]/pre - -prune: no -tidy: no - +body: //div[@class="highlight"]/pre + +prune: no +tidy: no + test_url: https://gist.github.com/1258908 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/givemesomethingtoread.com.txt b/inc/3rdparty/site_config/standard/givemesomethingtoread.com.txt old mode 100644 new mode 100755 index 144ce045..0de0750b --- a/inc/3rdparty/site_config/standard/givemesomethingtoread.com.txt +++ b/inc/3rdparty/site_config/standard/givemesomethingtoread.com.txt @@ -1,3 +1,3 @@ -single_page_link: //div[@id="content"]//h2/a +single_page_link: //div[@id="content"]//h2/a test_url: http://givemesomethingtoread.com/post/6285838917/the-baddest-lawyer-in-the-history-of-jersey \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/gizmodo.co.uk.txt b/inc/3rdparty/site_config/standard/gizmodo.co.uk.txt old mode 100644 new mode 100755 index 285e76c0..2eb82a6d --- a/inc/3rdparty/site_config/standard/gizmodo.co.uk.txt +++ b/inc/3rdparty/site_config/standard/gizmodo.co.uk.txt @@ -1,7 +1,7 @@ -body: //div[@id="leadimage" or @class="postcontent"] -author: //div[@class="contentauthor"] -date: //div[@class="timestamp"] - -prune: no - +body: //div[@id="leadimage" or @class="postcontent"] +author: //div[@class="contentauthor"] +date: //div[@class="timestamp"] + +prune: no + test_url: http://www.gizmodo.co.uk/2013/02/bbc-forcing-poor-old-sir-david-attenborough-to-go-on-twitter/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/gizmodo.com.txt b/inc/3rdparty/site_config/standard/gizmodo.com.txt old mode 100644 new mode 100755 index c9536255..e73ec9d2 --- a/inc/3rdparty/site_config/standard/gizmodo.com.txt +++ b/inc/3rdparty/site_config/standard/gizmodo.com.txt @@ -1,7 +1,11 @@ -body: //div[@class="post-body" or contains(@class, 'illustration top')] -author: (//cite//span[@class="plus-icon"])[1] -date: //span[@class="date"] - -prune: no - -test_url: http://gizmodo.com/5880147/kuhn-rikon-improves-their-spice-grinder-with-grade-school-science \ No newline at end of file +#body: //div[@class="post-body" or contains(@class, 'illustration top')] +body: //div[contains(@class, 'image-annotation-box') or contains(@class, 'post-content')] +#author: (//cite//span[@class="plus-icon"])[1] +author: //span[contains(@class, 'display-name')] +date: //span[@class="date"] + +prune: no + +test_url: http://gizmodo.com/5880147/kuhn-rikon-improves-their-spice-grinder-with-grade-school-science +test_url: http://gizmodo.com/what-van-goghs-paintings-would-look-like-if-they-came-874035680 +test_url: http://gizmodo.com/vip.xml \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/gizmodo.uol.com.br.txt b/inc/3rdparty/site_config/standard/gizmodo.uol.com.br.txt new file mode 100755 index 00000000..d963d684 --- /dev/null +++ b/inc/3rdparty/site_config/standard/gizmodo.uol.com.br.txt @@ -0,0 +1,6 @@ +title: //h1 + +body: //div[@id='destaques']//div[contains(@class, 'img')] | //div[@id='maincontent']//p + +test_url: http://gizmodo.uol.com.br/nvidia-gtx-titan-z/ +test_url: http://gizmodo.uol.com.br/perfil-mark-zuckerberg-hackeado/ diff --git a/inc/3rdparty/site_config/standard/gizmologia.com.txt b/inc/3rdparty/site_config/standard/gizmologia.com.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/gizmovil.com.txt b/inc/3rdparty/site_config/standard/gizmovil.com.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/global.txt b/inc/3rdparty/site_config/standard/global.txt old mode 100644 new mode 100755 index 135ed500..71fbc934 --- a/inc/3rdparty/site_config/standard/global.txt +++ b/inc/3rdparty/site_config/standard/global.txt @@ -1,4 +1,18 @@ -# Look for Open Graph data - http://ogp.me -title: //meta[@property="og:title"]/@content -date: //meta[@property="article:published_time"]/@content -# article:author is someties URL, e.g. on guardian.co.uk \ No newline at end of file +# Look for Open Graph data - http://ogp.me +title: //meta[@property="og:title"]/@content +date: //meta[@property="article:published_time"]/@content +# article:author is someties URL, e.g. on guardian.co.uk + +# Remove Google Publisher Tags: https://support.google.com/dfp_sb/answer/1649768?hl=en +#strip_id_or_class: div-gpt-ad + +# Strip doubleclick image ads +strip_image_src: doubleclick.net + +# If you get chunks of Javascript code appearing in the extracted output, try uncommenting the lines below. +# This tries to convert script tags to hidden div elements (which Full-Text RSS removes). +# If you notice issues with this approach, please let us know. +#find_string: <script +#replace_string: <div style="display:none" +#find_string: </script> +#replace_string: </div> \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/globalissues.org.txt b/inc/3rdparty/site_config/standard/globalissues.org.txt old mode 100644 new mode 100755 index 95d4becf..ee50f68f --- a/inc/3rdparty/site_config/standard/globalissues.org.txt +++ b/inc/3rdparty/site_config/standard/globalissues.org.txt @@ -1,15 +1,15 @@ -body: //div[@id='content'] - -strip: //p[@class='top'] -strip: //h2[.='Where next?'] -strip_id_or_class: where-next -strip_id_or_class: social-bookmarks -strip_id_or_class: link-to-here -strip_id_or_class: options-heading -strip_id_or_class: page-options-content -strip_id_or_class: page-info-bottom - -tidy: no -prune: no - +body: //div[@id='content'] + +strip: //p[@class='top'] +strip: //h2[.='Where next?'] +strip_id_or_class: where-next +strip_id_or_class: social-bookmarks +strip_id_or_class: link-to-here +strip_id_or_class: options-heading +strip_id_or_class: page-options-content +strip_id_or_class: page-info-bottom + +tidy: no +prune: no + test_url: http://www.globalissues.org/article/39/a-primer-on-neoliberalism \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/globoesporte.globo.com.txt b/inc/3rdparty/site_config/standard/globoesporte.globo.com.txt new file mode 100755 index 00000000..fd8e70ff --- /dev/null +++ b/inc/3rdparty/site_config/standard/globoesporte.globo.com.txt @@ -0,0 +1,25 @@ +title: //h1[@class="entry-title"] + +body: //div[@class='materia-titulo']/h2 | //*[@id="materia-letra"] + +date: //abbr[@class="published"] +date: //abbr[@class="updated"] + +author: //*[@class="author"]/strong + +strip: //div[contains(@class,'foto')]/strong +strip: //div[contains(@class,'frase-materia')]/div[@class='autor'] +strip: //div[contains(@class,'saibamais')] +strip: //*[contains(text(),'Clique aqui e veja mais')]/ancestor::p +strip: //ul[@class="toolbar"] + +# quotes +wrap_in(blockquote): //div[@id='materia-letra']//div[contains(@class,'frase-materia')]/div[@class='frase'] + +prune: no + +replace_string([Clique aqui e veja mais vídeos do Fluminense]): [] + +test_url: http://globoesporte.globo.com/atletismo/noticia/2013/08/michael-johnson-diz-que-bolt-e-melhor-da-historia-nao-ha-duvidas.html +test_url: http://globoesporte.globo.com/futebol/futebol-internacional/futebol-espanhol/noticia/2013/08/barca-atropela-levante-e-neymar-passa-em-branco-em-estreia-oficial.html +test_url: http://globoesporte.globo.com/futebol/times/fluminense/noticia/2013/08/poupado-no-sabado-felipe-se-diz-pronto-para-ser-titular-contra-o-goias.html diff --git a/inc/3rdparty/site_config/standard/gloswielkopolski.pl.txt b/inc/3rdparty/site_config/standard/gloswielkopolski.pl.txt new file mode 100755 index 00000000..16487955 --- /dev/null +++ b/inc/3rdparty/site_config/standard/gloswielkopolski.pl.txt @@ -0,0 +1,8 @@ +title: //article[@id='material']/header/h1 +author: //article[@id='material']/header/div[2]/p +date: //article[@id='material']/header/p/time[1] +body: //section[@id='tresc'] +next_page_link: .//section[@id='tresc']/div[@class='stronicowanie']/a[@rel='next'] +strip://div[@class='podobneSonda'] + +test_url: http://www.gloswielkopolski.pl/artykul/803547,abc-telemarketingu-praca-ktora-zwalnia-z-myslenia,id,t.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/goal.com.txt b/inc/3rdparty/site_config/standard/goal.com.txt old mode 100644 new mode 100755 index 075c4d2b..e25e9a00 --- a/inc/3rdparty/site_config/standard/goal.com.txt +++ b/inc/3rdparty/site_config/standard/goal.com.txt @@ -1,16 +1,16 @@ -title: //div[@id='article_headline']//h1 -date: //div[contains(@class, 'articleDate')]//h4 -body: //div[@id='article_headline']/h2 | //div[@id='large_article_image' or @id='article_content'] - -strip_id_or_class: relatedLinksBox -strip_id_or_class: betting-widget -strip_image_src: install_flash.gif - -strip: //table[contains(@style, 'float: right; width: 285px;')] -strip: //div[@class='caption'] - -tidy: no -prune: no - -test_url: http://www.goal.com/en-gb/news/3284/euro-2012/2012/05/31/3139032/video-profile-back-to-his-very-best-for-bayern-frances-flair-and- +title: //div[@id='article_headline']//h1 +date: //div[contains(@class, 'articleDate')]//h4 +body: //div[@id='article_headline']/h2 | //div[@id='large_article_image' or @id='article_content'] + +strip_id_or_class: relatedLinksBox +strip_id_or_class: betting-widget +strip_image_src: install_flash.gif + +strip: //table[contains(@style, 'float: right; width: 285px;')] +strip: //div[@class='caption'] + +tidy: no +prune: no + +test_url: http://www.goal.com/en-gb/news/3284/euro-2012/2012/05/31/3139032/video-profile-back-to-his-very-best-for-bayern-frances-flair-and- test_url: http://www.goal.com/en-gb/news/3284/euro-2012/2012/05/31/3139869/lampard-injury-a-bitter-blow-for-england-and-sorry-way-to# \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/golem.de.txt b/inc/3rdparty/site_config/standard/golem.de.txt old mode 100644 new mode 100755 index 6c5d1c4f..6afdebe8 --- a/inc/3rdparty/site_config/standard/golem.de.txt +++ b/inc/3rdparty/site_config/standard/golem.de.txt @@ -1,25 +1,25 @@ -# Jens Kohl, jens.kohl@... -# - Added publication date -# - Striped pagination block -# - Added single page link -# - Added xpath-querys for the printer friendly version - -title: //h1 -body: //div[@class='formatted'] -prune: no - -date: substring-after(//li[2][@class="text1"], 'Datum:') -strip: //ol[@class="list-chapters"] -strip_comments: yes - -# next: commands for printer friendly pages -single_page_link: //a[contains(@href, 'print.php?a=')]/@href -title: //body/h3 -strip_image_src: staticrl/images/logo.jpg -strip_image_src: http://cpx.golem.de/cpx.php?class=7 -strip: //body/h3 -strip: //body/b[1] -strip: //body/b[2] -strip: //body/b[3] -strip: //div[1] +# Jens Kohl, jens.kohl@... +# - Added publication date +# - Striped pagination block +# - Added single page link +# - Added xpath-querys for the printer friendly version + +title: //h1 +body: //div[@class='formatted'] +prune: no + +date: substring-after(//li[2][@class="text1"], 'Datum:') +strip: //ol[@class="list-chapters"] +strip_comments: yes + +# next: commands for printer friendly pages +single_page_link: //a[contains(@href, 'print.php?a=')]/@href +title: //body/h3 +strip_image_src: staticrl/images/logo.jpg +strip_image_src: http://cpx.golem.de/cpx.php?class=7 +strip: //body/h3 +strip: //body/b[1] +strip: //body/b[2] +strip: //body/b[3] +strip: //div[1] test_url: http://www.golem.de/1112/88696.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/good.is.txt b/inc/3rdparty/site_config/standard/good.is.txt old mode 100644 new mode 100755 index 5cf67011..94159fbf --- a/inc/3rdparty/site_config/standard/good.is.txt +++ b/inc/3rdparty/site_config/standard/good.is.txt @@ -1,4 +1,4 @@ -title: //div[@class="title"]/div/h1 -body: //div[@class="body"] -date: //li[@class="date-time"] +title: //div[@class="title"]/div/h1 +body: //div[@class="body"] +date: //li[@class="date-time"] test_url: http://www.good.is/post/why-amazon-is-the-next-top-tech-company/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/goodfil.ms.txt b/inc/3rdparty/site_config/standard/goodfil.ms.txt new file mode 100755 index 00000000..f8bbbc6a --- /dev/null +++ b/inc/3rdparty/site_config/standard/goodfil.ms.txt @@ -0,0 +1,2 @@ +strip_id_or_class: gutter +test_url: http://goodfil.ms/blog/posts/2012/08/13/angularjs-and-the-goodfilms-mobile-site-part-1/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/gossip-tv.gr.txt b/inc/3rdparty/site_config/standard/gossip-tv.gr.txt old mode 100644 new mode 100755 index c2fe4e40..e2d2d0b2 --- a/inc/3rdparty/site_config/standard/gossip-tv.gr.txt +++ b/inc/3rdparty/site_config/standard/gossip-tv.gr.txt @@ -1,14 +1,14 @@ -date: //meta[@name='og:article:published_time']/@value - -body: //div[@class='itemIntroText' or @class='itemImageBlock' or @class='itemFullText'] - -strip_id_or_class: itemImageGallery - -# remove extras at end of post content -find_string: <div style="margin:5px 0 10px;"> -replace_string: </div></body></html><!-- - -prune: no - -test_url: http://www.gossip-tv.gr/story/158902/aggelike-daliane-semera-duskoleuontai-oloi-sta-epaggelmatika-tous +date: //meta[@name='og:article:published_time']/@value + +body: //div[@class='itemIntroText' or @class='itemImageBlock' or @class='itemFullText'] + +strip_id_or_class: itemImageGallery + +# remove extras at end of post content +find_string: <div style="margin:5px 0 10px;"> +replace_string: </div></body></html><!-- + +prune: no + +test_url: http://www.gossip-tv.gr/story/158902/aggelike-daliane-semera-duskoleuontai-oloi-sta-epaggelmatika-tous test_url: http://www.gossip-tv.gr/lifestyle/Taste/story/230266/lahtaristo-kai-ygieino-tost-sokolatas \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/goteborgsfria.se.txt b/inc/3rdparty/site_config/standard/goteborgsfria.se.txt new file mode 100755 index 00000000..c90aed0b --- /dev/null +++ b/inc/3rdparty/site_config/standard/goteborgsfria.se.txt @@ -0,0 +1,7 @@ +body: //div[contains(@class, 'layout__inner')]//div[contains(@class, 'file-image') or contains(@class, 'node__content')] +author: //article//div[contains(@class, 'field-byline')] +strip_id_or_class: rekommenderade +strip_id_or_class: disqus +strip_id_or_class: annonser + +test_url: http://www.goteborgsfria.se/artikel/112079 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/gothamist.com.txt b/inc/3rdparty/site_config/standard/gothamist.com.txt old mode 100644 new mode 100755 index 5179fc12..36453878 --- a/inc/3rdparty/site_config/standard/gothamist.com.txt +++ b/inc/3rdparty/site_config/standard/gothamist.com.txt @@ -1,7 +1,7 @@ -title: //div[@class='entry-header'] -author: //span[@class='vcard author'] -date: //abbr[@class='published'] -#move_into(//div[@class='entry-body']): //img[@id='photo_1'] -body: //div[@class='entry-body'] +title: //div[@class='entry-header'] +author: //span[@class='vcard author'] +date: //abbr[@class='published'] +#move_into(//div[@class='entry-body']): //img[@id='photo_1'] +body: //div[@class='entry-body'] strip: //div[@class='galleryEaseThumbs'] test_url: http://gothamist.com/2012/03/15/fancy_cocktail_lounge_the_randolph.php \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/gotomanager.com.txt b/inc/3rdparty/site_config/standard/gotomanager.com.txt old mode 100644 new mode 100755 index 7fb0ee03..f8af7324 --- a/inc/3rdparty/site_config/standard/gotomanager.com.txt +++ b/inc/3rdparty/site_config/standard/gotomanager.com.txt @@ -1,21 +1,21 @@ -title: //span[@id="showTitle"] -author: //span[@id="showAuthor"] -date: //span[@id="showRefDate"] - -strip: //span[@class="black_bold"] -strip: //div[@id="sectionName"] -strip: //div[@id="storyHeader"] - -body: //div[@id="newsBodyText"] - -strip_image_src: "http://www.gotomanager.com/img/mgrm/space.gif" -strip_image_src: "http://www.gotomanager.com/images/separator.gif" -strip_image_src: "http://www.gotomanager.com/images/spaces.gif" - -convert_double_br_tags: yes -tidy: yes - -strip: //div[@id="smallLeadImage"] -strip: //div[@id="truehitsSurvey"] +title: //span[@id="showTitle"] +author: //span[@id="showAuthor"] +date: //span[@id="showRefDate"] + +strip: //span[@class="black_bold"] +strip: //div[@id="sectionName"] +strip: //div[@id="storyHeader"] + +body: //div[@id="newsBodyText"] + +strip_image_src: "http://www.gotomanager.com/img/mgrm/space.gif" +strip_image_src: "http://www.gotomanager.com/images/separator.gif" +strip_image_src: "http://www.gotomanager.com/images/spaces.gif" + +convert_double_br_tags: yes +tidy: yes + +strip: //div[@id="smallLeadImage"] +strip: //div[@id="truehitsSurvey"] strip: //table[@id="relatedInfoTable"] test_url: http://www.gotomanager.com/news/details.aspx?id=86759 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/gov.ky.txt b/inc/3rdparty/site_config/standard/gov.ky.txt new file mode 100755 index 00000000..294ece3a --- /dev/null +++ b/inc/3rdparty/site_config/standard/gov.ky.txt @@ -0,0 +1,4 @@ +strip: //body//title + +test_url: http://www.gov.ky/pls/portal/PORTAL.wwv_media.show?p_id=7593947&p_settingssetid=1&p_settingssiteid=0&p_siteid=2425&p_type=basetext&p_textid=7593948 +test_url: http://www.rcips.ky/pls/portal/wlacomp.wlafeed.show_cignewsfeed_agency?p_sitecode=POL&p_agency=Police \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/gp.se.txt b/inc/3rdparty/site_config/standard/gp.se.txt new file mode 100755 index 00000000..158ae4ed --- /dev/null +++ b/inc/3rdparty/site_config/standard/gp.se.txt @@ -0,0 +1,11 @@ +body: //div[@id='articleContainer'] +author: //div[@id='articleContent']//div[contains(@class, 'byline')]//span[contains(@class, 'name fn')] +strip_id_or_class: toolbar +strip_id_or_class: ADad +strip_id_or_class: articleSerieWrapper +strip_id_or_class: articleFloatContainer +strip: //div[contains(@class, 'byline')]//img +prune: no + +test_url: http://www.gp.se/nyheter/bohuslan/1.2045564-styckade-mannen-hade-mordat-hustrun +test_url: http://www.gp.se/1.16560 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/gq.com.txt b/inc/3rdparty/site_config/standard/gq.com.txt old mode 100644 new mode 100755 index 233c4a7f..8ad8a14e --- a/inc/3rdparty/site_config/standard/gq.com.txt +++ b/inc/3rdparty/site_config/standard/gq.com.txt @@ -1,9 +1,9 @@ -next_page_link: //div[@class='pagination']//span[@class='paginationNext']/a -strip_id_or_class: utility -strip_id_or_class: keywords -strip_id_or_class: pagination -strip_id_or_class: position2_content -body: //div[@class='article'] -title: //h1[@class='content-headline'] +next_page_link: //div[@class='pagination']//span[@class='paginationNext']/a +strip_id_or_class: utility +strip_id_or_class: keywords +strip_id_or_class: pagination +strip_id_or_class: position2_content +body: //div[@class='article'] +title: //h1[@class='content-headline'] author: //span[@class='contributor']//a test_url: http://www.gq.com/news-politics/newsmakers/201203/terry-thompson-ohio-zoo-massacre-chris-heath-gq-february-2012 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/grantland.com.txt b/inc/3rdparty/site_config/standard/grantland.com.txt old mode 100644 new mode 100755 index 3269e086..b8d419f4 --- a/inc/3rdparty/site_config/standard/grantland.com.txt +++ b/inc/3rdparty/site_config/standard/grantland.com.txt @@ -1,20 +1,20 @@ -# this is fragile with footnotes -- leave it for now - -#tidy: no -#prune: no -#move_into(//article): //aside[@id='footnotes'] -author: //cite/a -date: //time - -strip: //a[text()='Grantland'] -strip_id_or_class: ad-wrapper -strip_id_or_class: fb-connect-link -strip_id_or_class: fb-status -strip: //li[@class='print'] -strip: //cite -strip: //a[contains(text(), '[+]')] -strip: //a[@id='jump-nav-link'] -strip: //h1[text()='Share This'] -strip: //h1[text()='Top Stories'] -strip: //div[@id="update-text-size"] +# this is fragile with footnotes -- leave it for now + +#tidy: no +#prune: no +#move_into(//article): //aside[@id='footnotes'] +author: //cite/a +date: //time + +strip: //a[text()='Grantland'] +strip_id_or_class: ad-wrapper +strip_id_or_class: fb-connect-link +strip_id_or_class: fb-status +strip: //li[@class='print'] +strip: //cite +strip: //a[contains(text(), '[+]')] +strip: //a[@id='jump-nav-link'] +strip: //h1[text()='Share This'] +strip: //h1[text()='Top Stories'] +strip: //div[@id="update-text-size"] test_url: http://www.grantland.com/story/_/id/8421241/examining-new-albums-rock-veterans-no-doubt-green-day \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/greatergreaterwashington.org.txt b/inc/3rdparty/site_config/standard/greatergreaterwashington.org.txt old mode 100644 new mode 100755 index a5258030..31a41075 --- a/inc/3rdparty/site_config/standard/greatergreaterwashington.org.txt +++ b/inc/3rdparty/site_config/standard/greatergreaterwashington.org.txt @@ -1,11 +1,11 @@ -title: //div[@class="blogpost"]/h2 -author: //div[@class="blogpost"]/p[@class="byline"]/a -date: //div[@class="blogpost"]/p[@class="byline"]/span[@class="time_posted"] -body: //div[@class="blogpost"] -strip_id_or_class: flag -strip_id_or_class: byline -strip_id_or_class: post_footer -strip_id_or_class: related_posts -strip_id_or_class: post_author_bios +title: //div[@class="blogpost"]/h2 +author: //div[@class="blogpost"]/p[@class="byline"]/a +date: //div[@class="blogpost"]/p[@class="byline"]/span[@class="time_posted"] +body: //div[@class="blogpost"] +strip_id_or_class: flag +strip_id_or_class: byline +strip_id_or_class: post_footer +strip_id_or_class: related_posts +strip_id_or_class: post_author_bios strip: //h2 test_url: http://greatergreaterwashington.org/post/12457/ask-ggw-what-will-happen-to-the-1000-series-railcars/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/groups.drupal.org.txt b/inc/3rdparty/site_config/standard/groups.drupal.org.txt old mode 100644 new mode 100755 index 7e15a5c1..0fe30ef5 --- a/inc/3rdparty/site_config/standard/groups.drupal.org.txt +++ b/inc/3rdparty/site_config/standard/groups.drupal.org.txt @@ -1,5 +1,5 @@ -title://h1 -author://span[@class="submitted"]/a -date:substring-after(//span[@class="submitted"],'on ') +title://h1 +author://span[@class="submitted"]/a +date:substring-after(//span[@class="submitted"],'on ') body://div[@class="content"] test_url: http://groups.drupal.org/node/36816 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/gulfnews.com.txt b/inc/3rdparty/site_config/standard/gulfnews.com.txt old mode 100644 new mode 100755 index e69044b3..97b620de --- a/inc/3rdparty/site_config/standard/gulfnews.com.txt +++ b/inc/3rdparty/site_config/standard/gulfnews.com.txt @@ -1,5 +1,5 @@ -body: //div[@class='wrapper_half']//ul[@class='details'] | //div[@class='wrapper_half']//p[@class='synopsis'] | //div[@class='wrapper_half']//div[@class='image'] | //div[@class='wrapper_half']//div[@class='article'] -strip: //div[@class='wrapper_half']//ul[@class='details']/li[position()>1] -prune: no -tidy: no +body: //div[@class='wrapper_half']//ul[@class='details'] | //div[@class='wrapper_half']//p[@class='synopsis'] | //div[@class='wrapper_half']//div[@class='image'] | //div[@class='wrapper_half']//div[@class='article'] +strip: //div[@class='wrapper_half']//ul[@class='details']/li[position()>1] +prune: no +tidy: no test_url: http://gulfnews.com/news/gulf/uae/government/abu-dhabi-centre-offers-useful-information-1.811084 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/guokr.com.txt b/inc/3rdparty/site_config/standard/guokr.com.txt old mode 100644 new mode 100755 index 00255eb8..f8327bea --- a/inc/3rdparty/site_config/standard/guokr.com.txt +++ b/inc/3rdparty/site_config/standard/guokr.com.txt @@ -1,22 +1,22 @@ -# To administrator: -# Please change the hostname to "www.guokr.com/article/*" -# Not working for "www.guokr.com/post/" pages configured by carlosliu913@gmail.com - -# This filter is tested on: -# http://www.guokr.com/article/274325/ -# http://www.guokr.com/article/275013/ - -title://h1 -author://div[contains(@class, 'content-th-info')]/a -date://div[contains(@class, 'content-th-info')]/span -body://div[contains(@class, 'Content')] - -strip://div[contains(@class, 'bottom-i')] -strip://div[contains(@class, 'copyright')] -strip://div[contains(@class, 'fr')] -strip://div[contains(@class, 'content-th-info')] -strip://h1[contains(@id, 'articleTitle')] -strip://div[contains(@class, 'side')] -strip://div[contains(@class, 'top-wp')] -test_url: http://www.guokr.com/article/275013/ +# To administrator: +# Please change the hostname to "www.guokr.com/article/*" +# Not working for "www.guokr.com/post/" pages configured by carlosliu913@gmail.com + +# This filter is tested on: +# http://www.guokr.com/article/274325/ +# http://www.guokr.com/article/275013/ + +title://h1 +author://div[contains(@class, 'content-th-info')]/a +date://div[contains(@class, 'content-th-info')]/span +body://div[contains(@class, 'Content')] + +strip://div[contains(@class, 'bottom-i')] +strip://div[contains(@class, 'copyright')] +strip://div[contains(@class, 'fr')] +strip://div[contains(@class, 'content-th-info')] +strip://h1[contains(@id, 'articleTitle')] +strip://div[contains(@class, 'side')] +strip://div[contains(@class, 'top-wp')] +test_url: http://www.guokr.com/article/275013/ test_url: http://www.guokr.com/article/338387/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/haberler.com.txt b/inc/3rdparty/site_config/standard/haberler.com.txt old mode 100644 new mode 100755 index bc1ce689..1bb2bc7d --- a/inc/3rdparty/site_config/standard/haberler.com.txt +++ b/inc/3rdparty/site_config/standard/haberler.com.txt @@ -1,5 +1,5 @@ -title: //div[@id="habermetni"]/h1[@id="haber_baslik"] -body: //div[@id="habermetni"]/p -strip: //img[@class='newsDetailLeft'] +title: //div[@id="habermetni"]/h1[@id="haber_baslik"] +body: //div[@id="habermetni"]/p +strip: //img[@class='newsDetailLeft'] strip_image_src: /haber-resimleri/ test_url: http://www.haberler.com/emniyete-atacakti-elinde-patladi-3198733-haberi/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/hackmake.org.txt b/inc/3rdparty/site_config/standard/hackmake.org.txt new file mode 100755 index 00000000..98140117 --- /dev/null +++ b/inc/3rdparty/site_config/standard/hackmake.org.txt @@ -0,0 +1,7 @@ +date: //article//time[@pubdate] +body: //article/div[@id="post-wide"] +title: //article/header/h2 +strip: /div[@id="comment"] +strip: //footer +author: substring-after(//footer/p[@class='byline'] , 'By') +test_url: http://hackmake.org/2012/12/21/mindfulness-of-concentration \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/halo.bungie.org.txt b/inc/3rdparty/site_config/standard/halo.bungie.org.txt old mode 100644 new mode 100755 index 7989d09f..1802efea --- a/inc/3rdparty/site_config/standard/halo.bungie.org.txt +++ b/inc/3rdparty/site_config/standard/halo.bungie.org.txt @@ -1,5 +1,5 @@ -title:substring-before(id("maincontent")/table, 'Posted') -body:id("maincontent")/p -# eventually convert linebreaks better +title:substring-before(id("maincontent")/table, 'Posted') +body:id("maincontent")/p +# eventually convert linebreaks better test_url: http://halo.bungie.org/fanfic/?story=Delahunt0312112316071.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/hammers.theoffside.com.txt b/inc/3rdparty/site_config/standard/hammers.theoffside.com.txt old mode 100644 new mode 100755 index 747f90a1..33f7e726 --- a/inc/3rdparty/site_config/standard/hammers.theoffside.com.txt +++ b/inc/3rdparty/site_config/standard/hammers.theoffside.com.txt @@ -1,7 +1,7 @@ -# Remove right column -strip: //*[(@class = 'right_col')] - -# Remove comments etc. -strip: //*[(@class = 'category')] +# Remove right column +strip: //*[(@class = 'right_col')] + +# Remove comments etc. +strip: //*[(@class = 'category')] strip: /html/body/div[1][@class='absolute_content_high']/div[1][@class='wrapper']/div[1][@class='main_col']/div[@class='main_content']/h3 test_url: http://hammers.theoffside.com/carling-cup/a-funny-thing-happened-on-the-way-to-4-nil.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/handelsblatt.com.txt b/inc/3rdparty/site_config/standard/handelsblatt.com.txt new file mode 100755 index 00000000..7d067aa6 --- /dev/null +++ b/inc/3rdparty/site_config/standard/handelsblatt.com.txt @@ -0,0 +1,31 @@ +#Single Page +single_page_link: //li[contains(@class,"hcf-print")]/a + +# Title hcf-headline +title: //span[@class='hcf-headline'] + +# Authors +author: //div[@class="hcf-author"]/a/text() +author: substring-after(//div[@class='hcf-author'], 'von ') + +# Date +date: //div[@class='hcf-article-date'] + +# Body +body: //div[@class='article'] + +# General removements +strip: //div[contains(@class,"hcf-smartbox")] +strip: //div[contains(@class,"hcf-stopper")] +strip: //div[contains(@class,"hcf-img-controls")] +strip: //span[@class='hcf-location-mark'] +strip: //span[@class='hcf-copyright'] +strip: //div[@class='hcf-copyright'] +strip: //div[@class='hcf-origin'] + + + + +# Fix picture captions +wrap_in(small): //div[@class="hcf-caption"] +test_url: http://www.handelsblatt.com/meinung/gastbeitraege/gastkommentar-zum-emissionshandel-kurskorrekturen-fuehren-zum-kentern/8044326.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/hanselman.com.txt b/inc/3rdparty/site_config/standard/hanselman.com.txt old mode 100644 new mode 100755 index d3ffeab1..1dca632f --- a/inc/3rdparty/site_config/standard/hanselman.com.txt +++ b/inc/3rdparty/site_config/standard/hanselman.com.txt @@ -1,4 +1,4 @@ -date: //span[@class="item-date"] -body: //div[@class="item-content"] +date: //span[@class="item-date"] +body: //div[@class="item-content"] strip_comments: no test_url: http://www.hanselman.com/blog/BrainBytesBackBunsTheProgrammersPriorities.aspx \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/hardware.fr.txt b/inc/3rdparty/site_config/standard/hardware.fr.txt old mode 100644 new mode 100755 index 318885c8..e4f1f6bc --- a/inc/3rdparty/site_config/standard/hardware.fr.txt +++ b/inc/3rdparty/site_config/standard/hardware.fr.txt @@ -1,6 +1,6 @@ -title: //h1 -author: //a[@class='a_aut'] -body: //div[@class='content_dossier'] -strip: //div[@id='pagination'] +title: //h1 +author: //a[@class='a_aut'] +body: //div[@class='content_dossier'] +strip: //div[@id='pagination'] next_page_link: //div[@class='sommaire_colonne']//span[@class='page_actuelle']/following::span[@class='autres_page']//a/@href test_url: http://www.hardware.fr/articles/850-1/pci-express-3-0-impact-performances.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/hardware.no.txt b/inc/3rdparty/site_config/standard/hardware.no.txt new file mode 100755 index 00000000..cbbcf84e --- /dev/null +++ b/inc/3rdparty/site_config/standard/hardware.no.txt @@ -0,0 +1,16 @@ +title: //h1[@class='headline'] +title: //h2[@itemprop='alternativeHeadline'] +title: //h1[@itemprop='headline'] +author: //span[@itemprop='name'] +date: //time[@itemprop='datePublished'] +body: //div[@itemprop='reviewBody'] + +wrap_in(blockquote): //div[@class='factBox'] + +next_page_link: //a[@rel='next'] + +strip_id_or_class: 'product-box' +strip: //a[@rel='next'] +strip: //a[text()='Del på Facebook'] +strip: //a[text()='Del på Twitter'] +test_url: http://www.hardware.no/artikler/asus-vg248qe/132792 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/hbr.org.txt b/inc/3rdparty/site_config/standard/hbr.org.txt old mode 100644 new mode 100755 index fd6145e7..c2f292e1 --- a/inc/3rdparty/site_config/standard/hbr.org.txt +++ b/inc/3rdparty/site_config/standard/hbr.org.txt @@ -1,6 +1,7 @@ -title: //div[@id='article-title'] -author: //div[@id='articleAuthors'] -body: //div[@id='article'] -strip: //div[@class='module wide'] -next_page_link: //a[@title='Next Page'] -test_url: http://hbr.org/2012/04/the-real-leadership-lessons-of-steve-jobs/ar/ \ No newline at end of file +title: //div[@id='article-title'] +author: //div[@id='articleAuthors'] +body: //div[@id='article'] +strip: //div[@class='module wide'] +#single_page_link: //a[@class='social-print'] +test_url: http://hbr.org/2012/04/the-real-leadership-lessons-of-steve-jobs/ar/ +test_url: http://hbr.org/2013/03/big-bang-disruption/ar/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/headrush.typepad.com.txt b/inc/3rdparty/site_config/standard/headrush.typepad.com.txt new file mode 100755 index 00000000..a3146771 --- /dev/null +++ b/inc/3rdparty/site_config/standard/headrush.typepad.com.txt @@ -0,0 +1,14 @@ +title://div[@class='content']/h3[1] +body://div[@class='content'] + +# Article nav +strip://div[@class='content']/p[1] + +# Comments and trackbacks +strip://h2/following-sibling::p +strip://h2 + +# Posted on +strip://b/p +strip://div[@class='content']/p[@class='posted'] +test_url: http://headrush.typepad.com/creating_passionate_users/2005/05/the_case_for_ea.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/heise-online.mobi.txt b/inc/3rdparty/site_config/standard/heise-online.mobi.txt old mode 100644 new mode 100755 index 1da82ac7..daff6143 --- a/inc/3rdparty/site_config/standard/heise-online.mobi.txt +++ b/inc/3rdparty/site_config/standard/heise-online.mobi.txt @@ -1,3 +1,3 @@ -body: //div[@id='content']/div +body: //div[@id='content']/div date: //p[@class='author_date']/span[@class='date'] test_url: http://heise-online.mobi/newsticker/meldung/Amazons-Appstore-in-der-Kritik-Ein-Desaster-fuer-Kunden-und-Entwickler-1273936.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/heise.de.txt b/inc/3rdparty/site_config/standard/heise.de.txt old mode 100644 new mode 100755 index 5f19d3f8..c51af561 --- a/inc/3rdparty/site_config/standard/heise.de.txt +++ b/inc/3rdparty/site_config/standard/heise.de.txt @@ -1,7 +1,7 @@ -single_page_link: //p[@class='news_option']/a - -date: //p[@class='news_datum'] -title: //h1 -body: //div[@class='meldung_wrapper'] - +single_page_link: //p[@class='news_option']/a + +date: //p[@class='news_datum'] +title: //h1 +body: //div[@class='meldung_wrapper'] + test_url: http://www.heise.de/newsticker/meldung/Europa-soll-Grundrechteschutz-im-Netz-staerken-1392664.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/hemmings.com.txt b/inc/3rdparty/site_config/standard/hemmings.com.txt new file mode 100755 index 00000000..a02b4a62 --- /dev/null +++ b/inc/3rdparty/site_config/standard/hemmings.com.txt @@ -0,0 +1,9 @@ +title: //h2 +body: //div[@id='leftdetail'] +single_page_link: //a[contains(@href, 'printable=1')] +strip: //a[contains(., 'Full Version')] + +prune: no + +test_url: http://www.hemmings.com/classifieds/dealer/ferrari/330gtc/1601235.html +test_url: http://www.hemmings.com/rss/keyword.xml?adtype=carsforsale&make=ferrari \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/heroturko.me.txt b/inc/3rdparty/site_config/standard/heroturko.me.txt new file mode 100755 index 00000000..07b6adf1 --- /dev/null +++ b/inc/3rdparty/site_config/standard/heroturko.me.txt @@ -0,0 +1,6 @@ +title: //div[contains(@class, 'title')]//h1 +body: //div[contains(@class, 'story')] + +prune: no + +test_url: http://www.heroturko.me/5223034-ds-catia-p3-v5-6r2014-gasp0-x86x64-multilanguage-english-docs.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/hespress.com.txt b/inc/3rdparty/site_config/standard/hespress.com.txt old mode 100644 new mode 100755 index d866f629..4ed0b8b5 --- a/inc/3rdparty/site_config/standard/hespress.com.txt +++ b/inc/3rdparty/site_config/standard/hespress.com.txt @@ -1,7 +1,7 @@ -body: //div[@id='article_holder']//div[@class='image'] | //div[@id='article_body'] - -prune: no -tidy: no - -test_url: http://hespress.com/videos/73684.html +body: //div[@id='article_holder']//div[@class='image'] | //div[@id='article_body'] + +prune: no +tidy: no + +test_url: http://hespress.com/videos/73684.html test_url: http://hespress.com/permalink/73678.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/hiamag.com.txt b/inc/3rdparty/site_config/standard/hiamag.com.txt new file mode 100755 index 00000000..3c7ba5ac --- /dev/null +++ b/inc/3rdparty/site_config/standard/hiamag.com.txt @@ -0,0 +1,3 @@ +body: (//div[contains(@class, 'gallery-slides')]//img)[1] | //div[contains(@class, 'node_body_inner')] + +test_url: http://www.hiamag.com/rss.xml \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/highscalability.com.txt b/inc/3rdparty/site_config/standard/highscalability.com.txt old mode 100644 new mode 100755 index fd50b6ad..5a808fa4 --- a/inc/3rdparty/site_config/standard/highscalability.com.txt +++ b/inc/3rdparty/site_config/standard/highscalability.com.txt @@ -1,3 +1,3 @@ -body: //div[@class='journal-entry-text'] +body: //div[@class='journal-entry-text'] test_url: http://highscalability.com/blog/2011/3/14/6-lessons-from-dropbox-one-million-files-saved-every-15-minu.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/hiperpop.com.txt b/inc/3rdparty/site_config/standard/hiperpop.com.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/hiphopleeft.nl.txt b/inc/3rdparty/site_config/standard/hiphopleeft.nl.txt old mode 100644 new mode 100755 index c57c1aa9..d869a866 --- a/inc/3rdparty/site_config/standard/hiphopleeft.nl.txt +++ b/inc/3rdparty/site_config/standard/hiphopleeft.nl.txt @@ -1,4 +1,4 @@ -body: //div[@class = 'pd'] -strip: //div[@id = 'overzicht-albumrecensies'] +body: //div[@class = 'pd'] +strip: //div[@id = 'overzicht-albumrecensies'] strip: //div[@id = 'jc'] test_url: http://hiphopleeft.nl/index.php?option=com_content&view=article&id=2767:mark-ronson-record-collection&catid=66:m&Itemid=142 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/historytoday.com.txt b/inc/3rdparty/site_config/standard/historytoday.com.txt old mode 100644 new mode 100755 index dc687f3f..78fb60a6 --- a/inc/3rdparty/site_config/standard/historytoday.com.txt +++ b/inc/3rdparty/site_config/standard/historytoday.com.txt @@ -1,10 +1,10 @@ -body://div[@id = 'content'] -author://span[@class = 'authors'] -author://span[@class = 'ht-vtag'][1] -date:substring-before(//meta[@name = 'dc.date']/@content,'T') -strip://div[contains(@class, 'region-ubercontent')] -strip://h1 -strip://div[@id = 'ht-author'] -strip://ul[@class = 'links inline'] -strip://div[@id = 'ht-tools'] +body://div[@id = 'content'] +author://span[@class = 'authors'] +author://span[@class = 'ht-vtag'][1] +date:substring-before(//meta[@name = 'dc.date']/@content,'T') +strip://div[contains(@class, 'region-ubercontent')] +strip://h1 +strip://div[@id = 'ht-author'] +strip://ul[@class = 'links inline'] +strip://div[@id = 'ht-tools'] test_url: http://www.historytoday.com/carol-dyhouse/skin-deep-fall-fur \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/hmercer.com.txt b/inc/3rdparty/site_config/standard/hmercer.com.txt old mode 100644 new mode 100755 index eeee1594..2da13a8e --- a/inc/3rdparty/site_config/standard/hmercer.com.txt +++ b/inc/3rdparty/site_config/standard/hmercer.com.txt @@ -1,5 +1,5 @@ -title: //*[@class='ptitle'] -date: //span[@class='date'] -body: //div[@class='body'] +title: //*[@class='ptitle'] +date: //span[@class='date'] +body: //div[@class='body'] prune: no test_url: http://hmercer.com/2011/07/why-i-switched-to-jekyll/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/hollywoodlife.com.txt b/inc/3rdparty/site_config/standard/hollywoodlife.com.txt new file mode 100755 index 00000000..975ffa26 --- /dev/null +++ b/inc/3rdparty/site_config/standard/hollywoodlife.com.txt @@ -0,0 +1,22 @@ +date: //meta[@name='sailthru.date']/@content +body: //article[contains(@class, 'entry-content')] + +strip_image_src: subscribe.png + +strip_id_or_class: wpcom-iframe-form +strip_id_or_class: gallery-thumbs +strip_id_or_class: twitter +strip_id_or_class: fb-link +strip_id_or_class: pinterest + +strip: //div[@class='data'] +strip: //iframe[contains(@name, 'wpcom')] + +find_string: <a href="http://www.youtube.com/subscription_center?add_user_id=2rJLq19N0dGrxfib80M +replace_string: </p></div></body></html><!-- + +find_string: <h3>More +replace_string: </div></body></html><!-- + +test_url: http://hollywoodlife.com/2013/10/04/miriam-carey-dead-capitol-hill-car-chase-shooting-postpartum-depression/ +test_url: http://hollywoodlife.com/feed/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/hometheaterreview.com.txt b/inc/3rdparty/site_config/standard/hometheaterreview.com.txt old mode 100644 new mode 100755 index d43e6448..8ed26ff5 --- a/inc/3rdparty/site_config/standard/hometheaterreview.com.txt +++ b/inc/3rdparty/site_config/standard/hometheaterreview.com.txt @@ -1,4 +1,4 @@ -body: //div[@id='entry-body'] -strip_id_or_class: paginate +body: //div[@id='entry-body'] +strip_id_or_class: paginate strip: //p[contains(., 'Additional Resources')] test_url: http://hometheaterreview.com/dreamvision-starlight-3-three-chip-d-ila-projector-reviewed/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/hosted.ap.org.txt b/inc/3rdparty/site_config/standard/hosted.ap.org.txt old mode 100644 new mode 100755 index e19dd526..dfd81937 --- a/inc/3rdparty/site_config/standard/hosted.ap.org.txt +++ b/inc/3rdparty/site_config/standard/hosted.ap.org.txt @@ -1,5 +1,5 @@ -body: //table[@class='ap-smallphoto-table'] | //div[@class='body']//*[@class='entry-content'] -tidy: no -strip_image_src: analytics.apnewsregistry - +body: //table[@class='ap-smallphoto-table'] | //div[@class='body']//*[@class='entry-content'] +tidy: no +strip_image_src: analytics.apnewsregistry + test_url: http://hosted.ap.org/dynamic/stories/U/US_SPENDING_SHOWDOWN?SITE=FLPET&SECTION=HOME&TEMPLATE=DEFAULT&CTIME=2011-04-06-07-46-50 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/howtogeek.com.txt b/inc/3rdparty/site_config/standard/howtogeek.com.txt new file mode 100755 index 00000000..baa2ed4a --- /dev/null +++ b/inc/3rdparty/site_config/standard/howtogeek.com.txt @@ -0,0 +1,11 @@ +body: //div[contains(@class, 'thecontent')] + +strip_image_src: loading.gif +find_string:src="http://cdn.howtogeek.com/public/images/blank.gif" +replace_string:- +find_string:data-href= +replace_string:src= + +strip_id_or_class: relatedside + +test_url: http://www.howtogeek.com/school/microsoft-excel-formulas-and-functions/lesson1/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/hs.fi.txt b/inc/3rdparty/site_config/standard/hs.fi.txt old mode 100644 new mode 100755 index 67125fb5..360dc725 --- a/inc/3rdparty/site_config/standard/hs.fi.txt +++ b/inc/3rdparty/site_config/standard/hs.fi.txt @@ -1,3 +1,3 @@ -prune: yes +prune: yes tidy: yes test_url: http://www.hs.fi/kotimaa/Teollisuushallin%20palo%20levitt%C3%A4%C3%A4%20vaarallista%20savua%20Tuusulassa/a1305571582405 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/ht.ly.txt b/inc/3rdparty/site_config/standard/ht.ly.txt old mode 100644 new mode 100755 index a8412d2a..46535088 --- a/inc/3rdparty/site_config/standard/ht.ly.txt +++ b/inc/3rdparty/site_config/standard/ht.ly.txt @@ -1,3 +1,3 @@ -single_page_link: //iframe[@id='hootFrame']/@src - +single_page_link: //iframe[@id='hootFrame']/@src + test_url: http://ht.ly/bOiZV \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/huffingtonpost.com.txt b/inc/3rdparty/site_config/standard/huffingtonpost.com.txt old mode 100644 new mode 100755 index d40513b2..d4618c14 --- a/inc/3rdparty/site_config/standard/huffingtonpost.com.txt +++ b/inc/3rdparty/site_config/standard/huffingtonpost.com.txt @@ -1,16 +1,21 @@ -title: //meta[@property="og:title"]/@content -body: //div[img[starts-with(@id, 'img_caption')]] | //div[@class="big_photo"] | //div[contains(@class, 'entry_body_text')] -date: //meta[@name="publish_date"]/@content -author: //a[@rel="author"] -author: //meta[@name="author"]/@content -prune: no -tidy: no -strip: //footer -strip_id_or_class: ps-slideshow -strip_id_or_class: fs-slideshow -strip: //p[contains(., 'Related on HuffPost:')] -# end early -replace_string(<div class="sbm-main): </body></html><div class="not-interested - -test_url: http://www.huffingtonpost.com/mitch-moxley/tracking-beijings-boom-th_b_1209828.html -test_url: http://www.huffingtonpost.com/2012/09/11/president-obama-iphone-throwdown_n_1873826.html \ No newline at end of file +title: //meta[@property="og:title"]/@content +body: //div[img[starts-with(@id, 'img_caption')]] | //div[@class="big_photo"] | //div[contains(@class, 'entry_body_text')] +date: //meta[@name="publish_date"]/@content +author: //a[@rel="author"] +author: //meta[@name="author"]/@content + +prune: no +tidy: no + +strip: //footer +strip_id_or_class: ps-slideshow +strip_id_or_class: fs-slideshow +strip: //p[contains(., 'Related on HuffPost:')] +strip_id_or_class: contribute-story +strip_id_or_class: promo_holder + +# end early +replace_string(<div class="sbm-main): </body></html><div class="not-interested + +test_url: http://www.huffingtonpost.com/mitch-moxley/tracking-beijings-boom-th_b_1209828.html +test_url: http://www.huffingtonpost.com/2012/09/11/president-obama-iphone-throwdown_n_1873826.html diff --git a/inc/3rdparty/site_config/standard/humantransit.org.txt b/inc/3rdparty/site_config/standard/humantransit.org.txt old mode 100644 new mode 100755 index ec7d3c06..92d3c678 --- a/inc/3rdparty/site_config/standard/humantransit.org.txt +++ b/inc/3rdparty/site_config/standard/humantransit.org.txt @@ -1,5 +1,5 @@ -title: //h3[@class="entry-header"] -date: //h2[@class="date-header"] -body: //div[contains(@class, 'entry')] +title: //h3[@class="entry-header"] +date: //h2[@class="date-header"] +body: //div[contains(@class, 'entry')] test_url: http://www.humantransit.org/2012/06/can-network-primers-reduce-grief-about-network-design.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/hurriyet.com.tr.txt b/inc/3rdparty/site_config/standard/hurriyet.com.tr.txt old mode 100644 new mode 100755 index ccf09dcc..68fd220a --- a/inc/3rdparty/site_config/standard/hurriyet.com.tr.txt +++ b/inc/3rdparty/site_config/standard/hurriyet.com.tr.txt @@ -1,7 +1,7 @@ -title: //div[@class='HaberDetayTitleHold Title']/h1 -body: //div[@id='YazarDetayText'] -author: //div[@class='HaberDetayTitleHold Title']/h1 -prune: no - -test_url: http://www.hurriyet.com.tr/ekonomi/19490260.asp +title: //div[@class='HaberDetayTitleHold Title']/h1 +body: //div[@id='YazarDetayText'] +author: //div[@class='HaberDetayTitleHold Title']/h1 +prune: no + +test_url: http://www.hurriyet.com.tr/ekonomi/19490260.asp test_url: http://www.hurriyet.com.tr/yazarlar/22078439.asp \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/hvg.hu.txt b/inc/3rdparty/site_config/standard/hvg.hu.txt old mode 100644 new mode 100755 index 06fa98d8..05e7b5f1 --- a/inc/3rdparty/site_config/standard/hvg.hu.txt +++ b/inc/3rdparty/site_config/standard/hvg.hu.txt @@ -1,9 +1,9 @@ -title: //div[@id='pg-content']//h1 -body: //div[@id='articleBody0'] -replace_string(</table>): </table><br /><br /> - -single_page_link: //div[@class="up-header"]/a - -prune: no +title: //div[@id='pg-content']//h1 +body: //div[@id='articleBody0'] +replace_string(</table>): </table><br /><br /> + +single_page_link: //div[@class="up-header"]/a + +prune: no test_url: http://hvg.hu/w/20111125_sparta \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/hypebeast.com.txt b/inc/3rdparty/site_config/standard/hypebeast.com.txt old mode 100644 new mode 100755 index 49b46da5..23e47545 --- a/inc/3rdparty/site_config/standard/hypebeast.com.txt +++ b/inc/3rdparty/site_config/standard/hypebeast.com.txt @@ -1,10 +1,10 @@ -body: //div[@id='content']//div[contains(@class, 'wp-image-') or contains(@class, 'entry')][1] -author: //span[@class='author']/a - -strip_id_or_class: disqus -strip_id_or_class: paginator -strip_id_or_class: photo-number - -prune: no - +body: //div[@id='content']//div[contains(@class, 'wp-image-') or contains(@class, 'entry')][1] +author: //span[@class='author']/a + +strip_id_or_class: disqus +strip_id_or_class: paginator +strip_id_or_class: photo-number + +prune: no + test_url: http://hypebeast.com/2012/11/stussy-2012-fall-winter-november-releases/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/icannabis.tumblr.com.txt b/inc/3rdparty/site_config/standard/icannabis.tumblr.com.txt new file mode 100755 index 00000000..3bda753c --- /dev/null +++ b/inc/3rdparty/site_config/standard/icannabis.tumblr.com.txt @@ -0,0 +1,9 @@ +tidy:no +prune:no + +body://div[contains(@id,'content')] + +strip_id_or_class:meta +strip_id_or_class:notes +strip_id_or_class:pagination +test_url: http://icannabis.tumblr.com/post/28660592471/reviewmswireless3000 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/idealog.co.nz.txt b/inc/3rdparty/site_config/standard/idealog.co.nz.txt new file mode 100755 index 00000000..ca88f606 --- /dev/null +++ b/inc/3rdparty/site_config/standard/idealog.co.nz.txt @@ -0,0 +1,12 @@ +body: //div[@class='content'] + +strip: //p[@class='dateline'] +strip: //hr +strip_id_or_class: share +strip_id_or_class: comments +strip_id_or_class: tags + +title: substring-before(//title,' ::') +author: substring-before(//p[@class='dateline'],',') +date: //p[@class='dateline']/time +test_url: http://www.idealog.co.nz/blog/2012/12/geeks-plane-help-kiwis-take-san-francisco \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/idlewords.com.txt b/inc/3rdparty/site_config/standard/idlewords.com.txt old mode 100644 new mode 100755 index e1badef7..f3b33796 --- a/inc/3rdparty/site_config/standard/idlewords.com.txt +++ b/inc/3rdparty/site_config/standard/idlewords.com.txt @@ -1,7 +1,7 @@ -title: //a[@class='post_title'] -body: //div[@class='entrybox'] -strip_id_or_class: post_title -date: //div[@class='entrybox']/b[1] -strip: //div[@class='entrybox']/b[1] +title: //a[@class='post_title'] +body: //div[@class='entrybox'] +strip_id_or_class: post_title +date: //div[@class='entrybox']/b[1] +strip: //div[@class='entrybox']/b[1] author: string('Maciej Cegłowski') test_url: http://idlewords.com/2011/08/why_arabic_is_terrific.htm \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/igeneration.fr.txt b/inc/3rdparty/site_config/standard/igeneration.fr.txt old mode 100644 new mode 100755 index d7ec2da1..45dd5f25 --- a/inc/3rdparty/site_config/standard/igeneration.fr.txt +++ b/inc/3rdparty/site_config/standard/igeneration.fr.txt @@ -1,5 +1,5 @@ -author: substring-after(substring-after(//span[@class='submitted'],'- '),'- ') -date: substring-before(//span[@class='submitted'], concat('- ',substring-after(substring-after(//span[@class='submitted'],'- '),'- '))) -body: //div[@class='content clear-block zoneApple'] +author: substring-after(substring-after(//span[@class='submitted'],'- '),'- ') +date: substring-before(//span[@class='submitted'], concat('- ',substring-after(substring-after(//span[@class='submitted'],'- '),'- '))) +body: //div[@class='content clear-block zoneApple'] test_url: http://www.igeneration.fr/iphone/l-iphone-et-l-ipad-chouchous-des-tpe-et-pme-55112 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/ignoredbydinosaurs.com.txt b/inc/3rdparty/site_config/standard/ignoredbydinosaurs.com.txt old mode 100644 new mode 100755 index f74178a9..60635301 --- a/inc/3rdparty/site_config/standard/ignoredbydinosaurs.com.txt +++ b/inc/3rdparty/site_config/standard/ignoredbydinosaurs.com.txt @@ -1,7 +1,7 @@ -title://h1[@class='page-title'] -body://*[@id='content']//div[contains(@class,'node-content')] - -author://*[@id='content']//div[contains(@class,'node-submitted')]/a - +title://h1[@class='page-title'] +body://*[@id='content']//div[contains(@class,'node-content')] + +author://*[@id='content']//div[contains(@class,'node-submitted')]/a + date:substring-after(//div[contains(@class,'node-submitted')],' on ') test_url: http://ignoredbydinosaurs.com/2011/09/great-lie-lorem-ipsum \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/ilounge.com.txt b/inc/3rdparty/site_config/standard/ilounge.com.txt old mode 100644 new mode 100755 index ca1e54a8..9880b51f --- a/inc/3rdparty/site_config/standard/ilounge.com.txt +++ b/inc/3rdparty/site_config/standard/ilounge.com.txt @@ -1,13 +1,13 @@ -# Get proper Title, Author and Date info -title: substring-before(//title, '|') -author: substring-after(//h4/a[@href='http://www.ilounge.com/index.php/ilounge/aboutus/'], 'By') -date: //span[@class='instapaper_date'] - -# For Reviews & First Looks, get the intro paragraph and put it in front of the main body. -move_into(//div[@id='instapaper_para1']): //div[@id='instapaper_body'] -body: //div[@id='instapaper_para1'] -strip: //div[@class='reviewinfo'] - -# We don't use footnotes, so why bother checking for them? +# Get proper Title, Author and Date info +title: substring-before(//title, '|') +author: substring-after(//h4/a[@href='http://www.ilounge.com/index.php/ilounge/aboutus/'], 'By') +date: //span[@class='instapaper_date'] + +# For Reviews & First Looks, get the intro paragraph and put it in front of the main body. +move_into(//div[@id='instapaper_para1']): //div[@id='instapaper_body'] +body: //div[@id='instapaper_para1'] +strip: //div[@class='reviewinfo'] + +# We don't use footnotes, so why bother checking for them? footnotes: no test_url: http://www.ilounge.com/index.php/reviews/entry/luxa2-alum-x-for-iphone-4-4s/?utm_source=twitterfeed&utm_medium=twitter \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/ilyabirman.ru.txt b/inc/3rdparty/site_config/standard/ilyabirman.ru.txt old mode 100644 new mode 100755 index da6a60f6..51a7eb9c --- a/inc/3rdparty/site_config/standard/ilyabirman.ru.txt +++ b/inc/3rdparty/site_config/standard/ilyabirman.ru.txt @@ -1,5 +1,5 @@ -title: //div[@class='published visible e2-smart-title']//span -author: //span[@id='e2-blog-title'] -date: //p[@class='super-h'] +title: //div[@class='published visible e2-smart-title']//span +author: //span[@id='e2-blog-title'] +date: //p[@class='super-h'] body: //div[@class='text published visible'] test_url: http://ilyabirman.ru/meanwhile/2011/11/15/2/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/inc.com.txt b/inc/3rdparty/site_config/standard/inc.com.txt old mode 100644 new mode 100755 index 0589aaae..5410e64e --- a/inc/3rdparty/site_config/standard/inc.com.txt +++ b/inc/3rdparty/site_config/standard/inc.com.txt @@ -1,21 +1,21 @@ -author: substring-after(substring-before(//div[@id='byline'],'|'),'By') -author: //div[@class='byline']/a -date: //span[@class='pubdate'] -# print friendly page -body: //div[@id='text'] -# regular page -body: //div[@id= 'articlecontent'] - -strip: //div[@id= 'articlecontent']/h1 -strip: //div[@id='articlecontent']/p[@class='deck'] -strip: //div[@id='articlecontent']/div[@class='byline'] -strip: //div[@id='articlespacer'] -strip: //div[@id='incsharebox'] -strip: //div[@id='articlesidebar'] - -prune: no - -single_page_link: //a[contains(@href, 'Printer_Friendly.html')] -strip: //a[contains(., 'Dig Deeper')] -test_url: http://www.inc.com/guides/2010/11/seven-tips-for-lobbying-politicians.html +author: substring-after(substring-before(//div[@id='byline'],'|'),'By') +author: //div[@class='byline']/a +date: //span[@class='pubdate'] +# print friendly page +body: //div[@id='text'] +# regular page +body: //div[@id= 'articlecontent'] + +strip: //div[@id= 'articlecontent']/h1 +strip: //div[@id='articlecontent']/p[@class='deck'] +strip: //div[@id='articlecontent']/div[@class='byline'] +strip: //div[@id='articlespacer'] +strip: //div[@id='incsharebox'] +strip: //div[@id='articlesidebar'] + +prune: no + +single_page_link: //a[contains(@href, 'Printer_Friendly.html')] +strip: //a[contains(., 'Dig Deeper')] +test_url: http://www.inc.com/guides/2010/11/seven-tips-for-lobbying-politicians.html test_url: http://www.inc.com/eric-schurenberg/startups-are-we-geting-irrationally-exuberant.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/independent.co.uk.txt b/inc/3rdparty/site_config/standard/independent.co.uk.txt old mode 100644 new mode 100755 index 47baf36b..af742209 --- a/inc/3rdparty/site_config/standard/independent.co.uk.txt +++ b/inc/3rdparty/site_config/standard/independent.co.uk.txt @@ -1,9 +1,9 @@ -title: //meta[@property='og:title']/@content -body: //div[contains(@class, 'articleContent')] -date: //meta[@property='article:published_time']/@content -author: //div[@id='main']//div[@class='byline']//span[@class='authorName'] - -strip_id_or_class: RelatedArtTag - +title: //meta[@property='og:title']/@content +body: //div[contains(@class, 'articleContent')] +date: //meta[@property='article:published_time']/@content +author: //div[@id='main']//div[@class='byline']//span[@class='authorName'] + +strip_id_or_class: RelatedArtTag + tidy: no test_url: http://www.independent.co.uk/news/world/middle-east/syria-could-face-human-rights-probe-2274326.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/indiatimes.com.txt b/inc/3rdparty/site_config/standard/indiatimes.com.txt old mode 100644 new mode 100755 index e7a35e84..8112105f --- a/inc/3rdparty/site_config/standard/indiatimes.com.txt +++ b/inc/3rdparty/site_config/standard/indiatimes.com.txt @@ -1,6 +1,6 @@ -body: //figure[@class='mainVideo'] -strip: //figcaption - -prune: no - +body: //figure[@class='mainVideo'] +strip: //figcaption + +prune: no + test_url: http://www.indiatimes.com/bollywood/kareena-insecure-about-saif-working-with-bipasha-23386.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/inessential.com.txt b/inc/3rdparty/site_config/standard/inessential.com.txt old mode 100644 new mode 100755 index 312cec4b..52252455 --- a/inc/3rdparty/site_config/standard/inessential.com.txt +++ b/inc/3rdparty/site_config/standard/inessential.com.txt @@ -1,5 +1,5 @@ -title: //div[@class='weblogPost']/h3[1] -author: ("Brent Simmons") -date: //span[@class="weblogPostDisplayDate"] +title: //div[@class='weblogPost']/h3[1] +author: ("Brent Simmons") +date: //span[@class="weblogPostDisplayDate"] body: //div[@class='weblogPostBody'] test_url: http://inessential.com/2011/10/25/why_just_store_the_app_data_on_dropbo \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/info.abril.com.br.txt b/inc/3rdparty/site_config/standard/info.abril.com.br.txt old mode 100644 new mode 100755 index 64cf3c8e..dee69f80 --- a/inc/3rdparty/site_config/standard/info.abril.com.br.txt +++ b/inc/3rdparty/site_config/standard/info.abril.com.br.txt @@ -1,4 +1,4 @@ -title://h1 -body://div[@id='texto_link'] +title://h1 +body://div[@id='texto_link'] test_url: http://info.abril.com.br/noticias/internet/filme-do-youtube-vai-estrear-nos-cinemas-22042011-6.shl \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/infoq.com.txt b/inc/3rdparty/site_config/standard/infoq.com.txt old mode 100644 new mode 100755 index 3a4e402d..f4a328a6 --- a/inc/3rdparty/site_config/standard/infoq.com.txt +++ b/inc/3rdparty/site_config/standard/infoq.com.txt @@ -1,14 +1,14 @@ -body: //div[@id="intTranscript"] -body: //div[@class="box-content"] -title: //div[@class="box-content"]//h1[1] -author: //p[@class="info"]/strong -date: substring-before(substring-after(//p[@class="info"], "on"), "Length") -strip: //div[@class="box-content"]//h1[1] -strip: //div[@class="box-content"]//p[@class="info"] -strip_id_or_class: vendor-content-box -strip_id_or_class: tags2 -strip_id_or_class: instructions -strip_id_or_class: comments -strip_id_or_class: forum-list-tree +body: //div[@id="intTranscript"] +body: //div[@class="box-content"] +title: //div[@class="box-content"]//h1[1] +author: //p[@class="info"]/strong +date: substring-before(substring-after(//p[@class="info"], "on"), "Length") +strip: //div[@class="box-content"]//h1[1] +strip: //div[@class="box-content"]//p[@class="info"] +strip_id_or_class: vendor-content-box +strip_id_or_class: tags2 +strip_id_or_class: instructions +strip_id_or_class: comments +strip_id_or_class: forum-list-tree strip: //div[@class="addthis_toolbox addthis_default_style"] test_url: http://www.infoq.com/interviews/oleg-zhurakousky-javaone2011-interview \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/informador.com.mx.txt b/inc/3rdparty/site_config/standard/informador.com.mx.txt old mode 100644 new mode 100755 index eedec24f..77987493 --- a/inc/3rdparty/site_config/standard/informador.com.mx.txt +++ b/inc/3rdparty/site_config/standard/informador.com.mx.txt @@ -1,9 +1,9 @@ -title: //div[@class='tituloInt'] -body: //div[@class='notaPortada'] -strip: //img[@id='imgHorizontalInt imgDetalleImg imagenNota'] -date: //span[@class='publi'] -author: //span[@class='autor'] -tidy: no -prune: no +title: //div[@class='tituloInt'] +body: //div[@class='notaPortada'] +strip: //img[@id='imgHorizontalInt imgDetalleImg imagenNota'] +date: //span[@class='publi'] +author: //span[@class='autor'] +tidy: no +prune: no test_url: http://www.informador.com.mx/tecnologia/2011/337606/6/iran-desarrolla-antivirus-tras-afectaciones-por-duqu.htm \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/information.dk.txt b/inc/3rdparty/site_config/standard/information.dk.txt old mode 100644 new mode 100755 index 6e3c3b1a..3ade754d --- a/inc/3rdparty/site_config/standard/information.dk.txt +++ b/inc/3rdparty/site_config/standard/information.dk.txt @@ -1,7 +1,7 @@ -title: //meta[@property='og:title']/@content -author: //*[@property='dc:creator'] -date: //*[@property='dc:date']/@content -body: //div[@id='page-content']//div[contains(@class, 'article-body')] - +title: //meta[@property='og:title']/@content +author: //*[@property='dc:creator'] +date: //*[@property='dc:date']/@content +body: //div[@id='page-content']//div[contains(@class, 'article-body')] + tidy: no test_url: http://www.information.dk/282307 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/informationarchitects.net.txt b/inc/3rdparty/site_config/standard/informationarchitects.net.txt old mode 100644 new mode 100755 index 134306cd..1330a040 --- a/inc/3rdparty/site_config/standard/informationarchitects.net.txt +++ b/inc/3rdparty/site_config/standard/informationarchitects.net.txt @@ -1,10 +1,10 @@ -title://h1[@class="post_title"] -body://article[@class="post"] -date://h1[@class="section_separator"] -author://span[@class="post_author"] -strip://nav[@class="arrow_nav"] -strip://section[@id="contact"] -strip_id_or_class:post_title -strip_id_or_class:post_author +title://h1[@class="post_title"] +body://article[@class="post"] +date://h1[@class="section_separator"] +author://span[@class="post_author"] +strip://nav[@class="arrow_nav"] +strip://section[@id="contact"] +strip_id_or_class:post_title +strip_id_or_class:post_author strip_id_or_class:section_separator test_url: http://informationarchitects.net/blog/nzz-relaunch-a-quick-review/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/informationclearinghouse.info.txt b/inc/3rdparty/site_config/standard/informationclearinghouse.info.txt old mode 100644 new mode 100755 index 0879e9e6..60b798e6 --- a/inc/3rdparty/site_config/standard/informationclearinghouse.info.txt +++ b/inc/3rdparty/site_config/standard/informationclearinghouse.info.txt @@ -1,6 +1,6 @@ -title: //head/title -body: //table[@id='table3']//div[@class='postContent'] -prune: no -tidy: no - +title: //head/title +body: //table[@id='table3']//div[@class='postContent'] +prune: no +tidy: no + test_url: http://www.informationclearinghouse.info/article28238.htm \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/informit.com.txt b/inc/3rdparty/site_config/standard/informit.com.txt old mode 100644 new mode 100755 index 84c1fdcf..24bf6242 --- a/inc/3rdparty/site_config/standard/informit.com.txt +++ b/inc/3rdparty/site_config/standard/informit.com.txt @@ -1,7 +1,7 @@ -title: //div[@id='content']/h1 -body: //div[@id="content"] -strip: //img[contains(@src, 'informit_printer.png')] -single_page_link: //div[contains(@class, 'articleTools')]//a[contains(@href, '/printerfriendly.')] -prune: no - +title: //div[@id='content']/h1 +body: //div[@id="content"] +strip: //img[contains(@src, 'informit_printer.png')] +single_page_link: //div[contains(@class, 'articleTools')]//a[contains(@href, '/printerfriendly.')] +prune: no + test_url: http://www.informit.com/articles/article.aspx?p=1729268 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/infoworld.com.txt b/inc/3rdparty/site_config/standard/infoworld.com.txt old mode 100644 new mode 100755 index dd588ed8..d335bc4a --- a/inc/3rdparty/site_config/standard/infoworld.com.txt +++ b/inc/3rdparty/site_config/standard/infoworld.com.txt @@ -1,12 +1,12 @@ -body: //div[@id='main_text'] -title: //div[@id='main_text']/h1 -strip: //div[@id='main_text']/h1 -strip: //div[@id='main_text']/h2 -strip_id_or_class: tools -strip_id_or_class: articleTools -strip_id_or_class: pagination -strip_id_or_class: byline -strip_id_or_class: tweet -date: //div[@class='date'] +body: //div[@id='main_text'] +title: //div[@id='main_text']/h1 +strip: //div[@id='main_text']/h1 +strip: //div[@id='main_text']/h2 +strip_id_or_class: tools +strip_id_or_class: articleTools +strip_id_or_class: pagination +strip_id_or_class: byline +strip_id_or_class: tweet +date: //div[@class='date'] strip: //div[@class='date'] test_url: http://www.infoworld.com/d/the-industry-standard/it-jobs-the-rise-both-offshore-and-in-us-187689 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/infzm.com.txt b/inc/3rdparty/site_config/standard/infzm.com.txt old mode 100644 new mode 100755 index 012c873f..489d5aff --- a/inc/3rdparty/site_config/standard/infzm.com.txt +++ b/inc/3rdparty/site_config/standard/infzm.com.txt @@ -1,9 +1,9 @@ -# This filter is tested on: -# http://www.infzm.com/content/71068 -# http://www.infzm.com/content/41577 - -author://em[contains(@class, 'toAuthor')] -date:substring(//em[contains(@class, 'pubTime')],1) -body://section[contains(@id, 'articleContent')] +# This filter is tested on: +# http://www.infzm.com/content/71068 +# http://www.infzm.com/content/41577 + +author://em[contains(@class, 'toAuthor')] +date:substring(//em[contains(@class, 'pubTime')],1) +body://section[contains(@id, 'articleContent')] title://h1[contains(@class ,'articleHeadline clearfix')] test_url: http://www.infzm.com/content/41577 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/inhabitat.com.txt b/inc/3rdparty/site_config/standard/inhabitat.com.txt old mode 100644 new mode 100755 index 6629dafe..c63f53a6 --- a/inc/3rdparty/site_config/standard/inhabitat.com.txt +++ b/inc/3rdparty/site_config/standard/inhabitat.com.txt @@ -1,8 +1,8 @@ -# set body -body: //div[@class='post-listing'] - -# remove clutter -strip: //a/big -strip: //a/em +# set body +body: //div[@class='post-listing'] + +# remove clutter +strip: //a/big +strip: //a/em strip: //p/em test_url: http://inhabitat.com/2010/11/18/sliding-walls-transform-this-tokyo-house-into-an-office/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/instagr.am.txt b/inc/3rdparty/site_config/standard/instagr.am.txt old mode 100644 new mode 100755 index ad9e8214..522caebc --- a/inc/3rdparty/site_config/standard/instagr.am.txt +++ b/inc/3rdparty/site_config/standard/instagr.am.txt @@ -1,6 +1,6 @@ -title: //div[@class='caption'] -author: //p[@class='username'] - -strip: //div[@class='contents']/h3 +title: //div[@class='caption'] +author: //p[@class='username'] + +strip: //div[@class='contents']/h3 strip: //div[@class='location'] test_url: http://instagr.am/p/G-s_aciyDJ/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/interest.co.nz.txt b/inc/3rdparty/site_config/standard/interest.co.nz.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/iolanguage.com.txt b/inc/3rdparty/site_config/standard/iolanguage.com.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/ipadclub.nl.txt b/inc/3rdparty/site_config/standard/ipadclub.nl.txt old mode 100644 new mode 100755 index d196059e..afe058df --- a/inc/3rdparty/site_config/standard/ipadclub.nl.txt +++ b/inc/3rdparty/site_config/standard/ipadclub.nl.txt @@ -1,7 +1,7 @@ -body: //div[@id = 'post'] -strip: //div[@class = 'postinfo'] -strip: //div[@id = 'postmetanew'] -strip: //div[@class = 'paginator'] -strip: //div[@class = 'col-2'] +body: //div[@id = 'post'] +strip: //div[@class = 'postinfo'] +strip: //div[@id = 'postmetanew'] +strip: //div[@class = 'paginator'] +strip: //div[@class = 'col-2'] strip: //div[@id = 'adfactor-label'] test_url: http://www.ipadclub.nl/15808/text-writer-ipad-tekstverwerker-met-functieknoppen/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/ipadplanet.nl.txt b/inc/3rdparty/site_config/standard/ipadplanet.nl.txt old mode 100644 new mode 100755 index a2e49005..dedb5572 --- a/inc/3rdparty/site_config/standard/ipadplanet.nl.txt +++ b/inc/3rdparty/site_config/standard/ipadplanet.nl.txt @@ -1,7 +1,7 @@ -body: //div[@id = 'post'] -strip: //div[@class = 'postinfo'] -strip: //div[@id = 'postmetanew'] -strip: //div[@class = 'paginator'] -strip: //div[@class = 'col-2'] +body: //div[@id = 'post'] +strip: //div[@class = 'postinfo'] +strip: //div[@id = 'postmetanew'] +strip: //div[@class = 'paginator'] +strip: //div[@class = 'col-2'] strip: //div[@id = 'adfactor-label'] test_url: http://www.ipadplanet.nl/11723/steve-jobs-bevestigt-verdwijnen-fysieke-rotatieschakelaar-in-ios-4-2/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/iphoneclub.nl.txt b/inc/3rdparty/site_config/standard/iphoneclub.nl.txt old mode 100644 new mode 100755 index f8d4f6a6..850a24e9 --- a/inc/3rdparty/site_config/standard/iphoneclub.nl.txt +++ b/inc/3rdparty/site_config/standard/iphoneclub.nl.txt @@ -1,7 +1,7 @@ -body: //div[@id = 'post'] -strip: //div[@class = 'postinfo'] -strip: //div[@id = 'postmetanew'] -strip: //div[@class = 'paginator'] -strip: //div[@class = 'col-2'] -strip: //div[@id = 'adfactor-label'] +body: //div[@id = 'post'] +strip: //div[@class = 'postinfo'] +strip: //div[@id = 'postmetanew'] +strip: //div[@class = 'paginator'] +strip: //div[@class = 'col-2'] +strip: //div[@id = 'adfactor-label'] test_url: http://www.iphoneclub.nl/105808/t-mobile-mobiel-internet-wordt-duurder-maar-blijft-onbeperkt/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/iphonehacks.com.txt b/inc/3rdparty/site_config/standard/iphonehacks.com.txt old mode 100644 new mode 100755 index c97ff43c..e8ccea06 --- a/inc/3rdparty/site_config/standard/iphonehacks.com.txt +++ b/inc/3rdparty/site_config/standard/iphonehacks.com.txt @@ -1,9 +1,9 @@ -title: //meta[@name='og:title']/@content -body: //small[@class='postmetadata'] | //div[contains(@class, 'entry-content')] - -strip: //span[@vanilla-identifier] - -prune: no -tidy: no - +title: //meta[@name='og:title']/@content +body: //small[@class='postmetadata'] | //div[contains(@class, 'entry-content')] + +strip: //span[@vanilla-identifier] + +prune: no +tidy: no + test_url: http://www.iphonehacks.com/2012/07/app-review-process-behind-the-scenes.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/iplaysoft.com.txt b/inc/3rdparty/site_config/standard/iplaysoft.com.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/isource.com.txt b/inc/3rdparty/site_config/standard/isource.com.txt old mode 100644 new mode 100755 index a1c16a16..215fdf87 --- a/inc/3rdparty/site_config/standard/isource.com.txt +++ b/inc/3rdparty/site_config/standard/isource.com.txt @@ -1,6 +1,6 @@ -# Remove social buttons -strip: //div[@id='temp_Content_Right'] - -# Remove duplicate article title +# Remove social buttons +strip: //div[@id='temp_Content_Right'] + +# Remove duplicate article title strip: //*[(@class='storytitle')] test_url: http://isource.com/2010/10/24/swearch-a-cool-iphone-web-app/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/itavisen.no.txt b/inc/3rdparty/site_config/standard/itavisen.no.txt old mode 100644 new mode 100755 index 8da78cb0..3ba484a7 --- a/inc/3rdparty/site_config/standard/itavisen.no.txt +++ b/inc/3rdparty/site_config/standard/itavisen.no.txt @@ -1,6 +1,6 @@ -author: //p[@class = 'writer'] - -date: //p[@class = 'published-time'] - +author: //p[@class = 'writer'] + +date: //p[@class = 'published-time'] + body: //div[@class = 'text main'] test_url: http://www.itavisen.no/899786/old-republic-blir-gratis \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/itmedia.co.jp.txt b/inc/3rdparty/site_config/standard/itmedia.co.jp.txt new file mode 100755 index 00000000..97f00ce8 --- /dev/null +++ b/inc/3rdparty/site_config/standard/itmedia.co.jp.txt @@ -0,0 +1,8 @@ +body: //div[@id='cmsBody'] + +next_page_link: //span[@id='next']/a + +strip_id_or_class: cmsCopyright +strip_id_or_class: masterSocialbuttonBtm + +test_url: http://www.itmedia.co.jp/enterprise/articles/0912/05/news002.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/itstactical.com.txt b/inc/3rdparty/site_config/standard/itstactical.com.txt old mode 100644 new mode 100755 index 550875ec..b8cb461c --- a/inc/3rdparty/site_config/standard/itstactical.com.txt +++ b/inc/3rdparty/site_config/standard/itstactical.com.txt @@ -1,12 +1,12 @@ -title: //h1[@class="entry-title"] -body: //div[@class='format_text entry-content'] -author: //span[@class="author vcard"]/a -date: //abbr[@class="published"] - -strip_id_or_class: related-posts -strip_id_or_class: membershipbox -strip_id_or_class: share_this_compact_bt - - +title: //h1[@class="entry-title"] +body: //div[@class='format_text entry-content'] +author: //span[@class="author vcard"]/a +date: //abbr[@class="published"] + +strip_id_or_class: related-posts +strip_id_or_class: membershipbox +strip_id_or_class: share_this_compact_bt + + footnotes: no test_url: http://www.itstactical.com/warcom/knives/exclusive-triple-aught-design-production-dauntless-knife-video-walkthrough/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/itwire.com.txt b/inc/3rdparty/site_config/standard/itwire.com.txt new file mode 100755 index 00000000..72b41065 --- /dev/null +++ b/inc/3rdparty/site_config/standard/itwire.com.txt @@ -0,0 +1,5 @@ +author: //a[@rel="author"] +date: //li[@class="itemDateCreated"] +strip: //div[contains(@class, 'legend-rounded')] + +test_url: http://www.itwire.com/it-industry-news/market/59661-ibm-looks-to-high-value-solutions-to-meet-changing-demands diff --git a/inc/3rdparty/site_config/standard/itworld.com.txt b/inc/3rdparty/site_config/standard/itworld.com.txt old mode 100644 new mode 100755 index d4fa604e..1ee0ee58 --- a/inc/3rdparty/site_config/standard/itworld.com.txt +++ b/inc/3rdparty/site_config/standard/itworld.com.txt @@ -1,5 +1,5 @@ -title: //*[@id="article-title"] -author: //*[@id="article-info"]/strong -date: //*[@class="article-dateline"]/strong +title: //*[@id="article-title"] +author: //*[@id="article-info"]/strong +date: //*[@class="article-dateline"]/strong body: //*[@id="article-content"] test_url: http://www.itworld.com/open-source/140916/android-sued-microsoft-not-linux \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/izismile.com.txt b/inc/3rdparty/site_config/standard/izismile.com.txt old mode 100644 new mode 100755 index af3f299a..b0114d35 --- a/inc/3rdparty/site_config/standard/izismile.com.txt +++ b/inc/3rdparty/site_config/standard/izismile.com.txt @@ -1,4 +1,4 @@ -body: //div[starts-with(@id, 'news-id-')] -prune: no - +body: //div[starts-with(@id, 'news-id-')] +prune: no + test_url: http://izismile.com/2011/06/13/uncanny_factoid_fashion_or_creepy_2_pics.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/jalopnik.com.txt b/inc/3rdparty/site_config/standard/jalopnik.com.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/jandan.net.txt b/inc/3rdparty/site_config/standard/jandan.net.txt old mode 100644 new mode 100755 index f1dd3d17..343fd6fb --- a/inc/3rdparty/site_config/standard/jandan.net.txt +++ b/inc/3rdparty/site_config/standard/jandan.net.txt @@ -1,6 +1,6 @@ -body: //div[@id='content']//div[@class = 'post f'] -strip_id_or_class: comment-big -strip_id_or_class: avatar -strip: //div[@class='time_s'] +body: //div[@id='content']//div[@class = 'post f'] +strip_id_or_class: comment-big +strip_id_or_class: avatar +strip: //div[@class='time_s'] test_url: http://jandan.net/2011/04/03/iphone-5-sony.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/jetzt.sueddeutsche.de.txt b/inc/3rdparty/site_config/standard/jetzt.sueddeutsche.de.txt old mode 100644 new mode 100755 index 6e8af934..00e4cf63 --- a/inc/3rdparty/site_config/standard/jetzt.sueddeutsche.de.txt +++ b/inc/3rdparty/site_config/standard/jetzt.sueddeutsche.de.txt @@ -1,22 +1,22 @@ -title: //h1 -author: //p[contains(@class, 'author')]/a -date: //p[contains(@class, 'time')] -body: //div[@class='content']/div[contains(@class, 'text')] - -# prevent "no text" errors on multi-page articles -tidy: no - -# we use a custom next-link detector instead of the print view because -# it's pretty hard to strip out the unwanted parts in the print view -autodetect_next_page: no -next_page_link: //div[contains(@class, 'text')]/div/div[contains(@class, 'paging')]/a[@class='more '] - -strip: //h1 - -strip_id_or_class: meta -strip_id_or_class: author -strip_id_or_class: paging - -# prevent "Report an Error" from being recognized as footnote +title: //h1 +author: //p[contains(@class, 'author')]/a +date: //p[contains(@class, 'time')] +body: //div[@class='content']/div[contains(@class, 'text')] + +# prevent "no text" errors on multi-page articles +tidy: no + +# we use a custom next-link detector instead of the print view because +# it's pretty hard to strip out the unwanted parts in the print view +autodetect_next_page: no +next_page_link: //div[contains(@class, 'text')]/div/div[contains(@class, 'paging')]/a[@class='more '] + +strip: //h1 + +strip_id_or_class: meta +strip_id_or_class: author +strip_id_or_class: paging + +# prevent "Report an Error" from being recognized as footnote footnotes: no test_url: http://jetzt.sueddeutsche.de/texte/anzeigen/544308/Alles-flicken \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/jjahnke.net.txt b/inc/3rdparty/site_config/standard/jjahnke.net.txt old mode 100644 new mode 100755 index 95c45ee7..d45c8899 --- a/inc/3rdparty/site_config/standard/jjahnke.net.txt +++ b/inc/3rdparty/site_config/standard/jjahnke.net.txt @@ -1,4 +1,4 @@ -body: //div[@class='entry'] -prune: no +body: //div[@class='entry'] +prune: no test_url: http://www.jjahnke.net/rundbr87.html#2514 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/jobbank.gc.ca.txt b/inc/3rdparty/site_config/standard/jobbank.gc.ca.txt old mode 100644 new mode 100755 index af8d7d17..1dbe2072 --- a/inc/3rdparty/site_config/standard/jobbank.gc.ca.txt +++ b/inc/3rdparty/site_config/standard/jobbank.gc.ca.txt @@ -1,5 +1,5 @@ -body: //div[@id='formatCont_en'] - -prune: no - +body: //div[@id='formatCont_en'] + +prune: no + test_url: http://www.jobbank.gc.ca/detail-eng.aspx?Source=JobPosting&OrderNum=6397922 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/joelonsoftware.com.txt b/inc/3rdparty/site_config/standard/joelonsoftware.com.txt old mode 100644 new mode 100755 index 75fbee5a..241a361f --- a/inc/3rdparty/site_config/standard/joelonsoftware.com.txt +++ b/inc/3rdparty/site_config/standard/joelonsoftware.com.txt @@ -1,21 +1,21 @@ -# Works with old posts too, such as http://www.joelonsoftware.com/articles/fog0000000332.html - -author: substring-after(//div[@class="author"], 'by ') -date: //div[@class="date"] - -## Clean stuff at top ## - -strip: //h1[1] -strip: //h2[1] -strip: //div[@class="date"] -strip: //div[@class="author"] - -## Clean stuff at bottom ## - -strip: //blockquote[@class="textmessage"] -strip: //div[@style="width:500px"]/p[last()] -strip: //div[@style="width:500px"]/p[last()-1] -strip: //div[@style="width:500px"]/h4[last()] -strip: //div[@style="width:500px"]/h4[last()-1] +# Works with old posts too, such as http://www.joelonsoftware.com/articles/fog0000000332.html + +author: substring-after(//div[@class="author"], 'by ') +date: //div[@class="date"] + +## Clean stuff at top ## + +strip: //h1[1] +strip: //h2[1] +strip: //div[@class="date"] +strip: //div[@class="author"] + +## Clean stuff at bottom ## + +strip: //blockquote[@class="textmessage"] +strip: //div[@style="width:500px"]/p[last()] +strip: //div[@style="width:500px"]/p[last()-1] +strip: //div[@style="width:500px"]/h4[last()] +strip: //div[@style="width:500px"]/h4[last()-1] strip: //div[@style="width:500px"]/div[last()] test_url: http://www.joelonsoftware.com/items/2011/09/15.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/jouire.com.txt b/inc/3rdparty/site_config/standard/jouire.com.txt old mode 100644 new mode 100755 index 535a501e..3cf60672 --- a/inc/3rdparty/site_config/standard/jouire.com.txt +++ b/inc/3rdparty/site_config/standard/jouire.com.txt @@ -1,3 +1,3 @@ -author: //h1 +author: //h1 date: //p[contains(@class,'date')] test_url: http://jouire.com/2011/01/exquisite-whispers/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/joystiq.com.txt b/inc/3rdparty/site_config/standard/joystiq.com.txt old mode 100644 new mode 100755 index 7fbd467d..7a8e56f8 --- a/inc/3rdparty/site_config/standard/joystiq.com.txt +++ b/inc/3rdparty/site_config/standard/joystiq.com.txt @@ -1,8 +1,8 @@ -author: //a[@class="byline-author"] -title: //h1[@class="headline"] -strip: //div[@id="info-card"] -strip: //div[@id="breaking-news"] -strip: //div[@class="rmod list-post-mod"] -strip: //div[@id="footer"] +author: //a[@class="byline-author"] +title: //h1[@class="headline"] +strip: //div[@id="info-card"] +strip: //div[@id="breaking-news"] +strip: //div[@class="rmod list-post-mod"] +strip: //div[@id="footer"] strip: //div[@id="GH_strip"] test_url: http://www.joystiq.com/2012/06/20/magic-the-gathering-duels-of-the-planeswalkers-2013-review/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/juedische-allgemeine.de.txt b/inc/3rdparty/site_config/standard/juedische-allgemeine.de.txt old mode 100644 new mode 100755 index be844e57..ff5a0244 --- a/inc/3rdparty/site_config/standard/juedische-allgemeine.de.txt +++ b/inc/3rdparty/site_config/standard/juedische-allgemeine.de.txt @@ -1,19 +1,19 @@ -body: //div[@id='article_container'] -author: //h4//a[@class='author'] -title: //h1 - -replace_string(lang="en"): lang="de" -replace_string(/>1</a>):/></a> - -strip_id_or_class: share_toolbox -strip_id_or_class: article_header -strip_id_or_class: phototext - -strip_image_src: icon_author.gif - -strip: //img[@src=''] -strip: //h4[@id='author'] - -prune: no - +body: //div[@id='article_container'] +author: //h4//a[@class='author'] +title: //h1 + +replace_string(lang="en"): lang="de" +replace_string(/>1</a>):/></a> + +strip_id_or_class: share_toolbox +strip_id_or_class: article_header +strip_id_or_class: phototext + +strip_image_src: icon_author.gif + +strip: //img[@src=''] +strip: //h4[@id='author'] + +prune: no + test_url: http://www.juedische-allgemeine.de/article/view/id/13366 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/juppy.org.txt b/inc/3rdparty/site_config/standard/juppy.org.txt old mode 100644 new mode 100755 index e2d07f24..fdf7cdc9 --- a/inc/3rdparty/site_config/standard/juppy.org.txt +++ b/inc/3rdparty/site_config/standard/juppy.org.txt @@ -1,8 +1,8 @@ -convert_double_br_tags: yes - -title: //div[@id="storycredits"]/p/span[@class="title"] -author: //div[@id="storycredits"]/p/br[1]/following-sibling::text() - -strip: //div[@id="storycredits"] +convert_double_br_tags: yes + +title: //div[@id="storycredits"]/p/span[@class="title"] +author: //div[@id="storycredits"]/p/br[1]/following-sibling::text() + +strip: //div[@id="storycredits"] test_url: http://www.juppy.org/santa/stories.php?ForAuthorID=35&Year=2005 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/kachestvo.ru.txt b/inc/3rdparty/site_config/standard/kachestvo.ru.txt old mode 100644 new mode 100755 index 34404e96..535693c4 --- a/inc/3rdparty/site_config/standard/kachestvo.ru.txt +++ b/inc/3rdparty/site_config/standard/kachestvo.ru.txt @@ -1,3 +1,3 @@ -body: //div[contains(@class, 'inner_content')] +body: //div[contains(@class, 'inner_content')] test_url: http://kachestvo.ru/promtovar/odezhda/denim.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/kachiblog.com.txt b/inc/3rdparty/site_config/standard/kachiblog.com.txt new file mode 100755 index 00000000..35baf8df --- /dev/null +++ b/inc/3rdparty/site_config/standard/kachiblog.com.txt @@ -0,0 +1,7 @@ +title: //h3[contains(@class, 'entry-title')] +date: //abbr[@itemprop='datePublished']/@title +body: //div[@itemprop='articleBody'] +tidy: no + +test_url: http://www.kachiblog.com/2013/05/samsung-galaxy-s4-vs-samsung-galaxy.html +test_url: http://www.kachiblog.com/feeds/posts/default \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/kathimerini.gr.txt b/inc/3rdparty/site_config/standard/kathimerini.gr.txt new file mode 100755 index 00000000..2c7c518c --- /dev/null +++ b/inc/3rdparty/site_config/standard/kathimerini.gr.txt @@ -0,0 +1,4 @@ +title: //td[contains(@class, 'articleTitlos')] +body: //td[contains(@class, 'eelantext')] + +test_url: http://www.kathimerini.gr/4dcgi/_w_articles_kathremote_1_03/12/2013_530490 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/kenrockwell.com.txt b/inc/3rdparty/site_config/standard/kenrockwell.com.txt old mode 100644 new mode 100755 index e6d100ea..90c64cbf --- a/inc/3rdparty/site_config/standard/kenrockwell.com.txt +++ b/inc/3rdparty/site_config/standard/kenrockwell.com.txt @@ -1,7 +1,7 @@ -# Ads -strip: //table[@align="right"][@width="120"] - -# Affiliate link paragraphs -strip: //a[.="Adorama"]/parent::p[contains(., "goodies")] +# Ads +strip: //table[@align="right"][@width="120"] + +# Affiliate link paragraphs +strip: //a[.="Adorama"]/parent::p[contains(., "goodies")] strip: //a[.="Adorama"]/parent::p[contains(., "This free website's biggest source of")] test_url: http://www.kenrockwell.com/tech/composition.htm \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/kicker.de.txt b/inc/3rdparty/site_config/standard/kicker.de.txt old mode 100644 new mode 100755 index 7d5daa4b..db4f63c4 --- a/inc/3rdparty/site_config/standard/kicker.de.txt +++ b/inc/3rdparty/site_config/standard/kicker.de.txt @@ -1,21 +1,21 @@ -# set body -body: //div[@id='ovArtikel'] - -# set title -title: //div[@id='ovArtikel']/h1 -# strip main title and leave sub title -strip: //div[@id='ovArtikel']/h1 - -date: //div[@class='publicdate'] - -#remove captions -strip: //*/div[@class='bu'] -strip: //*/div[@class='credit'] - -#remove adds -strip: //*/div[@class='ad-head'] -strip: //*/div[@class='linksebay'] - -# remove video content +# set body +body: //div[@id='ovArtikel'] + +# set title +title: //div[@id='ovArtikel']/h1 +# strip main title and leave sub title +strip: //div[@id='ovArtikel']/h1 + +date: //div[@class='publicdate'] + +#remove captions +strip: //*/div[@class='bu'] +strip: //*/div[@class='credit'] + +#remove adds +strip: //*/div[@class='ad-head'] +strip: //*/div[@class='linksebay'] + +# remove video content strip: //*/div[@class='ovVideo'] test_url: http://www.kicker.de/news/fussball/frauen/wmfr/frauen-weltmeisterschaft/2011/3/1123662/spielbericht_frankreich-frauen_deutschland-frauen.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/kickstarter.com.txt b/inc/3rdparty/site_config/standard/kickstarter.com.txt old mode 100644 new mode 100755 index c055659f..7b3daa58 --- a/inc/3rdparty/site_config/standard/kickstarter.com.txt +++ b/inc/3rdparty/site_config/standard/kickstarter.com.txt @@ -1,7 +1,7 @@ -title: //h1[@id='name'] -body: //*[@id='leftcol'] - -strip_id_or_class: 'share-box' -strip_id_or_class: 'project-faqs' +title: //h1[@id='name'] +body: //*[@id='leftcol'] + +strip_id_or_class: 'share-box' +strip_id_or_class: 'project-faqs' strip_id_or_class: 'report-issue-wrap' test_url: http://www.kickstarter.com/projects/hop/elevation-dock-the-best-dock-for-iphone \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/kingarthurflour.com.txt b/inc/3rdparty/site_config/standard/kingarthurflour.com.txt old mode 100644 new mode 100755 index 2f6783a3..b27539f5 --- a/inc/3rdparty/site_config/standard/kingarthurflour.com.txt +++ b/inc/3rdparty/site_config/standard/kingarthurflour.com.txt @@ -1,4 +1,4 @@ -title: //div[@class='post']/h2 -body: //div[@class='entry'] +title: //div[@class='post']/h2 +body: //div[@class='entry'] strip: //p[contains(.,'Tags:')] test_url: http://www.kingarthurflour.com/blog/2011/01/28/a-big-sandwich-for-the-big-game/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/kotaku.com.txt b/inc/3rdparty/site_config/standard/kotaku.com.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/kottke.org.txt b/inc/3rdparty/site_config/standard/kottke.org.txt old mode 100644 new mode 100755 index f93a61e7..582f251c --- a/inc/3rdparty/site_config/standard/kottke.org.txt +++ b/inc/3rdparty/site_config/standard/kottke.org.txt @@ -1,6 +1,6 @@ -title: //h2 -author: //*[@id='main']/div/a[1] -date: substring-before(substring-after(//div[@class='meta'],'•'),'•') -body: //div[@id='main'] -strip: //div[@class='meta'] +title: //h2 +author: //*[@id='main']/div/a[1] +date: substring-before(substring-after(//div[@class='meta'],'•'),'•') +body: //div[@id='main'] +strip: //div[@class='meta'] test_url: http://kottke.org/08/02/king-of-kong-a-fistful-of-quarters \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/kumailplus.com.txt b/inc/3rdparty/site_config/standard/kumailplus.com.txt old mode 100644 new mode 100755 index 9e15cc34..2f604de0 --- a/inc/3rdparty/site_config/standard/kumailplus.com.txt +++ b/inc/3rdparty/site_config/standard/kumailplus.com.txt @@ -1,3 +1,3 @@ -body: //div[@class = "entry-full"] +body: //div[@class = "entry-full"] test_url: http://www.kumailplus.com/2011/12/02/24308 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/kumb.com.txt b/inc/3rdparty/site_config/standard/kumb.com.txt old mode 100644 new mode 100755 index 3f0d2369..fe350622 --- a/inc/3rdparty/site_config/standard/kumb.com.txt +++ b/inc/3rdparty/site_config/standard/kumb.com.txt @@ -1,10 +1,10 @@ -title: //div[@id='centrediv']/h1 - -author: substring-after(//div[@id='centrediv']/h3,'By: ') - -date: substring-after(substring-before(//div[@id='centrediv']/h3,'By: '),'Filed: ') - -body: //div[@class='KonaBody'] - +title: //div[@id='centrediv']/h1 + +author: substring-after(//div[@id='centrediv']/h3,'By: ') + +date: substring-after(substring-before(//div[@id='centrediv']/h3,'By: '),'Filed: ') + +body: //div[@class='KonaBody'] + convert_double_br_tags: yes test_url: http://www.kumb.com/story.php?id=126084 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/kwerfeldein.de.txt b/inc/3rdparty/site_config/standard/kwerfeldein.de.txt old mode 100644 new mode 100755 index 879b4d6c..cf4d3b8c --- a/inc/3rdparty/site_config/standard/kwerfeldein.de.txt +++ b/inc/3rdparty/site_config/standard/kwerfeldein.de.txt @@ -1,9 +1,9 @@ -date: //span[@class='datum'] -title: //div[@class='artikel']/h2 -body: //div[@class='entry'] -strip: //p[@class='tags'] -author: substring-after(//div[@class='authorinfo']/em,'Dies ist ein Artikel von ') -strip: //div[@class='authorinfo'] -strip: //div[@class='authorpic'] +date: //span[@class='datum'] +title: //div[@class='artikel']/h2 +body: //div[@class='entry'] +strip: //p[@class='tags'] +author: substring-after(//div[@class='authorinfo']/em,'Dies ist ein Artikel von ') +strip: //div[@class='authorinfo'] +strip: //div[@class='authorpic'] test_url: http://kwerfeldein.de/index.php/2011/10/17/doppelbelichtungen-mit-konzept/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/landetsfria.se.txt b/inc/3rdparty/site_config/standard/landetsfria.se.txt new file mode 100755 index 00000000..e5317a5a --- /dev/null +++ b/inc/3rdparty/site_config/standard/landetsfria.se.txt @@ -0,0 +1,7 @@ +body: //div[contains(@class, 'layout__inner')]//div[contains(@class, 'file-image') or contains(@class, 'node__content')] +author: //article//div[contains(@class, 'field-byline')] +strip_id_or_class: rekommenderade +strip_id_or_class: disqus +strip_id_or_class: annonser + +test_url: http://www.landetsfria.se/artikel/112070 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/laphamsquarterly.org.txt b/inc/3rdparty/site_config/standard/laphamsquarterly.org.txt old mode 100644 new mode 100755 index a34e39dd..d25999d0 --- a/inc/3rdparty/site_config/standard/laphamsquarterly.org.txt +++ b/inc/3rdparty/site_config/standard/laphamsquarterly.org.txt @@ -1,13 +1,13 @@ -title: //h1[@class='headline'] -body: //div[@class='article'] -strip: //div[@class='article']//h3[contains(@class, 'section')] -strip: //div[@class='article']//ul[contains(@class, 'article-actions')] -strip: //div[@id='syndication-upper'] -strip: //a[@id='syndication'] -strip: //dl[@id='article-tags'] -strip: //div[@id='article-like'] -prune: no - -single_page_link: //li[@class='single-page']/a - +title: //h1[@class='headline'] +body: //div[@class='article'] +strip: //div[@class='article']//h3[contains(@class, 'section')] +strip: //div[@class='article']//ul[contains(@class, 'article-actions')] +strip: //div[@id='syndication-upper'] +strip: //a[@id='syndication'] +strip: //dl[@id='article-tags'] +strip: //div[@id='article-like'] +prune: no + +single_page_link: //li[@class='single-page']/a + test_url: http://www.laphamsquarterly.org/essays/balanced-diets.php \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/laprensagrafica.com.txt b/inc/3rdparty/site_config/standard/laprensagrafica.com.txt old mode 100644 new mode 100755 index e771f81f..82374c0b --- a/inc/3rdparty/site_config/standard/laprensagrafica.com.txt +++ b/inc/3rdparty/site_config/standard/laprensagrafica.com.txt @@ -1,3 +1,3 @@ -tidy: no +tidy: no test_url: http://www.laprensagrafica.com/opinion/editorial/229252-reflexiones-sobre-la-educacion-que-necesitamos.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/laquadrature.net.txt b/inc/3rdparty/site_config/standard/laquadrature.net.txt old mode 100644 new mode 100755 index 5bad8e65..746bfca7 --- a/inc/3rdparty/site_config/standard/laquadrature.net.txt +++ b/inc/3rdparty/site_config/standard/laquadrature.net.txt @@ -1,10 +1,10 @@ -body: //div[@id='content-content']//div[@class='content'] -title: //h1[@class='title'] -date: substring-after(//*[@class='submitted'],'Submitted on') -tidy: no -strip: //div[@class='terms terms-inline'] -strip: //div[@class='more'] -strip: //div[@class='share-links'] -strip: //table[@id='attachments'] - +body: //div[@id='content-content']//div[@class='content'] +title: //h1[@class='title'] +date: substring-after(//*[@class='submitted'],'Submitted on') +tidy: no +strip: //div[@class='terms terms-inline'] +strip: //div[@class='more'] +strip: //div[@class='share-links'] +strip: //table[@id='attachments'] + test_url: http://www.laquadrature.net/en/finalization-of-eu-parliaments-weak-net-neutrality-resolution \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/lareviewofbooks.org.txt b/inc/3rdparty/site_config/standard/lareviewofbooks.org.txt old mode 100644 new mode 100755 index 504dbea1..25e36543 --- a/inc/3rdparty/site_config/standard/lareviewofbooks.org.txt +++ b/inc/3rdparty/site_config/standard/lareviewofbooks.org.txt @@ -1,12 +1,12 @@ -#meta data -title:substring-after(title,'|') - -author:substring-before( substring-after(//meta[@name = 'description']/@content, normalize-space(substring-after(//title,'|'))),' respond ') -date://h5[@class = 'postDate'] - -#text -body://div[@class = 'articleBody'] - -#clean up -strip://center -test_url: http://lareviewofbooks.org/post/14066007115/literary-transactions-and-their-vicissitudes \ No newline at end of file +#metadata +title: substring-before(//title,' |') +author: //a[contains(@class,'person') and starts-with(@href, '/contributor')] + +#text +body: //div[contains(@class, 'article_body')] + +#clean up +strip_id_or_class: recommended_section + +test_url: http://lareviewofbooks.org/review/american-politics-redeembale-robert-gates-hillary-clinton-two-memoirs-washington-dc +test_url: http://lareviewofbooks.org/interview/souvenirs-future diff --git a/inc/3rdparty/site_config/standard/latimes.com.txt b/inc/3rdparty/site_config/standard/latimes.com.txt old mode 100644 new mode 100755 index 0d6ac851..b2db37bf --- a/inc/3rdparty/site_config/standard/latimes.com.txt +++ b/inc/3rdparty/site_config/standard/latimes.com.txt @@ -1,11 +1,11 @@ -strip: //div[@id="tugs_story_display"] -strip: //div[@id="search_overlay"] -strip: //div[@id="adv_search"] -body: //div[@class='story'] -tidy: no -convert_double_br_tags: yes -single_page_link: //a[contains(@href, ',print.')] -strip: //p[starts-with(., 'latimes.com')] -strip: //h1[starts-with(., 'latimes.com')] +strip: //div[@id="tugs_story_display"] +strip: //div[@id="search_overlay"] +strip: //div[@id="adv_search"] +body: //div[@class='story'] +tidy: no +convert_double_br_tags: yes +single_page_link: //a[contains(@href, ',print.')] +strip: //p[starts-with(., 'latimes.com')] +strip: //h1[starts-with(., 'latimes.com')] strip_id_or_class: cubead test_url: http://www.latimes.com/news/opinion/commentary/la-oe-gartonash-wilders-20110512,0,2876761.story \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/laughingsquid.com.txt b/inc/3rdparty/site_config/standard/laughingsquid.com.txt old mode 100644 new mode 100755 index 1814988a..ab2f834f --- a/inc/3rdparty/site_config/standard/laughingsquid.com.txt +++ b/inc/3rdparty/site_config/standard/laughingsquid.com.txt @@ -1,3 +1,3 @@ -title: //h1[@class='entry-title'] +title: //h1[@class='entry-title'] body: //div[@class='entry-content'] test_url: http://laughingsquid.com/mysterious-tiny-doors-appearing-around-san-francisco/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/leancrew.com.txt b/inc/3rdparty/site_config/standard/leancrew.com.txt old mode 100644 new mode 100755 index 0a4c84ba..e78cf7e6 --- a/inc/3rdparty/site_config/standard/leancrew.com.txt +++ b/inc/3rdparty/site_config/standard/leancrew.com.txt @@ -1,9 +1,9 @@ -title: //div[@id="content"]/h1[1] -date: substring-before(//p[@class="postdate"], ' at ') -author: ("Dr. Drang") - -strip: //div[@id="content"]/h1[1] -strip: //p[@class="postdate"] -strip: //h2[@id="respond"] +title: //div[@id="content"]/h1[1] +date: substring-before(//p[@class="postdate"], ' at ') +author: ("Dr. Drang") + +strip: //div[@id="content"]/h1[1] +strip: //p[@class="postdate"] +strip: //h2[@id="respond"] strip: //blockquote[@class="bbpTweet"]/p/span/a/img test_url: http://www.leancrew.com/all-this/2011/12/more-shell-less-egg/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/lefigaro.fr.txt b/inc/3rdparty/site_config/standard/lefigaro.fr.txt old mode 100644 new mode 100755 index f5494b96..e720e377 --- a/inc/3rdparty/site_config/standard/lefigaro.fr.txt +++ b/inc/3rdparty/site_config/standard/lefigaro.fr.txt @@ -1,8 +1,8 @@ -title: //meta[@name='title']/@content -author: //span[@class='sign']//a[@class='journaliste'] -author: //meta[@name='author']/@content -body: //*[@id='article']/div[@class='photo'] | //*[@id='article']/h2 | //*[@id='article']/div[@class='texte'] -date: //time[@pubdate]/@datetime -prune: no -test_url: http://www.lefigaro.fr/environnement/2011/11/10/01029-20111110ARTFIG00801-la-chine-confrontee-a-un-immense-defi-ecologique.php +title: //meta[@name='title']/@content +author: //span[@class='sign']//a[@class='journaliste'] +author: //meta[@name='author']/@content +body: //*[@id='article']/div[@class='photo'] | //*[@id='article']/h2 | //*[@id='article']/div[@class='texte'] +date: //time[@pubdate]/@datetime +prune: no +test_url: http://www.lefigaro.fr/environnement/2011/11/10/01029-20111110ARTFIG00801-la-chine-confrontee-a-un-immense-defi-ecologique.php test_url: http://www.lefigaro.fr/conjoncture/2012/11/20/20002-20121120ARTFIG00609-l-usager-devrait-payer-plus-pour-financer-les-transports.php \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/lemonde.fr.txt b/inc/3rdparty/site_config/standard/lemonde.fr.txt old mode 100644 new mode 100755 index eb205275..097999b6 --- a/inc/3rdparty/site_config/standard/lemonde.fr.txt +++ b/inc/3rdparty/site_config/standard/lemonde.fr.txt @@ -1,13 +1,18 @@ -title: //h1 - -# they have a single component containing both author and date -#author: //p[@class='source'] -#date: //p[@class='source'] - -body: //div[@class='contenu_article'] -#Shoot the insane "conjugaison.lemonde.fr" links : -strip: //a[contains(@class, 'listLink')] - -prune: no - -test_url: http://www.lemonde.fr/economie/article/2011/07/05/moody-s-abaisse-la-note-du-portugal-de-quatre-crans_1545237_3234.html \ No newline at end of file +title: //h1 + +# We can have multiple authors +author: //a[@class='auteur'] + +# Last edition date (if any) +date: //time[@itemprop='dateModified']/@datetime +# Publication date +date: //time[@itemprop='datePublished']/@datetime + + +body: //div[@id='articleBody'] +#Shoot the insane "conjugaison.lemonde.fr" links : +#strip: //a[contains(@class, 'conjug')] + +prune: no + +test_url: http://www.lemonde.fr/economie/article/2011/07/05/moody-s-abaisse-la-note-du-portugal-de-quatre-crans_1545237_3234.html diff --git a/inc/3rdparty/site_config/standard/lesnumeriques.com.txt b/inc/3rdparty/site_config/standard/lesnumeriques.com.txt old mode 100644 new mode 100755 index 9b57f726..51e025ae --- a/inc/3rdparty/site_config/standard/lesnumeriques.com.txt +++ b/inc/3rdparty/site_config/standard/lesnumeriques.com.txt @@ -1,9 +1,9 @@ -title: //h1/following::span[@class='fn'] -# Author: should stop parsing until <br> reached, but I don't know how to do this. -author: //following::div[@class='PDate2'] -date: //following::div[@class='PDate2']/strong - -body: //div[@class='ArTexte'] -body: //div[@id='prod_txt_b'] -body: //div[@class='ArPhotoP'] +title: //h1/following::span[@class='fn'] +# Author: should stop parsing until <br> reached, but I don't know how to do this. +author: //following::div[@class='PDate2'] +date: //following::div[@class='PDate2']/strong + +body: //div[@class='ArTexte'] +body: //div[@id='prod_txt_b'] +body: //div[@class='ArPhotoP'] test_url: http://www.lesnumeriques.com/disque-dur-multimedia/popcorn-hour-300-p12231/test.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/letemps.ch.txt b/inc/3rdparty/site_config/standard/letemps.ch.txt old mode 100644 new mode 100755 index c4bee7ec..49b019f9 --- a/inc/3rdparty/site_config/standard/letemps.ch.txt +++ b/inc/3rdparty/site_config/standard/letemps.ch.txt @@ -1,3 +1,3 @@ -title: //h2 +title: //h2 strip_image_src: logo.gif test_url: http://www.letemps.ch/Facet/print/Uuid/7c9f912c-07c9-11e0-9b50-4d96c9eca37f \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/libcom.org.txt b/inc/3rdparty/site_config/standard/libcom.org.txt new file mode 100755 index 00000000..d1404d10 --- /dev/null +++ b/inc/3rdparty/site_config/standard/libcom.org.txt @@ -0,0 +1,7 @@ +date: //span[contains(@class, 'page-date')] +body: //div[@id='node-page'] +strip_id_or_class: book-navigation +prune: no + +test_url: http://libcom.org/library/what-was-the-ussr-aufheben-1 +test_url: http://libcom.org/library-latest/feed \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/lifeandculture.fr.txt b/inc/3rdparty/site_config/standard/lifeandculture.fr.txt old mode 100644 new mode 100755 index c3888aa8..0e1dceb1 --- a/inc/3rdparty/site_config/standard/lifeandculture.fr.txt +++ b/inc/3rdparty/site_config/standard/lifeandculture.fr.txt @@ -1,3 +1,3 @@ -title: //h2[@class="entry-title"] +title: //h2[@class="entry-title"] body: //div[@class="entry-content"] test_url: http://www.lifeandculture.fr/digital/facebook-and-the-epiphanator-an-end-to-endings/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/lifehacker.com.txt b/inc/3rdparty/site_config/standard/lifehacker.com.txt old mode 100644 new mode 100755 index 32ade14a..ec97f06c --- a/inc/3rdparty/site_config/standard/lifehacker.com.txt +++ b/inc/3rdparty/site_config/standard/lifehacker.com.txt @@ -1,42 +1,47 @@ -# Adds author text: Gawker sites commonly show as "Author: View Profile" -author://a[@class="plus-icon modfont"] - -# Add date and time -date: //span[@class="date"] - -# Remove date and time from article text -strip: //span[@class="date"] - -# Remove login/comment text -strip: //*[(@class="presence_control_external smalltype")] - -strip: //div[@class="nodebyline modfont"] - -# Remove right sidebar -strip: //div[@id="rightwrapper"] - -# Remove print header -strip: //div[@id='printhead']/h1 - -# Remove 'content is restricted' -strip: //div[@id='agegate_IDHERE'] - -# Remove follow text -strip: //*[(@class="permalink_ads")] - -# Remove view/comment count -strip: //div[@id='wrapper']/div[2][@class='postmeta_permalink_wrapper']/div[1][@class='postmeta_permalink']/div[2][@class='pm_line'] - -# Remove contact text -strip: //div[@id='wrapper']/div[1][@class='content permalink']/p[6][@class='contactinfo'] - -# Remove medium duplicates of the article image -strip_image_src: medium.jpg - -# Remove "arrow" class at bottom of page -strip: //p[@class="arrow"] - -# Remove "track" image from article body -strip: //img[@alt="track"] -test_url: http://lifehacker.com/5925801/how-can-i-turn-vague-goals-into-actionable-to+dos -test_url: http://lifehacker.com/5941600/hack-an-old-computer-mouse-into-a-wireless-bluetooth-mouse \ No newline at end of file +# Adds author text: Gawker sites commonly show as "Author: View Profile" +author://a[@class="plus-icon modfont"] + +# Add date and time +date: //span[@class="date"] + +body: //div[contains(@class, 'marquee-asset-wrapper') or contains(@class, 'post-content')] + +# Remove date and time from article text +strip: //span[@class="date"] + +# Remove login/comment text +strip: //*[(@class="presence_control_external smalltype")] + +strip: //div[@class="nodebyline modfont"] + +# Remove right sidebar +strip: //div[@id="rightwrapper"] + +# Remove print header +strip: //div[@id='printhead']/h1 + +# Remove 'content is restricted' +strip: //div[@id='agegate_IDHERE'] + +# Remove follow text +strip: //*[(@class="permalink_ads")] + +strip_id_or_class: inset_groups + +# Remove view/comment count +strip: //div[@id='wrapper']/div[2][@class='postmeta_permalink_wrapper']/div[1][@class='postmeta_permalink']/div[2][@class='pm_line'] + +# Remove contact text +strip: //div[@id='wrapper']/div[1][@class='content permalink']/p[6][@class='contactinfo'] + +# Remove medium duplicates of the article image +strip_image_src: medium.jpg + +# Remove "arrow" class at bottom of page +strip: //p[@class="arrow"] + +# Remove "track" image from article body +strip: //img[@alt="track"] +test_url: http://lifehacker.com/5925801/how-can-i-turn-vague-goals-into-actionable-to+dos +test_url: http://lifehacker.com/5941600/hack-an-old-computer-mouse-into-a-wireless-bluetooth-mouse +test_url: http://lifehacker.com/what-happens-to-the-brain-when-you-meditate-and-how-it-1202533314 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/lifestyle.inquirer.net.txt b/inc/3rdparty/site_config/standard/lifestyle.inquirer.net.txt new file mode 100755 index 00000000..25d544ae --- /dev/null +++ b/inc/3rdparty/site_config/standard/lifestyle.inquirer.net.txt @@ -0,0 +1,7 @@ +title: //h1[@class='singlePageTitle'] + +strip: //p[contains(text(), 'Follow Us')] +strip: //p/strong[contains(text(), 'Recent Stories:')] +strip: //div[@id="sharefeature"] + +test_url: http://lifestyle.inquirer.net/100223/dusting-your-ceiling-fan diff --git a/inc/3rdparty/site_config/standard/lifeweek.com.cn.txt b/inc/3rdparty/site_config/standard/lifeweek.com.cn.txt new file mode 100755 index 00000000..e09f6692 --- /dev/null +++ b/inc/3rdparty/site_config/standard/lifeweek.com.cn.txt @@ -0,0 +1,23 @@ +# This filter is tested on: +# http://www.lifeweek.com.cn/2012/1211/39439.shtml +# http://www.lifeweek.com.cn/2013/0308/40213.shtml + +title:substring-before(//h1, '(') +title://h1 +date://ul[@class='authorbox']/li +author: substring-after(//ul[@class='authorbox']/li/following-sibling::li, '作者:') + +next_page_link: //div[@class='pageturn_list']/a[@class='pagedown'] +body: //div[@class='original '] + +strip://h1 +strip://ul[@class='authorbox'] +strip://span[@class='app_p'] +strip://div[@style='text-align:right;'] +strip://div[@class='pageturn_list'] +strip://div[@class='lifespeaks'] +strip://div[@class='vright fr'] +strip://div[@class='copyrt mg20'] +strip://div[@class='keyabout mg20'] +strip://ul[@class='readabout mg20'] +test_url: http://www.lifeweek.com.cn/2013/0308/40213.shtml \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/linkedin.com.txt b/inc/3rdparty/site_config/standard/linkedin.com.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/livescience.com.txt b/inc/3rdparty/site_config/standard/livescience.com.txt new file mode 100755 index 00000000..5275d34a --- /dev/null +++ b/inc/3rdparty/site_config/standard/livescience.com.txt @@ -0,0 +1,20 @@ +title: //div[@class="album_title"]//h1 +author: substring-before(//div[@class='by_line'], ',') +date: substring-after(substring-before(//div[@class="album_time"], ' Time'), 'Date: ') +body: //div[@class="about_text"] + +strip: //div[@class='large_popper'] +strip: //span[contains(@id, 'mag_glass')] +strip: //span[contains(@class, 'img_overlay')] +strip: //td//span +strip: //div[@class="center_adsense"] +strip: //div[@class="article_info"]//div[@class='asset_section'] +strip: //div[@class="article_additional"] +strip: //div[contains(@style, 'overflow:hidden')] +strip: //div[@class="aa_text"] +strip: //div[@id='nointelliTXT'] + +prune: no +autodetect_on_failure: no + +test_url: http://www.livescience.com/34569-why-flowers-close-at-night-nyctinasty.html diff --git a/inc/3rdparty/site_config/standard/longform.org.txt b/inc/3rdparty/site_config/standard/longform.org.txt old mode 100644 new mode 100755 index 48d5e1a7..1310ec0d --- a/inc/3rdparty/site_config/standard/longform.org.txt +++ b/inc/3rdparty/site_config/standard/longform.org.txt @@ -1,3 +1,3 @@ -single_page_link: //div[@class="post"]/div[@class="title"]/a +single_page_link: //div[@class="post"]/div[@class="title"]/a test_url: http://longform.org/2011/05/06/disconcerting-new-answers-in-models-suicide/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/loopinsight.com.txt b/inc/3rdparty/site_config/standard/loopinsight.com.txt old mode 100644 new mode 100755 index 08ad90c3..730af947 --- a/inc/3rdparty/site_config/standard/loopinsight.com.txt +++ b/inc/3rdparty/site_config/standard/loopinsight.com.txt @@ -1,9 +1,9 @@ -body: //div[@class='container_16']//div[@class='grid_11'] -strip: //h2[@class='mast'] -strip: //div[@class='container_16']//div[@class='grid_11']/h1 -strip: //div[@class='container_16']//div[@class='grid_11']/p[1] -strip: //div[@class='container_16']//div[@class='grid_11']/div -author: //a[starts-with(@title, 'Posts by')] -date: substring-before(substring-after(//time, 'Posted on '), ' at') -test_url: http://www.loopinsight.com/2012/09/13/forget-iphone-5-naysayers-this-thing-is-big/ +body: //div[@class='container_16']//div[@class='grid_11'] +strip: //h2[@class='mast'] +strip: //div[@class='container_16']//div[@class='grid_11']/h1 +strip: //div[@class='container_16']//div[@class='grid_11']/p[1] +strip: //div[@class='container_16']//div[@class='grid_11']/div +author: //a[starts-with(@title, 'Posts by')] +date: substring-before(substring-after(//time, 'Posted on '), ' at') +test_url: http://www.loopinsight.com/2012/09/13/forget-iphone-5-naysayers-this-thing-is-big/ test_url: http://www.loopinsight.com/2011/05/20/playbook-returns-high-misses-sales-targets-by-90/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/lostgarden.com.txt b/inc/3rdparty/site_config/standard/lostgarden.com.txt old mode 100644 new mode 100755 index a823e649..d7eb0fa0 --- a/inc/3rdparty/site_config/standard/lostgarden.com.txt +++ b/inc/3rdparty/site_config/standard/lostgarden.com.txt @@ -1,3 +1,3 @@ -prune: no +prune: no convert_double_br_tags: yes test_url: http://www.lostgarden.com/2012/04/loops-and-arcs.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/lovefm.com.txt b/inc/3rdparty/site_config/standard/lovefm.com.txt new file mode 100755 index 00000000..20d26c56 --- /dev/null +++ b/inc/3rdparty/site_config/standard/lovefm.com.txt @@ -0,0 +1,6 @@ +title: //*[@id='title'] +date: //*[@id='date'] +body: //*[@id='desc'] +tidy: no + +test_url: http://www.lovefm.com/local_news.php?item=2176 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/lovetv.com.bz.txt b/inc/3rdparty/site_config/standard/lovetv.com.bz.txt new file mode 100755 index 00000000..a71fccdd --- /dev/null +++ b/inc/3rdparty/site_config/standard/lovetv.com.bz.txt @@ -0,0 +1,9 @@ +title: //div[contains(@class, 'post')]//h1 +body: //div[contains(@class, 'post')] +strip: //hr +strip_id_or_class: post-meta + +prune: no + +test_url: http://www.lovetv.com.bz/2013/06/28/recently-discovered-ancient-maya-wooden-canoe-paddle-to-be-handed-over-to-archaeology/ +test_url: http://www.lovetv.com.bz/feed/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/lrb.co.uk.txt b/inc/3rdparty/site_config/standard/lrb.co.uk.txt old mode 100644 new mode 100755 index ce5053d4..f1aacb7d --- a/inc/3rdparty/site_config/standard/lrb.co.uk.txt +++ b/inc/3rdparty/site_config/standard/lrb.co.uk.txt @@ -1,8 +1,12 @@ -title: substring-before(//title, ' � LRB') - -body: //div[@class="article-body indent"] - -date: substring-after(//p[@class="meta-info"]/a, '� ') - -prune: no -test_url: http://www.lrb.co.uk/v33/n18/james-meek/its-already-happened \ No newline at end of file +title: //div[contains(@class, "article-body")]/hgroup/h1 +body: //div[contains(@class, "article-body")] + +date: substring-after(//p[@class="meta-info"]/a, '· ') + +author: //div[contains(@class, "article-body")]/hgroup/h2 + +strip_id_or_class: print-hide +strip_id_or_class: books + +test_url: http://www.lrb.co.uk/v33/n18/james-meek/its-already-happened +test_url: http://www.lrb.co.uk/v36/n13/benjamin-kunkel/paupers-and-richlings diff --git a/inc/3rdparty/site_config/standard/luminous-landscape.com.txt b/inc/3rdparty/site_config/standard/luminous-landscape.com.txt old mode 100644 new mode 100755 index 92ccf3ba..b445f5eb --- a/inc/3rdparty/site_config/standard/luminous-landscape.com.txt +++ b/inc/3rdparty/site_config/standard/luminous-landscape.com.txt @@ -1,6 +1,6 @@ -title: //h2 - -body: // div[@id='content'] - +title: //h2 + +body: // div[@id='content'] + strip: //div[@class='sidebar_wrapper'] test_url: http://www.luminous-landscape.com/tutorials/optimizing_exposure.shtml \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/luxuo.com.txt b/inc/3rdparty/site_config/standard/luxuo.com.txt new file mode 100755 index 00000000..a3d5cb17 --- /dev/null +++ b/inc/3rdparty/site_config/standard/luxuo.com.txt @@ -0,0 +1,4 @@ +body: //div[@class='post-content'] +prune: no + +test_url: http://www.luxuo.com/watches/feed \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/m.bbc.co.uk.txt b/inc/3rdparty/site_config/standard/m.bbc.co.uk.txt old mode 100644 new mode 100755 index a8af5438..d1ff0b43 --- a/inc/3rdparty/site_config/standard/m.bbc.co.uk.txt +++ b/inc/3rdparty/site_config/standard/m.bbc.co.uk.txt @@ -1,8 +1,8 @@ -title: //div[@class="story-body"]/div[@class="story-inner"]/h1 -body: //div[@class="story-body"] -date: //p[@class='date']/strong -author: substring-after(//div[@class="story-inner"]/div[@class="byline"]//span[@class='name'], 'By') - -strip: //div[@class="story-inner"]/div[@class="byline"] +title: //div[@class="story-body"]/div[@class="story-inner"]/h1 +body: //div[@class="story-body"] +date: //p[@class='date']/strong +author: substring-after(//div[@class="story-inner"]/div[@class="byline"]//span[@class='name'], 'By') + +strip: //div[@class="story-inner"]/div[@class="byline"] test_url: http://m.bbc.co.uk/news/science-environment-19144464 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/m.douban.com.txt b/inc/3rdparty/site_config/standard/m.douban.com.txt new file mode 100755 index 00000000..ce9a3167 --- /dev/null +++ b/inc/3rdparty/site_config/standard/m.douban.com.txt @@ -0,0 +1,13 @@ +# This filter is tested on: +# http://m.douban.com/note/240776310/?session=6ac86d1e +# http://m.douban.com/note/208270705/?session=e00ec732_3433229 + +title: //h2 +author: //a[@class='founder'] +date: substring-after(//span[@class='info'],' | ') +body: //div[contains(@class,'entry item')] + +strip://span[contains(@class,'info')] + +convert_double_br_tags: yes +test_url: http://m.douban.com/note/240776310/?session=6ac86d1e \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/m.vanityfair.com.txt b/inc/3rdparty/site_config/standard/m.vanityfair.com.txt new file mode 100755 index 00000000..e47ce2ce --- /dev/null +++ b/inc/3rdparty/site_config/standard/m.vanityfair.com.txt @@ -0,0 +1,11 @@ +# Article Metadata +title: //h1 +author: //span[@class="name"]/a +date: //time + +# Content Pruning +strip: //h5 +strip: //time +strip: //div[@class="byline"] +strip: //h2[@class="headline "] +test_url: http://m.vanityfair.com/politics/2012/10/michael-lewis-profile-barack-obama \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/mac4ever.com.txt b/inc/3rdparty/site_config/standard/mac4ever.com.txt old mode 100644 new mode 100755 index 892b47f5..9999758b --- a/inc/3rdparty/site_config/standard/mac4ever.com.txt +++ b/inc/3rdparty/site_config/standard/mac4ever.com.txt @@ -1,5 +1,5 @@ -author: substring-after(//div[@class='author'],'Par ') -date: //div[@class='date'] -body: //div[@class='content'] +author: substring-after(//div[@class='author'],'Par ') +date: //div[@class='date'] +body: //div[@class='content'] test_url: http://www.mac4ever.com/news/64182/icloud_les_prix_en_euros_et_en_chf/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/macdrifter.com.txt b/inc/3rdparty/site_config/standard/macdrifter.com.txt old mode 100644 new mode 100755 index fd1ede7d..e57bd640 --- a/inc/3rdparty/site_config/standard/macdrifter.com.txt +++ b/inc/3rdparty/site_config/standard/macdrifter.com.txt @@ -1,2 +1,2 @@ -title: substring-before(//title,' � Macdrifter') +title: substring-before(//title,' « Macdrifter') test_url: http://www.macdrifter.com/2012/03/instacast-on-my-mac/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/macformat.techradar.com.txt b/inc/3rdparty/site_config/standard/macformat.techradar.com.txt old mode 100644 new mode 100755 index 109eae45..522efb49 --- a/inc/3rdparty/site_config/standard/macformat.techradar.com.txt +++ b/inc/3rdparty/site_config/standard/macformat.techradar.com.txt @@ -1,9 +1,9 @@ -# Remove news feed -strip: //div[@id='news_feed_front'] - -# Remove pull quote -strip: //div[@class='field field-type-text field-field-pull-quote'] - -# Remove login +# Remove news feed +strip: //div[@id='news_feed_front'] + +# Remove pull quote +strip: //div[@class='field field-type-text field-field-pull-quote'] + +# Remove login strip: //div[@class='right_bar_login'] test_url: http://macformat.techradar.com/blog/solid-state-storage-bringing-parity-back-mac-29-10-10&article=89189666 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/macgeneration.com.txt b/inc/3rdparty/site_config/standard/macgeneration.com.txt old mode 100644 new mode 100755 index e6bbe28e..739eff4e --- a/inc/3rdparty/site_config/standard/macgeneration.com.txt +++ b/inc/3rdparty/site_config/standard/macgeneration.com.txt @@ -1,5 +1,5 @@ -author: substring-before(substring-after(//div[@class='dateNews'],'par '),' le') -date: substring-after(//div[@class='dateNews'],' le ') -body: //div[@class='singleNews zoneApple'] +author: substring-before(substring-after(//div[@class='dateNews'],'par '),' le') +date: substring-after(//div[@class='dateNews'],' le ') +body: //div[@class='singleNews zoneApple'] test_url: http://www.macgeneration.com/news/voir/211162/dropbox-encore-un-mac-et-deux-comptes-dropbox \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/macmagazine.com.br.txt b/inc/3rdparty/site_config/standard/macmagazine.com.br.txt old mode 100644 new mode 100755 index 47ebfd79..da7df695 --- a/inc/3rdparty/site_config/standard/macmagazine.com.br.txt +++ b/inc/3rdparty/site_config/standard/macmagazine.com.br.txt @@ -1,21 +1,21 @@ -# Remove sliders -strip: //*[(@class="slides_container")] -strip: //div[(@id="slides_two")] - -# Remove tag cloud -strip: //span[(@class="secao")] - -# Fix date article -# TODO - -# Remove other stuff -strip: //div[(@id="idc-container")] -strip: //div[(@id="idc-noscript")] -strip: //div[(@class="linkwithin_div")] -strip: //div[(@class="navPosts")] -strip: //div[(@id="lateral")] -strip: //div[(@id="autor")] -strip: //div[(@id="rodape")] -strip: //div[(@id="post")]/h1 +# Remove sliders +strip: //*[(@class="slides_container")] +strip: //div[(@id="slides_two")] + +# Remove tag cloud +strip: //span[(@class="secao")] + +# Fix date article +# TODO + +# Remove other stuff +strip: //div[(@id="idc-container")] +strip: //div[(@id="idc-noscript")] +strip: //div[(@class="linkwithin_div")] +strip: //div[(@class="navPosts")] +strip: //div[(@id="lateral")] +strip: //div[(@id="autor")] +strip: //div[(@id="rodape")] +strip: //div[(@id="post")]/h1 strip: //div[(@id="post")]/div[(@id="boxInformacoes")] test_url: http://macmagazine.com.br/2011/08/01/skype-para-ipad-esta-finalmente-chegando-a-app-store/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/macrumors.com.txt b/inc/3rdparty/site_config/standard/macrumors.com.txt old mode 100644 new mode 100755 index 76f999d3..83cfb4a6 --- a/inc/3rdparty/site_config/standard/macrumors.com.txt +++ b/inc/3rdparty/site_config/standard/macrumors.com.txt @@ -1,10 +1,12 @@ -author: substring-after(//div[@class='byline'], " by ") -date: substring-before(//div[@class='byline'], " by ") - -# set body -body: //div[@class='content'] - -# set title -title: //h3 +author: substring-after(//div[@class='byline'], " by ") +date: substring-before(//div[@class='byline'], " by ") + +# set body +body: //div[@class='content'] +strip_id_or_class: commentsContainer +strip_id_or_class: linkback + +# set title +title: //h3 #strip: //div[@class='content']/h3 -test_url: http://www.macrumors.com/2010/11/10/apple-debuts-new-apple-tv-and-itunes-movie-content-in-japan/ \ No newline at end of file +test_url: http://www.macrumors.com/2010/11/10/apple-debuts-new-apple-tv-and-itunes-movie-content-in-japan/ diff --git a/inc/3rdparty/site_config/standard/macstories.net.txt b/inc/3rdparty/site_config/standard/macstories.net.txt old mode 100644 new mode 100755 index 6e651ca0..639fdd19 --- a/inc/3rdparty/site_config/standard/macstories.net.txt +++ b/inc/3rdparty/site_config/standard/macstories.net.txt @@ -1,8 +1,8 @@ -strip: //*[(@id = "featured")] - -author:substring-after( //div[@class='posttitle']/h2[@class='author'],'by ') - -date: concat(//div[@class='month'],' ',//div[@class='day']) - -#macstories doesn't provide a year, but month/day is better than nothing +strip: //*[(@id = "featured")] + +author:substring-after( //div[@class='posttitle']/h2[@class='author'],'by ') + +date: concat(//div[@class='month'],' ',//div[@class='day']) + +#macstories doesn't provide a year, but month/day is better than nothing test_url: http://www.macstories.net/news/instapaper-4-0-available-completely-redesigned-ipad-ui-new-features-search-subscription/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/mactalk.com.au.txt b/inc/3rdparty/site_config/standard/mactalk.com.au.txt old mode 100644 new mode 100755 index e8d60522..9be865af --- a/inc/3rdparty/site_config/standard/mactalk.com.au.txt +++ b/inc/3rdparty/site_config/standard/mactalk.com.au.txt @@ -1,4 +1,4 @@ -author://div[@class="article_username_container_full"] -date://div[@class="article_username_container"] +author://div[@class="article_username_container_full"] +date://div[@class="article_username_container"] body://div[@class="article cms_clear restore postcontainer"] test_url: http://www.mactalk.com.au/content/chat-basil-shkara-developer-taptax-2452/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/mactechnews.de.txt b/inc/3rdparty/site_config/standard/mactechnews.de.txt old mode 100644 new mode 100755 index c3fc0e44..5c03518a --- a/inc/3rdparty/site_config/standard/mactechnews.de.txt +++ b/inc/3rdparty/site_config/standard/mactechnews.de.txt @@ -1,3 +1,3 @@ -title: substring-after(substring-after(//title, '>'), '>') +title: substring-after(substring-after(//title, '>'), '>') body: //div[@class='NewsArticleContent'] test_url: http://www.mactechnews.de/news/index/Apple-Pressekonferenz-zum-iPhone-4-147316.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/macworld.com.txt b/inc/3rdparty/site_config/standard/macworld.com.txt old mode 100644 new mode 100755 index 96175872..e7d97202 --- a/inc/3rdparty/site_config/standard/macworld.com.txt +++ b/inc/3rdparty/site_config/standard/macworld.com.txt @@ -1,24 +1,24 @@ -title: //article//h1 -date: //meta[@name="date"]/@content -author: //div[@class="author-name" or @class="article-byline"]/a[1] - -body: //section[@class="page"] - -# remove 'From the Lab' and 'Recent posts' text -strip: //div[@class='blogLabel'] - -# remove byline and meta info -strip: //div[@class="article-meta"] -strip: //div[@class="author-info"] - -#strip tags and categories -strip: //div[@class="department"] - -#strip product cap links -strip: //div[@class="cap-main"] -strip: //div[@id="compare-lede"] - -prune: no - -# copes less well with Review pages, seems fine for News +title: //article//h1 +date: //meta[@name="date"]/@content +author: //div[@class="author-name" or @class="article-byline"]/a[1] + +body: //section[@class="page"] + +# remove 'From the Lab' and 'Recent posts' text +strip: //div[@class='blogLabel'] + +# remove byline and meta info +strip: //div[@class="article-meta"] +strip: //div[@class="author-info"] + +#strip tags and categories +strip: //div[@class="department"] + +#strip product cap links +strip: //div[@class="cap-main"] +strip: //div[@id="compare-lede"] + +prune: no + +# copes less well with Review pages, seems fine for News test_url: http://www.macworld.com/article/163184/2011/10/the_ipod_as_an_iconic_cultural_force.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/mainichi.jp.txt b/inc/3rdparty/site_config/standard/mainichi.jp.txt old mode 100644 new mode 100755 index e701207f..414a2f53 --- a/inc/3rdparty/site_config/standard/mainichi.jp.txt +++ b/inc/3rdparty/site_config/standard/mainichi.jp.txt @@ -1,3 +1,3 @@ -body: //div[@class='NewsArticle'] +body: //div[@class='NewsArticle'] test_url: http://mainichi.jp/select/weathernews/20110311/news/20110520k0000e040062000c.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/mainpost.de.txt b/inc/3rdparty/site_config/standard/mainpost.de.txt old mode 100644 new mode 100755 index a2d25d56..2136de3f --- a/inc/3rdparty/site_config/standard/mainpost.de.txt +++ b/inc/3rdparty/site_config/standard/mainpost.de.txt @@ -1,28 +1,28 @@ -title: substring-before(//title, '|') -body: //*[@id='content-left'] - -# Why is this not working here? -# body: //*[@id='content-left']/div[@class='content-container'][2]/div[@class='content-body']/div[@class='inner-container']/div[@class='detail'] - - -#Header -strip_id_or_class: 'subHead' -strip_id_or_class: 'fl_right' -strip_id_or_class: 'infolink' -strip_id_or_class: 'content-head' -strip_id_or_class: 'tab' -strip_id_or_class: 'tab-active' -strip: //*[contains(@class,'trenner')] - -# Headline -strip: //h1/* -strip_id_or_class: 'font16' - -#Images -strip_id_or_class: 'leftimage' -strip_id_or_class: 'rightimage' - -#Comments -strip: //table +title: substring-before(//title, '|') +body: //*[@id='content-left'] + +# Why is this not working here? +# body: //*[@id='content-left']/div[@class='content-container'][2]/div[@class='content-body']/div[@class='inner-container']/div[@class='detail'] + + +#Header +strip_id_or_class: 'subHead' +strip_id_or_class: 'fl_right' +strip_id_or_class: 'infolink' +strip_id_or_class: 'content-head' +strip_id_or_class: 'tab' +strip_id_or_class: 'tab-active' +strip: //*[contains(@class,'trenner')] + +# Headline +strip: //h1/* +strip_id_or_class: 'font16' + +#Images +strip_id_or_class: 'leftimage' +strip_id_or_class: 'rightimage' + +#Comments +strip: //table strip: //p/following-sibling::*[0] test_url: http://www.mainpost.de/ueberregional/meinung/Dioxin-Skandal-bringt-Agrarministerin-in-Bedraengnis;art9517,5920211 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/makeuseof.com.txt b/inc/3rdparty/site_config/standard/makeuseof.com.txt old mode 100644 new mode 100755 index 6809afed..078e8d08 --- a/inc/3rdparty/site_config/standard/makeuseof.com.txt +++ b/inc/3rdparty/site_config/standard/makeuseof.com.txt @@ -1,3 +1,9 @@ -tidy: no +title: //h1[@class='entry-title'] -test_url: http://www.makeuseof.com/dir/kindle-it-web-pages-kindle-friendly/ \ No newline at end of file +body: //article//header//img | //article//section[@class='post'] + +strip: //article//section[@class='post']/aside +strip: //article//section[@class='post']/footer + +test_url: http://www.makeuseof.com/tag/cool-websites-and-tools-advanced-photo-editor-keep-your-kids-stuff-online-identify-60-languages/ +test_url: http://www.makeuseof.com/tag/what-do-you-think-of-our-new-look-makeuseof-poll/ diff --git a/inc/3rdparty/site_config/standard/manager.co.th.txt b/inc/3rdparty/site_config/standard/manager.co.th.txt new file mode 100755 index 00000000..cd6c5c01 --- /dev/null +++ b/inc/3rdparty/site_config/standard/manager.co.th.txt @@ -0,0 +1,26 @@ +title: //td[@class="headline"] +author: //font[@color="#003366"] +date: //td[@class="date"] + +strip: //td[@class="headline"] +strip: //font[@color="#003366"] +strip: //td[@class="date"] + +strip: //img[@src="images/2009/logo_en.gif"] + +body: //tbody[@class="body"] +convert_double_br_tags:yes + +strip: //img[@src="/images/TabOver.gif"] +strip: //td[@width="160"] +strip: //img[@src="/images/TabUnder.gif"] + +strip: //td[@class="small"] +strip: //td[@height="47"] + +strip: //td[@valign="middle"] +strip: //td[@background="/images/menu_bottombg.gif"] +strip: //img[@src="/images/sc_footer_l.gif"] +strip: //img[@src="/images/sc_footer_m.gif"] +strip: //img[@src="/images/sc_footer_r.gif"] +test_url: http://www.manager.co.th/Entertainment/ViewNews.aspx?NewsID=9550000101979 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/marco.org.txt b/inc/3rdparty/site_config/standard/marco.org.txt old mode 100644 new mode 100755 index ef2e03d3..4bb24a62 --- a/inc/3rdparty/site_config/standard/marco.org.txt +++ b/inc/3rdparty/site_config/standard/marco.org.txt @@ -1,8 +1,8 @@ -tidy: no -prune: no -date: //article//time[@pubdate] -title: //article/header/h2 -body: //article -strip: //header -test_url: http://www.marco.org/2012/09/08/businessweek-gruber +tidy: no +prune: no +date: //article//time[@pubdate] +title: //article/header/h2 +body: //article +strip: //header +test_url: http://www.marco.org/2012/09/08/businessweek-gruber test_url: http://www.marco.org/2012/04/24/might-upgrade-someday \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/marksdailyapple.com.txt b/inc/3rdparty/site_config/standard/marksdailyapple.com.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/martinfowler.com.txt b/inc/3rdparty/site_config/standard/martinfowler.com.txt old mode 100644 new mode 100755 index 8e0e349f..4ff4a9c2 --- a/inc/3rdparty/site_config/standard/martinfowler.com.txt +++ b/inc/3rdparty/site_config/standard/martinfowler.com.txt @@ -1,8 +1,8 @@ -date: //div[@id="main"]/p[@class="date"] -author: string("Martin Fowler") -body: //div[@id="main"] -strip_id_or_class: date -strip_id_or_class: tags -strip_id_or_class: tagLabel +date: //div[@id="main"]/p[@class="date"] +author: string("Martin Fowler") +body: //div[@id="main"] +strip_id_or_class: date +strip_id_or_class: tags +strip_id_or_class: tagLabel strip: //div[@id="main"]/h1[1] test_url: http://martinfowler.com/bliki/DatabaseThaw.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/mashable.com.txt b/inc/3rdparty/site_config/standard/mashable.com.txt old mode 100644 new mode 100755 index 2c5a14a6..b6efb6c5 --- a/inc/3rdparty/site_config/standard/mashable.com.txt +++ b/inc/3rdparty/site_config/standard/mashable.com.txt @@ -1,4 +1,11 @@ -title: //header[@class='entry-title']/h1 -body: //div[@class='description'] +title: //h1[@class='title'] +author: substring-after(//span[@class='author_name'], 'By ') +date: //time + +body: //article strip: //div[@class='ytm-gallery-box'] -test_url: http://mashable.com/2011/12/05/india-wants-google-and-facebook-to-censor-user-content/ \ No newline at end of file +strip: //div[contains(@class, 'adsense')] +strip: //aside[contains(@class, 'social')] +strip_id_or_class: article-topics + +test_url: http://mashable.com/2013/05/24/myspace-architects-rebuilding-a-brand/ diff --git a/inc/3rdparty/site_config/standard/matt.might.net.txt b/inc/3rdparty/site_config/standard/matt.might.net.txt new file mode 100755 index 00000000..30d585cf --- /dev/null +++ b/inc/3rdparty/site_config/standard/matt.might.net.txt @@ -0,0 +1,5 @@ +title: //h1 +author: string("Matt Might") +strip: //h1/following-sibling::div + +test_url: http://matt.might.net/articles/oo-cesk/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/mattcutts.com.txt b/inc/3rdparty/site_config/standard/mattcutts.com.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/mbl.is.txt b/inc/3rdparty/site_config/standard/mbl.is.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/medialens.org.txt b/inc/3rdparty/site_config/standard/medialens.org.txt old mode 100644 new mode 100755 index 94f27b71..4c333aa1 --- a/inc/3rdparty/site_config/standard/medialens.org.txt +++ b/inc/3rdparty/site_config/standard/medialens.org.txt @@ -1,2 +1,4 @@ -strip: //div[contains(@class, 'article-tools')] +strip_id_or_class: article-tools +strip_id_or_class: pagenav +prune: no test_url: http://www.medialens.org/index.php/alerts/alert-archive/2012/713-the-illusion-of-democracy.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/medium.com.txt b/inc/3rdparty/site_config/standard/medium.com.txt new file mode 100755 index 00000000..acf7cc90 --- /dev/null +++ b/inc/3rdparty/site_config/standard/medium.com.txt @@ -0,0 +1,7 @@ +body: //div[contains(@class, 'post-content-inner')] +strip_id_or_class: follow-ups +strip_id_or_class: footer + +prune: no + +test_url: https://medium.com/p/6844c0d7893b \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/megamp3.eu.txt b/inc/3rdparty/site_config/standard/megamp3.eu.txt new file mode 100755 index 00000000..1b6a1279 --- /dev/null +++ b/inc/3rdparty/site_config/standard/megamp3.eu.txt @@ -0,0 +1,8 @@ +title: //h3[@class='episode_title'] +body: //ul[contains(@class, 'episode_imgdesc')]/li/descendant::* +prune: no +strip://*[contains(@class, 'plugin')] +strip://*[contains(@class, 'episode_keywords')] + +test_url: http://www.megamp3.eu/?p=episode&name=2013-04-19_la_filiere_progressive_431.mp3 +test_url: http://www.megamp3.eu/feed.xml diff --git a/inc/3rdparty/site_config/standard/menshealth.com.txt b/inc/3rdparty/site_config/standard/menshealth.com.txt old mode 100644 new mode 100755 index e7e1e269..a1a46f63 --- a/inc/3rdparty/site_config/standard/menshealth.com.txt +++ b/inc/3rdparty/site_config/standard/menshealth.com.txt @@ -1,16 +1,16 @@ -# need to find a way to eliminate <span> content for "related content" without eliminating important content - -convert_double_br_tags: [yes] -#body: //div[@id='leftside'] -title: //h1 -title: //h2 -Author: substring-after(//h4, 'By ') -Author: substring-after(//h4, 'By: ') -#Strip: //span -strip_id_or_class: morefromcat -strip_id_or_class: mostpopular -strip_id_or_class: articlepagination -strip_id_or_class: toolbar -body: //div[@id='zmodcontent'] -single_page_link: //li[@class='onepage'] //a[contains (@href, 'printer.php')] +# need to find a way to eliminate <span> content for "related content" without eliminating important content + +convert_double_br_tags: [yes] +#body: //div[@id='leftside'] +title: //h1 +title: //h2 +Author: substring-after(//h4, 'By ') +Author: substring-after(//h4, 'By: ') +#Strip: //span +strip_id_or_class: morefromcat +strip_id_or_class: mostpopular +strip_id_or_class: articlepagination +strip_id_or_class: toolbar +body: //div[@id='zmodcontent'] +single_page_link: //li[@class='onepage'] //a[contains (@href, 'printer.php')] test_url: http://www.menshealth.com/mhlists/pursuit_of_happiness/index.php \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/metafilter.com.txt b/inc/3rdparty/site_config/standard/metafilter.com.txt new file mode 100755 index 00000000..a2f3ada9 --- /dev/null +++ b/inc/3rdparty/site_config/standard/metafilter.com.txt @@ -0,0 +1,8 @@ +body: //div[contains(@class, 'copy') or contains(@class, 'comments')] +strip_id_or_class: related +strip: //a[. = 'Subscribe'] +strip: //h1/span[@class = 'smallcopy'] +strip: //a[@class = 'skip'] +strip: //div[@id = 'logo'] +strip: //div[contains(@class, 'comments') and contains(., 'You are not currently logged in')] +test_url: http://www.metafilter.com/128101/Probably-more-secure-than-the-Drafts-folder-on-a-shared-Gmail-account \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/mforum.cari.com.my.txt b/inc/3rdparty/site_config/standard/mforum.cari.com.my.txt new file mode 100755 index 00000000..c295d734 --- /dev/null +++ b/inc/3rdparty/site_config/standard/mforum.cari.com.my.txt @@ -0,0 +1,6 @@ +body: (//td[starts-with(@id, 'postmessage_')])[1] + +prune: no + +test_url: http://mforum.cari.com.my/forum.php?mod=viewthread&tid=788033 +test_url: http://mforum.cari.com.my/forum.php?mod=rss&fid=265&auth=0 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/mikeash.com.txt b/inc/3rdparty/site_config/standard/mikeash.com.txt old mode 100644 new mode 100755 index af8a7d30..abaa6a81 --- a/inc/3rdparty/site_config/standard/mikeash.com.txt +++ b/inc/3rdparty/site_config/standard/mikeash.com.txt @@ -1,5 +1,5 @@ -title: //div[@class="blogtitle"] -strip: //div[@class="blogtitle"] - +title: //div[@class="blogtitle"] +strip: //div[@class="blogtitle"] + author: substring-after(//span[@class="blogheader"], 'Author: ') test_url: http://www.mikeash.com/pyblog/friday-qa-2012-01-13-the-mac-toolbox.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/mikeindustries.com.txt b/inc/3rdparty/site_config/standard/mikeindustries.com.txt old mode 100644 new mode 100755 index 3d488e13..fb4636cc --- a/inc/3rdparty/site_config/standard/mikeindustries.com.txt +++ b/inc/3rdparty/site_config/standard/mikeindustries.com.txt @@ -1,9 +1,9 @@ -title: //div[@class='post_content']/h2 -date: //div[@class='dateline'] -body: //div[@class='entry'] - -strip: //div[@class='closer'] -strip: //div[@class='navigation'] -strip: //div[@class='aux_pane'] +title: //div[@class='post_content']/h2 +date: //div[@class='dateline'] +body: //div[@class='entry'] + +strip: //div[@class='closer'] +strip: //div[@class='navigation'] +strip: //div[@class='aux_pane'] strip: //div[@class='aux_aux_pane'] test_url: http://www.mikeindustries.com/blog/archive/2011/10/never-be-another \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/minnesota.publicradio.org.txt b/inc/3rdparty/site_config/standard/minnesota.publicradio.org.txt old mode 100644 new mode 100755 index 7e43d63c..773a627c --- a/inc/3rdparty/site_config/standard/minnesota.publicradio.org.txt +++ b/inc/3rdparty/site_config/standard/minnesota.publicradio.org.txt @@ -1,10 +1,10 @@ -title: //*[@class="article"]/h1 -date: //*[@class="article"]/div[@class="date"] - -# strip the title and date from the article text -strip: //*[@class="article"]/h1 -strip: //*[@class="article"]/div[@class="date"] - -# strip annoying <br> between metadata and article +title: //*[@class="article"]/h1 +date: //*[@class="article"]/div[@class="date"] + +# strip the title and date from the article text +strip: //*[@class="article"]/h1 +strip: //*[@class="article"]/div[@class="date"] + +# strip annoying <br> between metadata and article strip: //*[@class="article"]/div[@class="date"]/following-sibling::br test_url: http://minnesota.publicradio.org/display/web/2012/06/19/health/senators-want-health-care-ruling-on-tv/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/minnpost.com.txt b/inc/3rdparty/site_config/standard/minnpost.com.txt old mode 100644 new mode 100755 index 51a0630b..dc926a6f --- a/inc/3rdparty/site_config/standard/minnpost.com.txt +++ b/inc/3rdparty/site_config/standard/minnpost.com.txt @@ -1,5 +1,5 @@ -title: //*[@id="content-header"]/h1 -author: //*[contains(@class, 'byline')]/a/text() -date: substring-after(//*[contains(@class, 'byline')]/text()[2], '|') +title: //*[@id="content-header"]/h1 +author: //*[contains(@class, 'byline')]/a/text() +date: substring-after(//*[contains(@class, 'byline')]/text()[2], '|') body: //*[contains(@class, 'node-body')] test_url: http://www.minnpost.com/eric-black-ink/2012/06/overturning-obamacare-would-be-game-changer-supreme-court \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/mirrorfootball.co.uk.txt b/inc/3rdparty/site_config/standard/mirrorfootball.co.uk.txt old mode 100644 new mode 100755 index 4215a051..2033cf33 --- a/inc/3rdparty/site_config/standard/mirrorfootball.co.uk.txt +++ b/inc/3rdparty/site_config/standard/mirrorfootball.co.uk.txt @@ -1,3 +1,3 @@ -# Remove extra links +# Remove extra links strip: //*[@class='appended_html'] test_url: http://www.mirrorfootball.co.uk/news/West-Ham-crisis-Carlton-Cole-slams-diabolical-performance-and-rips-into-Avram-Grant-lack-of-tactical-nous-following-Liverpool-mauling-article636151.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/mises.org.txt b/inc/3rdparty/site_config/standard/mises.org.txt old mode 100644 new mode 100755 index ae542aa6..73c485e6 --- a/inc/3rdparty/site_config/standard/mises.org.txt +++ b/inc/3rdparty/site_config/standard/mises.org.txt @@ -1,5 +1,5 @@ -strip_id_or_class: 'book-ad' -strip_id_or_class: 'bigger pullquote' -strip_id_or_class: 'subscribe' +strip_id_or_class: 'book-ad' +strip_id_or_class: 'bigger pullquote' +strip_id_or_class: 'subscribe' strip_id_or_class: 'blog-link' test_url: http://mises.org/daily/4804 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/mlb.mlb.com.txt b/inc/3rdparty/site_config/standard/mlb.mlb.com.txt old mode 100644 new mode 100755 index 30e8aff2..765fab3f --- a/inc/3rdparty/site_config/standard/mlb.mlb.com.txt +++ b/inc/3rdparty/site_config/standard/mlb.mlb.com.txt @@ -1,14 +1,14 @@ -title: //h1[@class='article-headline'] -date: //span[@class='timeStamp'] -author: substring-before(//p[@class='article-byline'], '/') -body: //div[@id='article'] -#strip: //div[@class='inner'] -strip: //div[@id='article_head'] -strip: //p[@class='tagLine'] -strip: //div[@id='article_related_links'] -strip: //div[@id='article_related_mlb'] -strip: //span[@class='more'] -strip: //div[@class='article_component'] -strip: //span[@class='screen_reader'] -strip: //ul[@class='columnists_blurb'] +title: //h1[@class='article-headline'] +date: //span[@class='timeStamp'] +author: substring-before(//p[@class='article-byline'], '/') +body: //div[@id='article'] +#strip: //div[@class='inner'] +strip: //div[@id='article_head'] +strip: //p[@class='tagLine'] +strip: //div[@id='article_related_links'] +strip: //div[@id='article_related_mlb'] +strip: //span[@class='more'] +strip: //div[@class='article_component'] +strip: //span[@class='screen_reader'] +strip: //ul[@class='columnists_blurb'] test_url: http://mlb.mlb.com/news/article.jsp?ymd=20120403&content_id=27880830 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/mlb.sbnation.com.txt b/inc/3rdparty/site_config/standard/mlb.sbnation.com.txt old mode 100644 new mode 100755 index c4e3389e..8480e302 --- a/inc/3rdparty/site_config/standard/mlb.sbnation.com.txt +++ b/inc/3rdparty/site_config/standard/mlb.sbnation.com.txt @@ -1,14 +1,14 @@ -title: //h1[@id = 'stream_title'] -author: //p[@class = 'byline']/a -date: //span[@class = 'datetime'] - -body: //div[@id = 'stream_container'] -strip: //p[@class = 'byline'] -strip_id_or_class: stream_summary -strip_id_or_class: social-spoken -strip_id_or_class: datetime -strip_id_or_class: author-mini-profile -strip_id_or_class: social-tools -strip_id_or_class: entry-tags +title: //h1[@id = 'stream_title'] +author: //p[@class = 'byline']/a +date: //span[@class = 'datetime'] + +body: //div[@id = 'stream_container'] +strip: //p[@class = 'byline'] +strip_id_or_class: stream_summary +strip_id_or_class: social-spoken +strip_id_or_class: datetime +strip_id_or_class: author-mini-profile +strip_id_or_class: social-tools +strip_id_or_class: entry-tags strip_id_or_class: fb-like-box test_url: http://mlb.sbnation.com/2011/10/17/2495845/2011-world-series-st-louis-cardinals-texas-rangers-home-field-advantage \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/mlssoccer.com.txt b/inc/3rdparty/site_config/standard/mlssoccer.com.txt old mode 100644 new mode 100755 index 41e15136..5d706f88 --- a/inc/3rdparty/site_config/standard/mlssoccer.com.txt +++ b/inc/3rdparty/site_config/standard/mlssoccer.com.txt @@ -1,6 +1,6 @@ -title: //*[@class="header_title"]/h1 -date: //*[@class="field-date"] -author: //*[@class="field-author"] -body: //div[contains(@class, 'content')] +title: //*[@class="header_title"]/h1 +date: //*[@class="field-date"] +author: //*[@class="field-author"] +body: //div[contains(@class, 'content')] test_url: http://www.mlssoccer.com/news/article/2012/06/19/lack-depth-front-forces-arena-alter-las-formation \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/mmo-champion.com.txt b/inc/3rdparty/site_config/standard/mmo-champion.com.txt old mode 100644 new mode 100755 index 918fae36..50d8a24f --- a/inc/3rdparty/site_config/standard/mmo-champion.com.txt +++ b/inc/3rdparty/site_config/standard/mmo-champion.com.txt @@ -1,5 +1,5 @@ -title: //h1 -body: //div[@id = 'article_content']/div[contains(@class,'article')] -author: //sub[@class = 'article_promoted_text']/a[starts-with(@href, 'member')] +title: //h1 +body: //div[@id = 'article_content']/div[contains(@class,'article')] +author: //sub[@class = 'article_promoted_text']/a[starts-with(@href, 'member')] date: //div[@class = 'article_username_container'] test_url: http://www.mmo-champion.com/content/2688-Other-Press-Tour-Interviews-A-Night-in-Mists-of-Pandaria-Blue-Posts-MoP-Screenshot \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/mnn.com.txt b/inc/3rdparty/site_config/standard/mnn.com.txt old mode 100644 new mode 100755 index ddfe6fa2..d3576df2 --- a/inc/3rdparty/site_config/standard/mnn.com.txt +++ b/inc/3rdparty/site_config/standard/mnn.com.txt @@ -1,11 +1,11 @@ -tidy: no -author: //div[@id="above-content"]//img/@alt | //div[@class="comment-auth"]/span[1]/a/text() -date: //div[@class="comment-auth"]/div | //div[@class="comment-auth"]/span[2] -body: //div[@class="node"] - -strip_id_or_class: vertical-social-bar -strip_id_or_class: blogs_paginator -strip_id_or_class: horizontal-social-links -strip_id_or_class: servicelinksdiv +tidy: no +author: //div[@id="above-content"]//img/@alt | //div[@class="comment-auth"]/span[1]/a/text() +date: //div[@class="comment-auth"]/div | //div[@class="comment-auth"]/span[2] +body: //div[@class="node"] + +strip_id_or_class: vertical-social-bar +strip_id_or_class: blogs_paginator +strip_id_or_class: horizontal-social-links +strip_id_or_class: servicelinksdiv test_url: http://www.mnn.com/green-tech/research-innovations/blogs/5-breakthroughs-that-will-make-solar-power-cheaper-than-coal \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/mno.hu.txt b/inc/3rdparty/site_config/standard/mno.hu.txt old mode 100644 new mode 100755 index ba158953..8a3f9391 --- a/inc/3rdparty/site_config/standard/mno.hu.txt +++ b/inc/3rdparty/site_config/standard/mno.hu.txt @@ -1,14 +1,14 @@ -title: //title - -author: //div[@class="author"] - -strip_id_or_class: 'header' -strip_id_or_class: 'cikk_ajanlo' -strip_id_or_class: 'buttons' -strip_id_or_class: 'related' -strip_id_or_class: 'adbox ad_cikk_kozepre' -strip_id_or_class: 'cikk-cimkek' -strip_id_or_class: 'cikk_ertekeles' - +title: //title + +author: //div[@class="author"] + +strip_id_or_class: 'header' +strip_id_or_class: 'cikk_ajanlo' +strip_id_or_class: 'buttons' +strip_id_or_class: 'related' +strip_id_or_class: 'adbox ad_cikk_kozepre' +strip_id_or_class: 'cikk-cimkek' +strip_id_or_class: 'cikk_ertekeles' + strip_comments: yes test_url: http://mno.hu/grund/a-gumibottal-hadonaszo-rendort-joval-konnyebb-utalni-1055351 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/mobile.nytimes.com.txt b/inc/3rdparty/site_config/standard/mobile.nytimes.com.txt new file mode 100755 index 00000000..c60252ef --- /dev/null +++ b/inc/3rdparty/site_config/standard/mobile.nytimes.com.txt @@ -0,0 +1,4 @@ +title: //h1[contains(@class, 'headline')] +body: //article[contains(@class, 'full-art')] +strip_id_or_class: image-credit +test_url: http://mobile.nytimes.com/2014/06/19/opinion/gail-collins-romney-and-the-2016-contenders-huddle.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/mobile.slate.com.txt b/inc/3rdparty/site_config/standard/mobile.slate.com.txt old mode 100644 new mode 100755 index d5d81034..6ffcd18f --- a/inc/3rdparty/site_config/standard/mobile.slate.com.txt +++ b/inc/3rdparty/site_config/standard/mobile.slate.com.txt @@ -1,5 +1,5 @@ -title: //h2[@class="article_title"] -strip: //a[@class="houseAdLink"] -strip: //h1 +title: //h2[@class="article_title"] +strip: //a[@class="houseAdLink"] +strip: //h1 strip: //div[@class="more_articles"] test_url: http://mobile.slate.com/rss.jsp?rssid=411&item=http%3a%2f%2fwww.slate.com%2fdefault.aspx%3fdisplaymode%3d201%26id%3d2293749%26device%3drss \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/mobileopportunity.blogspot.com.txt b/inc/3rdparty/site_config/standard/mobileopportunity.blogspot.com.txt old mode 100644 new mode 100755 index a1cc5317..82da4aec --- a/inc/3rdparty/site_config/standard/mobileopportunity.blogspot.com.txt +++ b/inc/3rdparty/site_config/standard/mobileopportunity.blogspot.com.txt @@ -1,11 +1,11 @@ -body: //div[@class='post uncustomized-post-template'] - -# remove duplicate of post title, which is a link -strip: //h3[@class='post-title'] - -# remove permalink and timestamp, which isn't useful as it's a time with no date -strip: //span[@class='post-timestamp'] - -# remove labels (tags) +body: //div[@class='post uncustomized-post-template'] + +# remove duplicate of post title, which is a link +strip: //h3[@class='post-title'] + +# remove permalink and timestamp, which isn't useful as it's a time with no date +strip: //span[@class='post-timestamp'] + +# remove labels (tags) strip: //span[@class='post-labels'] test_url: http://mobileopportunity.blogspot.com/2010/12/rims-q3-financials-tale-of-two.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/modernghana.com.txt b/inc/3rdparty/site_config/standard/modernghana.com.txt old mode 100644 new mode 100755 index 4c93d0cf..306ef8d9 --- a/inc/3rdparty/site_config/standard/modernghana.com.txt +++ b/inc/3rdparty/site_config/standard/modernghana.com.txt @@ -1,8 +1,8 @@ -title: //meta[@property="og:title"]/@content -author: //meta[@name="author"]/@content -date: //span[@class='date1'] -body: //div[@id='newsimage'] | //div[@id='bodytext'] -tidy: no -prune: no - +title: //meta[@property="og:title"]/@content +author: //meta[@name="author"]/@content +date: //span[@class='date1'] +body: //div[@id='newsimage'] | //div[@id='bodytext'] +tidy: no +prune: no + test_url: http://www.modernghana.com/news/323765/1/039ghost039-teachers-removed-salaries-allowances-p.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/money.cnn.com.txt b/inc/3rdparty/site_config/standard/money.cnn.com.txt old mode 100644 new mode 100755 index a0d1628a..d5e03d20 --- a/inc/3rdparty/site_config/standard/money.cnn.com.txt +++ b/inc/3rdparty/site_config/standard/money.cnn.com.txt @@ -1,24 +1,24 @@ -title: //meta[@property="og:title"]/@content -title: //h1[@class='storyheadline'] -author: //meta[@name="AUTHOR"]/@content -date: //span[@class='cnnDateStamp'] -date: //meta[@name="DATE"]/@content -body: //div[@id='storytext' or @class='storytext'] - -strip_id_or_class: ie_column -strip_id_or_class: sharewidgets -strip_image_src: bug.gif - -strip: //div[@class="hed_side"] -strip: //span[@class="byline"] -strip: //a[@class="soc-twtname"] -strip: //span[@class="cnnDateStamp"] -strip: //div[@class="storytimestamp"] -strip: //div[@class="cnnCol_side"] - -prune: no -tidy: no - -test_url: http://money.cnn.com/2011/03/15/news/companies/steve_jobs_thought_process.fortune/index.htm?section=money_topstories&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fmoney_topstories+%28Top+Stories%29 -test_url: http://money.cnn.com/2012/01/27/markets/markets_newyork/index.htm +title: //meta[@property="og:title"]/@content +title: //h1[@class='storyheadline'] +author: //meta[@name="AUTHOR"]/@content +date: //span[@class='cnnDateStamp'] +date: //meta[@name="DATE"]/@content +body: //div[@id='storytext' or @class='storytext'] + +strip_id_or_class: ie_column +strip_id_or_class: sharewidgets +strip_image_src: bug.gif + +strip: //div[@class="hed_side"] +strip: //span[@class="byline"] +strip: //a[@class="soc-twtname"] +strip: //span[@class="cnnDateStamp"] +strip: //div[@class="storytimestamp"] +strip: //div[@class="cnnCol_side"] + +prune: no +tidy: no + +test_url: http://money.cnn.com/2011/03/15/news/companies/steve_jobs_thought_process.fortune/index.htm?section=money_topstories&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fmoney_topstories+%28Top+Stories%29 +test_url: http://money.cnn.com/2012/01/27/markets/markets_newyork/index.htm test_url: http://money.cnn.com/2012/05/13/technology/yahoo-ceo-out-rumor/index.htm \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/monkeyzen.com.txt b/inc/3rdparty/site_config/standard/monkeyzen.com.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/moonsault.de.txt b/inc/3rdparty/site_config/standard/moonsault.de.txt old mode 100644 new mode 100755 index 061a8d5c..55026eeb --- a/inc/3rdparty/site_config/standard/moonsault.de.txt +++ b/inc/3rdparty/site_config/standard/moonsault.de.txt @@ -1,13 +1,13 @@ -strip_image_src: menu -strip_image_src: templates -strip: //div/a -strip: //div/b -strip: //div/strong -strip: //td[@width='30%'] -strip: //br[1] -strip: //br[2] -strip: //br[3] -strip: //br[4] -strip: //a[@href='http://www.moonsault.de/newzboard/index.php?act=home'] +strip_image_src: menu +strip_image_src: templates +strip: //div/a +strip: //div/b +strip: //div/strong +strip: //td[@width='30%'] +strip: //br[1] +strip: //br[2] +strip: //br[3] +strip: //br[4] +strip: //a[@href='http://www.moonsault.de/newzboard/index.php?act=home'] strip_id_or_class: cse-branding-right test_url: http://www.moonsault.de/newzboard/index.php?news=22321&act=previous \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/moreintelligentlife.com.txt b/inc/3rdparty/site_config/standard/moreintelligentlife.com.txt old mode 100644 new mode 100755 index a7e59c30..780cca4f --- a/inc/3rdparty/site_config/standard/moreintelligentlife.com.txt +++ b/inc/3rdparty/site_config/standard/moreintelligentlife.com.txt @@ -1,7 +1,7 @@ -title: //h1[@class='print-title'] -body: //div[@class='print-submitted' or @class='print-created' or @class='print-content'] -prune: no - -single_page_link: //li[@class='print']/a - +title: //h1[@class='print-title'] +body: //div[@class='print-submitted' or @class='print-created' or @class='print-content'] +prune: no + +single_page_link: //li[@class='print']/a + test_url: http://moreintelligentlife.com/content/places/paul-markillie/they-trash-cars-dont-they \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/motherboard.vice.com.txt b/inc/3rdparty/site_config/standard/motherboard.vice.com.txt old mode 100644 new mode 100755 index 6faf1c9a..c6312c0e --- a/inc/3rdparty/site_config/standard/motherboard.vice.com.txt +++ b/inc/3rdparty/site_config/standard/motherboard.vice.com.txt @@ -1,5 +1,5 @@ -author: //span[@class="author"]/a -date: //span[@class="date"] -body: //div[@class="story-content"] -strip: //aside +author: //span[@class="author"]/a +date: //span[@class="date"] +body: //div[@class="story-content"] +strip: //aside test_url: http://motherboard.vice.com/blog/you-can-carry-a-copy-of-the-pirate-bay-in-your-pocket \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/mothering.com.txt b/inc/3rdparty/site_config/standard/mothering.com.txt old mode 100644 new mode 100755 index a9d9195f..a34adff7 --- a/inc/3rdparty/site_config/standard/mothering.com.txt +++ b/inc/3rdparty/site_config/standard/mothering.com.txt @@ -1,7 +1,7 @@ -title: //h2[contains(@class,'post_headline')] -body: //div[@class='entry'] -convert_double_br_tags: yes -strip_image_src: _selected.gif -strip_id_or_class: addthis_ +title: //h2[contains(@class,'post_headline')] +body: //div[@class='entry'] +convert_double_br_tags: yes +strip_image_src: _selected.gif +strip_id_or_class: addthis_ strip: //a[contains(@href,'feedburner.com')] test_url: http://mothering.com/all-things-mothering/inspiration/motherhood-brings-me-down \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/motherjones.com.txt b/inc/3rdparty/site_config/standard/motherjones.com.txt old mode 100644 new mode 100755 index d58c7d2c..851feb7e --- a/inc/3rdparty/site_config/standard/motherjones.com.txt +++ b/inc/3rdparty/site_config/standard/motherjones.com.txt @@ -1,15 +1,15 @@ -title: //h1 -body: //div[@id = 'content-area'] -next_page_link: //div[@class='node-pager']/a[contains(@class, 'next')] -tidy: no -author: //p[contains(@class, 'byline')]/a - -strip_id_or_class: node-header -strip_id_or_class: hdr-tools -strip_id_or_class: node-body-break -strip_id_or_class: pullquote -strip_id_or_class: node-pager -strip_id_or_class: author-bio -strip_id_or_class: node-footer +title: //h1 +body: //div[@id = 'content-area'] +next_page_link: //div[@class='node-pager']/a[contains(@class, 'next')] +tidy: no +author: //p[contains(@class, 'byline')]/a + +strip_id_or_class: node-header +strip_id_or_class: hdr-tools +strip_id_or_class: node-body-break +strip_id_or_class: pullquote +strip_id_or_class: node-pager +strip_id_or_class: author-bio +strip_id_or_class: node-footer test_url: http://motherjones.com/politics/2012/02/mac-mcclelland-free-online-shipping-warehouses-labor \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/motorfull.com.txt b/inc/3rdparty/site_config/standard/motorfull.com.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/movie.douban.com.txt b/inc/3rdparty/site_config/standard/movie.douban.com.txt new file mode 100755 index 00000000..eae211ed --- /dev/null +++ b/inc/3rdparty/site_config/standard/movie.douban.com.txt @@ -0,0 +1,12 @@ +# This filter is tested on: +# http://movie.douban.com/review/1062013/ + +title: //span[contains(@property, 'v:summary')] +author: //span[contains(@property, 'v:reviewer')] +date://span[contains(@property, 'v:dtreviewed')] +body://div[contains(@class, 'main-bd')] + +strip://img[contains(@class,'rating')]|//img[contains(@class,'review-stat')] +convert_double_br_tags: yes +test_url: http://movie.douban.com/review/1062013/ +test_url: http://movie.douban.com/review/1021870/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/msdn.microsoft.com.txt b/inc/3rdparty/site_config/standard/msdn.microsoft.com.txt old mode 100644 new mode 100755 index f4f20450..7a284275 --- a/inc/3rdparty/site_config/standard/msdn.microsoft.com.txt +++ b/inc/3rdparty/site_config/standard/msdn.microsoft.com.txt @@ -1,3 +1,3 @@ -body: //div[class="mainBody"] +body: //div[class="mainBody"] footnotes: no test_url: http://msdn.microsoft.com/en-us/library/hh542796(VS.103).aspx \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/msnbc.msn.com.txt b/inc/3rdparty/site_config/standard/msnbc.msn.com.txt old mode 100644 new mode 100755 index ad89cda8..f008d2d1 --- a/inc/3rdparty/site_config/standard/msnbc.msn.com.txt +++ b/inc/3rdparty/site_config/standard/msnbc.msn.com.txt @@ -1,21 +1,21 @@ -title: //title -author: //div[@id='byline'] - -date: //div[contains(@class,'timestamp')]/abbr/text() - -body: //div[@id='intellitTXT'] - -strip: //div[@id='byline'] -strip: //div[contains(@class,'timestamp')] -strip: //div[contains(@class, 'ad-label')] -strip: //div[contains(@class, 'ad-break')] -strip: //span[contains(@class, 'x-video')] -strip: //span[contains(@class, 'inline')] -strip: //div[contains(@class, 'video')] -strip: //div[contains(@class, 'discuss')] -strip: //div[@id='most-popular'] -strip: //div[contains(@class,'drawer')] -strip: //*[contains(@class, 'hide')] - +title: //title +author: //div[@id='byline'] + +date: //div[contains(@class,'timestamp')]/abbr/text() + +body: //div[@id='intellitTXT'] + +strip: //div[@id='byline'] +strip: //div[contains(@class,'timestamp')] +strip: //div[contains(@class, 'ad-label')] +strip: //div[contains(@class, 'ad-break')] +strip: //span[contains(@class, 'x-video')] +strip: //span[contains(@class, 'inline')] +strip: //div[contains(@class, 'video')] +strip: //div[contains(@class, 'discuss')] +strip: //div[@id='most-popular'] +strip: //div[contains(@class,'drawer')] +strip: //*[contains(@class, 'hide')] + footnotes: no test_url: http://www.msnbc.msn.com/id/44748412/ns/business-world_business/#.TolUv-vfDbE \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/myfoxatlanta.com.txt b/inc/3rdparty/site_config/standard/myfoxatlanta.com.txt new file mode 100755 index 00000000..8a7590ab --- /dev/null +++ b/inc/3rdparty/site_config/standard/myfoxatlanta.com.txt @@ -0,0 +1,5 @@ +body: //div[@id='WNStoryBody'] +author: //div[@id='WNStoryByline'] +prune: no + +test_url: http://www.myfoxatlanta.com/category/233685/local-news?clienttype=rss \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/myfoxboston.com.txt b/inc/3rdparty/site_config/standard/myfoxboston.com.txt old mode 100644 new mode 100755 index 1a35b4fc..9ad8ce05 --- a/inc/3rdparty/site_config/standard/myfoxboston.com.txt +++ b/inc/3rdparty/site_config/standard/myfoxboston.com.txt @@ -1,4 +1,4 @@ -body: //div[@class="col1"]//div[@class="photo"] | //div[@class="detail"]/p[@class="fontStyle21"] | //div[@class="story last"] -tidy: no - +body: //div[@class="col1"]//div[@class="photo"] | //div[@class="detail"]/p[@class="fontStyle21"] | //div[@class="story last"] +tidy: no + test_url: http://www.myfoxboston.com/dpp/news/local/transit-police-say-woman-spat-on-mbta-bus-driver-2010611 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/myrecipes.com.txt b/inc/3rdparty/site_config/standard/myrecipes.com.txt old mode 100644 new mode 100755 index 8b99d22d..956be1e6 --- a/inc/3rdparty/site_config/standard/myrecipes.com.txt +++ b/inc/3rdparty/site_config/standard/myrecipes.com.txt @@ -1,12 +1,12 @@ -title: //h2[contains(@class, 'name')] -body: //div[@class='printFullPageContentContainer']//div[contains(@class, 'recipe')] - -strip_id_or_class: photoBy -strip_id_or_class: link - -single_page_link: //li[@class='print']/a[contains(@href, '/print/')] - -prune: no -tidy: no - +title: //h2[contains(@class, 'name')] +body: //div[@class='printFullPageContentContainer']//div[contains(@class, 'recipe')] + +strip_id_or_class: photoBy +strip_id_or_class: link + +single_page_link: //li[@class='print']/a[contains(@href, '/print/')] + +prune: no +tidy: no + test_url: http://www.myrecipes.com/recipe/hummingbird-cake-10000000387218/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/narenji.ir.txt b/inc/3rdparty/site_config/standard/narenji.ir.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/nasa.gov.txt b/inc/3rdparty/site_config/standard/nasa.gov.txt old mode 100644 new mode 100755 index d95530f3..7df1112b --- a/inc/3rdparty/site_config/standard/nasa.gov.txt +++ b/inc/3rdparty/site_config/standard/nasa.gov.txt @@ -1,8 +1,8 @@ -title: //div[@class='address']/span -author: substring-before(//span[@class='credits'],',') -date: //div[@class='promodatepress']/span -body: //div[@class='default_style_wrap'] -strip: //div[@class='text_adjust'] -strip: //div[@class='skiplink'] +title: //div[@class='address']/span +author: substring-before(//span[@class='credits'],',') +date: //div[@class='promodatepress']/span +body: //div[@class='default_style_wrap'] +strip: //div[@class='text_adjust'] +strip: //div[@class='skiplink'] strip: //h2 test_url: http://www.nasa.gov/mission_pages/kepler/news/kepler-21b.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/nbweekly.com.txt b/inc/3rdparty/site_config/standard/nbweekly.com.txt old mode 100644 new mode 100755 index 0b722d33..2645d406 --- a/inc/3rdparty/site_config/standard/nbweekly.com.txt +++ b/inc/3rdparty/site_config/standard/nbweekly.com.txt @@ -1,10 +1,10 @@ -date://span[contains(@class,'date')] - -body://div[contains(@class,'contWarp')] - -strip://div[contains(@class,'keyWord')] -strip://div[contains(@class,'submitComt')] -strip://div[contains(@class,'cmts')] -strip://div[contains(@class,'notice')] +date://span[contains(@class,'date')] + +body://div[contains(@class,'contWarp')] + +strip://div[contains(@class,'keyWord')] +strip://div[contains(@class,'submitComt')] +strip://div[contains(@class,'cmts')] +strip://div[contains(@class,'notice')] strip://div[contains(@class,'part pt-second')] test_url: http://www.nbweekly.com/news/china/201203/29316.aspx \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/neh.gov.txt b/inc/3rdparty/site_config/standard/neh.gov.txt old mode 100644 new mode 100755 index 45136a2b..e7cc4313 --- a/inc/3rdparty/site_config/standard/neh.gov.txt +++ b/inc/3rdparty/site_config/standard/neh.gov.txt @@ -1,17 +1,17 @@ -#host configuration should be http://www.neh.gov/news/humanities/ - - -#meta data -title:substring-after(substring-after(//title,':'),':') -author:substring-after(//h2[@class = 'subHead'],'By') -date:substring-before(substring-after(//title,':'),':') - -#img and caption handling -wrap_in(small)://div[@id = 'mainContent']/table/descendant::p/descendant::text() -wrap_in(fieldset)://div[@id = 'mainContent']/table - -# clean up -strip: //table[@class = 'marginpaddingTop'] -strip: //h2[@class = 'subHead'] +#host configuration should be http://www.neh.gov/news/humanities/ + + +#meta data +title:substring-after(substring-after(//title,':'),':') +author:substring-after(//h2[@class = 'subHead'],'By') +date:substring-before(substring-after(//title,':'),':') + +#img and caption handling +wrap_in(small)://div[@id = 'mainContent']/table/descendant::p/descendant::text() +wrap_in(fieldset)://div[@id = 'mainContent']/table + +# clean up +strip: //table[@class = 'marginpaddingTop'] +strip: //h2[@class = 'subHead'] test_url: http://www.neh.gov/news/humanities/2011-11/IslamicScholar.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/neomoney.co.txt b/inc/3rdparty/site_config/standard/neomoney.co.txt old mode 100644 new mode 100755 index 564d5492..2089fc39 --- a/inc/3rdparty/site_config/standard/neomoney.co.txt +++ b/inc/3rdparty/site_config/standard/neomoney.co.txt @@ -1,3 +1,3 @@ -title: //*[@class="header_title"]/h1 +title: //*[@class="header_title"]/h1 body: //div[contains(@class, 'content')] test_url: http://neomoney.co/personal/expatriate-and-migrant-loans/expatriate-loans/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/net-security.org.txt b/inc/3rdparty/site_config/standard/net-security.org.txt old mode 100644 new mode 100755 index 4e6d66d4..b7fedbf3 --- a/inc/3rdparty/site_config/standard/net-security.org.txt +++ b/inc/3rdparty/site_config/standard/net-security.org.txt @@ -1,7 +1,7 @@ -title: //div[@class='content-title'] -#date: substring-after(//div[@class='dernek-text-under'],'Posted on') -body: //div[@class='content-item'] -next_page_link: //li[@class='next']/a -convert_double_br_tags: yes +title: //div[@class='content-title'] +#date: substring-after(//div[@class='dernek-text-under'],'Posted on') +body: //div[@class='content-item'] +next_page_link: //li[@class='next']/a +convert_double_br_tags: yes test_url: http://www.net-security.org/article.php?id=1732 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/netmagazine.com.txt b/inc/3rdparty/site_config/standard/netmagazine.com.txt old mode 100644 new mode 100755 index 86885445..dcea047c --- a/inc/3rdparty/site_config/standard/netmagazine.com.txt +++ b/inc/3rdparty/site_config/standard/netmagazine.com.txt @@ -1,16 +1,16 @@ -title: //h1 -author: //div[@class="submitted"]/span - -# seems like this should work, but nothing is returned. Issue with xpath parser? -date: //div[@class="submitted"]/time - -body: //div[@id="main-content"] - -strip_comments: no - -strip: //h1 -strip: //div[@class="submitted"] -strip: //dd[@class="profile-avatar"] -strip: //div[@class="author-profile"]/dl/dt[1] +title: //h1 +author: //div[@class="submitted"]/span + +# seems like this should work, but nothing is returned. Issue with xpath parser? +date: //div[@class="submitted"]/time + +body: //div[@id="main-content"] + +strip_comments: no + +strip: //h1 +strip: //div[@class="submitted"] +strip: //dd[@class="profile-avatar"] +strip: //div[@class="author-profile"]/dl/dt[1] strip: //div[@id="right-col"] test_url: http://www.netmagazine.com/opinions/nielsen-wrong-mobile \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/netzpolitik.org.txt b/inc/3rdparty/site_config/standard/netzpolitik.org.txt old mode 100644 new mode 100755 index 87dc3cdf..7fa43fd7 --- a/inc/3rdparty/site_config/standard/netzpolitik.org.txt +++ b/inc/3rdparty/site_config/standard/netzpolitik.org.txt @@ -1,6 +1,6 @@ -title: //h1[@class='entry-title'] -author: //a[@ref='author'] -date: //span[@class='entry-date'] -body: //div[@class='entry-content'] +title: //h1[@class='entry-title'] +author: //a[@ref='author'] +date: //span[@class='entry-date'] +body: //div[@class='entry-content'] test_url: http://netzpolitik.org/2011/buch-generation-facebook/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/newleftproject.org.txt b/inc/3rdparty/site_config/standard/newleftproject.org.txt new file mode 100755 index 00000000..d9af99d8 --- /dev/null +++ b/inc/3rdparty/site_config/standard/newleftproject.org.txt @@ -0,0 +1,3 @@ +title: //div[contains(@class, 'article_header')]//h3 + +test_url: http://www.newleftproject.org/index.php/site/article_comments/do_we_need_a_facebook_of_the_left \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/newmatilda.com.txt b/inc/3rdparty/site_config/standard/newmatilda.com.txt old mode 100644 new mode 100755 index ab766847..f17ecdc6 --- a/inc/3rdparty/site_config/standard/newmatilda.com.txt +++ b/inc/3rdparty/site_config/standard/newmatilda.com.txt @@ -1,9 +1,9 @@ -title: //div[@id="maincontent"]/h1 -body: //div[@id="maincontent"] -date: //div[@id="maincontent"]/p[2] -author: //ul[@id="contributors"]/li/p/b - -strip: //p[@*] -strip: //h1 +title: //div[@id="maincontent"]/h1 +body: //div[@id="maincontent"] +date: //div[@id="maincontent"]/p[2] +author: //ul[@id="contributors"]/li/p/b + +strip: //p[@*] +strip: //h1 strip: //div[@id="maincontent"]/div test_url: http://newmatilda.com/2011/07/22/turnbull-makes-sense-climate \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/newrepublic.com.txt b/inc/3rdparty/site_config/standard/newrepublic.com.txt new file mode 100755 index 00000000..039f0385 --- /dev/null +++ b/inc/3rdparty/site_config/standard/newrepublic.com.txt @@ -0,0 +1,8 @@ +author: //span[@class="authors"] +date: //span[@class="date"] +body: //div[@class="primary"] + +strip: //div[@id="controls"] +strip: //div[@id="read-next"] + +test_url: http://www.newrepublic.com/article/112731/moocs-will-online-education-ruin-university-experience \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/news-gazette.com.txt b/inc/3rdparty/site_config/standard/news-gazette.com.txt old mode 100644 new mode 100755 index 1f1e5d3a..2b352707 --- a/inc/3rdparty/site_config/standard/news-gazette.com.txt +++ b/inc/3rdparty/site_config/standard/news-gazette.com.txt @@ -1,8 +1,8 @@ -title: //div[@id="main-content"]//h2 - -author: //div[@id="main-content"]//span[@class="authors"] - -date: //div[@id="main-content"]//span[@class="timestamp"] - +title: //div[@id="main-content"]//h2 + +author: //div[@id="main-content"]//span[@class="authors"] + +date: //div[@id="main-content"]//span[@class="timestamp"] + body: //div[@id="main-content"]//div[@class="content"] test_url: http://www.news-gazette.com/news/business/economy/2011-08-08/ibm-drops-out-blue-waters-project.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/news.cnet.com.txt b/inc/3rdparty/site_config/standard/news.cnet.com.txt old mode 100644 new mode 100755 index b7ab224a..78af70f4 --- a/inc/3rdparty/site_config/standard/news.cnet.com.txt +++ b/inc/3rdparty/site_config/standard/news.cnet.com.txt @@ -1,12 +1,12 @@ -#This should apply to *.cnet.com. Not just news.cnet.com. -title: //h1 -author: //img[@class="mugshot"]/@alt -strip: //h1 -strip_id_or_class: breadcrumb -strip: //p[@id="introP"] -strip: //div[@class="postByline"] -strip: //div[@class="editorBio"] -strip: //div[@class="inline-slideshow"] -strip: //div[@class="related"] +#This should apply to *.cnet.com. Not just news.cnet.com. +title: //h1 +author: //img[@class="mugshot"]/@alt +strip: //h1 +strip_id_or_class: breadcrumb +strip: //p[@id="introP"] +strip: //div[@class="postByline"] +strip: //div[@class="editorBio"] +strip: //div[@class="inline-slideshow"] +strip: //div[@class="related"] body: //div[@class="postBody txtWrap"] test_url: http://news.cnet.com/8301-27076_3-57405303-248/apple-ipad-charging-fine-keep-it-plugged-in/?tag=mncol;posts \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/news.detik.com.txt b/inc/3rdparty/site_config/standard/news.detik.com.txt old mode 100644 new mode 100755 index 3ed1dc85..629bc917 --- a/inc/3rdparty/site_config/standard/news.detik.com.txt +++ b/inc/3rdparty/site_config/standard/news.detik.com.txt @@ -1,8 +1,8 @@ -title://div[@class="content_detail"]/h1 - -author://div[@class="author"]/strong - -date:substring-before(substring-after(//div[@class="content_detail"]/span[@class="date"], ','), ' WIB') - +title://div[@class="content_detail"]/h1 + +author://div[@class="author"]/strong + +date:substring-before(substring-after(//div[@class="content_detail"]/span[@class="date"], ','), ' WIB') + body://div[@class="text_detail"] test_url: http://news.detik.com/read/2012/05/22/225531/1922307/10/menkeu-cek-soal-lolosnya-315-kg-sabu-dari-bea-cukai \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/news.kanaloco.jp.txt b/inc/3rdparty/site_config/standard/news.kanaloco.jp.txt old mode 100644 new mode 100755 index 6fc86137..5754d47a --- a/inc/3rdparty/site_config/standard/news.kanaloco.jp.txt +++ b/inc/3rdparty/site_config/standard/news.kanaloco.jp.txt @@ -1,9 +1,9 @@ -body: //div[@id='main'] -strip: //div[@id='sbs'] -strip: //div[@id='fsizeSwitch'] -strip: //div[@id='googleAd'] -strip: //div[@id='detailFoot'] -strip_image_src: counter?key -convert_double_br_tags: yes +body: //div[@id='main'] +strip: //div[@id='sbs'] +strip: //div[@id='fsizeSwitch'] +strip: //div[@id='googleAd'] +strip: //div[@id='detailFoot'] +strip_image_src: counter?key +convert_double_br_tags: yes test_url: http://news.kanaloco.jp/localnews/article/1105200018/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/news.mynavi.jp.txt b/inc/3rdparty/site_config/standard/news.mynavi.jp.txt old mode 100644 new mode 100755 index ded680f1..1df47314 --- a/inc/3rdparty/site_config/standard/news.mynavi.jp.txt +++ b/inc/3rdparty/site_config/standard/news.mynavi.jp.txt @@ -1,11 +1,11 @@ -title: //h2[@class="lyt-hdg-02-04"] - -author: //div[@class="lyt-namearea"]/a - -date: //div[@class="lyt-namearea"]/text() - -body: //div[@class="articleContent"] - -strip: //div[@id="tab-aside"] +title: //h2[@class="lyt-hdg-02-04"] + +author: //div[@class="lyt-namearea"]/a + +date: //div[@class="lyt-namearea"]/text() + +body: //div[@class="articleContent"] + +strip: //div[@id="tab-aside"] test_url: http://news.mynavi.jp/articles/2011/12/07/nico/index.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/news.orf.at.txt b/inc/3rdparty/site_config/standard/news.orf.at.txt old mode 100644 new mode 100755 index b60deea4..3b1d3ccb --- a/inc/3rdparty/site_config/standard/news.orf.at.txt +++ b/inc/3rdparty/site_config/standard/news.orf.at.txt @@ -1,11 +1,11 @@ -single_page_link: //div[@id='content']//p[@class='readMore']/a - -title: //div[@class='hidden offscreen']/h2 -body: //div[@id="storyText"] -move_into(//div[@id='storyText']): //div[@class='fact'] -strip: //small[@class='credit'] -strip: //small[@class='caption'] -date: substring-after(//div[@id='storyMeta']//p[@class='date'],'Publiziert am') -strip: //p[@class='toplink'] +single_page_link: //div[@id='content']//p[@class='readMore']/a + +title: //div[@class='hidden offscreen']/h2 +body: //div[@id="storyText"] +move_into(//div[@id='storyText']): //div[@class='fact'] +strip: //small[@class='credit'] +strip: //small[@class='caption'] +date: substring-after(//div[@id='storyMeta']//p[@class='date'],'Publiziert am') +strip: //p[@class='toplink'] test_url: http://news.orf.at/stories/2084731/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/news.rambler.ru.txt b/inc/3rdparty/site_config/standard/news.rambler.ru.txt old mode 100644 new mode 100755 index 743245f8..1d547334 --- a/inc/3rdparty/site_config/standard/news.rambler.ru.txt +++ b/inc/3rdparty/site_config/standard/news.rambler.ru.txt @@ -1,9 +1,9 @@ -body: //article -title: //h1 -author: //span[@class='b-article-source-dropdown'] -strip: //span[@class='b-article-photo-incut__source'] -strip: //a[@class='b-read-more b-read-more_bottom'] - - +body: //article +title: //h1 +author: //span[@class='b-article-source-dropdown'] +strip: //span[@class='b-article-photo-incut__source'] +strip: //a[@class='b-read-more b-read-more_bottom'] + + tidy:no test_url: http://news.rambler.ru/12972208/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/news.techmeme.com.txt b/inc/3rdparty/site_config/standard/news.techmeme.com.txt old mode 100644 new mode 100755 index c80c3327..ba4db828 --- a/inc/3rdparty/site_config/standard/news.techmeme.com.txt +++ b/inc/3rdparty/site_config/standard/news.techmeme.com.txt @@ -1,4 +1,4 @@ -body: //div[@class='main']/div[@class='item'] -strip: //div[@class='right'] - +body: //div[@class='main']/div[@class='item'] +strip: //div[@class='right'] + test_url: http://news.techmeme.com/110516/fh-rip \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/news.yahoo.com.txt b/inc/3rdparty/site_config/standard/news.yahoo.com.txt old mode 100644 new mode 100755 index 5ee04049..fc1739c8 --- a/inc/3rdparty/site_config/standard/news.yahoo.com.txt +++ b/inc/3rdparty/site_config/standard/news.yahoo.com.txt @@ -1,12 +1,12 @@ -title: //meta[@property='og:title']/@content -title: //h1[@class='headline'] -author: //cite[contains(@class, 'byline') and contains(@class, 'vcard')]//span[@class='fn'] -date: //cite[contains(@class, 'byline') and contains(@class, 'vcard')]//abbr/@title -body: //div[@id='mediaarticlelead']//a[@class='media'] | //div[contains(@class,'yom-art-content')] -#strip: //cite/abbr -strip_id_or_class: action -strip_id_or_class: prefetch -tidy: no -prune: no +title: //meta[@property='og:title']/@content +title: //h1[@class='headline'] +author: //cite[contains(@class, 'byline') and contains(@class, 'vcard')]//span[@class='fn'] +date: //cite[contains(@class, 'byline') and contains(@class, 'vcard')]//abbr/@title +body: //div[@id='mediaarticlelead']//a[@class='media'] | //div[contains(@class,'yom-art-content')] +#strip: //cite/abbr +strip_id_or_class: action +strip_id_or_class: prefetch +tidy: no +prune: no test_url: http://news.yahoo.com/cold-la-nina-winter-forecast-west-coast-183535067.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/news.ycombinator.com.txt b/inc/3rdparty/site_config/standard/news.ycombinator.com.txt old mode 100644 new mode 100755 index 0b01f8a1..f7441d17 --- a/inc/3rdparty/site_config/standard/news.ycombinator.com.txt +++ b/inc/3rdparty/site_config/standard/news.ycombinator.com.txt @@ -1,3 +1,3 @@ -strip_comments: no +strip_comments: no strip: //a[. = 'reply'] test_url: http://news.ycombinator.com/item?id=1516461 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/news.zing.vn.txt b/inc/3rdparty/site_config/standard/news.zing.vn.txt new file mode 100755 index 00000000..af81e90e --- /dev/null +++ b/inc/3rdparty/site_config/standard/news.zing.vn.txt @@ -0,0 +1,3 @@ +body://div[@class="newsdetail_wrapper"] +strip://div[@class="more_news"] +test_url: http://news.zing.vn/xa-hoi/s-phat-nang-xe-may-di-duong-tren-cao-ha-noi/a280838.html#home_noibat1 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/news247.gr.txt b/inc/3rdparty/site_config/standard/news247.gr.txt new file mode 100755 index 00000000..87637bed --- /dev/null +++ b/inc/3rdparty/site_config/standard/news247.gr.txt @@ -0,0 +1,6 @@ +title: //h1[@class='title'] + +body: //img[@id='relPicsMainPic'] | //div[contains(@class, 'storyContent')] + +test_url: http://news247.gr/eidiseis/katatheseis_fwtia_htan_apofasismenoi_akomh_kai_na_afairesoyn_zwes_an_thewrousan_oti_to_thuma_htan_antipalos_toys.2433351.html +test_url: http://news247.gr/?widget=rssfeed&view=feed&contentId=38291 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/newsbomb.gr.txt b/inc/3rdparty/site_config/standard/newsbomb.gr.txt old mode 100644 new mode 100755 index 0500890f..5eb0ea46 --- a/inc/3rdparty/site_config/standard/newsbomb.gr.txt +++ b/inc/3rdparty/site_config/standard/newsbomb.gr.txt @@ -1,9 +1,9 @@ -date: //meta[@name='og:article:published_time']/@value - -body: //div[@class='itemIntroText' or @class='itemImageBlock' or @class='itemFullText'] - -strip_id_or_class: itemImageGallery - -prune: no - +date: //meta[@name='og:article:published_time']/@value + +body: //div[@class='itemIntroText' or @class='itemImageBlock' or @class='itemFullText'] + +strip_id_or_class: itemImageGallery + +prune: no + test_url: http://www.newsbomb.gr/gossip/story/257234/i-proin-moy-protimoyse-na-serfarei-apo-to-na-kanoyme-sex \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/newsle.com.txt b/inc/3rdparty/site_config/standard/newsle.com.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/newsmill.se.txt b/inc/3rdparty/site_config/standard/newsmill.se.txt old mode 100644 new mode 100755 index eb7d3350..1a990319 --- a/inc/3rdparty/site_config/standard/newsmill.se.txt +++ b/inc/3rdparty/site_config/standard/newsmill.se.txt @@ -1,12 +1,12 @@ -title: //h1 -body: (//div[@class='articleImg']//img)[1] | //p[contains(@class, 'commentTextArticle') or contains(@class, 'articlePublished')] | //div[@id='articleLeftContent'] -author: //div[@class='byline']//a[contains(@href, '/user/')] - -strip_id_or_class: facts -strip_id_or_class: articleBlogsHolder -strip_id_or_class: byline - -prune: no -tidy: no - +title: //h1 +body: (//div[@class='articleImg']//img)[1] | //p[contains(@class, 'commentTextArticle') or contains(@class, 'articlePublished')] | //div[@id='articleLeftContent'] +author: //div[@class='byline']//a[contains(@href, '/user/')] + +strip_id_or_class: facts +strip_id_or_class: articleBlogsHolder +strip_id_or_class: byline + +prune: no +tidy: no + test_url: http://www.newsmill.se/artikel/2012/05/06/medielogiken-v-ger-tyngre-n-reportrarnas-sikter \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/newsunspun.org.txt b/inc/3rdparty/site_config/standard/newsunspun.org.txt old mode 100644 new mode 100755 index 860ad66b..247bbebb --- a/inc/3rdparty/site_config/standard/newsunspun.org.txt +++ b/inc/3rdparty/site_config/standard/newsunspun.org.txt @@ -1,10 +1,10 @@ -body: //div[@class='right']//div[@class='articles'] -author: //div[@id='artinfo']//a[contains(@href, '/author/')] -strip: //div[@id='artinfo'] -strip: //table[//a[contains(@href, 'twitter.com')]] -strip_id_or_class: twitter - -prune: no -tidy: no - +body: //div[@class='right']//div[@class='articles'] +author: //div[@id='artinfo']//a[contains(@href, '/author/')] +strip: //div[@id='artinfo'] +strip: //table[//a[contains(@href, 'twitter.com')]] +strip_id_or_class: twitter + +prune: no +tidy: no + test_url: http://www.newsunspun.org/eotn/bbc-headline-change-iran-goes-from-not-building-to-undecided-on-nuclear-bomb \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/newsweek.com.txt b/inc/3rdparty/site_config/standard/newsweek.com.txt new file mode 100755 index 00000000..565648ba --- /dev/null +++ b/inc/3rdparty/site_config/standard/newsweek.com.txt @@ -0,0 +1,6 @@ +body: //div[@class = 'article-body'] +title: //h1[@class = 'article-title'] +strip: //aside + +test_url: http://www.newsweek.com/day-steve-mcqueen-met-his-new-nazi-neighbor-keith-moon-229741 +test_url: http://www.newsweek.com/2014/06/13/how-greylock-partners-finds-next-facebook-253329.html diff --git a/inc/3rdparty/site_config/standard/newswise.com.txt b/inc/3rdparty/site_config/standard/newswise.com.txt new file mode 100755 index 00000000..10120ea1 --- /dev/null +++ b/inc/3rdparty/site_config/standard/newswise.com.txt @@ -0,0 +1,17 @@ +prune: no +tidy: no + +title: //h1/a[2] +body: //div[@id="main"] +author: //span[@id="articlesource"] +date: //span[contains(@class, 'releasedate')] + +strip: //div[@class="inst-logo"] +strip: //h1[1] + +strip_id_or_class: addthis +strip_id_or_class: released +strip_id_or_class: skiptranslate +strip_id_or_class: flash + +test_url: http://www.newswise.com/articles/first-heat-wave-of-season-puts-elderly-at-risk diff --git a/inc/3rdparty/site_config/standard/newyorker.com.txt b/inc/3rdparty/site_config/standard/newyorker.com.txt old mode 100644 new mode 100755 index 5624aa8c..950324a3 --- a/inc/3rdparty/site_config/standard/newyorker.com.txt +++ b/inc/3rdparty/site_config/standard/newyorker.com.txt @@ -1,10 +1,11 @@ -title: //h1[@id='articlehed'] | //h2[@id="articleintro"] -body: //div[@id='articletext'] - -strip: //ul[@id="bc"] | //div[@id="yrail"] | //div[@class="entry-keywords"] | //div[@class="entry-categories"] | //div[@class="socialUtils"] | //div[@id="footer"] - -date: //h4[@id='articleauthor']/span[@class='dd dds'] -date: //div[@id="pagebody"]/div[@class='hentry entry']/div[@class='published'] - -single_page_link: //div[@class='paginationViewSinglePage']/a -test_url: http://www.newyorker.com/online/blogs/culture/2012/06/mug-shot-web-sites.html \ No newline at end of file +title: //h1[@id='articlehed'] | //h2[@id="articleintro"] +body: //div[@id='articletext'] + +strip: //ul[@id="bc"] | //div[@id="yrail"] | //div[@class="entry-keywords"] | //div[@class="entry-categories"] | //div[@class="socialUtils"] | //div[@id="footer"] | //div[@class="cartoon"] + +date: //h4[@id='articleauthor']/span[@class='dd dds'] +date: //div[@id="pagebody"]/div[@class='hentry entry']/div[@class='published'] + +single_page_link: //div[@class='paginationViewSinglePage']/a +test_url: http://www.newyorker.com/online/blogs/culture/2012/06/mug-shot-web-sites.html +test_url: http://www.newyorker.com/reporting/2013/04/22/130422fa_fact_bilger?currentPage=all&mobify=0 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/next-gen.biz.txt b/inc/3rdparty/site_config/standard/next-gen.biz.txt old mode 100644 new mode 100755 index 806a3dfd..b8d235db --- a/inc/3rdparty/site_config/standard/next-gen.biz.txt +++ b/inc/3rdparty/site_config/standard/next-gen.biz.txt @@ -1,16 +1,16 @@ -# 2011-08-22 [carlo@...] initial version -# 2011-08-22 [carlo@...] removed comments & social links - -tidy: no - -single_page_link: //a[@class="single active"] - -body: //div[@id="main"]//div[@class="content-region"]/article -author: //span[@class="author-name"] -date: //time/text() - -strip_id_or_class: //aside[@id="related"] -strip: //footer - +# 2011-08-22 [carlo@...] initial version +# 2011-08-22 [carlo@...] removed comments & social links + +tidy: no + +single_page_link: //a[@class="single active"] + +body: //div[@id="main"]//div[@class="content-region"]/article +author: //span[@class="author-name"] +date: //time/text() + +strip_id_or_class: //aside[@id="related"] +strip: //footer + title: //h1 test_url: http://www.next-gen.biz/reviews/deus-ex-human-revolution-review \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/nfl.com.txt b/inc/3rdparty/site_config/standard/nfl.com.txt old mode 100644 new mode 100755 index 70f92473..956b288f --- a/inc/3rdparty/site_config/standard/nfl.com.txt +++ b/inc/3rdparty/site_config/standard/nfl.com.txt @@ -1,11 +1,11 @@ -# doesn't look like selecting an attribute value works? -# author: //meta[@id="authorName"]@value - -author: substring-after(//li[@id="article-hdr-meta-author"]/text(), "By ") -date: //abbr[@id="article-time"] -title: //div[@id="article-hdr"]/h1 -body: //div[@class="articleText"] - -# strip miscellaneous teasers & etc +# doesn't look like selecting an attribute value works? +# author: //meta[@id="authorName"]@value + +author: substring-after(//li[@id="article-hdr-meta-author"]/text(), "By ") +date: //abbr[@id="article-time"] +title: //div[@id="article-hdr"]/h1 +body: //div[@class="articleText"] + +# strip miscellaneous teasers & etc strip: //div[@class="removeformobile"] test_url: http://www.nfl.com/news/story/09000d5d82388707/article/close-shave-chiefs-haley-perseveres-through-rough-start?module=HP11_content_stream \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/ngm.nationalgeographic.com.txt b/inc/3rdparty/site_config/standard/ngm.nationalgeographic.com.txt old mode 100644 new mode 100755 index 60834862..44a82a95 --- a/inc/3rdparty/site_config/standard/ngm.nationalgeographic.com.txt +++ b/inc/3rdparty/site_config/standard/ngm.nationalgeographic.com.txt @@ -1,7 +1,7 @@ -next_page_link: //div[@class='nextpage_continue']/a -strip: //div[@class='nextpage_continue'] -strip_id_or_class: nextpage -title: //div[@class='article_title']//h1 -body: //div[@class='article_title']/.. +next_page_link: //div[@class='nextpage_continue']/a +strip: //div[@class='nextpage_continue'] +strip_id_or_class: nextpage +title: //div[@class='article_title']//h1 +body: //div[@class='article_title']/.. body: //div[@class='content'] test_url: http://ngm.nationalgeographic.com/2012/02/tsunami/folger-text \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/nhk.or.jp.txt b/inc/3rdparty/site_config/standard/nhk.or.jp.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/nintendoworldreport.com.txt b/inc/3rdparty/site_config/standard/nintendoworldreport.com.txt old mode 100644 new mode 100755 index 409a8977..f0e28afb --- a/inc/3rdparty/site_config/standard/nintendoworldreport.com.txt +++ b/inc/3rdparty/site_config/standard/nintendoworldreport.com.txt @@ -1,13 +1,13 @@ -body: //div[@id="main"] -title: //div[@id="main"]/h3 - -# Remove ‘Review’ and ‘Wii’. -strip: //div[@class="badge"] - -# Remove duplicate title and country flag. -strip: //h3 - -# Commented out below are attempts to extract the author and date, which did not work. -# author: //p[@class="extra "]/a +body: //div[@id="main"] +title: //div[@id="main"]/h3 + +# Remove ‘Review’ and ‘Wii’. +strip: //div[@class="badge"] + +# Remove duplicate title and country flag. +strip: //h3 + +# Commented out below are attempts to extract the author and date, which did not work. +# author: //p[@class="extra "]/a # date: //p[@class="extra "]/span[@class="when"] test_url: http://www.nintendoworldreport.com/review/28400 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/nojesguiden.se.txt b/inc/3rdparty/site_config/standard/nojesguiden.se.txt old mode 100644 new mode 100755 index ae2d7e41..b15f0612 --- a/inc/3rdparty/site_config/standard/nojesguiden.se.txt +++ b/inc/3rdparty/site_config/standard/nojesguiden.se.txt @@ -1,5 +1,5 @@ -author: //span[@class='meta']/span[@class='username'] -body: //div[@class='article-content'] - +author: //span[@class='meta']/span[@class='username'] +body: //div[@class='article-content'] + strip_id_or_class: 'article-actions' test_url: http://nojesguiden.se/blogg/maja-bredberg/maja-laser-tidningen-en-helt-vanlig-lordag-i \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/northumberlandview.ca.txt b/inc/3rdparty/site_config/standard/northumberlandview.ca.txt old mode 100644 new mode 100755 index 04a0a34d..88429a78 --- a/inc/3rdparty/site_config/standard/northumberlandview.ca.txt +++ b/inc/3rdparty/site_config/standard/northumberlandview.ca.txt @@ -1,11 +1,11 @@ -title: //h1 -body: //div[@id='pn-maincontent'] -strip_id_or_class: z-menu -strip_id_or_class: news_category -strip_id_or_class: news_title -strip_id_or_class: news_modify -strip_id_or_class: news_morearticlesincat -strip_id_or_class: ezc_comments -strip_comments: yes - +title: //h1 +body: //div[@id='pn-maincontent'] +strip_id_or_class: z-menu +strip_id_or_class: news_category +strip_id_or_class: news_title +strip_id_or_class: news_modify +strip_id_or_class: news_morearticlesincat +strip_id_or_class: ezc_comments +strip_comments: yes + test_url: http://www.northumberlandview.ca/index.php?module=news&func=display&sid=5972 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/nosalty.hu.txt b/inc/3rdparty/site_config/standard/nosalty.hu.txt new file mode 100755 index 00000000..7e20cadf --- /dev/null +++ b/inc/3rdparty/site_config/standard/nosalty.hu.txt @@ -0,0 +1,6 @@ +title: //div[@id='tab-recept']//h1 +body: //div[@id='tab-recept']//div[contains(@class, 'column-container')] +strip_id_or_class: ajanlo-box +prune: no + +test_url: http://www.nosalty.hu/recept/szupergyors-fank \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/nplusonemag.com.txt b/inc/3rdparty/site_config/standard/nplusonemag.com.txt old mode 100644 new mode 100755 index 205b1af4..1b817c04 --- a/inc/3rdparty/site_config/standard/nplusonemag.com.txt +++ b/inc/3rdparty/site_config/standard/nplusonemag.com.txt @@ -1,6 +1,6 @@ -title: /html/body/div[3]/div/div/h1 - -body: //*[@id="article-body"] - +title: /html/body/div[3]/div/div/h1 + +body: //*[@id="article-body"] + test_url: http://nplusonemag.com/the-outskirts-of-progress \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/npr.org.txt b/inc/3rdparty/site_config/standard/npr.org.txt old mode 100644 new mode 100755 index afab0eb3..acd73e48 --- a/inc/3rdparty/site_config/standard/npr.org.txt +++ b/inc/3rdparty/site_config/standard/npr.org.txt @@ -1,32 +1,34 @@ -title: //div[contains(@class, 'storytitle')]//h1 -author: //p[@class="byline"]/span -body: //div[@id='storyspan02']//*[@class='duration' or @class='download' or contains(@class, 'photo')] | //div[@id='storytext'] | //div[@class='transcript'] -date: //meta[@name="date"]/@content - -strip: //div[@class='enlarge_measure'] -strip: //div[@class='enlarge_html'] -strip: //a[@class='enlargeicon'] -strip: //div[contains(@class, 'bookedition')] -strip: //div[@class='textsize'] -strip: //ul[@class='genres'] -strip: //span[@class='bull'] -strip_id_or_class: secondary -strip_id_or_class: con1col -strip: //h3[@class='conheader'] - -replace_string(<a name="more"> </a>): <!-- no more --> -replace_string(<div class="transcript">): <div class="transcript"><h2>Transcript</h2> - -prune: no -strip://div[@class="ecommercepop"] -strip://span[@class="bull"] -strip://span[@class="purchaseLink"] -strip://div[@class="enlarge_html"] -strip://div[@class="enlarge_measure"] -strip://div[@class="container con1col small"] -strip://a[contains(@class, "enlargebtn")] -strip://div[contains(@class, "bucketwrap internallink")] - -test_url: http://www.npr.org/blogs/thetwo-way/2011/07/12/137799301/sports-loses-its-escapist-gleam-in-a-summer-of-court-dates -test_url: http://www.npr.org/2012/07/04/156190948/feeling-under-siege-catholic-leadership-shifts-right -test_url: http://www.npr.org/2012/12/13/166480907/the-years-best-sci-fi-crosses-galaxies-and-genres \ No newline at end of file +title: //div[contains(@class, 'storytitle')]//h1 +author: //p[@class="byline"]/span +body: //div[@id='primaryaudio']//*[@class='duration' or @class='download' or contains(@class, 'photo')] | //div[@id='storytext' or @id='supplementarycontent' or contains(@class, 'transcript')] +date: //meta[@name="date"]/@content + +strip_id_or_class: enlarge_measure +strip_id_or_class: enlarge_html +strip: //a[contains(@class, 'enlargeicon')] +strip: //div[contains(@class, 'bookedition')] +strip: //div[@class='textsize'] +strip: //ul[@class='genres'] +strip: //span[@class='bull'] +strip_id_or_class: secondary +strip_id_or_class: con1col +strip: //h3[@class='conheader'] + +replace_string(<a name="more"> </a>): <!-- no more --> +replace_string(<div class="transcript">): <div class="transcript"><h2>Transcript</h2> +replace_string(<div class="transcript storytext">): <div class="transcript storytext"><h2>Transcript</h2> + +prune: no +strip://div[@class="ecommercepop"] +strip://span[@class="bull"] +strip://span[@class="purchaseLink"] +strip://div[@class="enlarge_html"] +strip://div[@class="enlarge_measure"] +strip://div[@class="container con1col small"] +strip://a[contains(@class, "enlargebtn")] +strip://div[contains(@class, "bucketwrap internallink")] + +test_url: http://www.npr.org/blogs/thetwo-way/2011/07/12/137799301/sports-loses-its-escapist-gleam-in-a-summer-of-court-dates +test_url: http://www.npr.org/2012/07/04/156190948/feeling-under-siege-catholic-leadership-shifts-right +test_url: http://www.npr.org/2012/12/13/166480907/the-years-best-sci-fi-crosses-galaxies-and-genres +test_url: http://www.npr.org/templates/story/story.php?storyId=229103221 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/nybooks.com.txt b/inc/3rdparty/site_config/standard/nybooks.com.txt old mode 100644 new mode 100755 index 8ecb8961..d95ec68e --- a/inc/3rdparty/site_config/standard/nybooks.com.txt +++ b/inc/3rdparty/site_config/standard/nybooks.com.txt @@ -1,13 +1,13 @@ -strip_id_or_class: sIFR-alternate -title: //div[@id='page-title-wrapper']/div[@id='page-title']/h2 -single_page_link: //a[contains(@href, 'pagination=false') and not(contains(@href, 'printpage=true'))] - -body: //div[@id = 'article-body'] -strip_id_or_class:article-tools -strip_id_or_class:js_target -strip_id_or_class:marker -author://div[@id = 'page-title']/h3 -date://div[@id = 'page-title']/h5/a[starts-with(@href,'/issues/')] - - +strip_id_or_class: sIFR-alternate +title: //div[@id='page-title-wrapper']/div[@id='page-title']/h2 +single_page_link: //a[contains(@href, 'pagination=false') and not(contains(@href, 'printpage=true'))] + +body: //div[@id = 'article-body'] +strip_id_or_class:article-tools +strip_id_or_class:js_target +strip_id_or_class:marker +author://div[@id = 'page-title']/h3 +date://div[@id = 'page-title']/h5/a[starts-with(@href,'/issues/')] + + test_url: http://www.nybooks.com/articles/archives/2012/feb/23/were-more-unequal-you-think/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/nymag.com.txt b/inc/3rdparty/site_config/standard/nymag.com.txt old mode 100644 new mode 100755 index f664c93d..7a1d62d9 --- a/inc/3rdparty/site_config/standard/nymag.com.txt +++ b/inc/3rdparty/site_config/standard/nymag.com.txt @@ -1,8 +1,8 @@ -title: //h2[contains(@class, 'primary')] -body: //div[@id='story'] -author: //*[@class='by']/a -date: substring-after(//*[@class='date'], 'Published') - -next_page_link: //div[@class='page-navigation']//li[@class='next']/a - +title: //h2[contains(@class, 'primary')] +body: //div[@id='story'] +author: //*[@class='by']/a +date: substring-after(//*[@class='date'], 'Published') + +next_page_link: //div[@class='page-navigation']//li[@class='next']/a + test_url: http://nymag.com/news/features/wall-street-2012-2/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/nyteknik.se.txt b/inc/3rdparty/site_config/standard/nyteknik.se.txt old mode 100644 new mode 100755 index 8c9e37f4..f4bedb6a --- a/inc/3rdparty/site_config/standard/nyteknik.se.txt +++ b/inc/3rdparty/site_config/standard/nyteknik.se.txt @@ -1,8 +1,8 @@ -title: //div[@class="article default-article"]/h1 -author: //p[@class="author"]/a[2] - -# Article introduction: -#move_into(//div[@class="article-bread"]): //p[@class="lead"] - +title: //div[@class="article default-article"]/h1 +author: //p[@class="author"]/a[2] + +# Article introduction: +#move_into(//div[@class="article-bread"]): //p[@class="lead"] + body: //div[@class="article-bread"] test_url: http://www.nyteknik.se/nyheter/energi_miljo/energi/article3391426.ece \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/nytimes.com.txt b/inc/3rdparty/site_config/standard/nytimes.com.txt old mode 100644 new mode 100755 index 8d9a794a..23c9ad11 --- a/inc/3rdparty/site_config/standard/nytimes.com.txt +++ b/inc/3rdparty/site_config/standard/nytimes.com.txt @@ -1,36 +1,49 @@ -title://h1[@class="articleHeadline"] -body://div[@id="article"] -strip_id_or_class:articleTools -strip_id_or_class:readerscomment -#strip://div[contains(@class, "articleInline runaroundLeft")] -strip: //div[contains(@class, "doubleRule")] -# strip image credit - appears as a bold heading -strip: //div[contains(@class, "articleInline")]//h6 -strip_id_or_class:enlargeThis -strip_id_or_class:pageLinks -strip_id_or_class:memberTools -strip_id_or_class:articleExtras -strip_id_or_class:singleAd -strip_id_or_class:byline -strip_id_or_class:dateline -strip_id_or_class:articleheadline -strip_id_or_class:articleBottomExtra -strip://a[contains(@href, 'nytimes.com/adx/')] -strip: //nyt_byline -strip: //span[contains(@class, 'slideshow') or contains(@class, 'video')] -strip: //p[@class='caption']//a[contains(., 'More Photos')] - -prune: no -tidy: no - -date: substring-after(//*[contains(@class, 'dateline')], 'Published:') - -single_page_link: //link[contains(@href, 'pagewanted=all')] -#single_page_link: //a[contains(@href, 'pagewanted=all') and not(contains(@href, 'login'))] - -strip://ul[@id = 'toolsList'] -strip://h6[@class = 'kicker'] -author:substring-after(//h6[@class='byline'],'By ') - -test_url: http://www.nytimes.com/2011/07/24/books/review/an-academic-authors-unintentional-masterpiece.html -test_url: http://www.nytimes.com/2012/06/10/arts/television/the-newsroom-aaron-sorkins-return-to-tv.html \ No newline at end of file +title://h1[@class="articleHeadline"] +body://div[@id="article"] +body://*[@itemprop="articleBody"] +strip_id_or_class:articleTools +strip_id_or_class:readerscomment +#strip://div[contains(@class, "articleInline runaroundLeft")] +strip: //div[contains(@class, "doubleRule")] +# strip image credit - appears as a bold heading +strip: //div[contains(@class, "articleInline")]//h6 +strip_id_or_class:enlargeThis +strip_id_or_class:pageLinks +strip_id_or_class:memberTools +strip_id_or_class:articleExtras +strip_id_or_class:singleAd +strip_id_or_class:byline +strip_id_or_class:dateline +strip_id_or_class:articleheadline +strip_id_or_class:articleBottomExtra +strip_id_or_class:shareTools +strip://a[contains(@href, 'nytimes.com/adx/')] +strip: //nyt_byline +strip: //span[contains(@class, 'slideshow') or contains(@class, 'video')] +strip: //p[@class='caption']//a[contains(., 'More Photos')] + +prune: no +tidy: no + +find_string: <script +replace_string: <div style="display:none" +find_string: </script> +replace_string: </div> + +date: substring-after(//*[contains(@class, 'dateline')], 'Published:') + +single_page_link: //link[contains(@href, 'pagewanted=all')] +single_page_link: //link[@rel='alternate' and contains(@href, 'mobile.nytimes.com')]/@href +single_page_link: concat(substring-before(//div[@id='pageLinks']//a[contains(@href, 'pagewanted=')]/@href, 'pagewanted='), 'pagewanted=all') +#single_page_link: //a[contains(@href, 'pagewanted=all') and not(contains(@href, 'login'))] + +strip://ul[@id = 'toolsList'] +strip://h6[@class = 'kicker'] +author:substring-after(//h6[@class='byline'],'By ') + +test_url: http://www.nytimes.com/2011/07/24/books/review/an-academic-authors-unintentional-masterpiece.html +test_url: http://www.nytimes.com/2012/06/10/arts/television/the-newsroom-aaron-sorkins-return-to-tv.html +test_url: http://www.nytimes.com/2013/03/25/world/middleeast/israeli-military-responds-after-patrols-come-under-fire-from-syria.html +test_url: http://www.nytimes.com/2013/08/15/nyregion/when-the-new-york-city-subway-ran-without-rails.html +test_url: http://www.nytimes.com/2004/02/29/weekinreview/correspondence-class-consciousness-china-s-wealthy-live-creed-hobbes-darwin-meet.html +test_url: http://www.nytimes.com/2014/06/19/opinion/gail-collins-romney-and-the-2016-contenders-huddle.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/nzz.ch.txt b/inc/3rdparty/site_config/standard/nzz.ch.txt old mode 100644 new mode 100755 index 81faabae..749f4f2a --- a/inc/3rdparty/site_config/standard/nzz.ch.txt +++ b/inc/3rdparty/site_config/standard/nzz.ch.txt @@ -1,12 +1,12 @@ -body: //*[@class='article-full'] -title: //h3 -strip: //header[@class='group'] -#body: //p[@class='lead'] -#move_into(//p[@class='lead']): //*[@class='article-full']/figure -#move_into(//p[@class='lead']): //div[@id='articleBodyText'] -strip: //div[@id='social-media-floater'] -strip: //div[@class='advertisement'] -strip: //div[@class='infobox'] -strip: //div[@id='articleComments'] - +body: //*[@class='article-full'] +title: //h3 +strip: //header[@class='group'] +#body: //p[@class='lead'] +#move_into(//p[@class='lead']): //*[@class='article-full']/figure +#move_into(//p[@class='lead']): //div[@id='articleBodyText'] +strip: //div[@id='social-media-floater'] +strip: //div[@class='advertisement'] +strip: //div[@class='infobox'] +strip: //div[@id='articleComments'] + test_url: http://www.nzz.ch/wissen/wissenschaft/sonnenschutz-fuer-die-erde-1.17282213 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/observer.com.txt b/inc/3rdparty/site_config/standard/observer.com.txt old mode 100644 new mode 100755 index e409ca2e..0b107538 --- a/inc/3rdparty/site_config/standard/observer.com.txt +++ b/inc/3rdparty/site_config/standard/observer.com.txt @@ -1,7 +1,7 @@ -body: //article[contains(@class, 'instapaper_body')] - -prune: no - -single_page_link: //a[@id='print-button'] - +body: //article[contains(@class, 'instapaper_body')] + +prune: no + +single_page_link: //a[@id='print-button'] + test_url: http://www.observer.com/2008/would-you-take-tumblr-man \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/off.net.mk.txt b/inc/3rdparty/site_config/standard/off.net.mk.txt old mode 100644 new mode 100755 index a2fb5f21..bf107876 --- a/inc/3rdparty/site_config/standard/off.net.mk.txt +++ b/inc/3rdparty/site_config/standard/off.net.mk.txt @@ -1,7 +1,7 @@ -body: //div[(@id = "content")] -strip: //div[(@class = "links-bar")] -strip: //div[(@class = "povrzani")] -strip: //div[(@class = "povrzani-dolu")] -strip: //div[(@class = "tags")] +body: //div[(@id = "content")] +strip: //div[(@class = "links-bar")] +strip: //div[(@class = "povrzani")] +strip: //div[(@class = "povrzani-dolu")] +strip: //div[(@class = "tags")] strip: //h1[(@id = "page-title")] test_url: http://off.net.mk/zhivot-i-zabava/gadzheti/dzhabe-raboti-dzhabe-ne-dishi \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/omaha.com.txt b/inc/3rdparty/site_config/standard/omaha.com.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/omiliya.org.txt b/inc/3rdparty/site_config/standard/omiliya.org.txt old mode 100644 new mode 100755 index 1b39b625..4b3a7202 --- a/inc/3rdparty/site_config/standard/omiliya.org.txt +++ b/inc/3rdparty/site_config/standard/omiliya.org.txt @@ -1,9 +1,9 @@ -title: //div[@id='squeeze']/h1 -strip: //div[@id='squeeze']/h1 -author: //div[@class='submitted']/a -strip: //div[@class='submitted']/a -convert_double_br_tags: yes - - +title: //div[@id='squeeze']/h1 +strip: //div[@id='squeeze']/h1 +author: //div[@class='submitted']/a +strip: //div[@class='submitted']/a +convert_double_br_tags: yes + + test_url: http://omiliya.org/content/predchuvstvie.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/on.net.mk.txt b/inc/3rdparty/site_config/standard/on.net.mk.txt old mode 100644 new mode 100755 index be7a17ef..a95c2b0f --- a/inc/3rdparty/site_config/standard/on.net.mk.txt +++ b/inc/3rdparty/site_config/standard/on.net.mk.txt @@ -1,5 +1,5 @@ -body: //div[(@class = "statija")] -strip: //div[(@class = "relatedBlock")] -strip: //div[(@class = "swftools")] +body: //div[(@class = "statija")] +strip: //div[(@class = "relatedBlock")] +strip: //div[(@class = "swftools")] strip: //table[(@class = "links")] test_url: http://on.net.mk/video/na-trkala/lamborghini-aventador-avionot-shto-ne-leta \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/online.wsj.com.txt b/inc/3rdparty/site_config/standard/online.wsj.com.txt old mode 100644 new mode 100755 index edb52855..448bb7e1 --- a/inc/3rdparty/site_config/standard/online.wsj.com.txt +++ b/inc/3rdparty/site_config/standard/online.wsj.com.txt @@ -1,23 +1,25 @@ -title: //meta[@property="og:title"]/@content -body: //div[@id='article_story_body'] - -author: //h3[@class='byline']/a -# for slid show content -body: //ul[@id='imageSlide']//li[@class='firstSlide']//img | (//div[@class='txt_body']//p)[1] -date: //li[@class='dateStamp']/small - -strip_id_or_class: insetFullBracket -strip_id_or_class: insettipBox -#strip_id_or_class: legacyInset -strip_id_or_class: recipeACShopAndBuyText - -strip: //div[contains(@class, 'insetContent')]//cite -strip: //*[contains(@style, 'visibility: hidden;')] -strip: //div[contains(@class, 'insetContent') and not(contains(@class, 'image'))] - -prune: no -tidy: no - -test_url: http://online.wsj.com/article/SB10001424052970203363504577185322849515102.html -# slide show -test_url: http://online.wsj.com/article/SB10001424052970204791104577110550376458164.html \ No newline at end of file +title: //meta[@property="og:title"]/@content +body: //div[@id='article_story_body'] + +author: //h3[@class='byline']/a +# for slide show content +body: //ul[@id='imageSlide']//li[@class='firstSlide']//img | (//div[@class='txt_body']//p)[1] +date: //li[@class='dateStamp']/small + +strip_id_or_class: insetFullBracket +strip_id_or_class: insettipBox +#strip_id_or_class: legacyInset +strip_id_or_class: recipeACShopAndBuyText + +strip: //div[contains(@class, 'insetContent')]//cite +strip: //*[contains(@style, 'visibility: hidden;')] +strip: //div[contains(@class, 'insetContent') and not(contains(@class, 'image'))] +strip: //div[contains(@class, 'carousel')] + +prune: no +tidy: no + +test_url: http://online.wsj.com/news/articles/SB10001424052702304626304579509100018004342 +test_url: http://online.wsj.com/article/SB10001424052970203363504577185322849515102.html +# slide show +test_url: http://online.wsj.com/article/SB10001424052970204791104577110550376458164.html diff --git a/inc/3rdparty/site_config/standard/onlinewelten.com.txt b/inc/3rdparty/site_config/standard/onlinewelten.com.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/onstartups.com.txt b/inc/3rdparty/site_config/standard/onstartups.com.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/ontologicalgeek.com.txt b/inc/3rdparty/site_config/standard/ontologicalgeek.com.txt new file mode 100755 index 00000000..a9bf71ef --- /dev/null +++ b/inc/3rdparty/site_config/standard/ontologicalgeek.com.txt @@ -0,0 +1,8 @@ +title: //h1[@class='entry-title'] + +author: //a[@rel='author'] + +date: substring-before(//aside[@class='entry-meta'], '|') + +body: //div[@class='entry-content'] +test_url: http://ontologicalgeek.com/change-or-live-final-fantasy-x-as-catholic-dystopia/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/opensource.org.txt b/inc/3rdparty/site_config/standard/opensource.org.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/openthemagazine.com.txt b/inc/3rdparty/site_config/standard/openthemagazine.com.txt old mode 100644 new mode 100755 index 510eb252..6913eb0e --- a/inc/3rdparty/site_config/standard/openthemagazine.com.txt +++ b/inc/3rdparty/site_config/standard/openthemagazine.com.txt @@ -1,4 +1,4 @@ -body: //div[@id = 'content-inner'] -strip: //div[@id = 'content-bottom'] +body: //div[@id = 'content-inner'] +strip: //div[@id = 'content-bottom'] strip_id_or_class: print_sharebutton test_url: http://openthemagazine.com/article/nation/sania-vs-saina \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/openwebx.org.txt b/inc/3rdparty/site_config/standard/openwebx.org.txt old mode 100644 new mode 100755 index b7663540..a5dcdb59 --- a/inc/3rdparty/site_config/standard/openwebx.org.txt +++ b/inc/3rdparty/site_config/standard/openwebx.org.txt @@ -1,4 +1,4 @@ -body: //div[@class="chapter"] -prune: no -tidy: no +body: //div[@class="chapter"] +prune: no +tidy: no test_url: http://openwebx.org/docs/springext.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/orf.at.txt b/inc/3rdparty/site_config/standard/orf.at.txt old mode 100644 new mode 100755 index ff16ca79..fb4f2181 --- a/inc/3rdparty/site_config/standard/orf.at.txt +++ b/inc/3rdparty/site_config/standard/orf.at.txt @@ -1,11 +1,11 @@ -single_page_link: //div[@id='content']//p[@class='readMore']/a - -title: //div[@class='hidden offscreen']/h2 -body: //div[@id="storyText"] -move_into(//div[@id='storyText']): //div[@class='fact'] -strip: //small[@class='credit'] -strip: //small[@class='caption'] -date: substring-after(//div[@id='storyMeta']//p[@class='date'],'Publiziert am') -strip: //p[@class='toplink'] +single_page_link: //div[@id='content']//p[@class='readMore']/a + +title: //div[@class='hidden offscreen']/h2 +body: //div[@id="storyText"] +move_into(//div[@id='storyText']): //div[@class='fact'] +strip: //small[@class='credit'] +strip: //small[@class='caption'] +date: substring-after(//div[@id='storyMeta']//p[@class='date'],'Publiziert am') +strip: //p[@class='toplink'] test_url: http://orf.at/stories/2084731/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/origo.hu.txt b/inc/3rdparty/site_config/standard/origo.hu.txt old mode 100644 new mode 100755 index 0dedac3d..50717f25 --- a/inc/3rdparty/site_config/standard/origo.hu.txt +++ b/inc/3rdparty/site_config/standard/origo.hu.txt @@ -1,18 +1,18 @@ -title: /html/body/div[5]/div[2]/h1 -body: /html/body/div[5]/div[2]/div[6]/div/div -body: //*[@id="cikk"] -strip: /html/body/div[5]/div[2]/h1 -strip: /html/body/div[5]/div[2]/div[4] -strip: //*[@id="multidoboz"] -strip: /html/body/div[5]/div[2]/div[6]/div[2] -strip: //*[@id="comments"] -strip: //*[@id="rating-doboz"] -strip: /html/body/div[5]/div[2]/div[10] -strip: /html/body/div[5]/div[2]/a -strip: /html/body/div[5]/div[2]/span -strip: /html/body/div[5]/div[2]/span[2] -strip: /html/body/div[5]/div[2]/span[3] -strip: /html/body/div[5]/div[2]/span[4] -strip: /html/body/div[5]/div[2]/span[5] +title: /html/body/div[5]/div[2]/h1 +body: /html/body/div[5]/div[2]/div[6]/div/div +body: //*[@id="cikk"] +strip: /html/body/div[5]/div[2]/h1 +strip: /html/body/div[5]/div[2]/div[4] +strip: //*[@id="multidoboz"] +strip: /html/body/div[5]/div[2]/div[6]/div[2] +strip: //*[@id="comments"] +strip: //*[@id="rating-doboz"] +strip: /html/body/div[5]/div[2]/div[10] +strip: /html/body/div[5]/div[2]/a +strip: /html/body/div[5]/div[2]/span +strip: /html/body/div[5]/div[2]/span[2] +strip: /html/body/div[5]/div[2]/span[3] +strip: /html/body/div[5]/div[2]/span[4] +strip: /html/body/div[5]/div[2]/span[5] strip: //*[@id="kommentszam"] test_url: http://www.origo.hu/itthon/20110119-lemondott-a-kulturaert-felelos-helyettes-allamtitkar.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/oschina.net.txt b/inc/3rdparty/site_config/standard/oschina.net.txt new file mode 100755 index 00000000..56451539 --- /dev/null +++ b/inc/3rdparty/site_config/standard/oschina.net.txt @@ -0,0 +1,3 @@ +title: //h1 +strip_id_or_class: syntaxhighlighter +test_url: http://www.oschina.net/translate/event-based-programming-what-async-has-over-sync?print \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/pakistantvdekho.com.txt b/inc/3rdparty/site_config/standard/pakistantvdekho.com.txt old mode 100644 new mode 100755 index f03c9551..7e2985e0 --- a/inc/3rdparty/site_config/standard/pakistantvdekho.com.txt +++ b/inc/3rdparty/site_config/standard/pakistantvdekho.com.txt @@ -1,11 +1,11 @@ -#body: (//div[@class='ftr-yt-vid'])[1] -body: (//blockquote[contains(@class, 'postcontent')])[1] -body: (//div[starts-with(@id, 'post_message')])[1] - -prune: no -tidy: no - -#replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player" -#replace_string(</iframe>): </iframe> </div> - +#body: (//div[@class='ftr-yt-vid'])[1] +body: (//blockquote[contains(@class, 'postcontent')])[1] +body: (//div[starts-with(@id, 'post_message')])[1] + +prune: no +tidy: no + +#replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player" +#replace_string(</iframe>): </iframe> </div> + test_url: http://pakistantvdekho.com/showthread.php?647741-Sitam-Gar-by-HUM-TV-Episode-07&p=659080#post659080 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/pakmedia.tv.txt b/inc/3rdparty/site_config/standard/pakmedia.tv.txt new file mode 100755 index 00000000..5d6e4c8c --- /dev/null +++ b/inc/3rdparty/site_config/standard/pakmedia.tv.txt @@ -0,0 +1,17 @@ +title: //h1[@class='entry-title'] +body: //article//div[@class='entry'] +strip_id_or_class: addthis +strip_id_or_class: gdsrcacheloader +strip_id_or_class: entry-meta +strip_id_or_class: entry-tags +strip_id_or_class: authorbox +strip: //div[@class='entry']/p[1] +strip: //img[@width='600' and @height='70'] +# related posts +strip: //h3[contains(., 'Related posts')] +strip: //div[contains(@style, 'border: 0pt none ; margin: 0pt; padding: 0pt;')] + +prune: no +tidy: no + +test_url: http://pakmedia.tv/tv-one/feed \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/pandagon.net.txt b/inc/3rdparty/site_config/standard/pandagon.net.txt old mode 100644 new mode 100755 index d0d2a5d0..35121e14 --- a/inc/3rdparty/site_config/standard/pandagon.net.txt +++ b/inc/3rdparty/site_config/standard/pandagon.net.txt @@ -1,5 +1,5 @@ -title://h2 -author://div[@class="posted"]/a -date://div[@class="date"] +title://h2 +author://div[@class="posted"]/a +date://div[@class="date"] body://div[@class="entry"] test_url: http://pandagon.net/index.php/site/its-okay-to-admit-that-mass-hysteria-is-real \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/pandodaily.com.txt b/inc/3rdparty/site_config/standard/pandodaily.com.txt old mode 100644 new mode 100755 index 7d1c2183..a5d427af --- a/inc/3rdparty/site_config/standard/pandodaily.com.txt +++ b/inc/3rdparty/site_config/standard/pandodaily.com.txt @@ -1,5 +1,5 @@ -tidy: no -body: //article -date: //time/@datetime +tidy: no +body: //article +date: //time/@datetime strip_id_or_class: sharedaddy test_url: http://pandodaily.com/2012/01/19/ibooks-author-is-not-going-to-hurt-publishers-it-might-even-help-them/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/panic.com.txt b/inc/3rdparty/site_config/standard/panic.com.txt old mode 100644 new mode 100755 index 0361f06d..e0e2595c --- a/inc/3rdparty/site_config/standard/panic.com.txt +++ b/inc/3rdparty/site_config/standard/panic.com.txt @@ -1,3 +1,3 @@ -body: //div[@class='entry'] +body: //div[@class='entry'] date: //h3[@class='postDate'] test_url: http://www.panic.com/blog/2011/07/panic-is-ready-for-lion/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/papodehomem.com.br.txt b/inc/3rdparty/site_config/standard/papodehomem.com.br.txt new file mode 100755 index 00000000..2c522da4 --- /dev/null +++ b/inc/3rdparty/site_config/standard/papodehomem.com.br.txt @@ -0,0 +1,6 @@ +title: //h2[@class="page_title"] +body: //div[@class="entry arquivo"] +author: //span[@class="author"] +footnotes: yes +prune: yes +test_url: http://papodehomem.com.br/um-relato-confessional-sobre-a-maioridade-penal/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/parislemon.com.txt b/inc/3rdparty/site_config/standard/parislemon.com.txt old mode 100644 new mode 100755 index a3bd4b0f..cd9bd55d --- a/inc/3rdparty/site_config/standard/parislemon.com.txt +++ b/inc/3rdparty/site_config/standard/parislemon.com.txt @@ -1,6 +1,6 @@ -title: //h2[@class="post-title"] -author: substring-after(//div[@class="description"],'Words by ') -date: //li[@class="date"] -strip: //h2[@class="post-title"] +title: //h2[@class="post-title"] +author: substring-after(//div[@class="description"],'Words by ') +date: //li[@class="date"] +strip: //h2[@class="post-title"] body: //div[@class="copy"] test_url: http://parislemon.com/post/13462682469/the-15-inch-air \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/parliament.uk.txt b/inc/3rdparty/site_config/standard/parliament.uk.txt old mode 100644 new mode 100755 index 478a669f..caaa2e94 --- a/inc/3rdparty/site_config/standard/parliament.uk.txt +++ b/inc/3rdparty/site_config/standard/parliament.uk.txt @@ -1,3 +1,3 @@ -title: //h1 +title: //h1 body: //div[@id='news-article'] test_url: http://www.parliament.uk/business/committees/committees-a-z/commons-select/backbench-business-committee/news/guidance-for-e-petitioners/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/pastebin.com.txt b/inc/3rdparty/site_config/standard/pastebin.com.txt old mode 100644 new mode 100755 index 89d13b2a..03b67b7e --- a/inc/3rdparty/site_config/standard/pastebin.com.txt +++ b/inc/3rdparty/site_config/standard/pastebin.com.txt @@ -1,6 +1,6 @@ -title://div[@class="paste_box_line1"]/h1 -author://div[@class="paste_box_line2"]/a -body://div[@class="text"] -date:substring-before(substring-after(//div[@class="paste_box_line2"],'|'),'|') +title://div[@class="paste_box_line1"]/h1 +author://div[@class="paste_box_line2"]/a +body://div[@class="text"] +date:substring-before(substring-after(//div[@class="paste_box_line2"],'|'),'|') dissolve://li test_url: http://pastebin.com/LAykd1es \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/pastepad.fivefilters.org.txt b/inc/3rdparty/site_config/standard/pastepad.fivefilters.org.txt old mode 100644 new mode 100755 index 40a049e0..c535158d --- a/inc/3rdparty/site_config/standard/pastepad.fivefilters.org.txt +++ b/inc/3rdparty/site_config/standard/pastepad.fivefilters.org.txt @@ -1,5 +1,5 @@ -title: //h1 -body: //div[@id='ff-pastepad-content'] -prune: no +title: //h1 +body: //div[@id='ff-pastepad-content'] +prune: no # todo: add test file test_url: http://pastepad.fivefilters.org/test.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/pathawks.com.txt b/inc/3rdparty/site_config/standard/pathawks.com.txt old mode 100644 new mode 100755 index 1a4cd25b..25042224 --- a/inc/3rdparty/site_config/standard/pathawks.com.txt +++ b/inc/3rdparty/site_config/standard/pathawks.com.txt @@ -1,8 +1,8 @@ -title://*[contains(@class,'post-title')] -body://div[contains(@class,'post-body')] -body://div[contains(@class,'entry-content')] -strip_comments:no -prune:no -convert_double_br_tags:yes +title://*[contains(@class,'post-title')] +body://div[contains(@class,'post-body')] +body://div[contains(@class,'entry-content')] +strip_comments:no +prune:no +convert_double_br_tags:yes tidy:yes test_url: http://www.pathawks.com/2011/06/crazyawesomecoloradotrip.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/pcast.me.txt b/inc/3rdparty/site_config/standard/pcast.me.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/pcmag.com.txt b/inc/3rdparty/site_config/standard/pcmag.com.txt old mode 100644 new mode 100755 index cebea4d7..96bdd95a --- a/inc/3rdparty/site_config/standard/pcmag.com.txt +++ b/inc/3rdparty/site_config/standard/pcmag.com.txt @@ -1,10 +1,10 @@ -prune:yes - -date://*[contains(@class,'date')] - -body://div[contains(@id,'content')] - -next_page_link://a[contains(.,'Next >')] - +prune:yes + +date://*[contains(@class,'date')] + +body://div[contains(@id,'content')] + +next_page_link://a[contains(.,'Next >')] + strip_id_or_class:sponsors test_url: http://www.pcmag.com/article2/0,2817,2401676,00.asp \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/pcworld.com.txt b/inc/3rdparty/site_config/standard/pcworld.com.txt old mode 100644 new mode 100755 index 30ccbb5f..7193f87e --- a/inc/3rdparty/site_config/standard/pcworld.com.txt +++ b/inc/3rdparty/site_config/standard/pcworld.com.txt @@ -1,19 +1,19 @@ -title: //div[@class='articleHead']//h1 -author: //div[@class="author-name"]/a[1] -body: //div[@class="main"] - -# remove 'From the Lab' and 'Recent posts' text -strip: //div[@class='blogLabel'] - -# remove byline and meta info -strip: //h1 -strip: //div[@class="article-meta"] -strip: //div[@class="author-info"] - -#strip tags and categories -strip: //div[@class="department"] - -#strip product cap links -strip: //div[@class="cap-main"] -strip: //div[@id="compare-lede"] +title: //div[@class='articleHead']//h1 +author: //div[@class="author-name"]/a[1] +body: //div[@class="main"] + +# remove 'From the Lab' and 'Recent posts' text +strip: //div[@class='blogLabel'] + +# remove byline and meta info +strip: //h1 +strip: //div[@class="article-meta"] +strip: //div[@class="author-info"] + +#strip tags and categories +strip: //div[@class="department"] + +#strip product cap links +strip: //div[@class="cap-main"] +strip: //div[@id="compare-lede"] test_url: http://www.pcworld.com/article/262034/are-printer-companies-gouging-us-on-laser-toner-pricing.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/penny-arcade.com.txt b/inc/3rdparty/site_config/standard/penny-arcade.com.txt old mode 100644 new mode 100755 index f97615f1..a0d5099e --- a/inc/3rdparty/site_config/standard/penny-arcade.com.txt +++ b/inc/3rdparty/site_config/standard/penny-arcade.com.txt @@ -1,23 +1,23 @@ -# 2012-01-14 carlo@... - fixed title, body; added author, date - -title: //div[@class="title"]/h2/a -# body: //div[@class="post"] -# author: //p[@class="iconEmail"]/a -# date: //p[@class="iconDate"] - -# 1/24/2013 yosoyju - fixed author, date, and body, added support for PA Report - -# Penny Arcade - -author: //li[@class="iconEmail"]/a -date: //li[@class="iconDate"] -body: //div[@class="body"] - -# PA Report - -author: //div[@class="meta"]/p/a -date: substring-after(//div[@class="meta"]/p, '/ ') -title: substring-after(//title, '- ') - -test_url: http://penny-arcade.com/2012/01/13/i-put-some-news-in-your-news +# 2012-01-14 carlo@... - fixed title, body; added author, date + +title: //div[@class="title"]/h2/a +# body: //div[@class="post"] +# author: //p[@class="iconEmail"]/a +# date: //p[@class="iconDate"] + +# 1/24/2013 yosoyju - fixed author, date, and body, added support for PA Report + +# Penny Arcade + +author: //li[@class="iconEmail"]/a +date: //li[@class="iconDate"] +body: //div[@class="body"] + +# PA Report + +author: //div[@class="meta"]/p/a +date: substring-after(//div[@class="meta"]/p, '/ ') +title: substring-after(//title, '- ') + +test_url: http://penny-arcade.com/2012/01/13/i-put-some-news-in-your-news test_url: http://penny-arcade.com/report/editorial-article/the-dystopian-future-of-casual-games-personalized-targeted-pricing-and-mech \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/pentaxforums.com.txt b/inc/3rdparty/site_config/standard/pentaxforums.com.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/philadelphiaeagles.com.txt b/inc/3rdparty/site_config/standard/philadelphiaeagles.com.txt old mode 100644 new mode 100755 index a369fd65..5ba5f772 --- a/inc/3rdparty/site_config/standard/philadelphiaeagles.com.txt +++ b/inc/3rdparty/site_config/standard/philadelphiaeagles.com.txt @@ -1,6 +1,6 @@ -prune: no -tidy: no -body: //div[@class='article-content'] -dissolve: //nobr/a +prune: no +tidy: no +body: //div[@class='article-content'] +dissolve: //nobr/a dissolve: //nobr test_url: http://www.philadelphiaeagles.com/news/article-1/Jacksons-Light-Shined-On-Sunday-Night/51a862de-42b4-40f1-a5a8-ba0fb8a435b7 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/philly.com.txt b/inc/3rdparty/site_config/standard/philly.com.txt old mode 100644 new mode 100755 index 41318f63..accbd60b --- a/inc/3rdparty/site_config/standard/philly.com.txt +++ b/inc/3rdparty/site_config/standard/philly.com.txt @@ -1,10 +1,10 @@ -title: //h1[@class='entry-title'] -author: //p[@class='byline']/span -body: //@id='body-content' -date: //div[@class='article_timestamp']/span - -strip: //@class=b-group -strip: //*[contains(@style, 'none')] -strip: //a[contains(@href, 'comments')] +title: //h1[@class='entry-title'] +author: //p[@class='byline']/span +body: //@id='body-content' +date: //div[@class='article_timestamp']/span + +strip: //@class=b-group +strip: //*[contains(@style, 'none')] +strip: //a[contains(@href, 'comments')] strip: //*[contains(@class, 'comment')] test_url: http://www.philly.com/philly/sports/eagles/20120127_Ohio_State_s_Posey_didn_t_waste_time_lost_to_suspension.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/photo.tutsplus.com.txt b/inc/3rdparty/site_config/standard/photo.tutsplus.com.txt old mode 100644 new mode 100755 index 4e2ccb01..7f7e3830 --- a/inc/3rdparty/site_config/standard/photo.tutsplus.com.txt +++ b/inc/3rdparty/site_config/standard/photo.tutsplus.com.txt @@ -1,6 +1,6 @@ -author: substring-before(//div[@class='post_meta'],' on') -date: substring-after(substring-before(//div[@class='post_meta'],'with'),' on') -title: //h1[class='post_title'] -body: //div[@class='article'] +author: substring-before(//div[@class='post_meta'],' on') +date: substring-after(substring-before(//div[@class='post_meta'],'with'),' on') +title: //h1[class='post_title'] +body: //div[@class='article'] test_url: http://photo.tutsplus.com/articles/news/a-brilliant-beginners-guide-to-architectural-photography/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/php.net.txt b/inc/3rdparty/site_config/standard/php.net.txt old mode 100644 new mode 100755 index 7c57a84d..cc643f05 --- a/inc/3rdparty/site_config/standard/php.net.txt +++ b/inc/3rdparty/site_config/standard/php.net.txt @@ -1,6 +1,6 @@ -body: //div[@id='content'] -strip_id_or_class: manualnavbar - -prune: no +body: //div[@id='content'] +strip_id_or_class: manualnavbar + +prune: no test_url: http://www.php.net/manual/en/migration5.incompatible.php \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/physicstoday.org.txt b/inc/3rdparty/site_config/standard/physicstoday.org.txt old mode 100644 new mode 100755 index a8163995..624055b7 --- a/inc/3rdparty/site_config/standard/physicstoday.org.txt +++ b/inc/3rdparty/site_config/standard/physicstoday.org.txt @@ -1,7 +1,7 @@ -title: //div[@class='abstitle']//h1 -author: //div[@class='authorList'] -body: //div[@id='fulltext_body'] - -prune: no +title: //div[@class='abstitle']//h1 +author: //div[@class='authorList'] +body: //div[@id='fulltext_body'] + +prune: no test_url: http://www.physicstoday.org/resource/1/phtoad/v64/i10/p48_s1?bypassSSO=1 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/pinterest.com.txt b/inc/3rdparty/site_config/standard/pinterest.com.txt new file mode 100755 index 00000000..01b6df41 --- /dev/null +++ b/inc/3rdparty/site_config/standard/pinterest.com.txt @@ -0,0 +1,5 @@ +title: //title +body: //div[contains(@class, 'imageContainer')] + +test_url: http://pinterest.com/pin/380906080954441188/ +test_url: http://pinterest.com/michaelsorm/architecture/rss \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/pitchfork.com.txt b/inc/3rdparty/site_config/standard/pitchfork.com.txt old mode 100644 new mode 100755 index 3decc538..eee96a9c --- a/inc/3rdparty/site_config/standard/pitchfork.com.txt +++ b/inc/3rdparty/site_config/standard/pitchfork.com.txt @@ -1,16 +1,16 @@ -title:concat(//h1,' - ',//h2,' - ',//h3) -author://address -date://span[@class='pub-date'] -body://div[@id='main'] -single_page_link://link[@rel='canonical'] -strip://div[@class='info'] -strip_id_or_class:'object-grid related-content' -strip_id_or_class:'object-prevnext' -strip_id_or_class:'object-header' -strip_id_or_class:'source' -strip_id_or_class:'label' -strip_id_or_class:'title' -dissolve://ul -strip://li[@class='next'] +title:concat(//h1,' - ',//h2,' - ',//h3) +author://address +date://span[@class='pub-date'] +body://div[@id='main'] +single_page_link://link[@rel='canonical'] +strip://div[@class='info'] +strip_id_or_class:'object-grid related-content' +strip_id_or_class:'object-prevnext' +strip_id_or_class:'object-header' +strip_id_or_class:'source' +strip_id_or_class:'label' +strip_id_or_class:'title' +dissolve://ul +strip://li[@class='next'] strip://li[@class='prev'] test_url: http://pitchfork.com/features/why-we-fight/8796-on-the-far-slope-of-the-uncanny-valley/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/pittnews.com.txt b/inc/3rdparty/site_config/standard/pittnews.com.txt old mode 100644 new mode 100755 index 92777073..c302526d --- a/inc/3rdparty/site_config/standard/pittnews.com.txt +++ b/inc/3rdparty/site_config/standard/pittnews.com.txt @@ -1,8 +1,8 @@ -title: //h2[@class='post-title'] -author: substring-before(substring-after(//h3[@class='post-byline'],'By:'),'/') -date: substring-before(substring-after(//p[@class='post-details'],'Posted on '),'in') -strip: //h2[@class='post-title'] -strip: //p[@class='post-details'] -strip: //h3[@class='post-byline'] +title: //h2[@class='post-title'] +author: substring-before(substring-after(//h3[@class='post-byline'],'By:'),'/') +date: substring-before(substring-after(//p[@class='post-details'],'Posted on '),'in') +strip: //h2[@class='post-title'] +strip: //p[@class='post-details'] +strip: //h3[@class='post-byline'] body: //div[@id='content'] test_url: http://pittnews.com/newsstory/mens-basketball-pitt-recruit-robinson-to-bring-leadership/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/pittsburgh.pirates.mlb.com.txt b/inc/3rdparty/site_config/standard/pittsburgh.pirates.mlb.com.txt old mode 100644 new mode 100755 index 824cb064..f2948528 --- a/inc/3rdparty/site_config/standard/pittsburgh.pirates.mlb.com.txt +++ b/inc/3rdparty/site_config/standard/pittsburgh.pirates.mlb.com.txt @@ -1,15 +1,15 @@ -title: substring-before(//title,'pirates.com') -date: //span[@class='timeStamp'] -author: substring-before(substring-after(//div[@class='byLine'],'By'),'/') -body: //div[@id='article'] -#strip: //div[@class='inner'] -strip: //div[@id='article_head'] -strip: //p[@class='tagLine'] -strip: //div[@id='article_related_links'] -strip: //div[@id='article_related_mlb'] -strip: //div[@id='article_related_club'] -strip: //span[@class='more'] -strip: //div[@class='article_component'] -strip: //span[@class='screen_reader'] +title: substring-before(//title,'pirates.com') +date: //span[@class='timeStamp'] +author: substring-before(substring-after(//div[@class='byLine'],'By'),'/') +body: //div[@id='article'] +#strip: //div[@class='inner'] +strip: //div[@id='article_head'] +strip: //p[@class='tagLine'] +strip: //div[@id='article_related_links'] +strip: //div[@id='article_related_mlb'] +strip: //div[@id='article_related_club'] +strip: //span[@class='more'] +strip: //div[@class='article_component'] +strip: //span[@class='screen_reader'] strip: //ul[@class='columnists_blurb'] test_url: http://pittsburgh.pirates.mlb.com/news/article.jsp?ymd=20120330&content_id=27759040&vkey=news_pit&c_id=pit \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/pittsburghlive.com.txt b/inc/3rdparty/site_config/standard/pittsburghlive.com.txt old mode 100644 new mode 100755 index b3e66166..cc7891f3 --- a/inc/3rdparty/site_config/standard/pittsburghlive.com.txt +++ b/inc/3rdparty/site_config/standard/pittsburghlive.com.txt @@ -1,7 +1,7 @@ -title: substring-before(//title,'- Pittsburgh Tribune') -author: substring-before(substring-after(//div[@class='byline'],'By '),',') -date: substring-after(substring-after(//div[@class='byline'],','),',') -body: //div[@id='storyBody'] -strip: //div[@class='morestories'] +title: substring-before(//title,'- Pittsburgh Tribune') +author: substring-before(substring-after(//div[@class='byline'],'By '),',') +date: substring-after(substring-after(//div[@class='byline'],','),',') +body: //div[@id='storyBody'] +strip: //div[@class='morestories'] dissolve: //p[@class='subheader'] test_url: http://www.pittsburghlive.com/x/pittsburghtrib/sports/columnists/s_785654.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/pittsburghmagazine.com.txt b/inc/3rdparty/site_config/standard/pittsburghmagazine.com.txt old mode 100644 new mode 100755 index dd715d8f..4d02f6bb --- a/inc/3rdparty/site_config/standard/pittsburghmagazine.com.txt +++ b/inc/3rdparty/site_config/standard/pittsburghmagazine.com.txt @@ -1,8 +1,8 @@ -title: //title -author: substring-after(//div[@class='by-line'],'BY') - -body: //div[@id='article-body'] - -strip: //div[@class='by-line'] +title: //title +author: substring-after(//div[@class='by-line'],'BY') + +body: //div[@id='article-body'] + +strip: //div[@class='by-line'] strip: //div[@id='article-body']/h1 test_url: http://www.pittsburghmagazine.com/Pittsburgh-Magazine/May-2012/Verde-Lights-the-Night/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/pittsburghpanthers.com.txt b/inc/3rdparty/site_config/standard/pittsburghpanthers.com.txt old mode 100644 new mode 100755 index 6113b96e..c372284a --- a/inc/3rdparty/site_config/standard/pittsburghpanthers.com.txt +++ b/inc/3rdparty/site_config/standard/pittsburghpanthers.com.txt @@ -1,4 +1,4 @@ -title: //span[@class='StoryHeadline'] -strip: //div[@class='fivevert'] +title: //span[@class='StoryHeadline'] +strip: //div[@class='fivevert'] body: //div[@id='Content'] test_url: http://www.pittsburghpanthers.com/sports/m-baskbl/recaps/031412aaa.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/pittscriptblog.com.txt b/inc/3rdparty/site_config/standard/pittscriptblog.com.txt old mode 100644 new mode 100755 index 3936310d..571874a4 --- a/inc/3rdparty/site_config/standard/pittscriptblog.com.txt +++ b/inc/3rdparty/site_config/standard/pittscriptblog.com.txt @@ -1,8 +1,8 @@ -title: //h1[@class='articletitle'] -author: substring-after(//span[@class='author'],'by') -date: //span[@class='created'] -body: //div[@class='article'] -strip: //div[@class='headline'] -strip: //p[@class='articleinfo'] +title: //h1[@class='articletitle'] +author: substring-after(//span[@class='author'],'by') +date: //span[@class='created'] +body: //div[@class='article'] +strip: //div[@class='headline'] +strip: //p[@class='articleinfo'] #dissolve: //p[@class='subheader'] test_url: http://www.pittscriptblog.com/2012-articles/march/2012-football-opponents-set-and-the-attendance-dilemma.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/planetvita.de.txt b/inc/3rdparty/site_config/standard/planetvita.de.txt new file mode 100755 index 00000000..bfc3342d --- /dev/null +++ b/inc/3rdparty/site_config/standard/planetvita.de.txt @@ -0,0 +1,5 @@ +title: //div[@id='frnRahmen']/div/div[@id='content']/div[2]/h2 +author: //div[@id='content']/div[1]/div/a +body: //div[@id='content']/div[2]/span +strip: //div[@id='commenthead'] +test_url: http://www.planetvita.de/news/10389-psn-store-update-vom-03-april-neue-inhalte-fuer-psvita.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/playboy.com.txt b/inc/3rdparty/site_config/standard/playboy.com.txt old mode 100644 new mode 100755 index 07b347a0..92834947 --- a/inc/3rdparty/site_config/standard/playboy.com.txt +++ b/inc/3rdparty/site_config/standard/playboy.com.txt @@ -1,6 +1,6 @@ -author: //article//*[@class="author"] -date: //article//*[@class="publication-date"] -body: //article -strip: //article/header +author: //article//*[@class="author"] +date: //article//*[@class="publication-date"] +body: //article +strip: //article/header strip: //article/section test_url: http://www.playboy.com/playground/view/playboy-interview-jon-hamm \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/plus.google.com.txt b/inc/3rdparty/site_config/standard/plus.google.com.txt old mode 100644 new mode 100755 index 50a5dbf5..4a7ea126 --- a/inc/3rdparty/site_config/standard/plus.google.com.txt +++ b/inc/3rdparty/site_config/standard/plus.google.com.txt @@ -1,17 +1,17 @@ -body: //div[@id='contentPane']//div[@class='vg'] -body: //div[@id='contentPane'] - -# Grab the author by finding the first profile pic, then backing up a node and getting the title of <a> tag which will be the author hopefully. Sorry can't test this due to parser errors, thanks google :( - -author: //div[@id='contentPane']//img[contains(@alt, 'profile photo')][1]/../@title - - -strip: //*[@title="People who +1'd this"]/../.. -strip: //*[contains(@class, 'a-b-f-i-Hg-Uf')] -strip: //*[@role='menu'] -strip: //img[contains(@alt, 'profile photo')] -strip: //*[@class='a-f-i-Ad'] - -tidy: no - +body: //div[@id='contentPane']//div[@class='vg'] +body: //div[@id='contentPane'] + +# Grab the author by finding the first profile pic, then backing up a node and getting the title of <a> tag which will be the author hopefully. Sorry can't test this due to parser errors, thanks google :( + +author: //div[@id='contentPane']//img[contains(@alt, 'profile photo')][1]/../@title + + +strip: //*[@title="People who +1'd this"]/../.. +strip: //*[contains(@class, 'a-b-f-i-Hg-Uf')] +strip: //*[@role='menu'] +strip: //img[contains(@alt, 'profile photo')] +strip: //*[@class='a-f-i-Ad'] + +tidy: no + test_url: http://plus.google.com/u/0/117840649766034848455/posts/FddaP6jeCqp \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/plzkthxbai.com.txt b/inc/3rdparty/site_config/standard/plzkthxbai.com.txt old mode 100644 new mode 100755 index bb9be0a9..ec151b42 --- a/inc/3rdparty/site_config/standard/plzkthxbai.com.txt +++ b/inc/3rdparty/site_config/standard/plzkthxbai.com.txt @@ -1,4 +1,4 @@ -title: //h2[@class='jcw-pagetitle' -date: //p[@class='postinfo'] +title: //h2[@class='jcw-pagetitle' +date: //p[@class='postinfo'] body: //div[@class='contenttext'] test_url: http://plzkthxbai.com/blog/2011/06/28/1password-and-internet-security/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/pogue.blogs.nytimes.com.txt b/inc/3rdparty/site_config/standard/pogue.blogs.nytimes.com.txt old mode 100644 new mode 100755 index 880311d3..65ddba54 --- a/inc/3rdparty/site_config/standard/pogue.blogs.nytimes.com.txt +++ b/inc/3rdparty/site_config/standard/pogue.blogs.nytimes.com.txt @@ -1,4 +1,4 @@ -body: //div[@id="content"]/div[1] - +body: //div[@id="content"]/div[1] + title: //h1[@class="entry-title"] test_url: http://pogue.blogs.nytimes.com/2011/05/12/the-future-of-skype/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/politico.com.txt b/inc/3rdparty/site_config/standard/politico.com.txt index c5302d1b..d8f5e575 100755 --- a/inc/3rdparty/site_config/standard/politico.com.txt +++ b/inc/3rdparty/site_config/standard/politico.com.txt @@ -1,17 +1,13 @@ -title://div[contains(@class, "article")]/h1 -body://div[contains(@class,"story-text")] - -# Why doesn't this work? next_page_link://ul[contains(@class,"pagination")]/li/a[@rel="next"] - -next_page_link://ul[contains(@class,"pagination")]/li[contains(@class, "current")]/following-sibling::node()/a -next_page_link://div[contains(@class,"pagination")]/ol/li[contains(@class, "current")]/following-sibling::node()/a -date://meta[@name="publish_date"]/@content - -strip://div[contains(@class, "breadcrumbs")] -strip://a[contains(@class, "hidden")] -strip://div[contains(@class, "story-embed")] +title://div[contains(@class, "article")]/h1 +body://div[contains(@class,"story-text")] + +# Why doesn't this work? next_page_link://ul[contains(@class,"pagination")]/li/a[@rel="next"] + +next_page_link://ul[contains(@class,"pagination")]/li[contains(@class, "current")]/following-sibling::node()/a +date://meta[@name="publish_date"]/@content + +strip://div[contains(@class, "breadcrumbs")] +strip://a[contains(@class, "hidden")] +strip://div[contains(@class, "story-embed")] strip://div[contains(@class, "story-text")]//p/a[contains(text(), "Also on POLITICO:")]/.. -strip://div[contains(@class, "story-interrupt")] -strip://footer[contains(@class, "author-bio")] - test_url: http://www.politico.com/news/stories/0712/78105.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/politifact.com.txt b/inc/3rdparty/site_config/standard/politifact.com.txt old mode 100644 new mode 100755 index fd247b5b..65a8fc57 --- a/inc/3rdparty/site_config/standard/politifact.com.txt +++ b/inc/3rdparty/site_config/standard/politifact.com.txt @@ -1,4 +1,4 @@ -body: //div[@id="content"] - +body: //div[@id="content"] + strip: //div[@class="pfcontentmid"]/div[position()>4]|//div[@class="pfad"] test_url: http://www.politifact.com/truth-o-meter/statements/2011/may/30/barbara-boxer/barbara-boxer-says-medicare-overhead-far-lower-pri/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/politiken.dk.txt b/inc/3rdparty/site_config/standard/politiken.dk.txt old mode 100644 new mode 100755 index 8deecbca..b13f8f87 --- a/inc/3rdparty/site_config/standard/politiken.dk.txt +++ b/inc/3rdparty/site_config/standard/politiken.dk.txt @@ -1,13 +1,13 @@ -# 21/10-2011: -# Added Author+Date -# Remove fakta-boks if found -# Deleted 'L�s ogs�...' filter -# - Change in markup caused it to strip too much. - -author://span[@class='autor-name'] -date:substring-after(//div[@class='art-created'], ' ') -title: //h1[contains(@class, 'stor-type')] -body: //div[@id='art-body'] -strip: //div[@class='art-fakta article-box'] +# 21/10-2011: +# Added Author+Date +# Remove fakta-boks if found +# Deleted 'Læs også...' filter +# - Change in markup caused it to strip too much. + +author://span[@class='autor-name'] +date:substring-after(//div[@class='art-created'], ' ') +title: //h1[contains(@class, 'stor-type')] +body: //div[@id='art-body'] +strip: //div[@class='art-fakta article-box'] test_url: http://politiken.dk/kultur/boger/skonlitteratur_boger/ECE1426386/makabre-tegneserie-zombier-aeder-alt-levende/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/polygon.com.txt b/inc/3rdparty/site_config/standard/polygon.com.txt new file mode 100755 index 00000000..8fe9b1be --- /dev/null +++ b/inc/3rdparty/site_config/standard/polygon.com.txt @@ -0,0 +1,34 @@ +body: //div[@id='article-content'] +body: //article[@id='entry-top']/div[@class='float_wrapper'] +author: //header/p[@class='byline']/em/a +date: //header/p[@class='byline']/span[@class='timestamp'] + +strip: //div[@id='article-content']//header +strip: //label + +#photos on left column (delete all) +strip: //div[@class='big_photo'] + +#photos on left column (remove extras used for scroll effect) +#strip: //div[@class='big_photo']/div[./img] +#strip: //div[@class='big_photo']/img[position()>1] + +strip_id_or_class: vox-lazy-load +strip_id_or_class: social_buttons +strip_id_or_class: feature_toc + +prune: no + +find_string: <noscript> +replace_string: <div> +find_string: </noscript> +replace_string: </div> + +#find_string: <script +#replace_string: <div style="display:none" +#find_string: </script> +#replace_string: </div> + +strip: //div[@class='float_wrapper']/header +test_url: http://www.polygon.com/2013/4/5/4189028/donkey-kong-country-returns-3d-new-content +test_url: http://www.polygon.com/features/2013/8/22/4602568/30-years-xbox-360-playstation-3-wii \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/popularmechanics.com.txt b/inc/3rdparty/site_config/standard/popularmechanics.com.txt old mode 100644 new mode 100755 index 85b7656b..2582e6fb --- a/inc/3rdparty/site_config/standard/popularmechanics.com.txt +++ b/inc/3rdparty/site_config/standard/popularmechanics.com.txt @@ -1,8 +1,8 @@ -next_page_link: //div[@id='longPagination']/a[@class='next'] - -title: //div[@id='contentHeader']//h1 - -body: //div[@id='articleBody'] -# this is so sad +next_page_link: //div[@id='longPagination']/a[@class='next'] + +title: //div[@id='contentHeader']//h1 + +body: //div[@id='articleBody'] +# this is so sad body: //div[@id='intelliTXT'] test_url: http://www.popularmechanics.com/technology/aviation/crashes/what-really-happened-aboard-air-france-447-6611877 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/portertech.ca.txt b/inc/3rdparty/site_config/standard/portertech.ca.txt new file mode 100755 index 00000000..2897cb57 --- /dev/null +++ b/inc/3rdparty/site_config/standard/portertech.ca.txt @@ -0,0 +1,3 @@ +author: //*[(@class = "author")] +date: //*[(@class = "date")] +test_url: http://portertech.ca/2012/12/10/iac-morning-market/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/positioningmag.com.txt b/inc/3rdparty/site_config/standard/positioningmag.com.txt old mode 100644 new mode 100755 index 21cd833c..f8eeb0a3 --- a/inc/3rdparty/site_config/standard/positioningmag.com.txt +++ b/inc/3rdparty/site_config/standard/positioningmag.com.txt @@ -1,19 +1,19 @@ -title: //div[@id="newsDetailTitle"] -author: //span[@id="showAuthor"] -date: //span[@id="showRefDate"] - -strip: //div[@id="breadcrumbs"] -strip: //span[@id="PageTitle"] -strip: //div[@id="newsDetailAuthorPublish"] - -strip: //div[@class="leadPix"] - -strip: //span[@id="ctl00_PageTitle"] -strip: //div[@id="newsDetailTitle"] -convert_double_br_tags:yes - -strip: //div[@id="newsDetailCredential"] -strip: //div[@id="sidebar2"] -strip: //div[@id="footer"] +title: //div[@id="newsDetailTitle"] +author: //span[@id="showAuthor"] +date: //span[@id="showRefDate"] + +strip: //div[@id="breadcrumbs"] +strip: //span[@id="PageTitle"] +strip: //div[@id="newsDetailAuthorPublish"] + +strip: //div[@class="leadPix"] + +strip: //span[@id="ctl00_PageTitle"] +strip: //div[@id="newsDetailTitle"] +convert_double_br_tags:yes + +strip: //div[@id="newsDetailCredential"] +strip: //div[@id="sidebar2"] +strip: //div[@id="footer"] test_url: http://www.positioningmag.com/magazine/details.aspx?id=41083 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/post-gazette.com.txt b/inc/3rdparty/site_config/standard/post-gazette.com.txt old mode 100644 new mode 100755 index 1ea945a0..baa9d69d --- a/inc/3rdparty/site_config/standard/post-gazette.com.txt +++ b/inc/3rdparty/site_config/standard/post-gazette.com.txt @@ -1,26 +1,26 @@ -title: //div[@class='story_headline'] -author: substring-before(substring-after(//div[@class='story_byline'],'By'),'/') -date: //div[@class='story_lastupdate'] -body: //div[@id='story'] -strip: //div[@class='story_byline'] -strip: //div[@class='story_lastupdate'] -strip: //div[@class='story_headline'] -strip: //div[@id='abuse'] -strip: //h2 -strip: //div[@class='pagenumbers_wrap'] -strip: //ul[@class='pagenumbers'] -strip: //div[starts-with(., 'To report inappropriate comments')] - -strip_id_or_class: story_share -strip_id_or_class: OUTBRAIN -strip_id_or_class: story_box_right -strip: //div[a[@href='http://www.post-gazette.com/pg/12062/1213990-42.stm']] -strip: //ul[@id='pikame']/li[position()>1] - -prune: no -tidy: no - -single_page_link: //a[contains(@href, '?p=0')] - -test_url: http://www.post-gazette.com/stories/sports/penguins/pens-crosby-expects-to-return-thursday-226648/ +title: //div[@class='story_headline'] +author: substring-before(substring-after(//div[@class='story_byline'],'By'),'/') +date: //div[@class='story_lastupdate'] +body: //div[@id='story'] +strip: //div[@class='story_byline'] +strip: //div[@class='story_lastupdate'] +strip: //div[@class='story_headline'] +strip: //div[@id='abuse'] +strip: //h2 +strip: //div[@class='pagenumbers_wrap'] +strip: //ul[@class='pagenumbers'] +strip: //div[starts-with(., 'To report inappropriate comments')] + +strip_id_or_class: story_share +strip_id_or_class: OUTBRAIN +strip_id_or_class: story_box_right +strip: //div[a[@href='http://www.post-gazette.com/pg/12062/1213990-42.stm']] +strip: //ul[@id='pikame']/li[position()>1] + +prune: no +tidy: no + +single_page_link: //a[contains(@href, '?p=0')] + +test_url: http://www.post-gazette.com/stories/sports/penguins/pens-crosby-expects-to-return-thursday-226648/ test_url: http://www.post-gazette.com/stories/sports/pirates/pirates-fork-over-changes-for-fans-at-pnc-park-629789 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/posta.com.tr.txt b/inc/3rdparty/site_config/standard/posta.com.tr.txt old mode 100644 new mode 100755 index 86cb5d0b..0f01149c --- a/inc/3rdparty/site_config/standard/posta.com.tr.txt +++ b/inc/3rdparty/site_config/standard/posta.com.tr.txt @@ -1,15 +1,15 @@ -title: //div[@id='divAdnetKeyword']/h1 -body: //div[@id='_middle_content_bottom'] - -wrap_in(fieldset)://div[@id='_middle_content_bottom_child2']/img - -strip: //div[@id='_middle_content_bottom_child1'] -strip: //div[@id='_middle_content_bottom_child4'] -strip: //div[@class='cls'] -strip: //div[@class='iphoneBox'] -strip: //ul[@class='ilgiliHaber'] -strip: //div[@class='yorumlar'] -strip: //div[@class='kategoriler'] -strip: //div[@class='textSize'] +title: //div[@id='divAdnetKeyword']/h1 +body: //div[@id='_middle_content_bottom'] + +wrap_in(fieldset)://div[@id='_middle_content_bottom_child2']/img + +strip: //div[@id='_middle_content_bottom_child1'] +strip: //div[@id='_middle_content_bottom_child4'] +strip: //div[@class='cls'] +strip: //div[@class='iphoneBox'] +strip: //ul[@class='ilgiliHaber'] +strip: //div[@class='yorumlar'] +strip: //div[@class='kategoriler'] +strip: //div[@class='textSize'] strip: //span[@class='tarih'] test_url: http://www.posta.com.tr/yasam/teknoloji/HaberDetay/Fedailer_Istanbul_da.htm?ArticleID=101044 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/prb.org.txt b/inc/3rdparty/site_config/standard/prb.org.txt old mode 100644 new mode 100755 index 7f7a5031..3952ea99 --- a/inc/3rdparty/site_config/standard/prb.org.txt +++ b/inc/3rdparty/site_config/standard/prb.org.txt @@ -1,8 +1,8 @@ -title: //h1 -date: /html/head/meta[@name="date"]/@content -body: //div[@id="featuredlinksbox"] -strip: //div[@class="relatedbox"] -strip: //h1 -strip: //br +title: //h1 +date: /html/head/meta[@name="date"]/@content +body: //div[@id="featuredlinksbox"] +strip: //div[@class="relatedbox"] +strip: //h1 +strip: //br strip_image_src: "/images" test_url: http://www.prb.org/Journalists/Webcasts/2011/military-families.aspx \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/prog21.dadgum.com.txt b/inc/3rdparty/site_config/standard/prog21.dadgum.com.txt old mode 100644 new mode 100755 index 906c27a0..9a49557e --- a/inc/3rdparty/site_config/standard/prog21.dadgum.com.txt +++ b/inc/3rdparty/site_config/standard/prog21.dadgum.com.txt @@ -1,9 +1,9 @@ -title: //h1 -body: //div[@id='left'] -strip: //h1 -convert_double_br_tags: yes -strip_id_or_class: entry-footer -strip: //h1[. = 'Previously']/following::* -author: string('James Hague') +title: //h1 +body: //div[@id='left'] +strip: //h1 +convert_double_br_tags: yes +strip_id_or_class: entry-footer +strip: //h1[. = 'Previously']/following::* +author: string('James Hague') date: //div[@class = 'entry-footer']/text() test_url: http://prog21.dadgum.com/105.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/prolost.com.txt b/inc/3rdparty/site_config/standard/prolost.com.txt old mode 100644 new mode 100755 index cef811d4..82ebf6bb --- a/inc/3rdparty/site_config/standard/prolost.com.txt +++ b/inc/3rdparty/site_config/standard/prolost.com.txt @@ -1,4 +1,4 @@ -body: //div[@class='body'] -title: //h2[@class='title'] +body: //div[@class='body'] +title: //h2[@class='title'] date: //span[@class='posted-on'] test_url: http://prolost.com/blog/2011/10/13/real-men-comp-with-film.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/propublica.org.txt b/inc/3rdparty/site_config/standard/propublica.org.txt old mode 100644 new mode 100755 index 11e63bd0..d141ac90 --- a/inc/3rdparty/site_config/standard/propublica.org.txt +++ b/inc/3rdparty/site_config/standard/propublica.org.txt @@ -1,11 +1,11 @@ -title: //h1[@class="article-title"] -author: //meta[@name="author"]/@content -body: //div[@class="article-full"] -strip_id_or_class: sidebar_inject -strip_id_or_class: callout -strip_id_or_class: content-inset -strip_id_or_class: byline-block -strip_id_or_class: photo-caption -strip_id_or_class: foot-tools +title: //h1[@class="article-title"] +author: //meta[@name="author"]/@content +body: //div[@class="article-full"] +strip_id_or_class: sidebar_inject +strip_id_or_class: callout +strip_id_or_class: content-inset +strip_id_or_class: byline-block +strip_id_or_class: photo-caption +strip_id_or_class: foot-tools test_url: http://www.propublica.org/article/pardon-applicants-benefit-from-friends-in-high-places \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/prosa.dk.txt b/inc/3rdparty/site_config/standard/prosa.dk.txt old mode 100644 new mode 100755 index dedd33d3..ba9ce8b8 --- a/inc/3rdparty/site_config/standard/prosa.dk.txt +++ b/inc/3rdparty/site_config/standard/prosa.dk.txt @@ -1,4 +1,4 @@ -author: //p[@class='name'] -date: substring-before(//p[@class='date'], ' | ') +author: //p[@class='name'] +date: substring-before(//p[@class='date'], ' | ') body: //div[@class='news_single_item'] test_url: http://www.prosa.dk/aktuelt/nyhed/artikel/internetaktivisten-uden-maske/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/prospectmagazine.co.uk.txt b/inc/3rdparty/site_config/standard/prospectmagazine.co.uk.txt old mode 100644 new mode 100755 index 19059c4a..739d1b9e --- a/inc/3rdparty/site_config/standard/prospectmagazine.co.uk.txt +++ b/inc/3rdparty/site_config/standard/prospectmagazine.co.uk.txt @@ -1,26 +1,26 @@ -#basics -author: (//div[contains(@class,'author')])[1] -date: substring-before(//a[@class='issue'], '—') -#body://div[@class = 'entry'] -# use this until move_into support is ready -body: //div[@class = 'entry' or @class='standfirst' or @class='lead_image'] - -#moves header image and tagline into body -move_into(//div[@class='entry']/div)://div[@class = 'lead_image'] -move_into(//div[@class='entry']/div)://div[@class = 'standfirst'] - - -# moves author info to end of text -move_into(//p[strong[string(.) = 'Follow Prospect on Twitter']])://div[@id='sidebar_content']/p/em - -prune: no - -# strips social links -strip_id_or_class:login-status -strip_id_or_class:shareinpost -strip_id_or_class:content_subscribe -strip_id_or_class:postinfo -strip_id_or_class:postutils -strip_id_or_class:comments -strip://strong[string(.) = 'Follow Prospect on Twitter'] +#basics +author: (//div[contains(@class,'author')])[1] +date: substring-before(//a[@class='issue'], '—') +#body://div[@class = 'entry'] +# use this until move_into support is ready +body: //div[@class = 'entry' or @class='standfirst' or @class='lead_image'] + +#moves header image and tagline into body +move_into(//div[@class='entry']/div)://div[@class = 'lead_image'] +move_into(//div[@class='entry']/div)://div[@class = 'standfirst'] + + +# moves author info to end of text +move_into(//p[strong[string(.) = 'Follow Prospect on Twitter']])://div[@id='sidebar_content']/p/em + +prune: no + +# strips social links +strip_id_or_class:login-status +strip_id_or_class:shareinpost +strip_id_or_class:content_subscribe +strip_id_or_class:postinfo +strip_id_or_class:postutils +strip_id_or_class:comments +strip://strong[string(.) = 'Follow Prospect on Twitter'] test_url: http://www.prospectmagazine.co.uk/2011/07/postmodernism-is-dead-va-exhibition-age-of-authenticism/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/protothema.gr.txt b/inc/3rdparty/site_config/standard/protothema.gr.txt new file mode 100755 index 00000000..fae261b0 --- /dev/null +++ b/inc/3rdparty/site_config/standard/protothema.gr.txt @@ -0,0 +1,6 @@ +body: //a[contains(@rel, 'mainphotos')] | //div[contains(@class, 'article-content')] + +prune: no + +test_url: http://www.protothema.gr//politics/article/326464/diamadopoulou-floridis-kaminis-kai-boutaris-se-ekdilosi-ton-europaion-fileleutheron/ +test_url: http://www.protothema.gr/rss/news/politics/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/psychologytoday.com.txt b/inc/3rdparty/site_config/standard/psychologytoday.com.txt old mode 100644 new mode 100755 index 3da3cea3..1bb63c29 --- a/inc/3rdparty/site_config/standard/psychologytoday.com.txt +++ b/inc/3rdparty/site_config/standard/psychologytoday.com.txt @@ -1,9 +1,9 @@ -title: //div[@class="page-title"]/h1 -author: //a[@title="View Bio"] -date: substring-before(substring-after(//span[@class="submitted"], 'Published on '), ' by') -strip://div[@class="page-title"]/h1 -strip://div[@class="article-abstract"] -strip://div[@class="article-meta"] -strip://div[@id="rightColumn"] +title: //div[@class="page-title"]/h1 +author: //a[@title="View Bio"] +date: substring-before(substring-after(//span[@class="submitted"], 'Published on '), ' by') +strip://div[@class="page-title"]/h1 +strip://div[@class="article-abstract"] +strip://div[@class="article-meta"] +strip://div[@id="rightColumn"] strip://div[@id="inline-content-bottom-left"] test_url: http://www.psychologytoday.com/blog/how-happiness/201205/my-quibble-facebook \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/publications.parliament.uk.txt b/inc/3rdparty/site_config/standard/publications.parliament.uk.txt old mode 100644 new mode 100755 index fa099473..8f32d7a4 --- a/inc/3rdparty/site_config/standard/publications.parliament.uk.txt +++ b/inc/3rdparty/site_config/standard/publications.parliament.uk.txt @@ -1,4 +1,4 @@ -author: //meta[@name="Author"] -date: //meta[@name="Date"] +author: //meta[@name="Author"] +date: //meta[@name="Date"] strip: //h5 test_url: http://www.publications.parliament.uk/pa/ld201011/ldhansrd/text/111109-0003.htm \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/publico.pt.txt b/inc/3rdparty/site_config/standard/publico.pt.txt new file mode 100755 index 00000000..bb6a05e1 --- /dev/null +++ b/inc/3rdparty/site_config/standard/publico.pt.txt @@ -0,0 +1,12 @@ +title: //h1[@class="entry-title"] +author: //span[@class="author"] +body: //article[@itemtype="http://schema.org/Article"] +date: //time[@itemprop="dateCreated"] + +strip: //header[@class="entry-header single-header"] +strip: //aside[@class="entry-assets"] +strip: //div[@class="entry-options entry-options-above group"] +strip: //div[@class="entry-options entry-options-below group"] + +convert_double_br_tags: yes +test_url: http://www.publico.pt/politica/noticia/passos-diz-que-se-limitacao-de-mandatos-fosse-para-todos-os-concelhos-estaria-claro-na-lei-1577691 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/purpleplanetmedia.com.txt b/inc/3rdparty/site_config/standard/purpleplanetmedia.com.txt old mode 100644 new mode 100755 index 126f9e27..0f1392a4 --- a/inc/3rdparty/site_config/standard/purpleplanetmedia.com.txt +++ b/inc/3rdparty/site_config/standard/purpleplanetmedia.com.txt @@ -1,4 +1,4 @@ -title: //div[@class='title'] -body: //div[@class='body'] +title: //div[@class='title'] +body: //div[@class='body'] next_page_link: //div[@class='source']/text()[contains(., 'page')]/following-sibling::a test_url: http://purpleplanetmedia.com/eye/inte/ngaiman.php \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/qctimes.com.txt b/inc/3rdparty/site_config/standard/qctimes.com.txt new file mode 100755 index 00000000..3c3edfeb --- /dev/null +++ b/inc/3rdparty/site_config/standard/qctimes.com.txt @@ -0,0 +1,5 @@ +# this site seems to work OK in the web view, but only occasionally in the instapaper app itself. + +body: //div[@class='entry-content'] +author: //span[@class='byline'] +test_url: http://qctimes.com/news/local/woman-faces-perjury-charges-in-meth-case/article_83f4c470-956a-11e2-a921-001a4bcf887a.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/quantumdiaries.org.txt b/inc/3rdparty/site_config/standard/quantumdiaries.org.txt old mode 100644 new mode 100755 index a366c1b3..c17fb312 --- a/inc/3rdparty/site_config/standard/quantumdiaries.org.txt +++ b/inc/3rdparty/site_config/standard/quantumdiaries.org.txt @@ -1,14 +1,14 @@ -title: //div[contains(@class, "hentry")]/h3 - -author: //div[contains(@class, "hentry")]/h2[contains(@class, "author_bio")] - -date: substring-before(substring-after(normalize-space(//p[contains(@class, "postmetadata")]/small), "was posted on "), " and is filed under") - -body: //div[contains(@class, "entry")] - -strip_id_or_class: addtoany_share_save_container -strip_id_or_class: postmetadata -strip_id_or_class: author_bio -strip_id_or_class: author_bio_2 +title: //div[contains(@class, "hentry")]/h3 + +author: //div[contains(@class, "hentry")]/h2[contains(@class, "author_bio")] + +date: substring-before(substring-after(normalize-space(//p[contains(@class, "postmetadata")]/small), "was posted on "), " and is filed under") + +body: //div[contains(@class, "entry")] + +strip_id_or_class: addtoany_share_save_container +strip_id_or_class: postmetadata +strip_id_or_class: author_bio +strip_id_or_class: author_bio_2 strip: //div[contains(@class, "hentry")]/h3 test_url: http://www.quantumdiaries.org/2011/10/25/piling-up/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/queerty.com.txt b/inc/3rdparty/site_config/standard/queerty.com.txt old mode 100644 new mode 100755 index 655f8b80..fc7ab37f --- a/inc/3rdparty/site_config/standard/queerty.com.txt +++ b/inc/3rdparty/site_config/standard/queerty.com.txt @@ -1,3 +1,3 @@ -body: //div[@class='copy'] +body: //div[@class='copy'] title: //h1[@class='hed'] test_url: http://www.queerty.com/rawhide-radicals-meet-five-heroes-from-the-leather-community-20120302/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/quepasa.cl.txt b/inc/3rdparty/site_config/standard/quepasa.cl.txt old mode 100644 new mode 100755 index fae4e6a3..fb09a8f3 --- a/inc/3rdparty/site_config/standard/quepasa.cl.txt +++ b/inc/3rdparty/site_config/standard/quepasa.cl.txt @@ -1,6 +1,6 @@ -title: //h1 - -body: //div[@class="cuerpoArticulo"] - +title: //h1 + +body: //div[@class="cuerpoArticulo"] + test_url: http://www.quepasa.cl/magazine/articulo/print.html?id=5299 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/quora.com.txt b/inc/3rdparty/site_config/standard/quora.com.txt old mode 100644 new mode 100755 index 3d34f2f8..732d12d7 --- a/inc/3rdparty/site_config/standard/quora.com.txt +++ b/inc/3rdparty/site_config/standard/quora.com.txt @@ -1,17 +1,17 @@ -tidy: no -prune: no -body: //div[contains(@class, 'main_col')] -title: //h1 - -strip_id_or_class: hidden -strip_id_or_class: item_action_bar -strip_id_or_class: answer_voters -strip_id_or_class: question_topics -strip_id_or_class: answer_header_text -strip_id_or_class: editor_link -strip_id_or_class: view_tag -strip_id_or_class: include_details -strip_id_or_class: sig_edit -strip_id_or_class: profile_photo_img +tidy: no +prune: no +body: //div[contains(@class, 'main_col')] +title: //h1 + +strip_id_or_class: hidden +strip_id_or_class: item_action_bar +strip_id_or_class: answer_voters +strip_id_or_class: question_topics +strip_id_or_class: answer_header_text +strip_id_or_class: editor_link +strip_id_or_class: view_tag +strip_id_or_class: include_details +strip_id_or_class: sig_edit +strip_id_or_class: profile_photo_img test_url: http://www.quora.com/What-everyday-habit-do-you-wish-you-had-developed-earlier-in-life \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/racjonalista.pl.txt b/inc/3rdparty/site_config/standard/racjonalista.pl.txt new file mode 100755 index 00000000..19c719d4 --- /dev/null +++ b/inc/3rdparty/site_config/standard/racjonalista.pl.txt @@ -0,0 +1,5 @@ +author: /html/body/center/b +date: /html/body/table/tr[2]/td/i +single_page_link: //*[@id='oTxt']/table[3]/tr[2]/td/a[1] + +test_url: http://www.racjonalista.pl/kk.php/s,7214/q,Geneza.szubrawstwa \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/radar.oreilly.com.txt b/inc/3rdparty/site_config/standard/radar.oreilly.com.txt old mode 100644 new mode 100755 index 99ab4bb1..fa66b815 --- a/inc/3rdparty/site_config/standard/radar.oreilly.com.txt +++ b/inc/3rdparty/site_config/standard/radar.oreilly.com.txt @@ -1,3 +1,3 @@ -date://span[@class='date'] +date://span[@class='date'] body://div[@class='entry-body'] test_url: http://radar.oreilly.com/2012/01/genome-cloud-digital-humanities-hadoop-world-strata.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/radionz.co.nz.txt b/inc/3rdparty/site_config/standard/radionz.co.nz.txt old mode 100644 new mode 100755 index e2617dc5..2496ddab --- a/inc/3rdparty/site_config/standard/radionz.co.nz.txt +++ b/inc/3rdparty/site_config/standard/radionz.co.nz.txt @@ -1,3 +1,3 @@ -body: //div[@class='body'] +body: //div[@class='body'] title: //div[@class='newsstory']/h2 test_url: http://www.radionz.co.nz/news/stories/2010/07/18/12481029a86d \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/randsinrepose.com.txt b/inc/3rdparty/site_config/standard/randsinrepose.com.txt old mode 100644 new mode 100755 index f0c91c51..6970a744 --- a/inc/3rdparty/site_config/standard/randsinrepose.com.txt +++ b/inc/3rdparty/site_config/standard/randsinrepose.com.txt @@ -1,11 +1,11 @@ -title: //div[@id='center-col']/h4 -author: substring-before(//title,'In') -date: substring-after(//div[@class='commenttext']/span,'#') -body: //div[@id='center-col'] -strip: //div[@id='center-col']/h4 -strip: //div[@class='graytext'] - -# Anthony Perez-Sanz 2012.3.14 -# Removed long gif from the end -strip: //img[@src='http://www.randsinrepose.com/spreader.gif'] +title: //div[@id='center-col']/h4 +author: substring-before(//title,'In') +date: substring-after(//div[@class='commenttext']/span,'#') +body: //div[@id='center-col'] +strip: //div[@id='center-col']/h4 +strip: //div[@class='graytext'] + +# Anthony Perez-Sanz 2012.3.14 +# Removed long gif from the end +strip: //img[@src='http://www.randsinrepose.com/spreader.gif'] test_url: http://www.randsinrepose.com/archives/2012/03/13/hacking_is_important.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/readability.com.txt b/inc/3rdparty/site_config/standard/readability.com.txt old mode 100644 new mode 100755 index 80337291..2d5aba76 --- a/inc/3rdparty/site_config/standard/readability.com.txt +++ b/inc/3rdparty/site_config/standard/readability.com.txt @@ -1,3 +1,3 @@ -single_page_link: //link[@rel='canonical']/@href +single_page_link: //link[@rel='canonical']/@href test_url: http://www.readability.com/read?url=http://feeds.gawker.com/~r/lifehacker/full/~3/jaxAjSay_Rw/add-a-rain-gutter-to-a-picnic-table-for-a-built+in-drink-cooler \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/readwriteweb.com.txt b/inc/3rdparty/site_config/standard/readwriteweb.com.txt old mode 100644 new mode 100755 index ff799aa0..e2aabda9 --- a/inc/3rdparty/site_config/standard/readwriteweb.com.txt +++ b/inc/3rdparty/site_config/standard/readwriteweb.com.txt @@ -1,8 +1,8 @@ -title: //h1[@class="titlelink"] -date: //span[@class="timestamp"]/@data-published -body: //div[@class="asset-content"] -strip_id_or_class: related-entries -strip_id_or_class: like-and-retweet - -author: //div[@id="submeta"]/a[1] +title: //h1[@class="titlelink"] +date: //span[@class="timestamp"]/@data-published +body: //div[@class="asset-content"] +strip_id_or_class: related-entries +strip_id_or_class: like-and-retweet + +author: //div[@id="submeta"]/a[1] test_url: http://www.readwriteweb.com/archives/why_facebook_terrifies_google.php \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/real.gr.txt b/inc/3rdparty/site_config/standard/real.gr.txt old mode 100644 new mode 100755 index fe5ab672..1a33610d --- a/inc/3rdparty/site_config/standard/real.gr.txt +++ b/inc/3rdparty/site_config/standard/real.gr.txt @@ -1,3 +1,3 @@ -body: //div[@id='_ctl12__ctl0_Article'] -prune: no +body: //div[@id='_ctl12__ctl0_Article'] +prune: no autodetect_on_failure: no \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/recipe.com.txt b/inc/3rdparty/site_config/standard/recipe.com.txt old mode 100644 new mode 100755 index 8c8f0e0c..a01aaef4 --- a/inc/3rdparty/site_config/standard/recipe.com.txt +++ b/inc/3rdparty/site_config/standard/recipe.com.txt @@ -1,10 +1,10 @@ -body: //div[@class='recipedetailsleft' or @id='recipePrepAndServe' or @id='recipeingredients'] - -strip_id_or_class: location -strip_id_or_class: savings -strip_id_or_class: recipeDetailDescButton - -prune: no -tidy: no - +body: //div[@class='recipedetailsleft' or @id='recipePrepAndServe' or @id='recipeingredients'] + +strip_id_or_class: location +strip_id_or_class: savings +strip_id_or_class: recipeDetailDescButton + +prune: no +tidy: no + test_url: http://www.recipe.com/avocado-basil-pasta/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/red-hot-girls.com.txt b/inc/3rdparty/site_config/standard/red-hot-girls.com.txt old mode 100644 new mode 100755 index 3ae959b1..0403ee86 --- a/inc/3rdparty/site_config/standard/red-hot-girls.com.txt +++ b/inc/3rdparty/site_config/standard/red-hot-girls.com.txt @@ -1,5 +1,5 @@ -body: //div[@class='short-text' or starts-with(@id, 'news-id-')] -prune: no -tidy: no - +body: //div[@class='short-text' or starts-with(@id, 'news-id-')] +prune: no +tidy: no + test_url: http://red-hot-girls.com/2011/06/10/the_red_hot_natalia_maria_53_pics.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/reddit.com.txt b/inc/3rdparty/site_config/standard/reddit.com.txt old mode 100644 new mode 100755 index 58ca9ece..8871f564 --- a/inc/3rdparty/site_config/standard/reddit.com.txt +++ b/inc/3rdparty/site_config/standard/reddit.com.txt @@ -1,16 +1,20 @@ -# This setup grabs the text from a Reddit self post. It ignores all comments etc. - -title: //p[@class="title"]/a/text() - -author: //p[@class="tagline"]/a - -# this doesn't work for some reason...? -date: //p[@class="tagline"]//@datetime - -body: //div[@class="expando"]//div[@class="usertext-body"] - -strip_id_or_class: tagline -strip_id_or_class: unvotable-message -strip_id_or_class: buttons - -test_url: http://www.reddit.com/r/truegaming/comments/wfe7r/i_wrote_about_the_problems_i_honestly_feel_that/ \ No newline at end of file +# This setup grabs the text from a Reddit self post. It ignores all comments etc. + +title: //p[@class="title"]/a/text() + +author: //p[@class="tagline"]/a + +# this doesn't work for some reason...? +date: //p[@class="tagline"]//@datetime + +body: //div[@class="expando"]//div[@class="usertext-body"] + +strip_id_or_class: tagline +strip_id_or_class: unvotable-message +strip_id_or_class: buttons + +# follow the posted link (unless it's a self post - relative URL, no http://) +single_page_link: //p[@class="title"]/a[contains(@href, 'http://')] + +test_url: http://www.reddit.com/r/truegaming/comments/wfe7r/i_wrote_about_the_problems_i_honestly_feel_that/ +test_url: http://www.reddit.com/r/worldnews/comments/1as37r/twelve_north_korean_soldiers_attempting_to_defect/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/redmondpie.com.txt b/inc/3rdparty/site_config/standard/redmondpie.com.txt old mode 100644 new mode 100755 index 12a96187..66cc1707 --- a/inc/3rdparty/site_config/standard/redmondpie.com.txt +++ b/inc/3rdparty/site_config/standard/redmondpie.com.txt @@ -1,13 +1,13 @@ -title: //div[@class='posthead']//h2 -body: //div[contains(@class, 'postcontent') or @class='posthead'] -author: //div[@class='posthead']//a[@rel='author'] - -strip: //div[@class='posthead']//h2 -replace_string(>Advertisements</div>): ></div> -replace_string(<p>You can follow us on): <p style="display:none;"> -strip_id_or_class: likeThisPost - -prune: no -tidy: no - +title: //div[@class='posthead']//h2 +body: //div[contains(@class, 'postcontent') or @class='posthead'] +author: //div[@class='posthead']//a[@rel='author'] + +strip: //div[@class='posthead']//h2 +replace_string(>Advertisements</div>): ></div> +replace_string(<p>You can follow us on): <p style="display:none;"> +strip_id_or_class: likeThisPost + +prune: no +tidy: no + test_url: http://www.redmondpie.com/how-to-play-music-directly-from-home-screen-folders-on-iphone/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/redtape.msnbc.msn.com.txt b/inc/3rdparty/site_config/standard/redtape.msnbc.msn.com.txt old mode 100644 new mode 100755 index 4f195a06..8541a0d4 --- a/inc/3rdparty/site_config/standard/redtape.msnbc.msn.com.txt +++ b/inc/3rdparty/site_config/standard/redtape.msnbc.msn.com.txt @@ -1,20 +1,20 @@ -# Think there might be something up with your parser that it strips out 'print' from the title :) - -title: //meta[@name='title']/@content -author: //meta[@name='author']/@content -date: //meta[@name='date']/@content - -body: //div[@class='articleText'] - -strip: //div[contains(@class, 'day')] -strip: //div[contains(@class, 'month')] -strip: //div[contains(@class, 'year')] -strip: //div[contains(@class, 'time')] -strip: //h1[@class='gl_headline'] -strip: //div[@class='byline'] -strip: //div[@id='left_ear'] -strip: //div[@id='right_ear'] -strip: //div[contains(@class, 'PopularPosts')] -strip ://div[@class='discuss_page_break'] +# Think there might be something up with your parser that it strips out 'print' from the title :) + +title: //meta[@name='title']/@content +author: //meta[@name='author']/@content +date: //meta[@name='date']/@content + +body: //div[@class='articleText'] + +strip: //div[contains(@class, 'day')] +strip: //div[contains(@class, 'month')] +strip: //div[contains(@class, 'year')] +strip: //div[contains(@class, 'time')] +strip: //h1[@class='gl_headline'] +strip: //div[@class='byline'] +strip: //div[@id='left_ear'] +strip: //div[@id='right_ear'] +strip: //div[contains(@class, 'PopularPosts')] +strip ://div[@class='discuss_page_break'] strip ://div[contains(@class, 'p-content_TagList')] test_url: http://redtape.msnbc.msn.com/_news/2011/09/28/8020661-sprint-raises-fee-but-wont-free-users-from-two-year-contracts?preview=true \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/reflets.info.txt b/inc/3rdparty/site_config/standard/reflets.info.txt old mode 100644 new mode 100755 index 4a9fab67..98a2bbfc --- a/inc/3rdparty/site_config/standard/reflets.info.txt +++ b/inc/3rdparty/site_config/standard/reflets.info.txt @@ -1,5 +1,5 @@ -body://div[@class='storycontent'] -date://div[@class='date'] -strip://li[@class='sharing_label'] +body://div[@class='storycontent'] +date://div[@class='date'] +strip://li[@class='sharing_label'] strip://a[@class='FlattrButton'] test_url: http://reflets.info/orange-nokia-siemens-deep-packet-inspection/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/renenekuda.cz.txt b/inc/3rdparty/site_config/standard/renenekuda.cz.txt old mode 100644 new mode 100755 index 0b3dee1d..a5361fd0 --- a/inc/3rdparty/site_config/standard/renenekuda.cz.txt +++ b/inc/3rdparty/site_config/standard/renenekuda.cz.txt @@ -1,3 +1,3 @@ -title: //*[@class='entry-title'] +title: //*[@class='entry-title'] body: //div[@class='entry-content'] test_url: http://www.renenekuda.cz/recept-na-produktivitu/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/resume.se.txt b/inc/3rdparty/site_config/standard/resume.se.txt new file mode 100755 index 00000000..17122a9b --- /dev/null +++ b/inc/3rdparty/site_config/standard/resume.se.txt @@ -0,0 +1,9 @@ +date: //meta[@name='bi3dPubDate']/@content +body: //div[contains(@class, 'articleBody')] + +prune: no + +test_url: http://www.resume.se/nyheter/media/2013/09/18/kvallspress-och-tv-slass-om-playtittarna-men-youtube-ohotat-storst/ +test_url: http://www.resume.se/nyheter/media/2013/09/18/cecilia-blankens-lamnar-mama-for-konkurrent/ +test_url: http://www.resume.se/nyheter/reklam/2013/09/18/ravelli-trodde-jag-var-med-i-blasningen/ +test_url: http://www.resume.se/rss-nyheter \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/retrieverweekly.com.txt b/inc/3rdparty/site_config/standard/retrieverweekly.com.txt old mode 100644 new mode 100755 index 1264ee3f..a0a23940 --- a/inc/3rdparty/site_config/standard/retrieverweekly.com.txt +++ b/inc/3rdparty/site_config/standard/retrieverweekly.com.txt @@ -1,6 +1,6 @@ -single_page_link://a[contains(@href, 'print')] - -# Grab metadata from the "printer-friendly" page, after specifying single_page_link -title://h2 +single_page_link://a[contains(@href, 'print')] + +# Grab metadata from the "printer-friendly" page, after specifying single_page_link +title://h2 date://cite test_url: http://www.retrieverweekly.com/?cmd=displaystory&story_id=7548&format=html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/reuters.com.txt b/inc/3rdparty/site_config/standard/reuters.com.txt old mode 100644 new mode 100755 index c5c94a4f..7411e62b --- a/inc/3rdparty/site_config/standard/reuters.com.txt +++ b/inc/3rdparty/site_config/standard/reuters.com.txt @@ -1,10 +1,10 @@ -title: //h1[@class='headline3'] -author: substring-after(//p[@class="byline"], 'By ') -date: //meta[@name="REVISION_DATE"]/@content -body: //div[@id='articleImage' or @id='frame_fd1fade'] | //span[@id='articleText'] | //div[@class='pageNavigation'] -strip: //li[@class='next'] -strip: //span[@class='articleLocation'] -prune: no -tidy: no - +title: //h1[@class='headline3'] +author: substring-after(//p[@class="byline"], 'By ') +date: //meta[@name="REVISION_DATE"]/@content +body: //div[@id='articleImage' or @id='frame_fd1fade'] | //span[@id='articleText'] | //div[@class='pageNavigation'] +strip: //li[@class='next'] +strip: //span[@class='articleLocation'] +prune: no +tidy: no + test_url: http://www.reuters.com/article/2011/04/08/us-ivorycoast-killings-idUSTRE73732A20110408 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/revistapiaui.estadao.com.br.txt b/inc/3rdparty/site_config/standard/revistapiaui.estadao.com.br.txt old mode 100644 new mode 100755 index dbe42932..30e627dc --- a/inc/3rdparty/site_config/standard/revistapiaui.estadao.com.br.txt +++ b/inc/3rdparty/site_config/standard/revistapiaui.estadao.com.br.txt @@ -1,10 +1,10 @@ -title: //div[@class="article_header"]/h3 -author: //div[@class="autor"]/p/* -date: substring-after(substring-after(//div[@class="flt-left"],"> "), "> ") - -move_into(//div[@class="new_article"]): //div[@class="img_article"]/img - -body: //div[@class="article_content"] -convert_double_br_tags: yes +title: //div[@class="article_header"]/h3 +author: //div[@class="autor"]/p/* +date: substring-after(substring-after(//div[@class="flt-left"],"> "), "> ") + +move_into(//div[@class="new_article"]): //div[@class="img_article"]/img + +body: //div[@class="article_content"] +convert_double_br_tags: yes test_url: http://revistapiaui.estadao.com.br/edicao-68/questoes-latino-americanas/filhos-da-guerra-suja \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/rezeptwelt.de.txt b/inc/3rdparty/site_config/standard/rezeptwelt.de.txt new file mode 100644 index 00000000..2093573b --- /dev/null +++ b/inc/3rdparty/site_config/standard/rezeptwelt.de.txt @@ -0,0 +1,5 @@ +body: //div[@class='step-content'] | //div[@class='global-active ingredients-box'] +title: //div[@class='step-1-container'] + +tidy: no +test_url: http://www.rezeptwelt.de/backen-herzhaft-rezepte/w%C3%BCrstchen-schlangen/530372 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/richardmuscat.wordpress.com.txt b/inc/3rdparty/site_config/standard/richardmuscat.wordpress.com.txt old mode 100644 new mode 100755 index 904a11dd..b0ee92dc --- a/inc/3rdparty/site_config/standard/richardmuscat.wordpress.com.txt +++ b/inc/3rdparty/site_config/standard/richardmuscat.wordpress.com.txt @@ -1,5 +1,5 @@ -body: //div[@id="post"] -strip: //div[@id="author-description"] -date: //span[@class="entry-date"] +body: //div[@id="post"] +strip: //div[@id="author-description"] +date: //span[@class="entry-date"] author: //span[@class="author vcard"] test_url: http://richardmuscat.wordpress.com/2011/06/20/the-price-of-free/?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+TheBrooksReview+%28The+Brooks+Review%29 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/ritemail.blogspot.com.txt b/inc/3rdparty/site_config/standard/ritemail.blogspot.com.txt old mode 100644 new mode 100755 index 82cfaf27..ed72915c --- a/inc/3rdparty/site_config/standard/ritemail.blogspot.com.txt +++ b/inc/3rdparty/site_config/standard/ritemail.blogspot.com.txt @@ -1,5 +1,5 @@ -body: //div[@class='post-body entry-content'] -strip: //div[@id='lws_0'] -prune: no - +body: //div[@class='post-body entry-content'] +strip: //div[@id='lws_0'] +prune: no + test_url: http://ritemail.blogspot.com/2011/06/hayden-panettiere-candids-in-los.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/ritholtz.com.txt b/inc/3rdparty/site_config/standard/ritholtz.com.txt new file mode 100755 index 00000000..d598479e --- /dev/null +++ b/inc/3rdparty/site_config/standard/ritholtz.com.txt @@ -0,0 +1,5 @@ +title: //div[@class='post']/h2 +author: substring-before(substring-after(//div[@class='alignright']/small, 'By '),'-') +date: substring-after(//div[@class='alignright']/small, '-') +strip: //div[@class='alignleft'] +test_url: http://www.ritholtz.com/blog/2012/09/situational-awareness/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/robertsspaceindustries.com.txt b/inc/3rdparty/site_config/standard/robertsspaceindustries.com.txt new file mode 100755 index 00000000..b0b90fb7 --- /dev/null +++ b/inc/3rdparty/site_config/standard/robertsspaceindustries.com.txt @@ -0,0 +1,4 @@ +strip_id_or_class: 'sharedaddy' +strip_id_or_class: 'respond' +strip_id_or_class: 'meta' +test_url: http://www.robertsspaceindustries.com/news-update-ai-pilots/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/robots.thoughtbot.com.txt b/inc/3rdparty/site_config/standard/robots.thoughtbot.com.txt new file mode 100755 index 00000000..da5b7bd8 --- /dev/null +++ b/inc/3rdparty/site_config/standard/robots.thoughtbot.com.txt @@ -0,0 +1,5 @@ +body: //section[@class='post text'] +title: //h1[@class='title'] +date: //p[@class='post-date'] +strip: //section[@class='meta-info'] +test_url: http://robots.thoughtbot.com/post/32455387133/four-phase-test \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/rockpapershotgun.com.txt b/inc/3rdparty/site_config/standard/rockpapershotgun.com.txt old mode 100644 new mode 100755 index 3035527c..f8c9541f --- a/inc/3rdparty/site_config/standard/rockpapershotgun.com.txt +++ b/inc/3rdparty/site_config/standard/rockpapershotgun.com.txt @@ -1,8 +1,8 @@ -title: //h2 - -strip: //div[ contains(@class, 'respond') ] | //h2 | //h1 - -date: substring-after(//p[@class='info'], ' on ') - +title: //h2 + +strip: //div[ contains(@class, 'respond') ] | //h2 | //h1 + +date: substring-after(//p[@class='info'], ' on ') + author: //p[@class='info']//a test_url: http://www.rockpapershotgun.com/2010/07/29/rps-half-verdict-starcraft-2/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/rodrigo.sharpcube.com.txt b/inc/3rdparty/site_config/standard/rodrigo.sharpcube.com.txt old mode 100644 new mode 100755 index abe70351..eef8b11c --- a/inc/3rdparty/site_config/standard/rodrigo.sharpcube.com.txt +++ b/inc/3rdparty/site_config/standard/rodrigo.sharpcube.com.txt @@ -1,7 +1,7 @@ -author: //article/header/span[@class='author'] -title://article/header/h1 -body: //article -strip: //article/header -strip: //article/p[@class='metadata'] +author: //article/header/span[@class='author'] +title://article/header/h1 +body: //article +strip: //article/header +strip: //article/p[@class='metadata'] footnotes: yes test_url: http://rodrigo.sharpcube.com/2010/06/20/using-and-sharing-a-vpn-connection-on-your-mac/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/rogerebert.com.txt b/inc/3rdparty/site_config/standard/rogerebert.com.txt old mode 100644 new mode 100755 index 26792330..da215109 --- a/inc/3rdparty/site_config/standard/rogerebert.com.txt +++ b/inc/3rdparty/site_config/standard/rogerebert.com.txt @@ -1,8 +1,8 @@ -title: substring-before(//title,':') -author: substring-after(substring-before(//div[@class='text']/b,'/'),'BY') - -body: //div[@class='text'] - -strip: //a[contains(@href,'printart')] +title: substring-before(//title,':') +author: substring-after(substring-before(//div[@class='text']/b,'/'),'BY') + +body: //div[@class='text'] + +strip: //a[contains(@href,'printart')] strip_id_or_class: enlarge_photo test_url: http://rogerebert.com/apps/pbcs.dll/article?AID=/20120411/REVIEWS/120419998/1005/GLOSSARY \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/rolfinjapan.nl.txt b/inc/3rdparty/site_config/standard/rolfinjapan.nl.txt old mode 100644 new mode 100755 index d618c23f..2365c42a --- a/inc/3rdparty/site_config/standard/rolfinjapan.nl.txt +++ b/inc/3rdparty/site_config/standard/rolfinjapan.nl.txt @@ -1,6 +1,6 @@ -body: //div[contains(@class, 'inhoud')] -date: //span[@class ='published'] -author: //span[@class ='author'] -strip: //div[@class = 'grid_2'] -strip: //div[@class = 'block-citation-text'] +body: //div[contains(@class, 'inhoud')] +date: //span[@class ='published'] +author: //span[@class ='author'] +strip: //div[@class = 'grid_2'] +strip: //div[@class = 'block-citation-text'] test_url: http://www.rolfinjapan.nl/2011/06/duizend-kraanvogels/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/rollingstone.com.txt b/inc/3rdparty/site_config/standard/rollingstone.com.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/rottentomatoes.com.txt b/inc/3rdparty/site_config/standard/rottentomatoes.com.txt old mode 100644 new mode 100755 index b5b29fe4..ef327691 --- a/inc/3rdparty/site_config/standard/rottentomatoes.com.txt +++ b/inc/3rdparty/site_config/standard/rottentomatoes.com.txt @@ -1,11 +1,11 @@ -body: //div[@class='movie_content_area'] -strip_id_or_class: tomatometer_bar_help -strip_id_or_class: critic-links -strip_id_or_class: top-critics-numbers -strip_id_or_class: fan_side -strip_id_or_class: fblike -strip_id_or_class: rating_widget -strip_id_or_class: friend_reviews -prune: no +body: //div[@class='movie_content_area'] +strip_id_or_class: tomatometer_bar_help +strip_id_or_class: critic-links +strip_id_or_class: top-critics-numbers +strip_id_or_class: fan_side +strip_id_or_class: fblike +strip_id_or_class: rating_widget +strip_id_or_class: friend_reviews +prune: no test_url: http://www.rottentomatoes.com/m/thor/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/roughtype.com.txt b/inc/3rdparty/site_config/standard/roughtype.com.txt old mode 100644 new mode 100755 index f2f00392..a012a67d --- a/inc/3rdparty/site_config/standard/roughtype.com.txt +++ b/inc/3rdparty/site_config/standard/roughtype.com.txt @@ -1,5 +1,5 @@ -body: //div[@class='content'] -strip: //p[@class='postmeta']/following::* -strip: //p[@class='postmeta'] +body: //div[@class='content'] +strip: //p[@class='postmeta']/following::* +strip: //p[@class='postmeta'] strip: //p[@align='left'] test_url: http://www.roughtype.com/archives/2012/01/power_to_the_da.php \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/roy.gbiv.com.txt b/inc/3rdparty/site_config/standard/roy.gbiv.com.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/rpgsite.net.txt b/inc/3rdparty/site_config/standard/rpgsite.net.txt old mode 100644 new mode 100755 index e7f29bbe..9ddbf0f2 --- a/inc/3rdparty/site_config/standard/rpgsite.net.txt +++ b/inc/3rdparty/site_config/standard/rpgsite.net.txt @@ -1,4 +1,4 @@ -body: //div[@id='news-text'] -prune: no -test_url: http://www.rpgsite.net/news/1964-tetsuya-nomura-says-hell-soon-show-the-future-of-final-fantasy +body: //div[@id='news-text'] +prune: no +test_url: http://www.rpgsite.net/news/1964-tetsuya-nomura-says-hell-soon-show-the-future-of-final-fantasy test_url: http://www.rpgsite.net/news/1965-new-atelier-totori-plus-screens-and-artwork \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/rubysfera.pl.txt b/inc/3rdparty/site_config/standard/rubysfera.pl.txt old mode 100644 new mode 100755 index d9df7684..d9d9a431 --- a/inc/3rdparty/site_config/standard/rubysfera.pl.txt +++ b/inc/3rdparty/site_config/standard/rubysfera.pl.txt @@ -1,9 +1,9 @@ -author: //div[contains(@class, 'author_text')]/h4/text() -date: //li[@class='date'] - -# stripping excessive tags -strip: //div[contains(@class, 'entry_meta')] -strip: //div[contains(@class, 'single_meta')] -strip: //br[contains(@class, 'clear')] +author: //div[contains(@class, 'author_text')]/h4/text() +date: //li[@class='date'] + +# stripping excessive tags +strip: //div[contains(@class, 'entry_meta')] +strip: //div[contains(@class, 'single_meta')] +strip: //br[contains(@class, 'clear')] strip: //h3[contains(., 'Komentarz')] test_url: http://rubysfera.pl/2011/09/10-porad-o-rvm/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/ruhlman.com.txt b/inc/3rdparty/site_config/standard/ruhlman.com.txt old mode 100644 new mode 100755 index 7a21c4af..e54b0f0e --- a/inc/3rdparty/site_config/standard/ruhlman.com.txt +++ b/inc/3rdparty/site_config/standard/ruhlman.com.txt @@ -1,6 +1,6 @@ -title: //h1[@class='entry-title'] -author: ///span[@class='author vcard'] -date: //abbr[@class='published'] -body: //div[@class='entry-content'] +title: //h1[@class='entry-title'] +author: ///span[@class='author vcard'] +date: //abbr[@class='published'] +body: //div[@class='entry-content'] test_url: http://ruhlman.com/2009/05/cookbooks-that-teach/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/ruttloff.org.txt b/inc/3rdparty/site_config/standard/ruttloff.org.txt old mode 100644 new mode 100755 index c036dcf8..43e130a4 --- a/inc/3rdparty/site_config/standard/ruttloff.org.txt +++ b/inc/3rdparty/site_config/standard/ruttloff.org.txt @@ -1,3 +1,3 @@ -author: //a[@class='author'] +author: //a[@class='author'] tidy: no test_url: http://ruttloff.org/2012/06/13/intervention \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/salon.com.txt b/inc/3rdparty/site_config/standard/salon.com.txt old mode 100644 new mode 100755 index 04f8afd5..2b47f744 --- a/inc/3rdparty/site_config/standard/salon.com.txt +++ b/inc/3rdparty/site_config/standard/salon.com.txt @@ -1,11 +1,11 @@ -title: //meta[@property='og:title']/@content -author: (//span[@class="byline"]/a)[1] -date: //span[contains(@class, "toLocalTime")] -body: (//div[contains(@class, "articleInner")]//img[contains(@src, 'media.salon.com') and contains(@src, '460x')])[1] | //div[contains(@class, "articleContent") or contains(@class, "writerMeta")] - -prune: no - -# deal with singleton links -single_page_link: (//h1/a[contains(@href, '/singleton')])[1] - +title: //meta[@property='og:title']/@content +author: (//span[@class="byline"]/a)[1] +date: //span[contains(@class, "toLocalTime")] +body: (//div[contains(@class, "articleInner")]//img[contains(@src, 'media.salon.com') and contains(@src, '460x')])[1] | //div[contains(@class, "articleContent") or contains(@class, "writerMeta")] + +prune: no + +# deal with singleton links +single_page_link: (//h1/a[contains(@href, '/singleton')])[1] + test_url: http://www.salon.com/2011/10/25/occupying_the_rust_belt/singleton/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/salzburg.com.txt b/inc/3rdparty/site_config/standard/salzburg.com.txt old mode 100644 new mode 100755 index 31067481..464f99f1 --- a/inc/3rdparty/site_config/standard/salzburg.com.txt +++ b/inc/3rdparty/site_config/standard/salzburg.com.txt @@ -1,6 +1,6 @@ -body: //p[@class='teaser1 darkgrey myriad'] -move_into(//p[@class='teaser1 darkgrey myriad']): //div[@class='artikel clear'] -strip: //div[@class='hidden'] -strip: //div[@id='article_related_source'] - +body: //p[@class='teaser1 darkgrey myriad'] +move_into(//p[@class='teaser1 darkgrey myriad']): //div[@class='artikel clear'] +strip: //div[@class='hidden'] +strip: //div[@id='article_related_source'] + test_url: http://www.salzburg.com/nachrichten/oesterreich/politik/sn/artikel/deutliche-nachbesserungen-bei-lehrerdienstrecht-19469/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/sanpedrosun.com.txt b/inc/3rdparty/site_config/standard/sanpedrosun.com.txt new file mode 100755 index 00000000..3f19cced --- /dev/null +++ b/inc/3rdparty/site_config/standard/sanpedrosun.com.txt @@ -0,0 +1,10 @@ +title: //div[contains(@class, 'post')]//h1 +date: //div[contains(@class, 'post')]//h6 +body: //div[contains(@class, 'entry')] +strip_id_or_class: post_stats +strip_id_or_class: related-posts +strip_id_or_class: after_story +prune: no + +test_url: http://www.sanpedrosun.com/community-and-society/2013/06/05/little-angelspre-school-talent-show/ +test_url: http://www.sanpedrosun.com/feed/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/saveyourself.ca.txt b/inc/3rdparty/site_config/standard/saveyourself.ca.txt old mode 100644 new mode 100755 index 354f5911..5a5605d9 --- a/inc/3rdparty/site_config/standard/saveyourself.ca.txt +++ b/inc/3rdparty/site_config/standard/saveyourself.ca.txt @@ -1,25 +1,25 @@ -title://h1 - -# my section divs seem to interfere with the Instapaper parser, so I ditch 'em -dissolve://div[contains(@class, 'section')] - -#these don't seem to be necessary, but just in case -strip_id_or_class:'masthead' -strip_id_or_class:'footer' - -#again, Instapaper seems to understand where my content is, but just in case -body://div[@id='content'] - -# in general, I want the Instapaper view to look like my print CSS, so I remove things specified for the screen or non-printing -strip_id_or_class:'screen-only' -strip_id_or_class:'no-print' - -#other misc removals and simplifications -strip_id_or_class:'popup' -strip_id_or_class:'ZoomSpin' - -#I have a lot of content in sidebars and "meta" asides that can work inline just fine, but has to be distinguished somehow with some minimal formatting, so I put them in blockquotes -wrap_in(blockquote)://div[contains(@class, 'sidebar')] -wrap_in(blockquote)://div[contains(@class, 'meta')] +title://h1 + +# my section divs seem to interfere with the Instapaper parser, so I ditch 'em +dissolve://div[contains(@class, 'section')] + +#these don't seem to be necessary, but just in case +strip_id_or_class:'masthead' +strip_id_or_class:'footer' + +#again, Instapaper seems to understand where my content is, but just in case +body://div[@id='content'] + +# in general, I want the Instapaper view to look like my print CSS, so I remove things specified for the screen or non-printing +strip_id_or_class:'screen-only' +strip_id_or_class:'no-print' + +#other misc removals and simplifications +strip_id_or_class:'popup' +strip_id_or_class:'ZoomSpin' + +#I have a lot of content in sidebars and "meta" asides that can work inline just fine, but has to be distinguished somehow with some minimal formatting, so I put them in blockquotes +wrap_in(blockquote)://div[contains(@class, 'sidebar')] +wrap_in(blockquote)://div[contains(@class, 'meta')] wrap_in(blockquote)://p[contains(@class, 'meta')] test_url: http://saveyourself.ca/tutorials/low-back-pain.php \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/sayidaty.net.txt b/inc/3rdparty/site_config/standard/sayidaty.net.txt new file mode 100755 index 00000000..2d9f1884 --- /dev/null +++ b/inc/3rdparty/site_config/standard/sayidaty.net.txt @@ -0,0 +1,4 @@ +date: //meta[@property='article:published_time']/@content +body: (//div[contains(@class, 'article-slider')]//img)[1] | //div[contains(@class, 'bottom-article-con')] + +test_url: http://www.sayidaty.net/taxonomy/term/10/all/feed \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/sbnation.com.txt b/inc/3rdparty/site_config/standard/sbnation.com.txt old mode 100644 new mode 100755 index c213843c..41b36755 --- a/inc/3rdparty/site_config/standard/sbnation.com.txt +++ b/inc/3rdparty/site_config/standard/sbnation.com.txt @@ -1,28 +1,28 @@ -title: //h1[@id='stream_title'] - -# Author and date don't work -author: //div[@class='byline'] -date: //div[@class='date-stamp'] - -body: //div[@class='node-article'] - -strip_id_or_class: fb-like-box -strip_id_or_class: stream-fb-like -strip_id_or_class: social-meta -strip_id_or_class: social-spoken -strip_id_or_class: twitter-share-button -strip_id_or_class: twitter-follow-button -strip_id_or_class: spinner_node_list -strip_id_or_class: node-sort-link -strip_id_or_class: stream_title -strip_id_or_class: stream_summary -strip_id_or_class: update-count-container -strip_id_or_class: major-updates -strip_id_or_class: newsletter-slide -strip_id_or_class: author-mini-profile -strip_id_or_class: byline -strip_id_or_class: header -strip_id_or_class: footer - +title: //h1[@id='stream_title'] + +# Author and date don't work +author: //div[@class='byline'] +date: //div[@class='date-stamp'] + +body: //div[@class='node-article'] + +strip_id_or_class: fb-like-box +strip_id_or_class: stream-fb-like +strip_id_or_class: social-meta +strip_id_or_class: social-spoken +strip_id_or_class: twitter-share-button +strip_id_or_class: twitter-follow-button +strip_id_or_class: spinner_node_list +strip_id_or_class: node-sort-link +strip_id_or_class: stream_title +strip_id_or_class: stream_summary +strip_id_or_class: update-count-container +strip_id_or_class: major-updates +strip_id_or_class: newsletter-slide +strip_id_or_class: author-mini-profile +strip_id_or_class: byline +strip_id_or_class: header +strip_id_or_class: footer + # Works, but "no text" errors on: http://www.sbnation.com/nba/2012/3/9/2856780/nba-scores-dwight-howard-bulls-magic-mavs-suns test_url: http://www.sbnation.com/nba/2012/3/13/2867226/dwight-howard-trade-rumors-2012-faq-orlando-magic \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/schneier.com.txt b/inc/3rdparty/site_config/standard/schneier.com.txt old mode 100644 new mode 100755 index 67181b65..0074a86a --- a/inc/3rdparty/site_config/standard/schneier.com.txt +++ b/inc/3rdparty/site_config/standard/schneier.com.txt @@ -1,25 +1,25 @@ -author: //p[@class='mastname'] - -body: //div[@class='indivbody'] -date: //div[@class='indivbody']/h2[1] - -# Remove blog title. Specify first occurrence in case h1 is used in article -strip: //div[@class='indivbody']/h1[1] - -# Remove blog description (the first p element) -strip: //div[@class='indivbody']/p[1] - -# Remove navigation (second p element) -strip: //div[@class='indivbody']/p[2] - -# Remove duplicate of article title. Specify first occurrence in case h3 is used in article -strip: //div[@class='indivbody']/h3[1] - -# Remove publishing date, it's extracted by rule above -strip: //div[@class='indivbody']/h2[1] - -# Remove duplicate of date at end, and newsletter signup -strip: //p[@class='posted'] - -# Leave date at top +author: //p[@class='mastname'] + +body: //div[@class='indivbody'] +date: //div[@class='indivbody']/h2[1] + +# Remove blog title. Specify first occurrence in case h1 is used in article +strip: //div[@class='indivbody']/h1[1] + +# Remove blog description (the first p element) +strip: //div[@class='indivbody']/p[1] + +# Remove navigation (second p element) +strip: //div[@class='indivbody']/p[2] + +# Remove duplicate of article title. Specify first occurrence in case h3 is used in article +strip: //div[@class='indivbody']/h3[1] + +# Remove publishing date, it's extracted by rule above +strip: //div[@class='indivbody']/h2[1] + +# Remove duplicate of date at end, and newsletter signup +strip: //p[@class='posted'] + +# Leave date at top test_url: http://www.schneier.com/blog/archives/2010/12/security_in_202.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/science.orf.at.txt b/inc/3rdparty/site_config/standard/science.orf.at.txt old mode 100644 new mode 100755 index 89ebfe08..c4b21834 --- a/inc/3rdparty/site_config/standard/science.orf.at.txt +++ b/inc/3rdparty/site_config/standard/science.orf.at.txt @@ -1,11 +1,11 @@ -body: //div[@class="storybox"] -title: //div[@class="storybox"]//h1 -strip: //p[@class='metaline'] -date: substring-after(//*[@class='time'],'Erstellt am') -strip: //div[@class='fact'] -strip: //p[@class='backlink'] -strip: //div[@class='mailto'] -strip: //div[@id='forumDisclaimer'] -strip: //div[@class='forum'] +body: //div[@class="storybox"] +title: //div[@class="storybox"]//h1 +strip: //p[@class='metaline'] +date: substring-after(//*[@class='time'],'Erstellt am') +strip: //div[@class='fact'] +strip: //p[@class='backlink'] +strip: //div[@class='mailto'] +strip: //div[@id='forumDisclaimer'] +strip: //div[@class='forum'] test_url: http://science.orf.at/stories/1700900/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/scienceblogs.de.txt b/inc/3rdparty/site_config/standard/scienceblogs.de.txt old mode 100644 new mode 100755 index 08c16842..b0dec3d2 --- a/inc/3rdparty/site_config/standard/scienceblogs.de.txt +++ b/inc/3rdparty/site_config/standard/scienceblogs.de.txt @@ -1,12 +1,12 @@ -single_page_link: //div[@class='c2c1']/div[@class='toptheme further line']//ul//li/a - -author: //div[@class='details clear']//a[@class='hi'] -body: //div[@class='title'] -strip: //p[@class='entrypagination'] -strip: //p[@class='details_top'] -date: //p[@class='details_top'] -title: //div[@class='title']/h1 -strip: //p[@class='details'] -strip: //p[@class='details_bottom'] +single_page_link: //div[@class='c2c1']/div[@class='toptheme further line']//ul//li/a + +author: //div[@class='details clear']//a[@class='hi'] +body: //div[@class='title'] +strip: //p[@class='entrypagination'] +strip: //p[@class='details_top'] +date: //p[@class='details_top'] +title: //div[@class='title']/h1 +strip: //p[@class='details'] +strip: //p[@class='details_bottom'] test_url: http://www.scienceblogs.de/astrodicticum-simplex/2011/10/weltuntergang-reloaded-das-jungste-gericht-findet-am-21-oktober-statt.php \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/scienceticker.info.txt b/inc/3rdparty/site_config/standard/scienceticker.info.txt old mode 100644 new mode 100755 index 75a52824..2a06f734 --- a/inc/3rdparty/site_config/standard/scienceticker.info.txt +++ b/inc/3rdparty/site_config/standard/scienceticker.info.txt @@ -1,11 +1,11 @@ -body: //div[@class='post'] -title: //h1[@id='singlePageTitle'] -date: substring-before(//small,'• Rubrik') - -strip: //div[@class='post-ratings'] -strip: //div[@class='post-ratings-loading'] -strip: //a[@title='Empfehlen Sie den Text weiter!'] -strip: //a[@title='Drucken'] -strip: //div[@class='share'] +body: //div[@class='post'] +title: //h1[@id='singlePageTitle'] +date: substring-before(//small,'• Rubrik') + +strip: //div[@class='post-ratings'] +strip: //div[@class='post-ratings-loading'] +strip: //a[@title='Empfehlen Sie den Text weiter!'] +strip: //a[@title='Drucken'] +strip: //div[@class='share'] test_url: http://www.scienceticker.info/2011/11/24/forscher-finden-gedachtnismolekul/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/scientificamerican.com.txt b/inc/3rdparty/site_config/standard/scientificamerican.com.txt old mode 100644 new mode 100755 index d510407d..1b3f31cf --- a/inc/3rdparty/site_config/standard/scientificamerican.com.txt +++ b/inc/3rdparty/site_config/standard/scientificamerican.com.txt @@ -1,25 +1,25 @@ -# -# After site revisions at SciAm, this configuration does -# not work, especially for multi-page articles. For -# every article there is now a "Print" link which -# is far more reliable. So this configuration should be -# removed or disabled. -# 2/3/13 -# - -# meta data -title://h1[@class = 'articleTitle'] -author:substring-after(//span[@class = 'byline'],'By ') -date:substring-before(//span[@class = 'datestamp'],'|') - -#body content -body://div[@id = 'articleContent'] -#next_page_link://li[@id = 'flairPagination']/a[last()] - -single_page_link: //a[contains(@href, 'print=true')] - -#cleanup -strip://div[@class = 'fsgBooks'] - -test_url: http://www.scientificamerican.com/article.cfm?id=do-brain-scans-comatose-patients-reveal-conscious-state +# +# After site revisions at SciAm, this configuration does +# not work, especially for multi-page articles. For +# every article there is now a "Print" link which +# is far more reliable. So this configuration should be +# removed or disabled. +# 2/3/13 +# + +# meta data +title://h1[@class = 'articleTitle'] +author:substring-after(//span[@class = 'byline'],'By ') +date:substring-before(//span[@class = 'datestamp'],'|') + +#body content +body://div[@id = 'articleContent'] +#next_page_link://li[@id = 'flairPagination']/a[last()] + +single_page_link: //a[contains(@href, 'print=true')] + +#cleanup +strip://div[@class = 'fsgBooks'] + +test_url: http://www.scientificamerican.com/article.cfm?id=do-brain-scans-comatose-patients-reveal-conscious-state test_url: http://www.scientificamerican.com/article.cfm?id=solar-wind-transforms-venus-into-shape-of-comet \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/scilogs.de.txt b/inc/3rdparty/site_config/standard/scilogs.de.txt new file mode 100755 index 00000000..b24d7844 --- /dev/null +++ b/inc/3rdparty/site_config/standard/scilogs.de.txt @@ -0,0 +1,15 @@ +title: //h1 +author: //div[@class='date']/a +date: substring-after(//div[@class='date'], ',') +body: //div[@class='entrybody'] + +strip_id_or_class: socialshareprivacy +strip: //div[@class='entrybody']/br[1] + +# Strip related articles +# 'p'-Tag strips 'Ähnliche Artikel: ' (<br> tags become <p>) +strip: //div[@class='entrybody']/p[last()] +strip: //div[@class='entrybody']/ul[last()] + +convert_double_br_tags: yes +test_url: http://www.scilogs.de/wblogs/blog/formbar/fusion/2012-10-08/rundgang-durch-deutschlands-gr-tes-fusionsexperiment \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/scotusblog.com.txt b/inc/3rdparty/site_config/standard/scotusblog.com.txt old mode 100644 new mode 100755 index f29e37f9..8881bb45 --- a/inc/3rdparty/site_config/standard/scotusblog.com.txt +++ b/inc/3rdparty/site_config/standard/scotusblog.com.txt @@ -1,8 +1,8 @@ -title: //title -author: //p[@id='author-name-role']/a -date: substring-after(//p[@class='time'],'Posted') -body: //div[@id='main'] -strip: //div[@id='author-info'] -strip: //div[@id='author-links'] +title: //title +author: //p[@id='author-name-role']/a +date: substring-after(//p[@class='time'],'Posted') +body: //div[@id='main'] +strip: //div[@id='author-info'] +strip: //div[@id='author-links'] strip: //h1 test_url: http://www.scotusblog.com/2012/04/shaken-baby-case-an-update/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/scraplab.net.txt b/inc/3rdparty/site_config/standard/scraplab.net.txt old mode 100644 new mode 100755 index 84be27f9..ca7ec195 --- a/inc/3rdparty/site_config/standard/scraplab.net.txt +++ b/inc/3rdparty/site_config/standard/scraplab.net.txt @@ -1,3 +1,3 @@ -title: //h2 +title: //h2 body: //div[@class='body'] test_url: http://scraplab.net/2010/10/26/please-keep-your-belongings-with-you-at-all-times/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/scripting.com.txt b/inc/3rdparty/site_config/standard/scripting.com.txt old mode 100644 new mode 100755 index d8b969b1..5fb0ee79 --- a/inc/3rdparty/site_config/standard/scripting.com.txt +++ b/inc/3rdparty/site_config/standard/scripting.com.txt @@ -1,8 +1,8 @@ -strip: //a[starts-with(@href, '#')] -strip: //*[@class='storyByline'] -body: //*[@class='storyPageText']/.. -author: string('Dave Winer') -date: substring-before(substring-after(//*[@class='storyByline'], 'on'), 'at') -title: //h1 +strip: //a[starts-with(@href, '#')] +strip: //*[@class='storyByline'] +body: //*[@class='storyPageText']/.. +author: string('Dave Winer') +date: substring-before(substring-after(//*[@class='storyByline'], 'on'), 'at') +title: //h1 footnotes: no test_url: http://scripting.com/stories/2011/07/08/yeahImStillYawning.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/sct.temple.edu.txt b/inc/3rdparty/site_config/standard/sct.temple.edu.txt old mode 100644 new mode 100755 index 9927675b..55f24173 --- a/inc/3rdparty/site_config/standard/sct.temple.edu.txt +++ b/inc/3rdparty/site_config/standard/sct.temple.edu.txt @@ -1,5 +1,5 @@ -body: //*[@class="entry-content"] -title: //h1[@class="entry-title"] -date: //*[@class="entry-date"] +body: //*[@class="entry-content"] +title: //h1[@class="entry-title"] +date: //*[@class="entry-date"] author: //*[@class="author vcard"] test_url: http://sct.temple.edu/blogs/news-events/2011/05/congratulations-sct-class-of-2011/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/searchenginejournal.com.txt b/inc/3rdparty/site_config/standard/searchenginejournal.com.txt new file mode 100755 index 00000000..dc98af3c --- /dev/null +++ b/inc/3rdparty/site_config/standard/searchenginejournal.com.txt @@ -0,0 +1,5 @@ +strip: //ul[contains(@id, "social")] +strip: //div[contains(@class, "ts-fab-wrapper")] +strip: //div[contains(@id, 'gpt-ad')] + +test_url: http://www.searchenginejournal.com/web-design-vs-seo-it-doesnt-make-much-sense/62294/ diff --git a/inc/3rdparty/site_config/standard/searchengineland.com.txt b/inc/3rdparty/site_config/standard/searchengineland.com.txt old mode 100644 new mode 100755 index f176d7c7..fb6a1074 --- a/inc/3rdparty/site_config/standard/searchengineland.com.txt +++ b/inc/3rdparty/site_config/standard/searchengineland.com.txt @@ -1,20 +1,20 @@ -body: //div[@class="storyBox"] -title: //div[@class="storyBox"]/h1 -author: //a[@rel="author"] -date: substring-before(//span[@class="dateline"], 'by') - -#Removes related content but cleans up article text -strip: //h1 -strip: //p[@class="homeStory tdmSideInfo"] -strip: //div[@id="bylineShare"] -strip: //script -strip: //hr - -strip_id_or_class: homeStory -strip_id_or_class: authorpic -strip_id_or_class: insideComments -strip_id_or_class: authorbio -strip_id_or_class: gpt-ad-sel-cube -strip_id_or_class: smxTextAd +body: //div[@class="storyBox"] +title: //div[@class="storyBox"]/h1 +author: //a[@rel="author"] +date: substring-before(//span[@class="dateline"], 'by') + +#Removes related content but cleans up article text +strip: //h1 +strip: //p[@class="homeStory tdmSideInfo"] +strip: //div[@id="bylineShare"] +strip: //script +strip: //hr + +strip_id_or_class: homeStory +strip_id_or_class: authorpic +strip_id_or_class: insideComments +strip_id_or_class: authorbio +strip_id_or_class: gpt-ad-sel-cube +strip_id_or_class: smxTextAd test_url: http://searchengineland.com/googles-jaw-dropping-sponsored-post-campaign-for-chrome-106348 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/seattletransitblog.com.txt b/inc/3rdparty/site_config/standard/seattletransitblog.com.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/sebbo.net.txt b/inc/3rdparty/site_config/standard/sebbo.net.txt old mode 100644 new mode 100755 index 3e800a16..b6d9c92d --- a/inc/3rdparty/site_config/standard/sebbo.net.txt +++ b/inc/3rdparty/site_config/standard/sebbo.net.txt @@ -1,4 +1,4 @@ -title: substring-before(//title, '�') -body: //div[@class = 'entry'] +title: substring-before(//title, '«') +body: //div[@class = 'entry'] strip_id_or_class: 'postmetabox' test_url: http://sebbo.net/2010/12/akkus/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/select.yeeyan.org.txt b/inc/3rdparty/site_config/standard/select.yeeyan.org.txt new file mode 100755 index 00000000..6e98b149 --- /dev/null +++ b/inc/3rdparty/site_config/standard/select.yeeyan.org.txt @@ -0,0 +1,18 @@ +# This filter is tested on: +# http://select.yeeyan.org/view/18312/332365 +# http://select.yeeyan.org/view/365295/333788 +# http://select.yeeyan.org/view/174464/332336 + +tidy:no +prune:no +title://h1 +author: //div[@class='sa_author']/span/a +date: substring-after(//div[@class='sa_author']/span/following-sibling::span, ':') +body: //div[@class='sa_left closetag'] +wrap_in(b)://div[@class='sa_abstract'] + +strip://ul[@class='sa_next clearfix'] +strip: //div[@class='sa_author'] +strip: //div[@class='sa_title_box'] + +test_url: http://select.yeeyan.org/view/258033/333481 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/seriouseats.com.txt b/inc/3rdparty/site_config/standard/seriouseats.com.txt old mode 100644 new mode 100755 index d7b4788c..5e633470 --- a/inc/3rdparty/site_config/standard/seriouseats.com.txt +++ b/inc/3rdparty/site_config/standard/seriouseats.com.txt @@ -1,15 +1,15 @@ -body: //div[@id='content'] - -# clean up recipe pages -strip: //h2[@class='fn'] | //h2[@class='double-lined'] | //h3 | //div[@id='threeColumn2'] | //div[@id='threeColumn3'] - -#recipe pages -strip_id_or_class: "recipe-feedback" -strip_id_or_class: "comments" -strip_id_or_class: "procedure-number" -strip_id_or_class: "more-with-author" - -#slice -strip_id_or_class: "inner" +body: //div[@id='content'] + +# clean up recipe pages +strip: //h2[@class='fn'] | //h2[@class='double-lined'] | //h3 | //div[@id='threeColumn2'] | //div[@id='threeColumn3'] + +#recipe pages +strip_id_or_class: "recipe-feedback" +strip_id_or_class: "comments" +strip_id_or_class: "procedure-number" +strip_id_or_class: "more-with-author" + +#slice +strip_id_or_class: "inner" test_url: http://www.seriouseats.com/recipes/2010/09/peking-duck-mandarin-pancakes-plum-sauce-recipe.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/sf.curbed.com.txt b/inc/3rdparty/site_config/standard/sf.curbed.com.txt old mode 100644 new mode 100755 index 9f443d5c..4c10e9c7 --- a/inc/3rdparty/site_config/standard/sf.curbed.com.txt +++ b/inc/3rdparty/site_config/standard/sf.curbed.com.txt @@ -1,7 +1,7 @@ -title: //h1[@class='post-title'] -author: //div[@class='post-byline']/a -date: substring-before(//div[@class='post-byline'], ', by') - -body: //div[@class='post-body'] +title: //h1[@class='post-title'] +author: //div[@class='post-byline']/a +date: substring-before(//div[@class='post-byline'], ', by') + +body: //div[@class='post-body'] dissolve: //noscript test_url: http://sf.curbed.com/archives/2011/10/17/lower_haight_loft_would_really_really_really_like_a_buyer.php \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/sf.eater.com.txt b/inc/3rdparty/site_config/standard/sf.eater.com.txt old mode 100644 new mode 100755 index fca656d2..1e7c85a0 --- a/inc/3rdparty/site_config/standard/sf.eater.com.txt +++ b/inc/3rdparty/site_config/standard/sf.eater.com.txt @@ -1,7 +1,7 @@ -title: //h1[@class="post-title"] -author: //div[@class="post-byline"]/a -date: substring-before(//div[@class='post-byline'], ', by') - -body: //div[@class='post-body'] +title: //h1[@class="post-title"] +author: //div[@class="post-byline"]/a +date: substring-before(//div[@class='post-byline'], ', by') + +body: //div[@class='post-body'] strip_id_or_class: post-kicker test_url: http://sf.eater.com/archives/2012/05/22/nate_pollack_talks_about_the_american_grilled_cheese_kitchen_moving_into_the_mission.php \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/sfgate.com.txt b/inc/3rdparty/site_config/standard/sfgate.com.txt old mode 100644 new mode 100755 index 5f73fbcb..54691122 --- a/inc/3rdparty/site_config/standard/sfgate.com.txt +++ b/inc/3rdparty/site_config/standard/sfgate.com.txt @@ -1,12 +1,12 @@ -title: /html/head/title - -body: //div[@id = 'articlecontent']/div[contains(@class, 'bodytext')] -author: //div[@class = 'articleheadings']/p[contains(@class,'author')]/span[@class = 'fn'] -date: //div[@class = 'articleheadings']/span[@class = 'updated'] -strip: //div[div[contains(@class, 'imgbox')]] - -body: //div[@class = 'blogitem'] -author: //p[@class="credit"]/span[@class="author"]/a[position() = 1] -date: //span[@class = 'pubdate'] +title: /html/head/title + +body: //div[@id = 'articlecontent']/div[contains(@class, 'bodytext')] +author: //div[@class = 'articleheadings']/p[contains(@class,'author')]/span[@class = 'fn'] +date: //div[@class = 'articleheadings']/span[@class = 'updated'] +strip: //div[div[contains(@class, 'imgbox')]] + +body: //div[@class = 'blogitem'] +author: //p[@class="credit"]/span[@class="author"]/a[position() = 1] +date: //span[@class = 'pubdate'] test_url: http://www.sfgate.com/columnists/garchik/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/sfweekly.com.txt b/inc/3rdparty/site_config/standard/sfweekly.com.txt old mode 100644 new mode 100755 index a11fe4cb..73c3017e --- a/inc/3rdparty/site_config/standard/sfweekly.com.txt +++ b/inc/3rdparty/site_config/standard/sfweekly.com.txt @@ -1,3 +1,3 @@ -body: //div[contains(@class, 'content_body')] +body: //div[contains(@class, 'content_body')] strip_id_or_class: det_rel test_url: http://www.sfweekly.com/2012-03-14/news/cia-lsd-wayne-ritchie-george-h-white-mk-ultra/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/shabayek.com.txt b/inc/3rdparty/site_config/standard/shabayek.com.txt old mode 100644 new mode 100755 index b175720e..9a0d60ae --- a/inc/3rdparty/site_config/standard/shabayek.com.txt +++ b/inc/3rdparty/site_config/standard/shabayek.com.txt @@ -1,3 +1,3 @@ -date: //span[@class='date'] +date: //span[@class='date'] body: //div[@class='post_content'] test_url: http://www.shabayek.com/blog/2011/10/16/%D8%AF%D8%B1%D9%88%D8%B3-%D9%85%D9%86-%D9%82%D8%B5%D8%A9-%D8%AA%D8%A3%D8%B3%D9%8A%D8%B3-%D8%AA%D9%88%D9%8A%D8%AA%D8%B1-%E2%80%93%D8%AC3/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/shawnblanc.net.txt b/inc/3rdparty/site_config/standard/shawnblanc.net.txt old mode 100644 new mode 100755 index b536fc3a..bd8438f7 --- a/inc/3rdparty/site_config/standard/shawnblanc.net.txt +++ b/inc/3rdparty/site_config/standard/shawnblanc.net.txt @@ -1,11 +1,11 @@ -title://*[@class='primary']/h1 -date: //*[@class='articledate'] -author: substring-before(substring-after(//*[@class='block first']/p,'2012 '),'.') -body: //div[@class='primary'] -footnotes: yes -strip: //*[@class='primary']/h1 -strip: //*[@class='articledate'] -strip: //*[@class='detailsarticle'] -strip: //*[@class='endnav'] -strip: //*[@class='endmeta'] +title://*[@class='primary']/h1 +date: //*[@class='articledate'] +author: substring-before(substring-after(//*[@class='block first']/p,'2012 '),'.') +body: //div[@class='primary'] +footnotes: yes +strip: //*[@class='primary']/h1 +strip: //*[@class='articledate'] +strip: //*[@class='detailsarticle'] +strip: //*[@class='endnav'] +strip: //*[@class='endmeta'] test_url: http://shawnblanc.net/2011/11/kindle-touch-review/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/shifteleven.com.txt b/inc/3rdparty/site_config/standard/shifteleven.com.txt old mode 100644 new mode 100755 index 68059ae1..43fd871d --- a/inc/3rdparty/site_config/standard/shifteleven.com.txt +++ b/inc/3rdparty/site_config/standard/shifteleven.com.txt @@ -1,6 +1,6 @@ -body: //div[ @class='entry-content' ] - -strip: //div[ contains(@class, 'sharing') ] - +body: //div[ @class='entry-content' ] + +strip: //div[ contains(@class, 'sharing') ] + date: //div[ @class='entry-meta' ]/a test_url: http://shifteleven.com/articles/2008/05/10/issue-tracking-git-ticgit \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/siasat.pk.txt b/inc/3rdparty/site_config/standard/siasat.pk.txt old mode 100644 new mode 100755 index a82ce69c..b10e12de --- a/inc/3rdparty/site_config/standard/siasat.pk.txt +++ b/inc/3rdparty/site_config/standard/siasat.pk.txt @@ -1,11 +1,11 @@ -#body: (//div[@class='ftr-yt-vid'])[1] -body: (//blockquote[contains(@class, 'postcontent')])[1] -body: (//div[starts-with(@id, 'post_message')])[1] - -prune: no -tidy: no - -#replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player" -#replace_string(</iframe>): </iframe> </div> - +#body: (//div[@class='ftr-yt-vid'])[1] +body: (//blockquote[contains(@class, 'postcontent')])[1] +body: (//div[starts-with(@id, 'post_message')])[1] + +prune: no +tidy: no + +#replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player" +#replace_string(</iframe>): </iframe> </div> + test_url: http://www.siasat.pk/forum/showthread.php?107668-Policy-Matters-17th-March-2012-Dr-Shahid-Masood-Gen-Hameed-gul-amp-Fawad-Chudhary-Pak-US-Relationship&p=787733 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/signalscv.com.txt b/inc/3rdparty/site_config/standard/signalscv.com.txt new file mode 100755 index 00000000..2d3c388e --- /dev/null +++ b/inc/3rdparty/site_config/standard/signalscv.com.txt @@ -0,0 +1,10 @@ +author: //span[contains(@class, 'byline_1')] +date: //span[@class='posted_date'] +body: //*[contains(@class, 'bigimage_container') or contains(@class, 'overlay_text') or contains(@id, 'articlebody')] + +strip_id_or_class: leftWrapper + +prune: no + +test_url: http://www.signalscv.com/section/46/article/102948/ +test_url: http://www.signalscv.com/syndication/feeds/rss/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/simonwillison.net.txt b/inc/3rdparty/site_config/standard/simonwillison.net.txt old mode 100644 new mode 100755 index e3ad6e41..69999698 --- a/inc/3rdparty/site_config/standard/simonwillison.net.txt +++ b/inc/3rdparty/site_config/standard/simonwillison.net.txt @@ -1,5 +1,5 @@ -body: //div[contains(@class, "entry")] - -date: //div[contains(@class, "entryFooter")]/a +body: //div[contains(@class, "entry")] + +date: //div[contains(@class, "entryFooter")]/a test_url: http://simonwillison.net/2009/Oct/22/redis/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/singaporeanstocksinvestor.blogspot.com.txt b/inc/3rdparty/site_config/standard/singaporeanstocksinvestor.blogspot.com.txt old mode 100644 new mode 100755 index a1b6b673..46e2d5f2 --- a/inc/3rdparty/site_config/standard/singaporeanstocksinvestor.blogspot.com.txt +++ b/inc/3rdparty/site_config/standard/singaporeanstocksinvestor.blogspot.com.txt @@ -1,5 +1,5 @@ -body: //div[@class='post-body'] -strip: //div[@id='lws_0'] -prune: no +body: //div[@class='post-body'] +strip: //div[@id='lws_0'] +prune: no test_url: http://singaporeanstocksinvestor.blogspot.com/2011/04/aims-amp-capital-industrial-reit.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/singularityhub.com.txt b/inc/3rdparty/site_config/standard/singularityhub.com.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/sintagoulis.gr.txt b/inc/3rdparty/site_config/standard/sintagoulis.gr.txt old mode 100644 new mode 100755 index 822bbeb0..0d05c40e --- a/inc/3rdparty/site_config/standard/sintagoulis.gr.txt +++ b/inc/3rdparty/site_config/standard/sintagoulis.gr.txt @@ -1,6 +1,6 @@ -title: //div[@class='headline']//h2 -body: //div[contains(@class, 'storycontent')] - -prune: no - +title: //div[@class='headline']//h2 +body: //div[contains(@class, 'storycontent')] + +prune: no + test_url: http://sintagoulis.gr/sokolatenia/sokolatenia-mpompa-me-amaretti- \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/sivers.org.txt b/inc/3rdparty/site_config/standard/sivers.org.txt new file mode 100755 index 00000000..a88f30d7 --- /dev/null +++ b/inc/3rdparty/site_config/standard/sivers.org.txt @@ -0,0 +1,6 @@ +title: //article[@class='post']/header[@class='wrapper']//h1/a +author: //header[@id='masthead']//h1/a +date: //article[@class='post']/header[@class='wrapper']//p[@class='postdate'] +body: //div[@id='body-content'] + +test_url: http://sivers.org/delegate/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/skanesfria.se.txt b/inc/3rdparty/site_config/standard/skanesfria.se.txt new file mode 100755 index 00000000..a0ddac79 --- /dev/null +++ b/inc/3rdparty/site_config/standard/skanesfria.se.txt @@ -0,0 +1,7 @@ +body: //div[contains(@class, 'layout__inner')]//div[contains(@class, 'file-image') or contains(@class, 'node__content')] +author: //article//div[contains(@class, 'field-byline')] +strip_id_or_class: rekommenderade +strip_id_or_class: disqus +strip_id_or_class: annonser + +test_url: http://www.skanesfria.se/artikel/112045 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/slashfilm.com.txt b/inc/3rdparty/site_config/standard/slashfilm.com.txt old mode 100644 new mode 100755 index 78d38ecf..4d17176a --- a/inc/3rdparty/site_config/standard/slashfilm.com.txt +++ b/inc/3rdparty/site_config/standard/slashfilm.com.txt @@ -1,15 +1,15 @@ -title: substring-before(//title,'| /Film') -date: substring-before(substring-after(//p[@class='post-info'],'Posted on '),'by') -strip: //div[@class='pm-left'] -strip: //div[@class='pm-right'] -strip: //h2/span -next_page_link: //h2/strong/a -strip: //h2/strong/a -strip: //p[contains(text(),'we have to split this post over')] -strip: //p[@class='post-info'] -strip: //h1/a -strip: //img[contains(@src,'siteimages/authors')] -strip: //div[@id='header'] -strip: //div[@class='topad-right'] -strip: //strong[contains(text(),'Cool Posts From Around the Web:')] +title: substring-before(//title,'| /Film') +date: substring-before(substring-after(//p[@class='post-info'],'Posted on '),'by') +strip: //div[@class='pm-left'] +strip: //div[@class='pm-right'] +strip: //h2/span +next_page_link: //h2/strong/a +strip: //h2/strong/a +strip: //p[contains(text(),'we have to split this post over')] +strip: //p[@class='post-info'] +strip: //h1/a +strip: //img[contains(@src,'siteimages/authors')] +strip: //div[@id='header'] +strip: //div[@class='topad-right'] +strip: //strong[contains(text(),'Cool Posts From Around the Web:')] test_url: http://www.slashfilm.com/superhero-bits-206/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/slate.com.txt b/inc/3rdparty/site_config/standard/slate.com.txt old mode 100644 new mode 100755 index e92f6a06..d5798e01 --- a/inc/3rdparty/site_config/standard/slate.com.txt +++ b/inc/3rdparty/site_config/standard/slate.com.txt @@ -1,19 +1,19 @@ -title: //h1[@class="sl-art-head-dek"] -body: //article//div[@class='sl-art-body']/div[contains(@class, 'body')] -strip: //div[@class="department_kicker"] -strip: //div[@id="insider_ad_wrapper" or @id="insider_ad_inner"] -strip: //div[@id="bottom_sponsored_links"] -strip: //div[@class="sl-art-ad-midflex"] -#strip: //dl -#strip: //p[em/a[contains(@href, 'facebook.com')]] -prune: no - -author: //div[@id='author_bio']//a[contains(@href, '/author/')] -author: //a[contains(@href, '/authors.')] - -date: substring-before(substring-after(//span[@class='sl-art-byline'], 'Posted '), ', at ') - -single_page_link: //a[@class='sl-art-sinpage'] - -test_url: http://www.slate.com/id/2274583/pagenum/all/ +title: //h1[@class="sl-art-head-dek"] +body: //article//div[@class='sl-art-body']/div[contains(@class, 'body')] +strip: //div[@class="department_kicker"] +strip: //div[@id="insider_ad_wrapper" or @id="insider_ad_inner"] +strip: //div[@id="bottom_sponsored_links"] +strip: //div[@class="sl-art-ad-midflex"] +#strip: //dl +#strip: //p[em/a[contains(@href, 'facebook.com')]] +prune: no + +author: //div[@id='author_bio']//a[contains(@href, '/author/')] +author: //a[contains(@href, '/authors.')] + +date: substring-before(substring-after(//span[@class='sl-art-byline'], 'Posted '), ', at ') + +single_page_link: //a[@class='sl-art-sinpage'] + +test_url: http://www.slate.com/id/2274583/pagenum/all/ test_url: http://www.slate.com/id/2293116/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/slice.seriouseats.com.txt b/inc/3rdparty/site_config/standard/slice.seriouseats.com.txt old mode 100644 new mode 100755 index 1a902b96..e62a3966 --- a/inc/3rdparty/site_config/standard/slice.seriouseats.com.txt +++ b/inc/3rdparty/site_config/standard/slice.seriouseats.com.txt @@ -1,15 +1,15 @@ -body: //div[@id='content'] - -# clean up recipe pages -strip: //h2[@class='fn'] | //h2[@class='double-lined'] | //h3 | //div[@id='threeColumn2'] | //div[@id='threeColumn3'] - -#recipe pages -strip_id_or_class: "recipe-feedback" -strip_id_or_class: "comments" -strip_id_or_class: "procedure-number" -strip_id_or_class: "more-with-author" - -#slice -strip_id_or_class: "inner" +body: //div[@id='content'] + +# clean up recipe pages +strip: //h2[@class='fn'] | //h2[@class='double-lined'] | //h3 | //div[@id='threeColumn2'] | //div[@id='threeColumn3'] + +#recipe pages +strip_id_or_class: "recipe-feedback" +strip_id_or_class: "comments" +strip_id_or_class: "procedure-number" +strip_id_or_class: "more-with-author" + +#slice +strip_id_or_class: "inner" test_url: http://slice.seriouseats.com/archives/2010/10/the-pizza-lab-how-to-make-great-new-york-style-pizza.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/slog.thestranger.com.txt b/inc/3rdparty/site_config/standard/slog.thestranger.com.txt old mode 100644 new mode 100755 index daa5e31b..f9526945 --- a/inc/3rdparty/site_config/standard/slog.thestranger.com.txt +++ b/inc/3rdparty/site_config/standard/slog.thestranger.com.txt @@ -1,4 +1,4 @@ -strip_id_or_class: postCategory -title: //h3[@class='postTitle'] +strip_id_or_class: postCategory +title: //h3[@class='postTitle'] body: //div[@class='postBody'] test_url: http://slog.thestranger.com/slog/archives/2010/10/12/sl-letter-of-the-day-leave-it-alone \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/smartinvestor.de.txt b/inc/3rdparty/site_config/standard/smartinvestor.de.txt old mode 100644 new mode 100755 index ec6c55c8..85ca46de --- a/inc/3rdparty/site_config/standard/smartinvestor.de.txt +++ b/inc/3rdparty/site_config/standard/smartinvestor.de.txt @@ -1,5 +1,5 @@ -title: //td[@class='hweissblau2'] -body: //p[@class='copy'] | //div[@class='Section1'] -prune: no +title: //td[@class='hweissblau2'] +body: //p[@class='copy'] | //div[@class='Section1'] +prune: no test_url: http://www.smartinvestor.de/news/smartinvestor/detail.hbs?itemid=item949496655&recnr=14593 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/sme.sk.txt b/inc/3rdparty/site_config/standard/sme.sk.txt old mode 100644 new mode 100755 index c3d01ffb..d41612cc --- a/inc/3rdparty/site_config/standard/sme.sk.txt +++ b/inc/3rdparty/site_config/standard/sme.sk.txt @@ -1,3 +1,3 @@ -title: //meta[@property='og:title']/@content +title: //meta[@property='og:title']/@content date: //p[@class='autor_line']/b/text() test_url: http://www.sme.sk/c/6268206/lipsic-vidi-malcharkove-uplatky.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/smithsonianmag.com.txt b/inc/3rdparty/site_config/standard/smithsonianmag.com.txt old mode 100644 new mode 100755 index 10a3f717..3e8fee95 --- a/inc/3rdparty/site_config/standard/smithsonianmag.com.txt +++ b/inc/3rdparty/site_config/standard/smithsonianmag.com.txt @@ -1,20 +1,20 @@ -# meta data -title://h1[@id = 'articleTitle'] -author:substring-after(//ul[@id = 'byLine']/li[1],'By ') -date:substring-before(substring-after(//ul[@id = 'byLine']/li[last()],','),',') -body://div[@id = 'article-body'] - -# full content -single_page_link://td/li[@class = 'article-singlepage']/a - -# caption clean up -wrap_in(i)://span[@class='articleImageCaptionwide'] -move_into (//span[@class='articleImageCaptionwide'])://div[@id = 'articleImage']/p - - -# clean up -strip://p[@id = 'articlePaginationWrapper'] -strip://ul[contains(@class, 'cat-breadcrumb')] -strip://div [@class= 'viewMorePhotos'] +# meta data +title://h1[@id = 'articleTitle'] +author:substring-after(//ul[@id = 'byLine']/li[1],'By ') +date:substring-before(substring-after(//ul[@id = 'byLine']/li[last()],','),',') +body://div[@id = 'article-body'] + +# full content +single_page_link://td/li[@class = 'article-singlepage']/a + +# caption clean up +wrap_in(i)://span[@class='articleImageCaptionwide'] +move_into (//span[@class='articleImageCaptionwide'])://div[@id = 'articleImage']/p + + +# clean up +strip://p[@id = 'articlePaginationWrapper'] +strip://ul[contains(@class, 'cat-breadcrumb')] +strip://div [@class= 'viewMorePhotos'] test_url: http://www.smithsonianmag.com/history-archaeology/The-Goddess-Goes-Home.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/smokingapples.com.txt b/inc/3rdparty/site_config/standard/smokingapples.com.txt old mode 100644 new mode 100755 index e22af7a9..c68c1321 --- a/inc/3rdparty/site_config/standard/smokingapples.com.txt +++ b/inc/3rdparty/site_config/standard/smokingapples.com.txt @@ -1,5 +1,5 @@ -title: //h2[@class='custom-entry-title'] -author: substring-after(//span[@class='author vcard'],'by ') -date: substring-after(//span[@class='publ'],'Published on ') -body: //div[@class='postentry-content'] +title: //h2[@class='custom-entry-title'] +author: substring-after(//span[@class='author vcard'],'by ') +date: substring-after(//span[@class='publ'],'Published on ') +body: //div[@class='postentry-content'] test_url: http://smokingapples.com/software/popclip-for-mac/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/somethingawful.com.txt b/inc/3rdparty/site_config/standard/somethingawful.com.txt new file mode 100755 index 00000000..48547948 --- /dev/null +++ b/inc/3rdparty/site_config/standard/somethingawful.com.txt @@ -0,0 +1,17 @@ +title: //h1 +body: //div[@id = 'content-area'] +author: //p[contains(@class, 'byline')]/a +autodetect_next_page: yes +tidy: no + +strip_id_or_class: articleid +strip_id_or_class: logo +strip_id_or_class: pagebar +strip_id_or_class: featurenavlinks +strip_id_or_class: featured_frontpage +strip_id_or_class: sidebar +strip_id_or_class: footer +strip_id_or_class: byline +strip_id_or_class: logo +strip_id_or_class: nav_network +test_url: http://www.somethingawful.com/d/dungeons-and-dragons/wtf-monster-manual.php \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/songshuhui.net.txt b/inc/3rdparty/site_config/standard/songshuhui.net.txt new file mode 100755 index 00000000..a9233593 --- /dev/null +++ b/inc/3rdparty/site_config/standard/songshuhui.net.txt @@ -0,0 +1,7 @@ +# This filter is tested on: +# http://songshuhui.net/archives/65522 +# http://songshuhui.net/archives/75760 +title://h2/span/a +date:substring-before(substring-after(//div[@class='atrctitle']/div, '发表于'),' |') +body://div[@class='entry'] +test_url: http://songshuhui.net/archives/74819 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/sourcebooks.com.txt b/inc/3rdparty/site_config/standard/sourcebooks.com.txt old mode 100644 new mode 100755 index 668fc44a..b52169da --- a/inc/3rdparty/site_config/standard/sourcebooks.com.txt +++ b/inc/3rdparty/site_config/standard/sourcebooks.com.txt @@ -1,4 +1,4 @@ -#grab the actual content div -body: //div[@class='rt-article'] - +#grab the actual content div +body: //div[@class='rt-article'] + test_url: http://www.sourcebooks.com/next/sourcebooks-next-our-blog/1601-another-piece-of-the-e-puzzle-or-when-good-ebook-promotions-go-bad.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/spectator.co.uk.txt b/inc/3rdparty/site_config/standard/spectator.co.uk.txt old mode 100644 new mode 100755 index a05c8395..d0605ed2 --- a/inc/3rdparty/site_config/standard/spectator.co.uk.txt +++ b/inc/3rdparty/site_config/standard/spectator.co.uk.txt @@ -1,7 +1,7 @@ -author: /html/body/div[@id='wrapper']/div[@id='main-content']/div[@class='article_body']/a[@class='author-link']/child::text() - -body: /html/body/div[@id='wrapper']/div[@id='main-content']/div[@class='article_body'] - -# Not very helpfull, the title and author are container by the same element that contains the body +author: /html/body/div[@id='wrapper']/div[@id='main-content']/div[@class='article_body']/a[@class='author-link']/child::text() + +body: /html/body/div[@id='wrapper']/div[@id='main-content']/div[@class='article_body'] + +# Not very helpfull, the title and author are container by the same element that contains the body strip: /html/body/div[@id='wrapper']/div[@id='main-content']/div[@class='article_body']/h2 | /html/body/div[@id='wrapper']/div[@id='main-content']/div[@class='article_body']/a[@class='author-link'] test_url: http://www.spectator.co.uk/arts-and-culture/night-and-day/7449683/spotify-sunday-my-personal-soundtrack.thtml \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/spectrum.ieee.org.txt b/inc/3rdparty/site_config/standard/spectrum.ieee.org.txt old mode 100644 new mode 100755 index 4b0704a8..aea3627e --- a/inc/3rdparty/site_config/standard/spectrum.ieee.org.txt +++ b/inc/3rdparty/site_config/standard/spectrum.ieee.org.txt @@ -1,3 +1,3 @@ -body://div[@class="articleBody"] +body://div[@class="articleBody"] author://p[@class="articleBodyTtl"] test_url: http://spectrum.ieee.org/semiconductors/processors/behind-intels-new-randomnumber-generator/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/speirs.org.txt b/inc/3rdparty/site_config/standard/speirs.org.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/spiegel.de.txt b/inc/3rdparty/site_config/standard/spiegel.de.txt old mode 100644 new mode 100755 index 390c075c..413e0155 --- a/inc/3rdparty/site_config/standard/spiegel.de.txt +++ b/inc/3rdparty/site_config/standard/spiegel.de.txt @@ -1,75 +1,75 @@ -# A. Niepel, narya.de@... -# - added single_page_link -# - added author for default and single page view -# - added date for single page view -# fforst@... -# - Fixed it -# bode2104@... -# - Fixed single_page_link -# - Included intro text in single page view -# - Added body in default view - -# set body -tidy: no -# body in single page view -body: //div[@id="spArticleContent"] -# body in default view -body: //div[@id="spArticleSection"] -# body in "Fotostrecke" -body: //div[@id="spBigaContent"] - -# set date in single page view -date: //div[@id="spArticleContent"]/h3 -# strip date -strip: //div[@id="spArticleContent"]/h3 -# set date in "Fotostrecke" -date: //div[@id="spBigaDatum"] - -#set title in single page view -title: //div[@id='spArticleContent']/h2 -# strip title -strip: //div[@id='spArticleContent']/h1 -strip: //div[@id='spArticleContent']/h2 -#set title in "Fotostrecke" -title: //div[@class='spBigaHeadline'] - -# set author -author: //p[@class="spAuthor"]/a -author: substring-after(//p[@class="spAuthor"], 'Von ') -# strip author -strip: //p[@class='spAuthor'] - -# remove captions -strip: //*/span[@class='spPicLayerText'] -strip: //*/div[@class='spPanoPlayerPaneControl'] -strip: //*/div[@class='spCredit'] -strip: //*/div[@class='spCredit']/following-sibling::p - -# remove ads -strip: //div[@class='spMInline'] - -# remove photogalleries and extras -strip: //div[@class='spPhotoGallery'] -strip: //div[@class='spPhotoGallery']/following-sibling::br -strip: //div[@class='spAssetAlignleft'] -strip: //div[contains(@class,'spAsset')] -strip: //br[@clear='all'] - -# remove community functions -strip: //div[@id='spSocialBookmark'] -strip: //div[contains(@class, 'spCommunityBox')] -strip: //div[contains(@class, 'spArticleNewsfeedBox')] -strip: //div[@class='spArticleCredit'] - -# remove clutter in "Fotostrecke" -strip: //div[@id='spBreadcrumb'] -strip: //div[@id='spBigaLatestEntries'] -strip: //div[contains(@class, 'spBigaNavi')] -strip: //div[@class='spDottedLine'] - -# Use link to print article for single page view -single_page_link: //a[contains(@href, '-druck')] - -# use next link in "Fotostrecke" -next_page_link: //a[@class='spBigaControlForw'] +# A. Niepel, narya.de@... +# - added single_page_link +# - added author for default and single page view +# - added date for single page view +# fforst@... +# - Fixed it +# bode2104@... +# - Fixed single_page_link +# - Included intro text in single page view +# - Added body in default view + +# set body +tidy: no +# body in single page view +body: //div[@id="spArticleContent"] +# body in default view +body: //div[@id="spArticleSection"] +# body in "Fotostrecke" +body: //div[@id="spBigaContent"] + +# set date in single page view +date: //div[@id="spArticleContent"]/h3 +# strip date +strip: //div[@id="spArticleContent"]/h3 +# set date in "Fotostrecke" +date: //div[@id="spBigaDatum"] + +#set title in single page view +title: //div[@id='spArticleContent']/h2 +# strip title +strip: //div[@id='spArticleContent']/h1 +strip: //div[@id='spArticleContent']/h2 +#set title in "Fotostrecke" +title: //div[@class='spBigaHeadline'] + +# set author +author: //p[@class="spAuthor"]/a +author: substring-after(//p[@class="spAuthor"], 'Von ') +# strip author +strip: //p[@class='spAuthor'] + +# remove captions +strip: //*/span[@class='spPicLayerText'] +strip: //*/div[@class='spPanoPlayerPaneControl'] +strip: //*/div[@class='spCredit'] +strip: //*/div[@class='spCredit']/following-sibling::p + +# remove ads +strip: //div[@class='spMInline'] + +# remove photogalleries and extras +strip: //div[@class='spPhotoGallery'] +strip: //div[@class='spPhotoGallery']/following-sibling::br +strip: //div[@class='spAssetAlignleft'] +strip: //div[contains(@class,'spAsset')] +strip: //br[@clear='all'] + +# remove community functions +strip: //div[@id='spSocialBookmark'] +strip: //div[contains(@class, 'spCommunityBox')] +strip: //div[contains(@class, 'spArticleNewsfeedBox')] +strip: //div[@class='spArticleCredit'] + +# remove clutter in "Fotostrecke" +strip: //div[@id='spBreadcrumb'] +strip: //div[@id='spBigaLatestEntries'] +strip: //div[contains(@class, 'spBigaNavi')] +strip: //div[@class='spDottedLine'] + +# Use link to print article for single page view +single_page_link: //a[contains(@href, '-druck')] + +# use next link in "Fotostrecke" +next_page_link: //a[@class='spBigaControlForw'] test_url: http://www.spiegel.de/politik/deutschland/0,1518,787602,00.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/spiked-online.com.txt b/inc/3rdparty/site_config/standard/spiked-online.com.txt new file mode 100755 index 00000000..7ec39c2b --- /dev/null +++ b/inc/3rdparty/site_config/standard/spiked-online.com.txt @@ -0,0 +1,7 @@ +title: //div[@id='articleTitleWrapper' or @id='mainFeature']//h1 +author: //*[@id='authorNameJob']//a +date: //div[@id='articleMeta']/p +body: //div[@id='mainFeature']//img | //div[contains(@class, 'fullText')] + +test_url: http://www.spiked-online.com/newsite/article/standing_up_to_the_white-coated_gods_of_fortune/13785 +test_url: http://www.spiked-online.com/newsite/article/sex_box_and_the_crisis_of_intimacy/14168 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/spin.com.txt b/inc/3rdparty/site_config/standard/spin.com.txt old mode 100644 new mode 100755 index 66f6192b..88eb454c --- a/inc/3rdparty/site_config/standard/spin.com.txt +++ b/inc/3rdparty/site_config/standard/spin.com.txt @@ -1,5 +1,5 @@ -tidy: no -body: //section[contains(@class, 'main')] -strip: //footer +tidy: no +body: //section[contains(@class, 'main')] +strip: //footer strip: //a[@class='paginated'] test_url: http://www.spin.com/articles/bathlands-deep-heart-americas-new-drug-nightmare \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/splatf.com.txt b/inc/3rdparty/site_config/standard/splatf.com.txt old mode 100644 new mode 100755 index d5671652..3e05a225 --- a/inc/3rdparty/site_config/standard/splatf.com.txt +++ b/inc/3rdparty/site_config/standard/splatf.com.txt @@ -1,5 +1,5 @@ -author:string('Dan Frommer/SplatF') -date://div[@class='postdate'] -body://div[@class='entry'] +author:string('Dan Frommer/SplatF') +date://div[@class='postdate'] +body://div[@class='entry'] title://div[@class='post']/h1 test_url: http://www.splatf.com/2012/02/month-six/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/splitsider.com.txt b/inc/3rdparty/site_config/standard/splitsider.com.txt old mode 100644 new mode 100755 index d1d392e7..4bbc7aac --- a/inc/3rdparty/site_config/standard/splitsider.com.txt +++ b/inc/3rdparty/site_config/standard/splitsider.com.txt @@ -1,4 +1,4 @@ -author: //div[@class='byline']/a -date: //div[@id='date'] +author: //div[@class='byline']/a +date: //div[@id='date'] body: //div[@class='entry'] test_url: http://splitsider.com/2011/10/saturday-nights-children-rob-riggle-2004-2005/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/sport.detik.com.txt b/inc/3rdparty/site_config/standard/sport.detik.com.txt old mode 100644 new mode 100755 index b404b829..18552d1e --- a/inc/3rdparty/site_config/standard/sport.detik.com.txt +++ b/inc/3rdparty/site_config/standard/sport.detik.com.txt @@ -1,8 +1,8 @@ -title://div[@class="content_detail"]/h1 - -author://div[@class="author"]/strong - -date:substring-before(substring-after(//div[@class="content_detail"]/*[@class="date"], ','), ' WIB') - +title://div[@class="content_detail"]/h1 + +author://div[@class="author"]/strong + +date:substring-before(substring-after(//div[@class="content_detail"]/*[@class="date"], ','), ' WIB') + body://div[@class='text_detail'] test_url: http://sport.detik.com/sepakbola/read/2012/05/23/065011/1922350/71/agen-silva-ingin-bertahan-di-milan?b99220270 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/sport.orf.at.txt b/inc/3rdparty/site_config/standard/sport.orf.at.txt old mode 100644 new mode 100755 index a794ded9..f0be85c7 --- a/inc/3rdparty/site_config/standard/sport.orf.at.txt +++ b/inc/3rdparty/site_config/standard/sport.orf.at.txt @@ -1,11 +1,11 @@ -single_page_link: //div[@id='content']//p[@class='readMore']/a - -title: //div[@class='hidden offscreen']/h2 -body: //div[@id="storyText"] -move_into(//div[@id='storyText']): //div[@class='fact'] -strip: //small[@class='credit'] -strip: //small[@class='caption'] -date: substring-after(//div[@id='storyMeta']//p[@class='date'],'Publiziert am') -strip: //p[@class='toplink'] +single_page_link: //div[@id='content']//p[@class='readMore']/a + +title: //div[@class='hidden offscreen']/h2 +body: //div[@id="storyText"] +move_into(//div[@id='storyText']): //div[@class='fact'] +strip: //small[@class='credit'] +strip: //small[@class='caption'] +date: substring-after(//div[@id='storyMeta']//p[@class='date'],'Publiziert am') +strip: //p[@class='toplink'] test_url: http://sport.orf.at/stories/2084851/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/sport365.fr.txt b/inc/3rdparty/site_config/standard/sport365.fr.txt new file mode 100755 index 00000000..8688f40b --- /dev/null +++ b/inc/3rdparty/site_config/standard/sport365.fr.txt @@ -0,0 +1,8 @@ +body: //h2[contains(@class, 'body_head')] | //div[@id='img_article' or contains(@class, 'body_content')] +body: //div[contains(@class, 'cpanel')]//div[contains(@class, 'thumbnails')] +prune: no +strip: //div[starts-with(@class, 'actu_')] +strip: //div[contains(@class, 'data')] + +test_url: http://www.sport365.fr/basketball/nba/new-york-accord-avec-toronto-pour-bargnani-1038773.shtml +test_url: http://www.sport365.fr/rss.xml \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/sports.espn.go.com.txt b/inc/3rdparty/site_config/standard/sports.espn.go.com.txt old mode 100644 new mode 100755 index e0f8223c..8c21ef2b --- a/inc/3rdparty/site_config/standard/sports.espn.go.com.txt +++ b/inc/3rdparty/site_config/standard/sports.espn.go.com.txt @@ -1,12 +1,12 @@ -title: //div[@class='headline'] | //div[@class='mod-header']/h3 -body: //div[contains(@class, 'article')] -strip: //div[contains(@class, 'mod-inline')] -strip: //*/span[@class='page-actions']/a -strip: //*/span[@class='page-actions']/a -strip: //div[@class='page-actions']/* -strip: //div[@class='headline'] | //div[@class='mod-header']/h3 -strip: //div[@class='mod-blog-navigation'] -strip: //div[@class='monthday'] -strip: //div[@class='time'] -strip: //div[@class='timeofday'] +title: //div[@class='headline'] | //div[@class='mod-header']/h3 +body: //div[contains(@class, 'article')] +strip: //div[contains(@class, 'mod-inline')] +strip: //*/span[@class='page-actions']/a +strip: //*/span[@class='page-actions']/a +strip: //div[@class='page-actions']/* +strip: //div[@class='headline'] | //div[@class='mod-header']/h3 +strip: //div[@class='mod-blog-navigation'] +strip: //div[@class='monthday'] +strip: //div[@class='time'] +strip: //div[@class='timeofday'] test_url: http://sports.espn.go.com/espn/page2/story?page=simmonsnfl2010/lebron_james_return_clevelend&sportCat=nba \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/sports.yahoo.com.txt b/inc/3rdparty/site_config/standard/sports.yahoo.com.txt old mode 100644 new mode 100755 index 96a3bb71..b0f57e2c --- a/inc/3rdparty/site_config/standard/sports.yahoo.com.txt +++ b/inc/3rdparty/site_config/standard/sports.yahoo.com.txt @@ -1,9 +1,9 @@ -title: //div[@id='article']/div[@class='hd']/h1 -body: //p[@id='byline'] | //div[@id='article']//div[@class='body_copy 0'] -strip: //div[@class='foot'] -strip: //div[@id='sidebar']//div[@class='ft'] -strip: //p[@id='byline']//em -tidy: no -prune: no +title: //div[@id='article']/div[@class='hd']/h1 +body: //p[@id='byline'] | //div[@id='article']//div[@class='body_copy 0'] +strip: //div[@class='foot'] +strip: //div[@id='sidebar']//div[@class='ft'] +strip: //p[@id='byline']//em +tidy: no +prune: no test_url: http://sports.yahoo.com/nba/news?slug=ap-nbafinals \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/sportschau.de.txt b/inc/3rdparty/site_config/standard/sportschau.de.txt old mode 100644 new mode 100755 index 6500e75c..1e58b520 --- a/inc/3rdparty/site_config/standard/sportschau.de.txt +++ b/inc/3rdparty/site_config/standard/sportschau.de.txt @@ -1,22 +1,22 @@ -title://div[@id='ardContent']/h1 - -author://p[@id='ardAutor'] -author://span[@id='ardQuelle'] -author:string('sportschau.de') - -date:substring-after(//span[@id='ardStand'], 'Stand: ') - -body://div[@id='ardContent'] - -strip://div[@id='ardContent']/h1 -strip://p[@id='ardAutor'] -strip: //div[@class='embeddedPlayer_clipinfo'] -strip: //div[@class='ardMehrZumThemaRechts'] -strip: //*[contains(@class, 'inv')] - -strip: //p[@id='ardAbbinder'] -strip: //div[@class='socialBookmarks'] -strip: //div[@id='ardContentEnd'] -strip: //div[@id='ardDisclaimer'] +title://div[@id='ardContent']/h1 + +author://p[@id='ardAutor'] +author://span[@id='ardQuelle'] +author:string('sportschau.de') + +date:substring-after(//span[@id='ardStand'], 'Stand: ') + +body://div[@id='ardContent'] + +strip://div[@id='ardContent']/h1 +strip://p[@id='ardAutor'] +strip: //div[@class='embeddedPlayer_clipinfo'] +strip: //div[@class='ardMehrZumThemaRechts'] +strip: //*[contains(@class, 'inv')] + +strip: //p[@id='ardAbbinder'] +strip: //div[@class='socialBookmarks'] +strip: //div[@id='ardContentEnd'] +strip: //div[@id='ardDisclaimer'] strip: //div[@id='ardRechteSpalte'] test_url: http://www.sportschau.de/sp/fussball/news201203/17/analyse_leverkusen_gladbach.jsp \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/sportsillustrated.cnn.com.txt b/inc/3rdparty/site_config/standard/sportsillustrated.cnn.com.txt old mode 100644 new mode 100755 index afc5879f..b3da8138 --- a/inc/3rdparty/site_config/standard/sportsillustrated.cnn.com.txt +++ b/inc/3rdparty/site_config/standard/sportsillustrated.cnn.com.txt @@ -1,26 +1,26 @@ -# main sportsillustrated.com articles -# -body: //div[@id="cnnStoryContent"] -title: //div[@id="cnnStoryHeadline"]//h1 -author: //div[@id="cnnSubBanner"]//strong -date: substring-after(//div[@id="cnnTimeStamp"], "Updated: ") -date: substring-after(//div[@id="cnnTimeStamp"], "Posted: ") - -# kill ugly font buttons -strip: //div[@id="cnnSCFontButtons"] - -# kill misc filler videos & etc -strip: //div[@class="cnnDivideContent"] -strip: //*[@class="cnnTMbox"] - -# si vault articles -# ------------- -body: //div[@class="siv_artPara"] -title: //div[@class="siv_artHeader"]//h1 -author: //div[@class="byline"] -date: //div[@class="date"] - -next_page_link: //div[@id='cnnStoryContinue']/a -strip_id_or_class: cnnstorypagination - +# main sportsillustrated.com articles +# +body: //div[@id="cnnStoryContent"] +title: //div[@id="cnnStoryHeadline"]//h1 +author: //div[@id="cnnSubBanner"]//strong +date: substring-after(//div[@id="cnnTimeStamp"], "Updated: ") +date: substring-after(//div[@id="cnnTimeStamp"], "Posted: ") + +# kill ugly font buttons +strip: //div[@id="cnnSCFontButtons"] + +# kill misc filler videos & etc +strip: //div[@class="cnnDivideContent"] +strip: //*[@class="cnnTMbox"] + +# si vault articles +# ------------- +body: //div[@class="siv_artPara"] +title: //div[@class="siv_artHeader"]//h1 +author: //div[@class="byline"] +date: //div[@class="date"] + +next_page_link: //div[@id='cnnStoryContinue']/a +strip_id_or_class: cnnstorypagination + test_url: http://sportsillustrated.cnn.com/2012/writers/peter_king/02/27/combine/index.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/sprengsatz.de.txt b/inc/3rdparty/site_config/standard/sprengsatz.de.txt old mode 100644 new mode 100755 index 16636bc5..5b683811 --- a/inc/3rdparty/site_config/standard/sprengsatz.de.txt +++ b/inc/3rdparty/site_config/standard/sprengsatz.de.txt @@ -1,5 +1,5 @@ -title: //h2 -author: string('Michael Spreng') -date: //div[@class='date'] +title: //h2 +author: string('Michael Spreng') +date: //div[@class='date'] body: //div[@class='entry'] test_url: http://www.sprengsatz.de/?p=3691 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/sqlite.org.txt b/inc/3rdparty/site_config/standard/sqlite.org.txt old mode 100644 new mode 100755 index 4872519a..15763c32 --- a/inc/3rdparty/site_config/standard/sqlite.org.txt +++ b/inc/3rdparty/site_config/standard/sqlite.org.txt @@ -1,7 +1,7 @@ -body: //div[@id='ff-body'] - -replace_string(<h1 align=center>): <div id="ff-body"><h1 align=center> - -prune: no - +body: //div[@id='ff-body'] + +replace_string(<h1 align=center>): <div id="ff-body"><h1 align=center> + +prune: no + test_url: http://www.sqlite.org/fileformat2.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/squashed.tumblr.com.txt b/inc/3rdparty/site_config/standard/squashed.tumblr.com.txt old mode 100644 new mode 100755 index 388209a9..8eae13ed --- a/inc/3rdparty/site_config/standard/squashed.tumblr.com.txt +++ b/inc/3rdparty/site_config/standard/squashed.tumblr.com.txt @@ -1,4 +1,4 @@ -body: //div[@class='content'] -date: substring-before( //div[@class='unit dateAndNotes'], 'with') +body: //div[@class='content'] +date: substring-before( //div[@class='unit dateAndNotes'], 'with') title: //h3 test_url: http://squashed.tumblr.com/post/17613522228/lets-stop-blaming-the-victims-of-predatory-lending \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/stackoverflow.com.txt b/inc/3rdparty/site_config/standard/stackoverflow.com.txt old mode 100644 new mode 100755 index e5317bac..bb95e93a --- a/inc/3rdparty/site_config/standard/stackoverflow.com.txt +++ b/inc/3rdparty/site_config/standard/stackoverflow.com.txt @@ -1,14 +1,14 @@ -body: //div[@class='post-text' or @class='user-action-time' or @class='user-details' or @class='vote'] | //div[@id='answers-header']//h2 - -replace_string(<div class="user-details"><br></div>): <!-- nothing --> -replace_string(<div class="vote">): <div class="vote"><h3>Vote count: - -strip_id_or_class: vote-up -strip_id_or_class: vote-down -strip_id_or_class: star-off -strip_id_or_class: favoritecount -strip_id_or_class: -share -strip_id_or_class: badgecount - +body: //div[@class='post-text' or @class='user-action-time' or @class='user-details' or @class='vote'] | //div[@id='answers-header']//h2 + +replace_string(<div class="user-details"><br></div>): <!-- nothing --> +replace_string(<div class="vote">): <div class="vote"><h3>Vote count: + +strip_id_or_class: vote-up +strip_id_or_class: vote-down +strip_id_or_class: star-off +strip_id_or_class: favoritecount +strip_id_or_class: -share +strip_id_or_class: badgecount + test_url: http://stackoverflow.com/questions/4484289/id-like-to-understand-the-jquery-plugin-syntax \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/stalbansreview.co.uk.txt b/inc/3rdparty/site_config/standard/stalbansreview.co.uk.txt old mode 100644 new mode 100755 index bde14217..a0f1587c --- a/inc/3rdparty/site_config/standard/stalbansreview.co.uk.txt +++ b/inc/3rdparty/site_config/standard/stalbansreview.co.uk.txt @@ -1,14 +1,14 @@ -title: //div[@class='articleLeft']/h3 - -author: substring-after(//span[@class='articleAuthor']/a,'By ') - -date: substring-before(//span[@class='articleDateTime'],'in ') - -body: //div[@class='articleLeft'] -strip: //div[@class='articleMoreNews'] -strip: //div[@class='articleLeft']/h3 -strip: //div[@class='articleLeft']/p[@class='articleInfo clearfix'] - -# Remove duplicate title from text -strip: //div[@id='site']/div[5][@class='holder']/div[1][@class='hBlock']/div[1][@class='sglCol article']/h3 +title: //div[@class='articleLeft']/h3 + +author: substring-after(//span[@class='articleAuthor']/a,'By ') + +date: substring-before(//span[@class='articleDateTime'],'in ') + +body: //div[@class='articleLeft'] +strip: //div[@class='articleMoreNews'] +strip: //div[@class='articleLeft']/h3 +strip: //div[@class='articleLeft']/p[@class='articleInfo clearfix'] + +# Remove duplicate title from text +strip: //div[@id='site']/div[5][@class='holder']/div[1][@class='hBlock']/div[1][@class='sglCol article']/h3 test_url: http://www.stalbansreview.co.uk/news/9581446.New_roundabout_in_King_Harry_Lane/r/?ref=rss \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/standard.co.uk.txt b/inc/3rdparty/site_config/standard/standard.co.uk.txt old mode 100644 new mode 100755 index 22a33484..71a2bda1 --- a/inc/3rdparty/site_config/standard/standard.co.uk.txt +++ b/inc/3rdparty/site_config/standard/standard.co.uk.txt @@ -1,16 +1,16 @@ -autodetect_next_page: no -footnotes: no -dissolve: //div[@class="column-2"]//div[@class="widget"] -dissolve: //div[@class="column-2"]//div - -author: //div[@class="innerbyline"]/a -strip: //div[@class="innerbyline"]/a - -strip: //p[@class="dateline"] -date: //p[@class="dateline"] - -title: //h1[@class="title"] -author: //div[@class="innerbyline"]/a -date: //p[@class="dateline"] +autodetect_next_page: no +footnotes: no +dissolve: //div[@class="column-2"]//div[@class="widget"] +dissolve: //div[@class="column-2"]//div + +author: //div[@class="innerbyline"]/a +strip: //div[@class="innerbyline"]/a + +strip: //p[@class="dateline"] +date: //p[@class="dateline"] + +title: //h1[@class="title"] +author: //div[@class="innerbyline"]/a +date: //p[@class="dateline"] body: //div[@class="column-2"] test_url: http://www.standard.co.uk/lifestyle/esmagazine/grace-and-flavour-pizarro-7938350.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/staradvertiser.com.txt b/inc/3rdparty/site_config/standard/staradvertiser.com.txt old mode 100644 new mode 100755 index 0579455f..254e2c2b --- a/inc/3rdparty/site_config/standard/staradvertiser.com.txt +++ b/inc/3rdparty/site_config/standard/staradvertiser.com.txt @@ -1,11 +1,11 @@ -title: //h1[@id='storyTitle'] -author: substring-after(//span[@class='hsa_postCredit'], 'By ') -date://span[@class='hsa_dateStamp'] -body: //div[@class='storytext'] -strip_id_or_class: insideStoryAd -strip_id_or_class: printDesc -strip_id_or_class: sb_2010_story_tools -strip_id_or_class: FBConnectButton_Text -strip_id_or_class: breadcrumbs -prune: no +title: //h1[@id='storyTitle'] +author: substring-after(//span[@class='hsa_postCredit'], 'By ') +date://span[@class='hsa_dateStamp'] +body: //div[@class='storytext'] +strip_id_or_class: insideStoryAd +strip_id_or_class: printDesc +strip_id_or_class: sb_2010_story_tools +strip_id_or_class: FBConnectButton_Text +strip_id_or_class: breadcrumbs +prune: no test_url: http://www.staradvertiser.com/news/20111112_World_leaders_step_onto_isle_stage.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/stephenfry.com.txt b/inc/3rdparty/site_config/standard/stephenfry.com.txt old mode 100644 new mode 100755 index 1169984f..efd1ec2b --- a/inc/3rdparty/site_config/standard/stephenfry.com.txt +++ b/inc/3rdparty/site_config/standard/stephenfry.com.txt @@ -1,8 +1,8 @@ -title: /html/head/meta[@name='title']/@content -author: //span[contains(concat(' ',normalize-space(@class),' '),' article_author ')]/a -date: //span[contains(concat(' ',normalize-space(@class),' '),' article_date ')] - -body: //div[@class='entry-content'] - +title: /html/head/meta[@name='title']/@content +author: //span[contains(concat(' ',normalize-space(@class),' '),' article_author ')]/a +date: //span[contains(concat(' ',normalize-space(@class),' '),' article_date ')] + +body: //div[@class='entry-content'] + single_page_link: //p[@class='pagination']/a test_url: http://www.stephenfry.com/2011/10/06/steve-jobs/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/stlbeacon.org.txt b/inc/3rdparty/site_config/standard/stlbeacon.org.txt old mode 100644 new mode 100755 index d66fee9f..75379a9c --- a/inc/3rdparty/site_config/standard/stlbeacon.org.txt +++ b/inc/3rdparty/site_config/standard/stlbeacon.org.txt @@ -1,5 +1,5 @@ -title: article/h1 -author: //p[@class='byline'] -date: //p[@class='date'] +title: article/h1 +author: //p[@class='byline'] +date: //p[@class='date'] body: //div[@class='body'] test_url: https://www.stlbeacon.org/#!/content/23404/mogop_caucus_031712 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/stockholm.etc.se.txt b/inc/3rdparty/site_config/standard/stockholm.etc.se.txt old mode 100644 new mode 100755 index 073043d5..2f4f8cb8 --- a/inc/3rdparty/site_config/standard/stockholm.etc.se.txt +++ b/inc/3rdparty/site_config/standard/stockholm.etc.se.txt @@ -1,5 +1,5 @@ -strip_id_or_class: 'left' -strip_id_or_class: 'right' -strip_id_or_class: 'block-belowcontent' +strip_id_or_class: 'left' +strip_id_or_class: 'right' +strip_id_or_class: 'block-belowcontent' test_url: http://stockholm.etc.se/reportage/bakom-stangda-dorrar-pa-fas-3-massa \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/stockholmsfria.nu.txt b/inc/3rdparty/site_config/standard/stockholmsfria.nu.txt new file mode 100755 index 00000000..cc8c28b8 --- /dev/null +++ b/inc/3rdparty/site_config/standard/stockholmsfria.nu.txt @@ -0,0 +1,7 @@ +body: //div[contains(@class, 'layout__inner')]//div[contains(@class, 'file-image') or contains(@class, 'node__content')] +author: //article//div[contains(@class, 'field-byline')] +strip_id_or_class: rekommenderade +strip_id_or_class: disqus +strip_id_or_class: annonser + +test_url: http://www.stockholmsfria.nu/artikel/112068 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/straightdope.com.txt b/inc/3rdparty/site_config/standard/straightdope.com.txt new file mode 100755 index 00000000..f01d7ad1 --- /dev/null +++ b/inc/3rdparty/site_config/standard/straightdope.com.txt @@ -0,0 +1,6 @@ +body: //div[@id='article' or @id='current_illustration'] +title: //div[@id='article']//h1 +date: //div[@id='article']//div[@class='date'] +prune: no + +test_url: http://www.straightdope.com/columns/read/947/whatever-happened-to-adoption-of-the-metric-system-in-the-u-s \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/streetsblog.net.txt b/inc/3rdparty/site_config/standard/streetsblog.net.txt old mode 100644 new mode 100755 index 0b62a3d6..6cf03ca6 --- a/inc/3rdparty/site_config/standard/streetsblog.net.txt +++ b/inc/3rdparty/site_config/standard/streetsblog.net.txt @@ -1,7 +1,7 @@ -title: //h2[@class="post-title"] -date: //span[@class="post-date"] -body: //div[@class="post-entry"] - -#This is also good for *.streetsblog.org, for example: +title: //h2[@class="post-title"] +date: //span[@class="post-date"] +body: //div[@class="post-entry"] + +#This is also good for *.streetsblog.org, for example: #http://dc.streetsblog.org/2011/10/21/friday-job-market/ test_url: http://streetsblog.net/2011/10/20/look-out-below-one-in-nine-bridges-structurally-deficient-reports-t4a/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/stuff.co.nz.txt b/inc/3rdparty/site_config/standard/stuff.co.nz.txt old mode 100644 new mode 100755 index 12fd0939..3756092c --- a/inc/3rdparty/site_config/standard/stuff.co.nz.txt +++ b/inc/3rdparty/site_config/standard/stuff.co.nz.txt @@ -1,22 +1,22 @@ -title://div[@id='left_col']/h1 -author:substring-after(//span[contains(@class,'storycredit')],'BY ') -author://span[contains(@class,'storycredit')] -date:substring-after(//div[contains(@class,'toolbox_date')],'Last updated ') -date://div[contains(@class,'toolbox_date')] -body://div[@id='left_col'] - -strip_id_or_class: toolbox -strip_id_or_class: story_features -strip_id_or_class: sharebox_new -strip_id_or_class: related_box -strip_id_or_class: sponsored_links -strip_id_or_class: hidden_ad -strip_id_or_class: story_content_top -strip_id_or_class: total_number -strip_id_or_class: sort_order -strip_id_or_class: subscribe_order - -strip://div[contains(@class,'ad_story')] - -test_url: http://www.stuff.co.nz/national/politics/3930344/PM-issues-challenge +title://div[@id='left_col']/h1 +author:substring-after(//span[contains(@class,'storycredit')],'BY ') +author://span[contains(@class,'storycredit')] +date:substring-after(//div[contains(@class,'toolbox_date')],'Last updated ') +date://div[contains(@class,'toolbox_date')] +body://div[@id='left_col'] + +strip_id_or_class: toolbox +strip_id_or_class: story_features +strip_id_or_class: sharebox_new +strip_id_or_class: related_box +strip_id_or_class: sponsored_links +strip_id_or_class: hidden_ad +strip_id_or_class: story_content_top +strip_id_or_class: total_number +strip_id_or_class: sort_order +strip_id_or_class: subscribe_order + +strip://div[contains(@class,'ad_story')] + +test_url: http://www.stuff.co.nz/national/politics/3930344/PM-issues-challenge test_url: http://www.stuff.co.nz/entertainment/7045944/International-praise-for-Ladyhawke \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/stumbleupon.com.txt b/inc/3rdparty/site_config/standard/stumbleupon.com.txt old mode 100644 new mode 100755 index 85682166..9adc3c50 --- a/inc/3rdparty/site_config/standard/stumbleupon.com.txt +++ b/inc/3rdparty/site_config/standard/stumbleupon.com.txt @@ -1,3 +1,3 @@ -single_page_link: //iframe[@id='stumbleFrame']/@src - -test_url: www.stumbleupon.com/su/35V0wB/zouchmagazine.com/poetry-violet/ \ No newline at end of file +single_page_link: //iframe[@id='tb-stumble-frame']/@src + +test_url: http://www.stumbleupon.com/su/35V0wB/zouchmagazine.com/poetry-violet/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/subtraction.com.txt b/inc/3rdparty/site_config/standard/subtraction.com.txt old mode 100644 new mode 100755 index 454e37b1..9ba6eb77 --- a/inc/3rdparty/site_config/standard/subtraction.com.txt +++ b/inc/3rdparty/site_config/standard/subtraction.com.txt @@ -1,17 +1,17 @@ -title: //*[@id='posts']/div[1]/h2 -author: //*[@id='posts']/div[1]/div[2]/span[2]/a -date: //*[@class='date'] -body: //div[@class='body-lead'] - -# take out the bit saying 'body' -strip: //div[@class='body-lead']/div[@class='info-label'] - - - - - - - - - +title: //*[@id='posts']/div[1]/h2 +author: //*[@id='posts']/div[1]/div[2]/span[2]/a +date: //*[@class='date'] +body: //div[@class='body-lead'] + +# take out the bit saying 'body' +strip: //div[@class='body-lead']/div[@class='info-label'] + + + + + + + + + test_url: http://www.subtraction.com/2011/02/01/unnecessary-explanations \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/sueddeutsche.de.txt b/inc/3rdparty/site_config/standard/sueddeutsche.de.txt old mode 100644 new mode 100755 index 4aa9410c..74b8d451 --- a/inc/3rdparty/site_config/standard/sueddeutsche.de.txt +++ b/inc/3rdparty/site_config/standard/sueddeutsche.de.txt @@ -1,18 +1,18 @@ -# 2012-12-04: complete rewrite after S�ddeutsche.de relaunch - carlo@... - -single_page_link: //a[ contains( @href, "/2.220/" ) ] - -body: //article[@id="sitecontent"]/section[@class="body"] -author: //address[@class="author"] -date: //div[@class="header"]//h1//span[@class="updated"] -wrap_in(small): //div[@class="footer"] -wrap_in(i): //figcaption/h3 -dissolve: //figcaption//h3 -dissolve: //figure/div[@class="body"] -dissolve: //figure/a - -strip: //figure[ not( contains(@class, "zoomimage" ) ) ] -strip: //div[@data-onlineonly="true"] -strip: //address[@class="author"] - +# 2012-12-04: complete rewrite after Süddeutsche.de relaunch - carlo@... + +single_page_link: //a[ contains( @href, "/2.220/" ) ] + +body: //article[@id="sitecontent"]/section[@class="body"] +author: //address[@class="author"] +date: //div[@class="header"]//h1//span[@class="updated"] +wrap_in(small): //div[@class="footer"] +wrap_in(i): //figcaption/h3 +dissolve: //figcaption//h3 +dissolve: //figure/div[@class="body"] +dissolve: //figure/a + +strip: //figure[ not( contains(@class, "zoomimage" ) ) ] +strip: //div[@data-onlineonly="true"] +strip: //address[@class="author"] + test_url: http://www.sueddeutsche.de/muenchen/mietshaus-am-gaertnerplatz-alles-muss-raus-1.1556693 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/summify.com.txt b/inc/3rdparty/site_config/standard/summify.com.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/suntimes.com.txt b/inc/3rdparty/site_config/standard/suntimes.com.txt old mode 100644 new mode 100755 index 13390e4f..6d4594cf --- a/inc/3rdparty/site_config/standard/suntimes.com.txt +++ b/inc/3rdparty/site_config/standard/suntimes.com.txt @@ -1,14 +1,14 @@ -title: //div[@class='story-details']/h1 -date: //span[@class='date-time'] -Author: substring-after(//p[@class='by-line'], 'By ') - -strip: //div[@class='videoThumbnails'] -strip: //div[@class='ad-square2-container'] -strip: //div[@class='homeDeliveryContainer5'] - -strip: //div[@class='image-description'] -strip: //div[@id='internal-side-bar'] - -strip: //span[@class='hide'] +title: //div[@class='story-details']/h1 +date: //span[@class='date-time'] +Author: substring-after(//p[@class='by-line'], 'By ') + +strip: //div[@class='videoThumbnails'] +strip: //div[@class='ad-square2-container'] +strip: //div[@class='homeDeliveryContainer5'] + +strip: //div[@class='image-description'] +strip: //div[@id='internal-side-bar'] + +strip: //span[@class='hide'] strip: //div[@class='date'] test_url: http://www.suntimes.com/technology/ihnatko/8816567-452/review-kindle-fire-is-no-ipad-killer-but-it-is-a-killer-device.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/svd.se.txt b/inc/3rdparty/site_config/standard/svd.se.txt old mode 100644 new mode 100755 index 02b5b8ca..bc0a1ca0 --- a/inc/3rdparty/site_config/standard/svd.se.txt +++ b/inc/3rdparty/site_config/standard/svd.se.txt @@ -1,4 +1,14 @@ -# Ads -strip_id_or_class: articlead +body: //div[@id='article-content'] +author: //div[@id='article']//div[@class='byline']/p -test_url: http://www.svd.se/nyheter/inrikes/manga-huggormsbitna-golfare_5004031.svd \ No newline at end of file +# Ads +strip_id_or_class: articlead + +# Sharing +strip_id_or_class: share + +prune: no + +test_url: http://www.svd.se/nyheter/inrikes/oppositionen-stoppar-skattesankning_8531228.svd +test_url: http://www.svd.se/nyheter/inrikes/manga-huggormsbitna-golfare_5004031.svd +test_url: http://www.svd.se/?service=rss&type=senastenytt \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/svt.se.txt b/inc/3rdparty/site_config/standard/svt.se.txt new file mode 100755 index 00000000..ba35f7d1 --- /dev/null +++ b/inc/3rdparty/site_config/standard/svt.se.txt @@ -0,0 +1,16 @@ +title: //article[@role='main']//h1 +body: //article[@role='main'] +strip: //aside +replace_string(<noscript>): <div> +replace_string(</noscript>): </div> +strip_id_or_class: svtHide-No-Js +strip_id_or_class: aside +strip_id_or_class: Aside +strip_id_or_class: hidden +strip_id_or_class: Share +tidy: no +prune: no + +test_url: http://www.svt.se/ug/framtidsdrommar-om-jobb-blev-lackande-gifthal +test_url: http://www.svt.se/nyheter/het-debatt-mellan-borg-och-andersson +test_url: http://www.svt.se/nyheter/regionalt/svtsormland/sj-tag-evakuerades-efter-rokdrama \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/sydsvenskan.se.txt b/inc/3rdparty/site_config/standard/sydsvenskan.se.txt old mode 100644 new mode 100755 index da6772aa..24ba1426 --- a/inc/3rdparty/site_config/standard/sydsvenskan.se.txt +++ b/inc/3rdparty/site_config/standard/sydsvenskan.se.txt @@ -1,11 +1,18 @@ -title: //h1 - -author: //a[contains(@href, '/sok/?')]/text() - -date: substring-after(//span[@class='date'], 'Publicerad ') - -body: //div[@class='two_column_left'] -strip_id_or_class: story -strip: //div[@class='leadText saplo:lead']/h5 - -test_url: http://www.sydsvenskan.se/kultur-och-nojen/-jag-vill-garna--stanna-- \ No newline at end of file +title: //h1 + +author: //a[contains(@href, '/sok/?')]/text() + +date: //meta[@name='bi3dPubDate']/@content + +body: (//div[contains(@class, 'slider_wrapper')])[1] | //div[@id='article_image' or @class='two_column_left'] +strip_id_or_class: story +strip_id_or_class: article_body_ad +strip: //div[@class='leadText saplo:lead']/h5 + +replace_string(<br />): <br /><br /> + +prune: no + +test_url: http://www.sydsvenskan.se/malmo/allt-jag-ager-ligger-pa-botten/ +test_url: http://www.sydsvenskan.se/kultur-och-nojen/-jag-vill-garna--stanna-- +test_url: http://www.sydsvenskan.se/rss.xml \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/symmetrymagazine.org.txt b/inc/3rdparty/site_config/standard/symmetrymagazine.org.txt old mode 100644 new mode 100755 index 3109c0e7..5bcfb9ef --- a/inc/3rdparty/site_config/standard/symmetrymagazine.org.txt +++ b/inc/3rdparty/site_config/standard/symmetrymagazine.org.txt @@ -1,12 +1,12 @@ -title: //div[contains(@class, "post")]/h2 - -author: //div[contains(@class, "post")]/p[position()=last()]/text()[1] - -date: //div[contains(@class, "post")]/p[1] - -body: //div[contains(@class, "post")] - -strip: //div[contains(@class, "post")]/h2[1] -strip: //div[contains(@class, "post")]/p[1] +title: //div[contains(@class, "post")]/h2 + +author: //div[contains(@class, "post")]/p[position()=last()]/text()[1] + +date: //div[contains(@class, "post")]/p[1] + +body: //div[contains(@class, "post")] + +strip: //div[contains(@class, "post")]/h2[1] +strip: //div[contains(@class, "post")]/p[1] strip: //div[contains(@class, "post")]/p[position()=last()] test_url: http://www.symmetrymagazine.org/breaking/?p=12784 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/sz-magazin.sueddeutsche.de.txt b/inc/3rdparty/site_config/standard/sz-magazin.sueddeutsche.de.txt old mode 100644 new mode 100755 index c3e34977..e058032c --- a/inc/3rdparty/site_config/standard/sz-magazin.sueddeutsche.de.txt +++ b/inc/3rdparty/site_config/standard/sz-magazin.sueddeutsche.de.txt @@ -1,15 +1,15 @@ -title: //h1 -body://div[@class='drucken'] -author: substring-after(//span[@class='autor'], 'Von ') -author: //span[@class='autor'] - -single_page_link://a[contains(@href, '/drucken/')] -convert_double_br_tags:yes - -dissolve://div[@class='vorspann'] - -strip://h1 -strip_id_or_class: klassifizierung -strip_id_or_class: source +title: //h1 +body://div[@class='drucken'] +author: substring-after(//span[@class='autor'], 'Von ') +author: //span[@class='autor'] + +single_page_link://a[contains(@href, '/drucken/')] +convert_double_br_tags:yes + +dissolve://div[@class='vorspann'] + +strip://h1 +strip_id_or_class: klassifizierung +strip_id_or_class: source strip_id_or_class: autor test_url: http://sz-magazin.sueddeutsche.de/texte/anzeigen/37567 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/sz.de.txt b/inc/3rdparty/site_config/standard/sz.de.txt new file mode 100755 index 00000000..f67637d2 --- /dev/null +++ b/inc/3rdparty/site_config/standard/sz.de.txt @@ -0,0 +1,18 @@ +# 2012-12-04: complete rewrite after Süddeutsche.de relaunch - carlo@... + +single_page_link: //a[ contains( @href, "/2.220/" ) ] + +body: //article[@id="sitecontent"]/section[@class="body"] +author: //address[@class="author"] +date: //div[@class="header"]//h1//span[@class="updated"] +wrap_in(small): //div[@class="footer"] +wrap_in(i): //figcaption/h3 +dissolve: //figcaption//h3 +dissolve: //figure/div[@class="body"] +dissolve: //figure/a + +strip: //figure[ not( contains(@class, "zoomimage" ) ) ] +strip: //div[@data-onlineonly="true"] +strip: //address[@class="author"] + +test_url: http://sz.de/1.1556693 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/tagesschau.de.txt b/inc/3rdparty/site_config/standard/tagesschau.de.txt old mode 100644 new mode 100755 index 8ce8a90e..be76cd05 --- a/inc/3rdparty/site_config/standard/tagesschau.de.txt +++ b/inc/3rdparty/site_config/standard/tagesschau.de.txt @@ -1,23 +1,23 @@ -title://h1[1] - -author: substring-after(//em, 'Von ') -author:string('tagesschau.de') - -date:substring-after(//div[@class='standDatum'], 'Stand: ') - -body://div[contains(@class, 'article')] | //div[contains(@class, 'centerCol')] - -strip://h1[1] -strip: //div[contains(@class, 'directLinks')] -strip: //div[contains(@class, 'zitatBox')] -strip: //div[contains(@class, 'teaserBox metaBlock')] -strip: //*[contains(@class, 'inv')] -strip: //span[@class='imgSubline'] -strip: //*[contains(@class, 'topline')][1] -strip: //div[@id='rightCol'][1] -strip: //div[@id="footer"][1] -strip: //div[@class="fPlayer"] -strip: //div[@id='seitenanfang'] -strip: //div[@class='standDatum'] +title://h1[1] + +author: substring-after(//em, 'Von ') +author:string('tagesschau.de') + +date:substring-after(//div[@class='standDatum'], 'Stand: ') + +body://div[contains(@class, 'article')] | //div[contains(@class, 'centerCol')] + +strip://h1[1] +strip: //div[contains(@class, 'directLinks')] +strip: //div[contains(@class, 'zitatBox')] +strip: //div[contains(@class, 'teaserBox metaBlock')] +strip: //*[contains(@class, 'inv')] +strip: //span[@class='imgSubline'] +strip: //*[contains(@class, 'topline')][1] +strip: //div[@id='rightCol'][1] +strip: //div[@id="footer"][1] +strip: //div[@class="fPlayer"] +strip: //div[@id='seitenanfang'] +strip: //div[@class='standDatum'] strip: //em test_url: http://www.tagesschau.de/ausland/wahlkampffrankreich102.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/tampabay.com.txt b/inc/3rdparty/site_config/standard/tampabay.com.txt old mode 100644 new mode 100755 index bfe841c6..47a6ffab --- a/inc/3rdparty/site_config/standard/tampabay.com.txt +++ b/inc/3rdparty/site_config/standard/tampabay.com.txt @@ -1,5 +1,5 @@ -title: //span[@class="entry-title"] -author: //*[contains(@class, 'item')]/p/a/text() -date: substring-after(//*[contains(@class, 'item')]/p/text()[3], 'Posted:') +title: //span[@class="entry-title"] +author: //*[contains(@class, 'item')]/p/a/text() +date: substring-after(//*[contains(@class, 'item')]/p/text()[3], 'Posted:') body: //div[@class="entry-content"] test_url: http://www.tampabay.com/news/salvador-dali-leaders-want-st-petersburg-city-council-to-put-brakes-on/1236349 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/taptaptap.com.txt b/inc/3rdparty/site_config/standard/taptaptap.com.txt old mode 100644 new mode 100755 index 13de70e9..e1e79428 --- a/inc/3rdparty/site_config/standard/taptaptap.com.txt +++ b/inc/3rdparty/site_config/standard/taptaptap.com.txt @@ -1,4 +1,4 @@ -title: //h3[@class="storytitle"] -body: //div[@class="post"] +title: //h3[@class="storytitle"] +body: //div[@class="post"] strip: //div[@class="blurbBox"] test_url: http://taptaptap.com/blog/apples-precedents-vs-apples-guidelines/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/tasteofhome.com.txt b/inc/3rdparty/site_config/standard/tasteofhome.com.txt old mode 100644 new mode 100755 index 77773363..f3234f34 --- a/inc/3rdparty/site_config/standard/tasteofhome.com.txt +++ b/inc/3rdparty/site_config/standard/tasteofhome.com.txt @@ -1,15 +1,11 @@ -title: //span[@id='ctl00_ctl00_MainContent_MainContent_RecipeImage1_lblRecipeTitle'] -body: //div[@id='RDNEW']//*[@class='Recipe-imgCon' or @class='Recipe-Intro' or @class='recipeDetails'] -strip_id_or_class: rec-ExRightPanel -strip_id_or_class: divCarousel -strip_id_or_class: preptimeOuter -strip_id_or_class: cooktimeOuter -strip_id_or_class: durationOuter -strip_id_or_class: divImageFooter -strip_id_or_class: microFormatFnIngred -strip: //span[@class='Recipe-Intro']//*[@class='link' or @class='rating'] - -prune: no -tidy: no - -test_url: http://www.tasteofhome.com/recipes/Grinch-Punch \ No newline at end of file +title: //div[@id='ctl00_MainContent_ctl00_Div1']//h2 +body: //div[@id='ctl00_MainContent_ctl00_Div1'] + +single_page_link: //div[contains(@class, 'recipeHeader')]//a[contains(@href, '/print')] + +strip_image_src: tohPrintL.png + +prune: no + +test_url: http://www.tasteofhome.com/recipes/Grinch-Punch +test_url: http://www.tasteofhome.com/recipes/lactose-free-chocolate-chip-cookies \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/taz.de.txt b/inc/3rdparty/site_config/standard/taz.de.txt old mode 100644 new mode 100755 index 6e84527b..cf853662 --- a/inc/3rdparty/site_config/standard/taz.de.txt +++ b/inc/3rdparty/site_config/standard/taz.de.txt @@ -1,8 +1,8 @@ -date: //div[@class='secthead'] -body: //div[@class='sectbody'] -title: concat(//div[@class='sectbody']/h4,': ',//div[@class='sectbody']/h1) -author: //span[@class='author'] -strip: //p[@class='caption'] -strip_id_or_class: rack +date: //div[@class='secthead'] +body: //div[@class='sectbody'] +title: concat(//div[@class='sectbody']/h4,': ',//div[@class='sectbody']/h1) +author: //span[@class='author'] +strip: //p[@class='caption'] +strip_id_or_class: rack test_url: http://www.taz.de/Protestbewegung-Occupy/!80188/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/tbray.org.txt b/inc/3rdparty/site_config/standard/tbray.org.txt old mode 100644 new mode 100755 index fbe94fa4..558dc9c8 --- a/inc/3rdparty/site_config/standard/tbray.org.txt +++ b/inc/3rdparty/site_config/standard/tbray.org.txt @@ -1,5 +1,5 @@ -body: //div[@id='centercontent'] -strip: //div[@id='rightcontent'] -date: substring-before( //div[@id='cats'], '�') +body: //div[@id='centercontent'] +strip: //div[@id='rightcontent'] +date: substring-before( //div[@id='cats'], '·') title: //h1 test_url: http://www.tbray.org/ongoing/When/201x/2012/03/04/Mobile-Money \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/tcmanila.tk.txt b/inc/3rdparty/site_config/standard/tcmanila.tk.txt new file mode 100755 index 00000000..f6032ec3 --- /dev/null +++ b/inc/3rdparty/site_config/standard/tcmanila.tk.txt @@ -0,0 +1,7 @@ +title: //h2 +body: //div[@class="post_content"] +author: //span[@class="fn"] +date: //time[@class="updated"] +strip_comments: //yes +footnotes: //yes +test_url: http://tcmanila.tk/post/29189064358/my-2012-roadmap-is-almost-complete-look-at-the \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/tcng.org.txt b/inc/3rdparty/site_config/standard/tcng.org.txt old mode 100644 new mode 100755 index 765224e4..4873b50d --- a/inc/3rdparty/site_config/standard/tcng.org.txt +++ b/inc/3rdparty/site_config/standard/tcng.org.txt @@ -1,4 +1,4 @@ -title: //div[@id='main-content']/h1 -body: //div[@id='main-content'] +title: //div[@id='main-content']/h1 +body: //div[@id='main-content'] strip: //div[@id='main-content']/h1 test_url: http://www.tcng.org/index.php/blog/view/teaching-basic-health-cutting-down-costs \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/tech.fortune.cnn.com.txt b/inc/3rdparty/site_config/standard/tech.fortune.cnn.com.txt old mode 100644 new mode 100755 index b6d17da4..da198622 --- a/inc/3rdparty/site_config/standard/tech.fortune.cnn.com.txt +++ b/inc/3rdparty/site_config/standard/tech.fortune.cnn.com.txt @@ -1,4 +1,4 @@ -title: //h1[@class='storyheadline'] -body: //div[@class='storytext'] +title: //h1[@class='storyheadline'] +body: //div[@class='storytext'] strip: //strong test_url: http://tech.fortune.cnn.com/2011/03/17/why-startups-dont-go-public-anymore/?section=money_topstories&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fmoney_topstories+%28Top+Stories%29 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/tech.gilt.com.txt b/inc/3rdparty/site_config/standard/tech.gilt.com.txt new file mode 100755 index 00000000..ab564606 --- /dev/null +++ b/inc/3rdparty/site_config/standard/tech.gilt.com.txt @@ -0,0 +1,5 @@ +title: //div[@class="title"]/h1 +title: //div[@class="caption"]/h1 +author: substring-after(//div[@class="metadata"]/div[@class="date"]/a[2], 'by ') +date: //div[@class="metadata"]/div[@class="date"]/a +test_url: http://tech.gilt.com/post/46359463184/26-3-13-todays-noon-outage-and-what-were-doing-to \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/tech.sina.com.cn.txt b/inc/3rdparty/site_config/standard/tech.sina.com.cn.txt old mode 100644 new mode 100755 index f7228ebf..75126f9c --- a/inc/3rdparty/site_config/standard/tech.sina.com.cn.txt +++ b/inc/3rdparty/site_config/standard/tech.sina.com.cn.txt @@ -1,11 +1,11 @@ -title://h1[contains(@id,'artibodyTitle')] - -date://span[contains(@id,'pub_date')] - -body://div[contains(@id,'artibody')] - -strip://div[contains(@class,'otherContent')] - -next_page_link://p[@class='page']/a[contains(.,'下一页')] +title://h1[contains(@id,'artibodyTitle')] + +date://span[contains(@id,'pub_date')] + +body://div[contains(@id,'artibody')] + +strip://div[contains(@class,'otherContent')] + +next_page_link://p[@class='page']/a[contains(.,'下一页')] test_url: http://tech.sina.com.cn/mobile/n/2012-03-22/07476863046.shtml \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/techcrunch.com.txt b/inc/3rdparty/site_config/standard/techcrunch.com.txt old mode 100644 new mode 100755 index f436acb5..1509c46e --- a/inc/3rdparty/site_config/standard/techcrunch.com.txt +++ b/inc/3rdparty/site_config/standard/techcrunch.com.txt @@ -1,18 +1,18 @@ -body: //div[contains(@class, 'media-container') or contains(@class, 'body-copy')] - -author: //a[@class="name"] - -date: //div[@class="post-time"] - -title: //h1[@class="headline"] -strip_id_or_class: module-crunchbase - -# The following is for the mobile site -body: //div[@id="singlentry"] -author: substring-after(//span[@class="single-post-meta-top"],'rsaquo; ') -date: substring-before(//div[@class="single-post-meta-top"],' @') -title: //a[@class="sh2"] - -prune: no - +body: //div[contains(@class, 'media-container') or contains(@class, 'body-copy')] + +author: //a[@class="name"] + +date: //div[@class="post-time"] + +title: //h1[@class="headline"] +strip_id_or_class: module-crunchbase + +# The following is for the mobile site +body: //div[@id="singlentry"] +author: substring-after(//span[@class="single-post-meta-top"],'rsaquo; ') +date: substring-before(//div[@class="single-post-meta-top"],' @') +title: //a[@class="sh2"] + +prune: no + test_url: http://techcrunch.com/2011/10/18/apples-insanely-great-q1-2012/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/techdirt.com.txt b/inc/3rdparty/site_config/standard/techdirt.com.txt old mode 100644 new mode 100755 index 727f3701..7db2f95b --- a/inc/3rdparty/site_config/standard/techdirt.com.txt +++ b/inc/3rdparty/site_config/standard/techdirt.com.txt @@ -1,12 +1,12 @@ -body: //div[@class='story'] -title: //div[@class='story']/h1 -strip: //div[@class='story']/h1 - -author: //div[@class='details']/p[contains(., 'by ')]/a -date: //p[@class='storydate'] - -strip: //p[a[contains(., 'Leave a Comment')]] -strip_id_or_class: share -strip_id_or_class: maincolumn_head +body: //div[@class='story'] +title: //div[@class='story']/h1 +strip: //div[@class='story']/h1 + +author: //div[@class='details']/p[contains(., 'by ')]/a +date: //p[@class='storydate'] + +strip: //p[a[contains(., 'Leave a Comment')]] +strip_id_or_class: share +strip_id_or_class: maincolumn_head strip_id_or_class: maincolmod test_url: http://www.techdirt.com/articles/20120112/17455117394/sega-gets-it-right-about-sopa-its-time-hard-reset-copyright-law-congress.shtml \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/techhive.com.txt b/inc/3rdparty/site_config/standard/techhive.com.txt new file mode 100755 index 00000000..29720b0b --- /dev/null +++ b/inc/3rdparty/site_config/standard/techhive.com.txt @@ -0,0 +1,18 @@ +title: //div[@class='articleHead']//h1 +author: //div[@class="author-name"]/a[1] +body: //div[@class="main"] + +# remove 'From the Lab' and 'Recent posts' text +strip: //div[@class='blogLabel'] + +# remove byline and meta info +strip: //div[@class="article-meta"] +strip: //div[@class="author-info"] + +#strip tags and categories +strip: //div[@class="department"] + +#strip product cap links +strip: //div[@class="cap-main"] +strip: //div[@id="compare-lede"] +test_url: http://www.techhive.com/article/2010549/up-close-with-blackberry-10.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/techmeme.com.txt b/inc/3rdparty/site_config/standard/techmeme.com.txt old mode 100644 new mode 100755 index 8644e00f..0b4bfbd6 --- a/inc/3rdparty/site_config/standard/techmeme.com.txt +++ b/inc/3rdparty/site_config/standard/techmeme.com.txt @@ -1,3 +1,3 @@ -single_page_link_in_feed: //b/a - +single_page_link_in_feed: //b/a + test_url_feed: http://www.techmeme.com/feed.xml \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/technicallyjordan.tumblr.com.txt b/inc/3rdparty/site_config/standard/technicallyjordan.tumblr.com.txt old mode 100644 new mode 100755 index cc26ee4c..d871b603 --- a/inc/3rdparty/site_config/standard/technicallyjordan.tumblr.com.txt +++ b/inc/3rdparty/site_config/standard/technicallyjordan.tumblr.com.txt @@ -1,8 +1,8 @@ -title: //h2 -author: //meta[@name="author"]/@content -date: //h3 -body: //div[@class="postBody"] -strip: //h1 -strip: //h2 -strip: //h3 +title: //h2 +author: //meta[@name="author"]/@content +date: //h3 +body: //div[@class="postBody"] +strip: //h1 +strip: //h2 +strip: //h3 test_url: http://technicallyjordan.tumblr.com/post/22914659822/facebook-to-launch-app-store-knock-off \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/technologizer.com.txt b/inc/3rdparty/site_config/standard/technologizer.com.txt new file mode 100755 index 00000000..179bf5a6 --- /dev/null +++ b/inc/3rdparty/site_config/standard/technologizer.com.txt @@ -0,0 +1,5 @@ +next_page_link: //a[contains(., 'NEXT PAGE')] +# following::node() selects text nodes too whereas following::* selects only elements. +strip: //span[@class='pageo']/following::node() +strip: //span[@class='pageo'] +test_url: http://technologizer.com/2010/03/08/the-secret-origin-of-windows/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/technologyreview.com.txt b/inc/3rdparty/site_config/standard/technologyreview.com.txt old mode 100644 new mode 100755 index 41f21d46..d405eb18 --- a/inc/3rdparty/site_config/standard/technologyreview.com.txt +++ b/inc/3rdparty/site_config/standard/technologyreview.com.txt @@ -1,16 +1,16 @@ -title: //header[@class='article-meta']/h1 -title: substring-before(//title, '|') - -body: //section[contains(@class, 'body')] - -# Author & Date for News and Featured Stories -author: //ul[@class='byline']/li/a -author: substring-before(substring-after(//ul[@class='byline']/li, 'By '), ' on') -date: substring-after(//ul[@class='byline']/li, 'on ') - -# Author & Date for "Views" -author: //div[@class='view-byline']/div[@class='meta']/h2[1] -date: //div[@class='view-byline']/div[@class='meta']/h2[2] - -next_page_link: //section[@class='pagination']/a[contains(@class, 'continue')] +title: //header[@class='article-meta']/h1 +title: substring-before(//title, '|') + +body: //section[contains(@class, 'body')] + +# Author & Date for News and Featured Stories +author: //ul[@class='byline']/li/a +author: substring-before(substring-after(//ul[@class='byline']/li, 'By '), ' on') +date: substring-after(//ul[@class='byline']/li, 'on ') + +# Author & Date for "Views" +author: //div[@class='view-byline']/div[@class='meta']/h2[1] +date: //div[@class='view-byline']/div[@class='meta']/h2[2] + +next_page_link: //section[@class='pagination']/a[contains(@class, 'continue')] test_url: http://www.technologyreview.com/news/427567/facebooks-telescope-on-human-behavior/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/techpinions.com.txt b/inc/3rdparty/site_config/standard/techpinions.com.txt old mode 100644 new mode 100755 index 89ed8349..8e1aa96c --- a/inc/3rdparty/site_config/standard/techpinions.com.txt +++ b/inc/3rdparty/site_config/standard/techpinions.com.txt @@ -1,7 +1,7 @@ -body: //div[@class="post"] - -strip: //div[@class="post-meta"] -strip: //div[@id="socialicons"] -strip: //div[@id="authorbox"] +body: //div[@class="post"] + +strip: //div[@class="post-meta"] +strip: //div[@id="socialicons"] +strip: //div[@id="authorbox"] test_url: http://techpinions.com/why-google-and-microsoft-hate-siri/3572 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/techradar.com.txt b/inc/3rdparty/site_config/standard/techradar.com.txt old mode 100644 new mode 100755 index ed92a974..0a0ca619 --- a/inc/3rdparty/site_config/standard/techradar.com.txt +++ b/inc/3rdparty/site_config/standard/techradar.com.txt @@ -1,12 +1,12 @@ -# Title without news/reviews etc. appended -title: //div[@id='subColumn1Pad']/div[1][@class='article']/div[1][@class='articleHead']/h1 - -# Remove home link -strip: //div[@id='page_logo']/a - -# Remove utilities -strip: //*[(@id = "utilities")] - -# Remove comments link +# Title without news/reviews etc. appended +title: //div[@id='subColumn1Pad']/div[1][@class='article']/div[1][@class='articleHead']/h1 + +# Remove home link +strip: //div[@id='page_logo']/a + +# Remove utilities +strip: //*[(@id = "utilities")] + +# Remove comments link strip: //div[@id='subColumn1Pad']/div[1][@class='article']/div[1][@class='articleHead']/p[@class='tiny'] test_url: http://www.techradar.com/news/television/sky-to-rebrand-living-as-sky-living-903105 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/telegraaf.nl.txt b/inc/3rdparty/site_config/standard/telegraaf.nl.txt old mode 100644 new mode 100755 index ff3cd06e..91b5baf9 --- a/inc/3rdparty/site_config/standard/telegraaf.nl.txt +++ b/inc/3rdparty/site_config/standard/telegraaf.nl.txt @@ -1,9 +1,9 @@ -body: //div[@id='artikelKolom'] -strip: //div[@class='broodMediaBox']/div[@class='docbox' or @class='artBannerWrapper'] -strip: //div[@id='artikeltoolbar'] -strip: //div[@class='reactiebalk artspacer' or @class='bannercenter clearfix artspacer'] -strip: //div[@id='artikelKolomRechts' or @id='TMGTweetWidget'] -tidy: no -prune: no +body: //div[@id='artikelKolom'] +strip: //div[@class='broodMediaBox']/div[@class='docbox' or @class='artBannerWrapper'] +strip: //div[@id='artikeltoolbar'] +strip: //div[@class='reactiebalk artspacer' or @class='bannercenter clearfix artspacer'] +strip: //div[@id='artikelKolomRechts' or @id='TMGTweetWidget'] +tidy: no +prune: no test_url: http://www.telegraaf.nl/binnenland/10275097/__Identiteit_man_in_sloot_onbekend__.html?cid=rss \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/telegraph.co.uk.txt b/inc/3rdparty/site_config/standard/telegraph.co.uk.txt old mode 100644 new mode 100755 index e1faf23b..8dcdb42b --- a/inc/3rdparty/site_config/standard/telegraph.co.uk.txt +++ b/inc/3rdparty/site_config/standard/telegraph.co.uk.txt @@ -1,10 +1,10 @@ -body: //div[@class='byline' or @id='storyEmbSlide' or @id='mainBodyArea'] -strip: //p[@class='comments'] -strip: //div[@id='storyEmbSlide']//div[contains(@class, "hide")] -strip: //div[@id='tmg-related-links' or @id='outbrain-related-links' or @id='onespot-related-links'] -strip: //p[@class='bbpTweet']/span[@class='timestamp'] -strip: //p[@class='bbpTweet']/span[@class='metadata']//img -tidy: no -prune: no +body: //div[@class='byline' or @id='storyEmbSlide' or @id='mainBodyArea'] +strip: //p[@class='comments'] +strip: //div[@id='storyEmbSlide']//div[contains(@class, "hide")] +strip: //div[@id='tmg-related-links' or @id='outbrain-related-links' or @id='onespot-related-links'] +strip: //p[@class='bbpTweet']/span[@class='timestamp'] +strip: //p[@class='bbpTweet']/span[@class='metadata']//img +tidy: no +prune: no test_url: http://www.telegraph.co.uk/news/worldnews/europe/ireland/8663451/Is-Ireland-divorcing-from-the-Catholic-Church.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/thanhnien.com.vn.txt b/inc/3rdparty/site_config/standard/thanhnien.com.vn.txt new file mode 100755 index 00000000..596ecc90 --- /dev/null +++ b/inc/3rdparty/site_config/standard/thanhnien.com.vn.txt @@ -0,0 +1,4 @@ +body://div[@id="print-news"] +strip://a +strip://span[@class="date-line"] +test_url: http://www.thanhnien.com.vn/pages/20121006/hon-90-trieu-usd-nang-cap-do-thi-can-tho.aspx \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/the-magazine.org.txt b/inc/3rdparty/site_config/standard/the-magazine.org.txt new file mode 100755 index 00000000..08864657 --- /dev/null +++ b/inc/3rdparty/site_config/standard/the-magazine.org.txt @@ -0,0 +1,3 @@ +tidy: no + +test_url: http://the-magazine.org/1/alone-together-again \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/theage.com.au.txt b/inc/3rdparty/site_config/standard/theage.com.au.txt new file mode 100755 index 00000000..ea27c314 --- /dev/null +++ b/inc/3rdparty/site_config/standard/theage.com.au.txt @@ -0,0 +1,5 @@ +author: //h3[@class='authorName'] +date: //time +body: //div[@class='articleBody'] +strip_id_or_class: adspot +test_url: http://www.theage.com.au/victoria/top-cops-warns-outlaw-bikies-we-have-a-gang-too-20130331-2h1l8.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/theamericanscholar.org.txt b/inc/3rdparty/site_config/standard/theamericanscholar.org.txt new file mode 100755 index 00000000..38b96672 --- /dev/null +++ b/inc/3rdparty/site_config/standard/theamericanscholar.org.txt @@ -0,0 +1,13 @@ +# Article Metadata +title: //meta[@property="og:title"]/@content +author: substring-after(//h3, 'By ') +date: //h4/a[2] + +# Content Pruning +strip: //h4 +strip: //a[@id="print_button"] +strip: //p[@class="excerpt"] +strip: //h3 +strip: //div[@class="caption"] +strip: //center/a/img +test_url: http://theamericanscholar.org/too-big-to-fail-and-too-risky-to-exist/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/theappleblog.com.txt b/inc/3rdparty/site_config/standard/theappleblog.com.txt old mode 100644 new mode 100755 index 3bd555f1..caa5ae0c --- a/inc/3rdparty/site_config/standard/theappleblog.com.txt +++ b/inc/3rdparty/site_config/standard/theappleblog.com.txt @@ -1,3 +1,3 @@ -# Remove home link +# Remove home link strip: //div[@id='blog-title']/a test_url: http://theappleblog.com/2010/10/21/the-new-macbook-air-is-underwhelming/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/theatlantic.com.txt b/inc/3rdparty/site_config/standard/theatlantic.com.txt old mode 100644 new mode 100755 index 267fd39c..aa41b153 --- a/inc/3rdparty/site_config/standard/theatlantic.com.txt +++ b/inc/3rdparty/site_config/standard/theatlantic.com.txt @@ -1,18 +1,20 @@ -title: //div[@id='article']/h1 -title: //h1 - -body: //div[@class='articleText'] -body: //div[@class='articleContent'] -body: //div[@id='article'] -date: //*[contains(@class, 'date')] -author: //div[@id='profile']//*[@class='authors']//a[1] -author: //*[@class='author']/span -prune: no - -strip: //div[@class='moreOnBoxWithImages'] - -single_page_link: //a[@class='print'] - -test_url: http://www.theatlantic.com/technology/archive/2011/04/want-to-see-how-crazy-a-bot-run-market-can-be/237773/ -test_url: http://www.theatlantic.com/magazine/archive/2007/11/the-autumn-of-the-multitaskers/6342/ +title: //div[contains(@class, 'articleHead')]//h1 + +body: //div[@class='articleText'] +body: //div[@class='articleContent'] +body: //div[@id='article'] +date: //*[contains(@class, 'date')] +author: //div[@id='profile']//*[@class='authors']//a[1] +author: //*[@class='author']/span +prune: no + +strip: //div[@class='moreOnBoxWithImages'] +strip: //p[contains(., 'This article available online at:')] +strip: //p[contains(., 'This article available online at:')]/following::* +strip: //div[@class='earthbox'] + +single_page_link: //article//a[contains(@class, 'print')] + +test_url: http://www.theatlantic.com/technology/archive/2011/04/want-to-see-how-crazy-a-bot-run-market-can-be/237773/ +test_url: http://www.theatlantic.com/magazine/archive/2007/11/the-autumn-of-the-multitaskers/6342/ test_url: http://www.theatlantic.com/entertainment/archive/2012/04/30-rock-live-a-funny-reminder-of-why-sitcoms-arent-shot-live-anymore/256447/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/theatlanticcities.com.txt b/inc/3rdparty/site_config/standard/theatlanticcities.com.txt new file mode 100755 index 00000000..880f207d --- /dev/null +++ b/inc/3rdparty/site_config/standard/theatlanticcities.com.txt @@ -0,0 +1,17 @@ +# To administrator: +# Please replace the hostname with "*.theatlanticcities.com" + +# This filter is tested on: +# http://m.theatlanticcities.com/arts-and-lifestyle/2012/04/invisible-borders-define-american-culture/1839/ +# http://www.theatlanticcities.com/housing/2012/11/chinas-holdouts/3981/ +# http://www.theatlanticcities.com/arts-and-lifestyle/2012/12/christmas-time-here/4133/ + +title://h1 +author: //ul[@class='meta']/li/a +date: //ul[@class='meta']/li/following-sibling::li +body://article[@class='post'] + +strip://h1 +strip://ul[@class='meta'] +strip://div[@class='newsletter-slug'] +test_url: http://www.theatlanticcities.com/arts-and-lifestyle/2012/12/christmas-time-here/4133/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/thebostonchannel.com.txt b/inc/3rdparty/site_config/standard/thebostonchannel.com.txt old mode 100644 new mode 100755 index 64df90c1..b74442de --- a/inc/3rdparty/site_config/standard/thebostonchannel.com.txt +++ b/inc/3rdparty/site_config/standard/thebostonchannel.com.txt @@ -1,7 +1,7 @@ -title: //meta[@name='og:title']/@content -date: //meta[@name='created']/@content -body: //div[@class="StoryBody" or @class="storyTeaser"] - -replace_string(<p></p>): <br /><br /> - +title: //meta[@name='og:title']/@content +date: //meta[@name='created']/@content +body: //div[@class="StoryBody" or @class="storyTeaser"] + +replace_string(<p></p>): <br /><br /> + test_url: http://www.thebostonchannel.com/slideshow/news/28210648/detail.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/thebrowser.com.txt b/inc/3rdparty/site_config/standard/thebrowser.com.txt old mode 100644 new mode 100755 index c3c20504..807e7dad --- a/inc/3rdparty/site_config/standard/thebrowser.com.txt +++ b/inc/3rdparty/site_config/standard/thebrowser.com.txt @@ -1,10 +1,10 @@ -title: //h2[contains(@class, 'page-title')] -body: //div[@id='content']/div[contains(@id, 'node-')]/div[@class='content'] - -prune: no - -strip: //div[contains(@class, 'node-book')]//a[@class='button'] - -single_page_link: //a[@class='tool-print'] +title: //h2[contains(@class, 'page-title')] +body: //div[@id='content']/div[contains(@id, 'node-')]/div[@class='content'] + +prune: no + +strip: //div[contains(@class, 'node-book')]//a[@class='button'] + +single_page_link: //a[@class='tool-print'] test_url: http://thebrowser.com/interviews/yotam-ottolenghi-on-his-favourite-cookery-books \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/thecarton.net.txt b/inc/3rdparty/site_config/standard/thecarton.net.txt old mode 100644 new mode 100755 index 9ef4ed8b..13fa35a0 --- a/inc/3rdparty/site_config/standard/thecarton.net.txt +++ b/inc/3rdparty/site_config/standard/thecarton.net.txt @@ -1,10 +1,10 @@ -title: substring-before(//title, ' – ') -author:string('Shawn') -date: //*/time/@pubdate - - -strip: //header -strip: //div[@id='prev_next'] -strip: //div[@id='masthead'] - +title: substring-before(//title, ' – ') +author:string('Shawn') +date: //*/time/@pubdate + + +strip: //header +strip: //div[@id='prev_next'] +strip: //div[@id='masthead'] + test_url: http://thecarton.net/2012/12/20/imdb \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/thedaily.com.txt b/inc/3rdparty/site_config/standard/thedaily.com.txt old mode 100644 new mode 100755 index 24ebbbac..e255e6a8 --- a/inc/3rdparty/site_config/standard/thedaily.com.txt +++ b/inc/3rdparty/site_config/standard/thedaily.com.txt @@ -1,24 +1,24 @@ -#keep all body text -prune: no - -#title, body, metadata -title: //div[@class='story_header']/h1 -body: //div[@id='content'] -author: substring-after(//span[@class='byline'], "by ") -author: substring-after(//span[@class='byline'], "By ") -author: //span[@class='byline'] -date: //span[@class='date'] - -#formatting -convert_double_br_tags: yes -dissolve: //div[@class='slides_full']/ul/li - -# cleanup -strip: //a[@id='story_note'] -strip: //br -strip: //div[@class='intro'] -strip: //div[@class='share-block'] -strip: //div[@class='sidebar-social'] -strip: //div[@class='top-stories'] -strip: //div[@class='prevnext'] +#keep all body text +prune: no + +#title, body, metadata +title: //div[@class='story_header']/h1 +body: //div[@id='content'] +author: substring-after(//span[@class='byline'], "by ") +author: substring-after(//span[@class='byline'], "By ") +author: //span[@class='byline'] +date: //span[@class='date'] + +#formatting +convert_double_br_tags: yes +dissolve: //div[@class='slides_full']/ul/li + +# cleanup +strip: //a[@id='story_note'] +strip: //br +strip: //div[@class='intro'] +strip: //div[@class='share-block'] +strip: //div[@class='sidebar-social'] +strip: //div[@class='top-stories'] +strip: //div[@class='prevnext'] test_url: http://www.thedaily.com/page/2012/01/09/010912-news-college-costs-1-5/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/thedailybeast.com.txt b/inc/3rdparty/site_config/standard/thedailybeast.com.txt old mode 100644 new mode 100755 index 4781c65a..f5e938ae --- a/inc/3rdparty/site_config/standard/thedailybeast.com.txt +++ b/inc/3rdparty/site_config/standard/thedailybeast.com.txt @@ -1,7 +1,7 @@ -title: //h1 -body: //article/div[contains(@class, 'article-body')] -#strip: //header/hgroup/h1 -strip: //footer[@class='storyFooter'] -single_page_link: //li[@class='print']/a -prune: no +title: //h1 +body: //article/div[contains(@class, 'article-body')] +#strip: //header/hgroup/h1 +strip: //footer[@class='storyFooter'] +single_page_link: //li[@class='print']/a +prune: no test_url: http://www.thedailybeast.com/articles/2010/04/06/how-mastercard-predicts-divorce.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/thedailymash.co.uk.txt b/inc/3rdparty/site_config/standard/thedailymash.co.uk.txt old mode 100644 new mode 100755 index 0f15558d..a83a6cf6 --- a/inc/3rdparty/site_config/standard/thedailymash.co.uk.txt +++ b/inc/3rdparty/site_config/standard/thedailymash.co.uk.txt @@ -1,14 +1,14 @@ -# Remove duplicated title -strip: //div[@id='content']/div[1][@class='full_intro']/h2 - -# Remove links, ads etc. -strip: //*[(@class= "aside")] - -# Remove the date and add it to the date published field in Instapaper -strip: //div[@class="date"] -date: //div[@class="date"] - -# There is no byline on The Daily Mash. - -convert_double_br_tags: yes +# Remove duplicated title +strip: //div[@id='content']/div[1][@class='full_intro']/h2 + +# Remove links, ads etc. +strip: //*[(@class= "aside")] + +# Remove the date and add it to the date published field in Instapaper +strip: //div[@class="date"] +date: //div[@class="date"] + +# There is no byline on The Daily Mash. + +convert_double_br_tags: yes test_url: http://www.thedailymash.co.uk/index.php?option=com_content&task=view&id=4994&Itemid=81&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+thedailymash+%28The+Daily+Mash.+It%27s+news+to+us.%29 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/thedisneyblog.com.txt b/inc/3rdparty/site_config/standard/thedisneyblog.com.txt new file mode 100755 index 00000000..57b3254a --- /dev/null +++ b/inc/3rdparty/site_config/standard/thedisneyblog.com.txt @@ -0,0 +1,7 @@ +title: //h1[contains(@class, 'entry-title')] +author: //span[contains(@class, 'author vcard')] +date: //span[@class = 'entry-date'] +body: //div[@class='entry-content'] +strip_id_or_class: bottomcontainerBox +strip_id_or_class: lightsocial_container +test_url: http://thedisneyblog.com/2012/11/17/videopolis-one-woman-disney-musical-beauty-and-the-beast/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/theeuropean-magazine.com.txt b/inc/3rdparty/site_config/standard/theeuropean-magazine.com.txt new file mode 100755 index 00000000..a19bae15 --- /dev/null +++ b/inc/3rdparty/site_config/standard/theeuropean-magazine.com.txt @@ -0,0 +1,17 @@ +# Tested on: +# http://theeuropean-magazine.com/352-dyson-george/353-evolution-and-innovation +# http://theeuropean-magazine.com/522-casertano-stefano/919-morsi-and-the-future-of-egypt + +title://h2[@class='article-title'] +author:substring-before(substring-after(//p[@class='article-meta'], 'by'), '—') +date:substring-after(//p[@class='article-meta'], '—') +body://div[@class='article'] + +wrap_in(strong)://p[@class='article-teaser'] +move_into(//div[@class='article-head'])://li/img + +strip://h2[@class='article-title'] +strip://p[@class='article-meta'] +strip://div[@class='copyright'] +strip://div[@class='opinions-of-readers'] +test_url: http://theeuropean-magazine.com/522-casertano-stefano/919-morsi-and-the-future-of-egypt \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/thefilmexperience.net.txt b/inc/3rdparty/site_config/standard/thefilmexperience.net.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/thegamedesignforum.com.txt b/inc/3rdparty/site_config/standard/thegamedesignforum.com.txt new file mode 100755 index 00000000..849ede77 --- /dev/null +++ b/inc/3rdparty/site_config/standard/thegamedesignforum.com.txt @@ -0,0 +1,14 @@ +## ERROR: Removes all images. Please fix, have no idea why (bad HTML?) + +title: //h1[@class='featuretitle'] +body: //div[@id='nobordercontentarea'] + +# remove Twitter badge +strip: //img[@alt='Follow tgdfweb on Twitter'] + +# fix for headers not showing for some reason +wrap_in(h2): //h2[@class='sectionheader'] +dissolve: //h2[@class='sectionheader'] + +tidy: yes +test_url: http://thegamedesignforum.com/features/acceleration_flow_1.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/theglobalmail.org.txt b/inc/3rdparty/site_config/standard/theglobalmail.org.txt old mode 100644 new mode 100755 index fae0fb29..da1c84f9 --- a/inc/3rdparty/site_config/standard/theglobalmail.org.txt +++ b/inc/3rdparty/site_config/standard/theglobalmail.org.txt @@ -1,41 +1,41 @@ -title: //h1[@id="headline"] -author: //div[contains(@class, "editorial-byline-author")]/a -date: substring-after(//div[contains(@class, "editorial-byline-meta")], " | ") - -# The article body contains a mix or article and non-article elements, so lot of manual tweaks are needed -body: //div[@id="template"] -strip_id_or_class: editorial-byline-pic -strip_id_or_class: editorial-byline -strip_id_or_class: headline - -# Include the leadin paragraph in the body text, but remove quotes because they're out of context -dissolve: //div[contains(@id, "leadin")] -strip_id_or_class: pullquote - -# Image captions removed because they're confusing in body text -strip_id_or_class: image-caption-content - -# Remove header and footer -strip_id_or_class: header -strip_id_or_class: footer - -# Remove the hidden logo that seems to be used to cause Facebook to show the logo instead of a random article image -strip: /html/body/span[contains(@style, "display: none")] - -# Remove search box -strip_id_or_class: searchContainer -strip: //div[contains(@class, "searchInstruction")] -strip: //div[contains(@class, "searchResults")]/h4 - -# Remove the 'Letters to the Editor' section -strip_id_or_class: letter-text -strip_id_or_class: letter-from -strip_id_or_class: letter-date - -# Remove Like/Tweet links -strip_id_or_class: social-tab - -# Remove 'divider' which causes an inexplicable slash to appear in the article body -strip_id_or_class: divider +title: //h1[@id="headline"] +author: //div[contains(@class, "editorial-byline-author")]/a +date: substring-after(//div[contains(@class, "editorial-byline-meta")], " | ") + +# The article body contains a mix or article and non-article elements, so lot of manual tweaks are needed +body: //div[@id="template"] +strip_id_or_class: editorial-byline-pic +strip_id_or_class: editorial-byline +strip_id_or_class: headline + +# Include the leadin paragraph in the body text, but remove quotes because they're out of context +dissolve: //div[contains(@id, "leadin")] +strip_id_or_class: pullquote + +# Image captions removed because they're confusing in body text +strip_id_or_class: image-caption-content + +# Remove header and footer +strip_id_or_class: header +strip_id_or_class: footer + +# Remove the hidden logo that seems to be used to cause Facebook to show the logo instead of a random article image +strip: /html/body/span[contains(@style, "display: none")] + +# Remove search box +strip_id_or_class: searchContainer +strip: //div[contains(@class, "searchInstruction")] +strip: //div[contains(@class, "searchResults")]/h4 + +# Remove the 'Letters to the Editor' section +strip_id_or_class: letter-text +strip_id_or_class: letter-from +strip_id_or_class: letter-date + +# Remove Like/Tweet links +strip_id_or_class: social-tab + +# Remove 'divider' which causes an inexplicable slash to appear in the article body +strip_id_or_class: divider test_url: http://www.theglobalmail.org/feature/tiramisu-time-in-pyongyang/88/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/theglobeandmail.com.txt b/inc/3rdparty/site_config/standard/theglobeandmail.com.txt old mode 100644 new mode 100755 index 90634a08..750f8473 --- a/inc/3rdparty/site_config/standard/theglobeandmail.com.txt +++ b/inc/3rdparty/site_config/standard/theglobeandmail.com.txt @@ -1,5 +1,5 @@ -single_page_link: //div[contains(@class, 'pagination')]//a[contains(@title, 'ingle page')] -tidy: no -prune: no +single_page_link: //div[contains(@class, 'pagination')]//a[contains(@title, 'ingle page')] +tidy: no +prune: no test_url: http://www.theglobeandmail.com/report-on-business/rob-magazine/how-a-novice-miner-survived-a-summer-in-the-klondike/article2345350/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/thegreatdiscontent.com.txt b/inc/3rdparty/site_config/standard/thegreatdiscontent.com.txt new file mode 100755 index 00000000..12442b40 --- /dev/null +++ b/inc/3rdparty/site_config/standard/thegreatdiscontent.com.txt @@ -0,0 +1,6 @@ +title: //h1[@id='headline'] +author: substring-after(//section[@class="credits"]/ul/li[1],"Interview by ") +date: //time[@pubdate] +body: //article[@class='interview'] +strip: //article[@class='interview']/footer +test_url: http://thegreatdiscontent.com/jeffrey-zeldman \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/theguardian.com.txt b/inc/3rdparty/site_config/standard/theguardian.com.txt new file mode 100755 index 00000000..c803e4e4 --- /dev/null +++ b/inc/3rdparty/site_config/standard/theguardian.com.txt @@ -0,0 +1,13 @@ +title: //div[@id='main-article-info']//h1 +body: //div[@id='article-wrapper'] +date: //li[@class='publication']//time[@pubdate] | //li[@class='publication']//data[@pubdate] +strip: //div[contains(@class, 'email-subscription')] +strip: //div[contains(@class, 'kindleWidget')] +#strip: //a[not(text())] +strip_id_or_class: pocket-btn +author: //li[@class='byline'] +prune: no +tidy: no +test_url: http://www.theguardian.com/world/2013/oct/04/nsa-gchq-attack-tor-network-encryption +test_url: http://www.theguardian.com/world/2013/oct/03/edward-snowden-files-john-lanchester +test_url: http://www.theguardian.com/commentisfree/2014/jun/15/britishness-search-identity-my-part-in-camerons-odyssey \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/theindychannel.com.txt b/inc/3rdparty/site_config/standard/theindychannel.com.txt old mode 100644 new mode 100755 index 3544f247..2cd865bb --- a/inc/3rdparty/site_config/standard/theindychannel.com.txt +++ b/inc/3rdparty/site_config/standard/theindychannel.com.txt @@ -1,13 +1,13 @@ -title: //h1[@class="Headline"] -date: substring-after(//div[@class="posted"], 'EDT ') -body: //div[@class="storyBody"] - -strip: //td[@class="AssocContentTD"] -strip: //div[@id="pageTitle"] -strip: //div[@class="posted"] -strip: //div[@class="updated"] -strip: //div[@class="js-kit-disclaimer"] -strip: //table[@class="row3table"] -strip: //div[@class="container2"] +title: //h1[@class="Headline"] +date: substring-after(//div[@class="posted"], 'EDT ') +body: //div[@class="storyBody"] + +strip: //td[@class="AssocContentTD"] +strip: //div[@id="pageTitle"] +strip: //div[@class="posted"] +strip: //div[@class="updated"] +strip: //div[@class="js-kit-disclaimer"] +strip: //table[@class="row3table"] +strip: //div[@class="container2"] strip: //div[@id="delta"] test_url: http://www.theindychannel.com/news/31050840/detail.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/themarker.com.txt b/inc/3rdparty/site_config/standard/themarker.com.txt new file mode 100755 index 00000000..141b1a3b --- /dev/null +++ b/inc/3rdparty/site_config/standard/themarker.com.txt @@ -0,0 +1,11 @@ +title: //h1[contains(@class, 'mainTitle')] +author: //ul[@class='author']//a[@rel='author'] +body: //div[@id='article-box'] +prune: no +tidy: no +strip_id_or_class: head +strip_id_or_class: social-nav +strip_id_or_class: rate +strip_id_or_class: video + +test_url: http://www.themarker.com/markerweek/1.2093167 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/themillions.com.txt b/inc/3rdparty/site_config/standard/themillions.com.txt old mode 100644 new mode 100755 index e3e57fea..4d46daee --- a/inc/3rdparty/site_config/standard/themillions.com.txt +++ b/inc/3rdparty/site_config/standard/themillions.com.txt @@ -1,10 +1,10 @@ -title: /html/body/div/div[2]/div/div/div/h3 - -body: /html/body/div/div[2]/div/div/div/div[2] - -strip: /html/body/div/div[2]/div/div/div/div[6]/div[3]/div/div/div - -tidy: no - +title: /html/body/div/div[2]/div/div/div/h3 + +body: /html/body/div/div[2]/div/div/div/div[2] + +strip: /html/body/div/div[2]/div/div/div/div[6]/div[3]/div/div/div + +tidy: no + # any way to get rid of this word character garbage? test_url: http://www.themillions.com/2010/07/at-the-movies-with-david-mitchell-the-thousand-autumns-of-jacob-de-zoet.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/themuseumofinnocence.com.txt b/inc/3rdparty/site_config/standard/themuseumofinnocence.com.txt old mode 100644 new mode 100755 index 518bff93..80aba441 --- a/inc/3rdparty/site_config/standard/themuseumofinnocence.com.txt +++ b/inc/3rdparty/site_config/standard/themuseumofinnocence.com.txt @@ -1,7 +1,7 @@ -body: single-review -strip_id_or_class: featured-review -strip_id_or_class: resources -strip_id_or_class: rate-the-book -strip_id_or_class: write-review +body: single-review +strip_id_or_class: featured-review +strip_id_or_class: resources +strip_id_or_class: rate-the-book +strip_id_or_class: write-review test_url: http://themuseumofinnocence.com/review.php?id=1179 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/thenation.com.txt b/inc/3rdparty/site_config/standard/thenation.com.txt old mode 100644 new mode 100755 index d88bcdd6..dab17f0b --- a/inc/3rdparty/site_config/standard/thenation.com.txt +++ b/inc/3rdparty/site_config/standard/thenation.com.txt @@ -1,11 +1,13 @@ -title: //h1[@class='print-title'] -body: //div[@class='print-content'] -author: //a[contains(@href, '/authors')] -author: substring-before(//div[@class='print-created'], '|') -date: //span[@class='article-date'] -date: substring-after(//div[@class='print-created'], '|') -prune: no - -single_page_link: //ul[contains(@class, 'article-actions-bar')]//a[contains(@href, '/print/article/')] - +title: //h2[@property='dc:title'] +#body: //div[@class='print-content'] +body: //div[@id='wysiwyg'] +author: //a[contains(@href, '/authors')] +author: substring-before(//div[@class='print-created'], '|') +date: //span[@class='article-date'] +date: substring-after(//div[@class='print-created'], '|') +prune: no + +#single_page_link: //ul[contains(@class, 'article-actions-bar')]//a[contains(@href, '/print/article/')] +single_page_link: //ul[contains(@class, 'article-actions-bar')]//a[contains(@href, '?page=full')] + test_url: http://www.thenation.com/article/162331/hard-against-time-roy-fisher \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/thenetworkgarden.blogs.com.txt b/inc/3rdparty/site_config/standard/thenetworkgarden.blogs.com.txt old mode 100644 new mode 100755 index 846b8a8a..b7f5f0f0 --- a/inc/3rdparty/site_config/standard/thenetworkgarden.blogs.com.txt +++ b/inc/3rdparty/site_config/standard/thenetworkgarden.blogs.com.txt @@ -1,4 +1,4 @@ -body: //div[@id="beta-inner"] -title: //h3[@class="entry-header"] +body: //div[@id="beta-inner"] +title: //h3[@class="entry-header"] test_url: http://thenetworkgarden.blogs.com/weblog/2011/09/microsoft-metro-and-the-next-wave-in-computing.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/thenextgeneration.org.txt b/inc/3rdparty/site_config/standard/thenextgeneration.org.txt new file mode 100755 index 00000000..dedd989f --- /dev/null +++ b/inc/3rdparty/site_config/standard/thenextgeneration.org.txt @@ -0,0 +1,8 @@ +title: //h1[@class='interior-page-title'] +author: //span[@class='author']/a +date: //div[@class='byline']/time +body: //div[@class='rich-text-body'] + +strip: //div[@class='byline'] +strip: //div[@class='offscreen-menu'] +test_url: http://thenextgeneration.org/blog/post/rebrand-announce/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/thenextweb.com.txt b/inc/3rdparty/site_config/standard/thenextweb.com.txt old mode 100644 new mode 100755 index fdc70005..684fe82d --- a/inc/3rdparty/site_config/standard/thenextweb.com.txt +++ b/inc/3rdparty/site_config/standard/thenextweb.com.txt @@ -1,12 +1,12 @@ -body: //div[@class= 'article-body'] -author: //div[@class='featured mb-1']//a[starts-with(@href,'/author')] - -strip: //div[@class = 'bargo'] -strip: //div[@class = 'tf'] -strip: //div[@class = 'article']/div[@class = 'blue-box'] -strip_id_or_class: respond - -tidy: no -next_page_link: //div[@class='pages-wrapper']//span/following-sibling::a/@href - +body: //div[@class= 'article-body'] +author: //div[@class='featured mb-1']//a[starts-with(@href,'/author')] + +strip: //div[@class = 'bargo'] +strip: //div[@class = 'tf'] +strip: //div[@class = 'article']/div[@class = 'blue-box'] +strip_id_or_class: respond + +tidy: no +next_page_link: //div[@class='pages-wrapper']//span/following-sibling::a/@href + test_url: http://thenextweb.com/apple/2011/10/12/tnw-review-a-complete-guide-to-apples-ios-5-with-icloud-an-os-14-years-in-the-making/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/theoaklandpress.com.txt b/inc/3rdparty/site_config/standard/theoaklandpress.com.txt old mode 100644 new mode 100755 index c7132321..c9abda71 --- a/inc/3rdparty/site_config/standard/theoaklandpress.com.txt +++ b/inc/3rdparty/site_config/standard/theoaklandpress.com.txt @@ -1,3 +1,3 @@ -body: //div[@id='fullstory'] +body: //div[@id='fullstory'] strip: //div[@id='page_leftbar'] test_url: http://theoaklandpress.com/articles/2011/04/25/news/doc4db5330e0bce9220005852.txt \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/theonion.com.txt b/inc/3rdparty/site_config/standard/theonion.com.txt old mode 100644 new mode 100755 index 12918b88..90e8d658 --- a/inc/3rdparty/site_config/standard/theonion.com.txt +++ b/inc/3rdparty/site_config/standard/theonion.com.txt @@ -1,11 +1,11 @@ -title: //h2[@class='title'] -date: substring-before(//p[@class='meta'], '|') -body: //div[@class='story'] -#body: //div[@class='article_body'] - -strip: //h2[@class='title'] -strip: //p[@class='meta'] -strip: //div[@class='ga_section'] -strip: //div[@id='recent_slider'] +title: //h2[@class='title'] +date: substring-before(//p[@class='meta'], '|') +body: //div[@class='story'] +#body: //div[@class='article_body'] + +strip: //h2[@class='title'] +strip: //p[@class='meta'] +strip: //div[@class='ga_section'] +strip: //div[@id='recent_slider'] test_url: http://www.theonion.com/articles/pathetic-bobcats-owner-again-regaling-players-with,27572/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/thepioneerwoman.com.txt b/inc/3rdparty/site_config/standard/thepioneerwoman.com.txt old mode 100644 new mode 100755 index f89f3a87..75583cd3 --- a/inc/3rdparty/site_config/standard/thepioneerwoman.com.txt +++ b/inc/3rdparty/site_config/standard/thepioneerwoman.com.txt @@ -1,11 +1,11 @@ -title: //h1[@class='post-title'] -body: //div[@class='post'] -author: //p[@class='posted-by'] -date: //div[@class='sprite post-date'] - -# The body of the post doesn't have it's own div so we have to strip out the metadata -strip: //div[@class='author_avatar'] -strip: //div[@class='sprite post-date'] -strip: //h1[@class='post-title'] +title: //h1[@class='post-title'] +body: //div[@class='post'] +author: //p[@class='posted-by'] +date: //div[@class='sprite post-date'] + +# The body of the post doesn't have it's own div so we have to strip out the metadata +strip: //div[@class='author_avatar'] +strip: //div[@class='sprite post-date'] +strip: //h1[@class='post-title'] strip: //p[@class='posted-by'] test_url: http://thepioneerwoman.com/cooking/2011/08/pie-fats-a-comparison/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/theregister.co.uk.txt b/inc/3rdparty/site_config/standard/theregister.co.uk.txt old mode 100644 new mode 100755 index ebcc55d5..5d30230d --- a/inc/3rdparty/site_config/standard/theregister.co.uk.txt +++ b/inc/3rdparty/site_config/standard/theregister.co.uk.txt @@ -1,5 +1,8 @@ -title: //div[@id="article"]/h2 -author: //div[@id="article"]/p[@class="byline"]/a[1] -date: //div[@id="article"]/p[@class="dateline"]/a[2] -body: //div[@id="article"]/div[@id="body"] -test_url: http://www.theregister.co.uk/2011/10/06/gas_bill_shocker/ \ No newline at end of file +# Updated 25-Jan-2014 +single_page_link: //a[contains(@href, '/Print/')] + +title: //div[@id="article"]/h2 +author: //p[@class="byline"]/a +date: //p[@class="dateline"]/a[last()] + +test_url: http://www.theregister.co.uk/2014/01/24/thirty_years_of_the_apple_macintosh_part_2/ diff --git a/inc/3rdparty/site_config/standard/theroot.com.txt b/inc/3rdparty/site_config/standard/theroot.com.txt old mode 100644 new mode 100755 index ebff662d..1f56316d --- a/inc/3rdparty/site_config/standard/theroot.com.txt +++ b/inc/3rdparty/site_config/standard/theroot.com.txt @@ -1,3 +1,3 @@ -body: //div[@id='node-content'] +body: //div[@id='node-content'] strip_id_or_class: pager test_url: http://www.theroot.com/views/why-i-am-male-feminist \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/therumpus.net.txt b/inc/3rdparty/site_config/standard/therumpus.net.txt old mode 100644 new mode 100755 index d01a89bb..84d0e783 --- a/inc/3rdparty/site_config/standard/therumpus.net.txt +++ b/inc/3rdparty/site_config/standard/therumpus.net.txt @@ -1,4 +1,4 @@ -title: /html/body/div/div[2]/div/div/h1 - +title: /html/body/div/div[2]/div/div/h1 + body: /html/body/div/div[2]/div/div/div[2] test_url: http://therumpus.net/2010/07/the-rumpus-interview-with-david-means/?full=yes \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/thesiasat.com.txt b/inc/3rdparty/site_config/standard/thesiasat.com.txt old mode 100644 new mode 100755 index ab9a99e8..68a8bc8e --- a/inc/3rdparty/site_config/standard/thesiasat.com.txt +++ b/inc/3rdparty/site_config/standard/thesiasat.com.txt @@ -1,11 +1,11 @@ -#body: (//div[@class='ftr-yt-vid'])[1] -body: (//blockquote[contains(@class, 'postcontent')])[1] -body: (//div[starts-with(@id, 'post_message')])[1] - -prune: no -tidy: no - -#replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player" -#replace_string(</iframe>): </iframe> </div> - +#body: (//div[@class='ftr-yt-vid'])[1] +body: (//blockquote[contains(@class, 'postcontent')])[1] +body: (//div[starts-with(@id, 'post_message')])[1] + +prune: no +tidy: no + +#replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player" +#replace_string(</iframe>): </iframe> </div> + test_url: http://www.thesiasat.com/showthread.php?19220-Dunya-News-HASB-E-HAAL-16-06-2012-Part-1-5 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/thesimpledollar.com.txt b/inc/3rdparty/site_config/standard/thesimpledollar.com.txt old mode 100644 new mode 100755 index d5c6c9e0..dcdf2572 --- a/inc/3rdparty/site_config/standard/thesimpledollar.com.txt +++ b/inc/3rdparty/site_config/standard/thesimpledollar.com.txt @@ -1,4 +1,4 @@ -title: //h3[@class='post-title']/a[@class='post-title-link'] -body: //div[@class='post-content'] +title: //h3[@class='post-title']/a[@class='post-title-link'] +body: //div[@class='post-content'] author: //div[@class='post-meta-under-title']/a test_url: http://www.thesimpledollar.com/2011/09/13/determining-the-size-of-your-emergency-fund/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/thespoiler.co.uk.txt b/inc/3rdparty/site_config/standard/thespoiler.co.uk.txt old mode 100644 new mode 100755 index e2ed1e63..ca983281 --- a/inc/3rdparty/site_config/standard/thespoiler.co.uk.txt +++ b/inc/3rdparty/site_config/standard/thespoiler.co.uk.txt @@ -1,3 +1,3 @@ -strip: //*[(@id = "content")]/h2 +strip: //*[(@id = "content")]/h2 strip: //*[(@class = "wp-notable-line")] test_url: http://www.thespoiler.co.uk/index.php/2010/10/21/wayne-rooney-tells-man-utd-its-not-me-its-you \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/thespoof.com.txt b/inc/3rdparty/site_config/standard/thespoof.com.txt old mode 100644 new mode 100755 index 409dc0c9..f71cfb6b --- a/inc/3rdparty/site_config/standard/thespoof.com.txt +++ b/inc/3rdparty/site_config/standard/thespoof.com.txt @@ -1,9 +1,9 @@ -title: //h1[contains(@class, 'cTitle')] -body: //div[contains(@class, 'KonaBody') or @id='articleimageright'] -author: //meta[@name='Author']/@content -date: //meta[@name='OriginalPublicationDate']/@content - -prune: no -tidy: no - +title: //h1[contains(@class, 'cTitle')] +body: //div[contains(@class, 'KonaBody') or @id='articleimageright'] +author: //meta[@name='Author']/@content +date: //meta[@name='OriginalPublicationDate']/@content + +prune: no +tidy: no + test_url: http://www.thespoof.com/news/spoof.cfm?headline=s8i108389 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/thestranger.com.txt b/inc/3rdparty/site_config/standard/thestranger.com.txt old mode 100644 new mode 100755 index 0f9855c8..6fcf4fdf --- a/inc/3rdparty/site_config/standard/thestranger.com.txt +++ b/inc/3rdparty/site_config/standard/thestranger.com.txt @@ -1,12 +1,12 @@ -# savage* filtering is for Savage Love, such as: http://www.thestranger.com/seattle/SavageLove?oid=5135029 - -#other filtering are plain articles, such as: http://www.thestranger.com/seattle/the-stranger-election-control-board/Content?oid=5142885 - -title: //div[@id='savageColumn_head']/h1 -title: //h1[@class="headlineLarge"] - -strip: //div[@id='savage_right'] | //div[@id='savageColumn_head'] | //div[@id='savageArticleRight'] | //div[@id='articleRight'] | //div[@class='savAppBanner'] - -body: //div[@id='savageColumn'] +# savage* filtering is for Savage Love, such as: http://www.thestranger.com/seattle/SavageLove?oid=5135029 + +#other filtering are plain articles, such as: http://www.thestranger.com/seattle/the-stranger-election-control-board/Content?oid=5142885 + +title: //div[@id='savageColumn_head']/h1 +title: //h1[@class="headlineLarge"] + +strip: //div[@id='savage_right'] | //div[@id='savageColumn_head'] | //div[@id='savageArticleRight'] | //div[@id='articleRight'] | //div[@class='savAppBanner'] + +body: //div[@id='savageColumn'] body: //div[@id='story_text'] test_url: http://www.thestranger.com/seattle/SavageLove?oid=5135029 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/thestreet.com.txt b/inc/3rdparty/site_config/standard/thestreet.com.txt old mode 100644 new mode 100755 index 5de75637..58eabf00 --- a/inc/3rdparty/site_config/standard/thestreet.com.txt +++ b/inc/3rdparty/site_config/standard/thestreet.com.txt @@ -1,25 +1,25 @@ -title: //div[@id='storyHdr']/h1 -title: //div[@id='print']//h2 -body: //div[@class="virtualpage"] -body: //div[@id='print']//div[@id='bd'] -author: //meta[@name="AUTHOR"]/@content -author: (//div[@id='print']//div[@id='bd']/h4)[1] -date: //meta[@name="DATE"]/@content -date: //div[@id='print']//div[@id='dte'] - -strip_id_or_class: articleFooter -strip_id_or_class: sidebar -strip_id_or_class: ie6PrintSubhead -strip_id_or_class: subHdr - - -replace_string(<P/>): </p><p> - -prune: no - -#TODO: redirects back - perhaps needs referer to work -single_page_link: //div[@id='storyDetail']//a[contains(@href, '/print/')] - -test_url: http://www.thestreet.com/story/11386556/1/which-of-these-10-dividend-stocks-is-worth-the-risk.html -# multi page +title: //div[@id='storyHdr']/h1 +title: //div[@id='print']//h2 +body: //div[@class="virtualpage"] +body: //div[@id='print']//div[@id='bd'] +author: //meta[@name="AUTHOR"]/@content +author: (//div[@id='print']//div[@id='bd']/h4)[1] +date: //meta[@name="DATE"]/@content +date: //div[@id='print']//div[@id='dte'] + +strip_id_or_class: articleFooter +strip_id_or_class: sidebar +strip_id_or_class: ie6PrintSubhead +strip_id_or_class: subHdr + + +replace_string(<P/>): </p><p> + +prune: no + +#TODO: redirects back - perhaps needs referer to work +single_page_link: //div[@id='storyDetail']//a[contains(@href, '/print/')] + +test_url: http://www.thestreet.com/story/11386556/1/which-of-these-10-dividend-stocks-is-worth-the-risk.html +# multi page test_url: http://www.thestreet.com/story/11387090/1/7-ubs-stock-picks-for-2012.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/thethaovanhoa.vn.txt b/inc/3rdparty/site_config/standard/thethaovanhoa.vn.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/theverge.com.txt b/inc/3rdparty/site_config/standard/theverge.com.txt old mode 100644 new mode 100755 index 11c5c153..1e1ce58f --- a/inc/3rdparty/site_config/standard/theverge.com.txt +++ b/inc/3rdparty/site_config/standard/theverge.com.txt @@ -1,31 +1,48 @@ -title: //h1[contains(@class, "headline")] - -author: //p[contains(@class, "byline")]/a[contains(@class, "author")] - -date: substring-after(normalize-space(//p[contains(@class, "byline")]/span[contains(@class, "publish-date")]), "on ") - -body: //article[contains(@class, 'feature-entry')] -body: //article -prune: no -tidy: no - -strip: //article/header -strip: //*[@id='sticky-menu'] -strip: //aside -strip: //nav - -strip_id_or_class: gallery -strip_id_or_class: article-meta -strip_id_or_class: story-navigation -strip_id_or_class: slegend -strip_id_or_class: related-product-meta -strip_id_or_class: comments -strip_id_or_class: ui-jump-list -strip_id_or_class: pullquote - -strip: //q - -strip: //a[contains(@class, 'entry-section-title')] - -test_url: http://www.theverge.com/2012/2/29/2821763/lytro-review -test_url: http://www.theverge.com/2011/11/3/2534861/nokia-lumia-800-review \ No newline at end of file +author: //p[contains(@class, "byline")]/a[contains(@class, "author")] + +date: //span[contains(@class, "publish-date")]/time[@pubdate]/@datetime + +body: //div[contains(@class, 'entry-content')] +# for vergecasts, e.g. http://www.theverge.com/2013/8/22/4648566/the-vergecast-090-august-22th-2013-video +body: //article +prune: no +#tidy: no + +strip: //article/header +strip: //*[@id='sticky-menu'] +strip: //aside +strip: //nav +strip: //img[contains(@class, 'vox-lazy-load')] +# deal with bad parsing +strip: //div[contains(@class, 'story-image')]//div[contains(., 'function(')] + +strip_id_or_class: gallery +strip_id_or_class: article-meta +strip_id_or_class: story-navigation +strip_id_or_class: slegend +strip_id_or_class: related-product-meta +strip_id_or_class: comments +strip_id_or_class: ui-jump-list +strip_id_or_class: pullquote +strip_id_or_class: m-ad +strip_id_or_class: social-sharing +strip_id_or_class: m-video-entry__excerpt +strip_id_or_class: hidden + +replace_string(<noscript>): <div> +replace_string(</noscript>): </div> + +find_string: <script +replace_string: <div style="display:none" +find_string: </script> +replace_string: </div> + +strip: //q + +strip: //a[contains(@class, 'entry-section-title')] + +test_url: http://www.theverge.com/2012/2/29/2821763/lytro-review +test_url: http://www.theverge.com/2011/11/3/2534861/nokia-lumia-800-review +test_url: http://www.theverge.com/2013/2/24/4026114/barnes-noble-shifting-focus-away-from-nook-hardware +test_url: http://www.theverge.com/2014/6/19/5824072/top-shelf-living-the-dream +test_url: http://www.theverge.com/rss/frontpage \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/theweek.com.txt b/inc/3rdparty/site_config/standard/theweek.com.txt old mode 100644 new mode 100755 index 27281ceb..f98749e2 --- a/inc/3rdparty/site_config/standard/theweek.com.txt +++ b/inc/3rdparty/site_config/standard/theweek.com.txt @@ -1,4 +1,4 @@ -body: //div[@class="briefingEntry"] -prune: no +body: //div[@class="briefingEntry"] +prune: no test_url: http://theweek.com/article/index/215763/insider-trading-on-capitol-hill \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/thinkprogress.org.txt b/inc/3rdparty/site_config/standard/thinkprogress.org.txt old mode 100644 new mode 100755 index 8934b68e..1eec4e3c --- a/inc/3rdparty/site_config/standard/thinkprogress.org.txt +++ b/inc/3rdparty/site_config/standard/thinkprogress.org.txt @@ -1,4 +1,4 @@ -author: //p[@class="byline"]/a -body: //div[@class="post"] +author: //p[@class="byline"]/a +body: //div[@class="post"] test_url: http://thinkprogress.org/special/2011/11/12/367040/harvard-law-professor-criticizes-homeland-security-feel-of-overreaction-to-occupy-harvard/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/thisdaylive.com.txt b/inc/3rdparty/site_config/standard/thisdaylive.com.txt old mode 100644 new mode 100755 index 958d4b27..73b3c9ed --- a/inc/3rdparty/site_config/standard/thisdaylive.com.txt +++ b/inc/3rdparty/site_config/standard/thisdaylive.com.txt @@ -1,2 +1,2 @@ -body: //div[@class='main-content-panel']/div[@class='img'] | //div[@id='page_content_Content9_oModuleContent_2_div_Body'] +body: //div[@class='main-content-panel']/div[@class='img'] | //div[@id='page_content_Content9_oModuleContent_2_div_Body'] test_url: http://www.thisdaylive.com/articles/australia-pm-talks-human-rights-with-chinas-wen/90394/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/thisismynext.com.txt b/inc/3rdparty/site_config/standard/thisismynext.com.txt old mode 100644 new mode 100755 index 6850b4be..70b53995 --- a/inc/3rdparty/site_config/standard/thisismynext.com.txt +++ b/inc/3rdparty/site_config/standard/thisismynext.com.txt @@ -1,8 +1,8 @@ -author: //div[@class='meta clearfix']/a -body: //div[@class='post'] - -strip: //div[@class='metaCat'] -strip: //div[@class='post']/h1 -strip: //div[@class='post']/div[@class='meta clearfix'] +author: //div[@class='meta clearfix']/a +body: //div[@class='post'] + +strip: //div[@class='metaCat'] +strip: //div[@class='post']/h1 +strip: //div[@class='post']/div[@class='meta clearfix'] strip: //div[@class='post']/div[@class='social-bar clearfix'] test_url: http://thisismynext.com/2011/10/18/galaxy-nexus-android-ice-cream-sandwich-pictures-video-hands-on/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/tidbits.com.txt b/inc/3rdparty/site_config/standard/tidbits.com.txt old mode 100644 new mode 100755 index 8bcf2ec1..1950e58e --- a/inc/3rdparty/site_config/standard/tidbits.com.txt +++ b/inc/3rdparty/site_config/standard/tidbits.com.txt @@ -1,3 +1,3 @@ -author: //span[@class='fn'] -date: substring-before(substring-after(//*[@id='center_ajax_sub']/div/div[3],'|'),'|') +author: //span[@class='fn'] +date: substring-before(substring-after(//*[@id='center_ajax_sub']/div/div[3],'|'),'|') test_url: http://tidbits.com/article/12651 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/time.com.txt b/inc/3rdparty/site_config/standard/time.com.txt old mode 100644 new mode 100755 index fd3fe08c..f3f886bc --- a/inc/3rdparty/site_config/standard/time.com.txt +++ b/inc/3rdparty/site_config/standard/time.com.txt @@ -1,14 +1,12 @@ -# 2011-10-25 - carlo@... - Initial setup. - -single_page_link: //li[@class='print']/a/@href - -title: //h1 -author: //meta[@name="byline"]/@content -date: //meta[@name="date"]/@content - -strip: //span[@class="see"] -strip: //div[@class="byline"] -strip: //div[@id="date2"] -strip: //h1 - -test_url: http://www.time.com/time/specials/packages/article/0,28804,2094921_2094923_2094924,00.html \ No newline at end of file +title: //h1[contains(@class, 'article-title')] +author: //article//span[contains(@class, 'byline')] +date: //time[@pubdate]/@datetime +body: //section[contains(@class, 'article-body')] +prune: no +tidy: no + +strip: //figcaption +strip: //p[contains(., 'MORE:') and ./a] +strip: //aside + +test_url: http://time.com/14478/emotions-may-not-be-so-universal-after-all/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/timeshighereducation.co.uk.txt b/inc/3rdparty/site_config/standard/timeshighereducation.co.uk.txt old mode 100644 new mode 100755 index 17297732..af1c23ce --- a/inc/3rdparty/site_config/standard/timeshighereducation.co.uk.txt +++ b/inc/3rdparty/site_config/standard/timeshighereducation.co.uk.txt @@ -1,6 +1,6 @@ -title: //h1 -body: //div[@class="storytext"] -strip: //div[@id="thelogin"] -strip: //*[@class="hide"] +title: //h1 +body: //div[@class="storytext"] +strip: //div[@id="thelogin"] +strip: //*[@class="hide"] strip: //div[@id="anchored"] test_url: http://www.timeshighereducation.co.uk/story.asp?sectioncode=26&storycode=416124&c=1 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/tipb.com.txt b/inc/3rdparty/site_config/standard/tipb.com.txt old mode 100644 new mode 100755 index 9533eb0f..b8474d97 --- a/inc/3rdparty/site_config/standard/tipb.com.txt +++ b/inc/3rdparty/site_config/standard/tipb.com.txt @@ -1,9 +1,9 @@ -body: //div[@id='content'] - -strip_id_or_class: featured-box -strip_id_or_class: postmeta -strip_id_or_class: respond - -author: //a[contains(@href, '/author/') and contains(@title, 'Posts by')] -date: substring-before(//a[contains(@href, '/author/') and contains(@title, 'Posts by')]/.., ' by ') +body: //div[@id='content'] + +strip_id_or_class: featured-box +strip_id_or_class: postmeta +strip_id_or_class: respond + +author: //a[contains(@href, '/author/') and contains(@title, 'Posts by')] +date: substring-before(//a[contains(@href, '/author/') and contains(@title, 'Posts by')]/.., ' by ') test_url: http://www.tipb.com/2011/10/17/iphone-4s-review/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/tnr.com.txt b/inc/3rdparty/site_config/standard/tnr.com.txt old mode 100644 new mode 100755 index 65a1899f..199f5d13 --- a/inc/3rdparty/site_config/standard/tnr.com.txt +++ b/inc/3rdparty/site_config/standard/tnr.com.txt @@ -1,17 +1,17 @@ -title: //div[contains(@class, 'article_detail')]/div[@class='entry_header']/h1 -title: //div[contains(@class, 'article_detail')]//h1 -title: //h1 - -body: //div[contains(@class, 'article_detail')] - -author: //div[@class='article_detail']/div[@class='entry_header']/li/div[@class='author']//h3 -author: div[@class='author']//h3 -strip: //div[contains(@class, 'field-field-book-cover')] - -date: translate(//*[@class='post_date' and contains(., ' 20')], '|', '') - -prune: no - -single_page_link: //a[@class='print-page'] - +title: //div[contains(@class, 'article_detail')]/div[@class='entry_header']/h1 +title: //div[contains(@class, 'article_detail')]//h1 +title: //h1 + +body: //div[contains(@class, 'article_detail')] + +author: //div[@class='article_detail']/div[@class='entry_header']/li/div[@class='author']//h3 +author: div[@class='author']//h3 +strip: //div[contains(@class, 'field-field-book-cover')] + +date: translate(//*[@class='post_date' and contains(., ' 20')], '|', '') + +prune: no + +single_page_link: //a[@class='print-page'] + test_url: http://www.tnr.com/blog/jonathan-chait/92991/did-obama-get-rolled \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/tomdispatch.com.txt b/inc/3rdparty/site_config/standard/tomdispatch.com.txt old mode 100644 new mode 100755 index d8548c78..701a2122 --- a/inc/3rdparty/site_config/standard/tomdispatch.com.txt +++ b/inc/3rdparty/site_config/standard/tomdispatch.com.txt @@ -1,6 +1,6 @@ -title: //div[@id='maincontent']//div[@class='title'] -body: //div[@id='maincontent']//div[@class='byline'] | //div[@id='maincontent']//div[@class='meat'] - -tidy: no +title: //div[@id='maincontent']//div[@class='title'] +body: //div[@id='maincontent']//div[@class='byline'] | //div[@id='maincontent']//div[@class='meat'] + +tidy: no test_url: http://www.tomdispatch.com/post/175436/tomgram:_noam_chomsky%2C_the_imperial_mentality_and_9_11/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/tomshardware.com.txt b/inc/3rdparty/site_config/standard/tomshardware.com.txt old mode 100644 new mode 100755 index 2bba6de8..2b437574 --- a/inc/3rdparty/site_config/standard/tomshardware.com.txt +++ b/inc/3rdparty/site_config/standard/tomshardware.com.txt @@ -1,8 +1,8 @@ -tidy: no -title: //title -author: //a[@itemprop = 'author'] -date: //time[@itemprop = 'datePublished'] -body: //div[@id = 'intelliTXT'] - +tidy: no +title: //title +author: //a[@itemprop = 'author'] +date: //time[@itemprop = 'datePublished'] +body: //div[@id = 'intelliTXT'] + next_page_link: //li[@class="pagin next"]/a test_url: http://www.tomshardware.com/reviews/gaming-graphics-card-review,3107.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/tomshardware.de.txt b/inc/3rdparty/site_config/standard/tomshardware.de.txt old mode 100644 new mode 100755 index e910003c..eee57ccf --- a/inc/3rdparty/site_config/standard/tomshardware.de.txt +++ b/inc/3rdparty/site_config/standard/tomshardware.de.txt @@ -1,12 +1,12 @@ -body://div[@id="news-content"]/div[@id="intelliTXT"][1] - -author://div[@id="header-news-infos"]/a[1] - -date: //div[@id="header-news-infos"]/span[1] - -title://h1[@id="header-news-title" and @class="hardwareTitle"][1] - -strip://div[@id="news-content"]/div[@id="intelliTXT"]/table - +body://div[@id="news-content"]/div[@id="intelliTXT"][1] + +author://div[@id="header-news-infos"]/a[1] + +date: //div[@id="header-news-infos"]/span[1] + +title://h1[@id="header-news-title" and @class="hardwareTitle"][1] + +strip://div[@id="news-content"]/div[@id="intelliTXT"]/table + footnotes: no test_url: http://www.tomshardware.de/DDR4-DDR3-ISSCC-Samsung-Hynix,news-247133.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/toolsandtoys.net.txt b/inc/3rdparty/site_config/standard/toolsandtoys.net.txt old mode 100644 new mode 100755 index dbe60b15..bb45d890 --- a/inc/3rdparty/site_config/standard/toolsandtoys.net.txt +++ b/inc/3rdparty/site_config/standard/toolsandtoys.net.txt @@ -1,6 +1,6 @@ -body: //div[@class='post'] - -strip: //div[@class='social'] -strip: //span[@class='next'] +body: //div[@class='post'] + +strip: //div[@class='social'] +strip: //span[@class='next'] strip: //span[@class='previous'] test_url: http://toolsandtoys.net/noble-tonic-02/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/tracks.ranea.org.txt b/inc/3rdparty/site_config/standard/tracks.ranea.org.txt new file mode 100755 index 00000000..5a386470 --- /dev/null +++ b/inc/3rdparty/site_config/standard/tracks.ranea.org.txt @@ -0,0 +1,14 @@ +# Metadata +title: substring-after(//title, 'Coyote Tracks - ') +author: //meta[@name="author"]/@content +date: //div[@class="post_header"]/a + +# Content Pruning +strip: //div[@class="column left"] +strip: //div[@class="pages"] +strip: //a[@class="text_title"] +strip: //ol[@class="notes"] + +dissolve: //div[@class='column right']/ul +dissolve: //li[@class='post'] +test_url: http://tracks.ranea.org/post/31431060205/the-next-big-uh-slightly-taller-thing \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/trailer.web-view.net.txt b/inc/3rdparty/site_config/standard/trailer.web-view.net.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/trailerzone.de.txt b/inc/3rdparty/site_config/standard/trailerzone.de.txt new file mode 100755 index 00000000..02151a63 --- /dev/null +++ b/inc/3rdparty/site_config/standard/trailerzone.de.txt @@ -0,0 +1,9 @@ +body: //div[@id='video' or @id='main'] + +strip_id_or_class: socialshareprivacy2 +strip_id_or_class: wp_rp_first + +find_string: Genre</strong> +replace_string: </strong></p><p><strong>Genre</strong> + +test_url: http://www.trailerzone.de/g-i-joe-2-die-abrechnung/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/traningslara.se.txt b/inc/3rdparty/site_config/standard/traningslara.se.txt old mode 100644 new mode 100755 index 96e491fa..d6cfb6db --- a/inc/3rdparty/site_config/standard/traningslara.se.txt +++ b/inc/3rdparty/site_config/standard/traningslara.se.txt @@ -1,8 +1,8 @@ -title: //div[@class="Post-body"]//span[@class="PostHeader"] -author: //div[@class="PostHeaderIcons metadata"]/a[@title="Author"] -date: substring-before(//div[@class="PostHeaderIcons metadata"], '|') -body: //div[@class="Post-body"] -strip_id_or_class: print1 -strip_id_or_class: metadata +title: //div[@class="Post-body"]//span[@class="PostHeader"] +author: //div[@class="PostHeaderIcons metadata"]/a[@title="Author"] +date: substring-before(//div[@class="PostHeaderIcons metadata"], '|') +body: //div[@class="Post-body"] +strip_id_or_class: print1 +strip_id_or_class: metadata strip_id_or_class: authorbox test_url: http://traningslara.se/skoinlagg-och-skador-finns-det-nagot-samband/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/triblive.com.txt b/inc/3rdparty/site_config/standard/triblive.com.txt old mode 100644 new mode 100755 index 82797db9..663cafe1 --- a/inc/3rdparty/site_config/standard/triblive.com.txt +++ b/inc/3rdparty/site_config/standard/triblive.com.txt @@ -1,13 +1,13 @@ -title: //title -author: //span/a -date: substring-after(//small,'Published:') - -strip: //h1[@class='vert_class'] -strip: //h1[@class='headline'] -strip: //img[contains(@src,'logo_triblive.gif')] - -#strip: //h6 -#strip_img_src: logo_triblive.gif - -single_page_link: //a[@class='stprint'] +title: //title +author: //span/a +date: substring-after(//small,'Published:') + +strip: //h1[@class='vert_class'] +strip: //h1[@class='headline'] +strip: //img[contains(@src,'logo_triblive.gif')] + +#strip: //h6 +#strip_img_src: logo_triblive.gif + +single_page_link: //a[@class='stprint'] test_url: http://triblive.com/sports/2819913-85/lemieux-deal-penguins-burkle-nhl-owners-team-mario-bettman-case \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/truthdig.com.txt b/inc/3rdparty/site_config/standard/truthdig.com.txt old mode 100644 new mode 100755 index e7c1a4bc..9e0663b0 --- a/inc/3rdparty/site_config/standard/truthdig.com.txt +++ b/inc/3rdparty/site_config/standard/truthdig.com.txt @@ -1,10 +1,12 @@ -title: //div[@class='printbody']/h1 -body: //div[@class='printbody'] -prune: no - -strip: //div[@class='printbody']/a[@href='http://www.truthdig.com/'] -strip: //table[@class='footer'] - -single_page_link: //div[@class='article_tools']//a[contains(@href, '/print/')] - -test_url: http://www.truthdig.com/report/item/the_election_march_of_the_trolls_20110829/ \ No newline at end of file +title: //div[@class='printbody']/h1 +body: //div[@class='printbody'] +prune: no + +strip: //div[@class='printbody']/a[@href='http://www.truthdig.com/'] +strip: //table[@class='footer'] +strip: //h6[contains(., 'http://')] + +single_page_link: //a[contains(@href, '/print/')] + +test_url: http://www.truthdig.com/report/item/the_election_march_of_the_trolls_20110829/ +test_url: http://www.truthdig.com/dig/item/the_death_of_truth_20130505/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/tthfanfic.org.txt b/inc/3rdparty/site_config/standard/tthfanfic.org.txt old mode 100644 new mode 100755 index 0dab5b0f..63537c10 --- a/inc/3rdparty/site_config/standard/tthfanfic.org.txt +++ b/inc/3rdparty/site_config/standard/tthfanfic.org.txt @@ -1,4 +1,4 @@ -title: //h2 -author: //a[starts-with(@href, '/AuthorStories')] +title: //h2 +author: //a[starts-with(@href, '/AuthorStories')] body: //div[@id='storyinnerbody'] test_url: http://www.tthfanfic.org/Story-6512/Kudra+Journeys.htm \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/tthor.com.txt b/inc/3rdparty/site_config/standard/tthor.com.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/tuaw.com.txt b/inc/3rdparty/site_config/standard/tuaw.com.txt old mode 100644 new mode 100755 index b86f8ccb..2af00c27 --- a/inc/3rdparty/site_config/standard/tuaw.com.txt +++ b/inc/3rdparty/site_config/standard/tuaw.com.txt @@ -1,6 +1,6 @@ -title: //h1[@class='posttitle'] -author: //span[@class='author']/a -date: //span[@class='timestamp'] -body: //div[@class='body'] +title: //h1[@class='posttitle'] +author: //span[@class='author']/a +date: //span[@class='timestamp'] +body: //div[@class='body'] test_url: http://www.tuaw.com/2011/10/19/apple-posts-fans-memories-of-steve-jobs/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/tuckreview.com.txt b/inc/3rdparty/site_config/standard/tuckreview.com.txt old mode 100644 new mode 100755 index a3946cbc..6e18e3da --- a/inc/3rdparty/site_config/standard/tuckreview.com.txt +++ b/inc/3rdparty/site_config/standard/tuckreview.com.txt @@ -1,6 +1,6 @@ -title: //h1[@class='post-title'] -author: //div[@class='display-name'] -date: //div[@class='date'] -body: //div[@class='body'] -footnotes: no +title: //h1[@class='post-title'] +author: //div[@class='display-name'] +date: //div[@class='date'] +body: //div[@class='body'] +footnotes: no test_url: http://tuckreview.com/2012/8/14/migrating-to-v6 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/tvtropes.org.txt b/inc/3rdparty/site_config/standard/tvtropes.org.txt old mode 100644 new mode 100755 index 08dbba59..3cc3a9cf --- a/inc/3rdparty/site_config/standard/tvtropes.org.txt +++ b/inc/3rdparty/site_config/standard/tvtropes.org.txt @@ -1,20 +1,20 @@ -# Google Custom Search -strip_id_or_class: google_branding_style - -# Avoid double title -strip_id_or_class: pagetitle - -# external links are labelled -strip_image_src: http://static.mediatropes.info/pmwiki/pub/external_link.gif - -title: //div[@class="pagetitle"] -body: //div[@id="wikitext"] - -# don't get clever. -strip_comments: no -prune: no - -# navigation in footer lives inside the wikitext div, annoyingly. -strip_id_or_class: pathholder +# Google Custom Search +strip_id_or_class: google_branding_style + +# Avoid double title +strip_id_or_class: pagetitle + +# external links are labelled +strip_image_src: http://static.mediatropes.info/pmwiki/pub/external_link.gif + +title: //div[@class="pagetitle"] +body: //div[@id="wikitext"] + +# don't get clever. +strip_comments: no +prune: no + +# navigation in footer lives inside the wikitext div, annoyingly. +strip_id_or_class: pathholder test_url: http://tvtropes.org/pmwiki/pmwiki.php/Main/WithinParameters \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/twitter.com.txt b/inc/3rdparty/site_config/standard/twitter.com.txt old mode 100644 new mode 100755 index 12ab1546..520ebd85 --- a/inc/3rdparty/site_config/standard/twitter.com.txt +++ b/inc/3rdparty/site_config/standard/twitter.com.txt @@ -1,9 +1,9 @@ -title: //title -body: (//p[contains(@class, 'js-tweet-text')])[1] -author: (//strong[contains(@class, 'fullname')])[1] -date: //span[contains(@class, 'js-short-timestamp')]/@data-time - -prune: no -tidy: no - +title: //title +body: (//p[contains(@class, 'js-tweet-text')])[1] +author: (//strong[contains(@class, 'fullname')])[1] +date: //span[contains(@class, 'js-short-timestamp')]/@data-time + +prune: no +tidy: no + test_url: https://twitter.com/medialens/status/216883678582804480 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/uefa.com.txt b/inc/3rdparty/site_config/standard/uefa.com.txt old mode 100644 new mode 100755 index 088d6586..3469be03 --- a/inc/3rdparty/site_config/standard/uefa.com.txt +++ b/inc/3rdparty/site_config/standard/uefa.com.txt @@ -1,6 +1,6 @@ -body: //div[@class='d3cmsCBody']//div[@class='pubText pubDate' or @class='newsComment' or contains(@class, 'newsPhoto') or @class='newsText'] -strip: //div[contains(@class, 'mpindex')] -prune: no -tidy: no - +body: //div[@class='d3cmsCBody']//div[@class='pubText pubDate' or @class='newsComment' or contains(@class, 'newsPhoto') or @class='newsText'] +strip: //div[contains(@class, 'mpindex')] +prune: no +tidy: no + test_url: http://www.uefa.com/uefaeuropaleague/news/newsid=1617320.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/uk.xbox360.ign.com.txt b/inc/3rdparty/site_config/standard/uk.xbox360.ign.com.txt old mode 100644 new mode 100755 index 29e19565..cd9c1361 --- a/inc/3rdparty/site_config/standard/uk.xbox360.ign.com.txt +++ b/inc/3rdparty/site_config/standard/uk.xbox360.ign.com.txt @@ -1,23 +1,23 @@ -# applies to uk.ds.ign.com, uk.wii.ign.com etc. -# possibly to non-UK versions, but I can’t test that - -title: //h1[@class="headline"] -author: //div[@class="hdr-sub byline"]/a -date: //h2[@class="publish-date"]/span -body: //div[@id="main-article-content"] - -strip: //ul[@class="lnks-readmore"] - -strip: //div[@class="inlineImageCaption"] -# can’t make the images appear, so remove the captions - -strip: //div[@style="width:468px"] -# video caption links - -convert_double_br_tags: yes - -strip_comments: no -# otherwise the ‘Closing Comments’ are removed - +# applies to uk.ds.ign.com, uk.wii.ign.com etc. +# possibly to non-UK versions, but I can’t test that + +title: //h1[@class="headline"] +author: //div[@class="hdr-sub byline"]/a +date: //h2[@class="publish-date"]/span +body: //div[@id="main-article-content"] + +strip: //ul[@class="lnks-readmore"] + +strip: //div[@class="inlineImageCaption"] +# can’t make the images appear, so remove the captions + +strip: //div[@style="width:468px"] +# video caption links + +convert_double_br_tags: yes + +strip_comments: no +# otherwise the ‘Closing Comments’ are removed + # Ratings box could do with some rearranging, but it’s tricky test_url: http://uk.xbox360.ign.com/articles/121/1210717p1.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/uni-watch.com.txt b/inc/3rdparty/site_config/standard/uni-watch.com.txt old mode 100644 new mode 100755 index cbe87d19..4a5ae344 --- a/inc/3rdparty/site_config/standard/uni-watch.com.txt +++ b/inc/3rdparty/site_config/standard/uni-watch.com.txt @@ -1,17 +1,17 @@ -author: substring-before(substring-after(//div[@class='post-byline'], 'By '), ', on') -date: substring-after(//div[@class='post-byline'], ', on') - -# for some reason, the following is producing a "no text [48]" error -#title: //div[@class='post-headline'] - -# for some reason, the following doesn't appear to isolate just the body copy -body: //div[@class='post-bodycopy'] - -# we solve the above issue by stripping out everything else we don't want -# these can probably all be removed if the body: command above worked -strip_id_or_class: reply -strip_id_or_class: left -strip_id_or_class: post-headline -strip_id_or_class: post-byline +author: substring-before(substring-after(//div[@class='post-byline'], 'By '), ', on') +date: substring-after(//div[@class='post-byline'], ', on') + +# for some reason, the following is producing a "no text [48]" error +#title: //div[@class='post-headline'] + +# for some reason, the following doesn't appear to isolate just the body copy +body: //div[@class='post-bodycopy'] + +# we solve the above issue by stripping out everything else we don't want +# these can probably all be removed if the body: command above worked +strip_id_or_class: reply +strip_id_or_class: left +strip_id_or_class: post-headline +strip_id_or_class: post-byline strip_id_or_class: footer test_url: http://www.uni-watch.com/2011/10/18/the-curious-case-of-steve-debergs-microphone-and-speaker/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/unwinnable.com.txt b/inc/3rdparty/site_config/standard/unwinnable.com.txt new file mode 100755 index 00000000..05ad86a5 --- /dev/null +++ b/inc/3rdparty/site_config/standard/unwinnable.com.txt @@ -0,0 +1,9 @@ +title: //h1[@class='postTitle'] +author: //a[@rel='author'] +date: substring-before(//h4[@class='postAuthor'], '|') +body: //div[@class='postContent'] + +strip: //div[@class='simplePullQuote'] + +wrap_in(figure): //img +test_url: http://www.unwinnable.com/2013/04/23/gratifying-play/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/uppsalafria.se.txt b/inc/3rdparty/site_config/standard/uppsalafria.se.txt new file mode 100755 index 00000000..79c59ece --- /dev/null +++ b/inc/3rdparty/site_config/standard/uppsalafria.se.txt @@ -0,0 +1,7 @@ +body: //div[contains(@class, 'layout__inner')]//div[contains(@class, 'file-image') or contains(@class, 'node__content')] +author: //article//div[contains(@class, 'field-byline')] +strip_id_or_class: rekommenderade +strip_id_or_class: disqus +strip_id_or_class: annonser + +test_url: http://www.uppsalafria.se/artikel/97167 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/urbandictionary.com.txt b/inc/3rdparty/site_config/standard/urbandictionary.com.txt old mode 100644 new mode 100755 index 86061f77..385c95ca --- a/inc/3rdparty/site_config/standard/urbandictionary.com.txt +++ b/inc/3rdparty/site_config/standard/urbandictionary.com.txt @@ -1,3 +1,3 @@ -title: //title -body: //td[@id='content'] -test_url: http://www.urbandictionary.com/define.php?term=Grown-Ass \ No newline at end of file +title: //title +body: //table[@id='entries'] +test_url: http://www.urbandictionary.com/define.php?term=Grown-Ass diff --git a/inc/3rdparty/site_config/standard/usatoday.com.txt b/inc/3rdparty/site_config/standard/usatoday.com.txt new file mode 100755 index 00000000..710a7b37 --- /dev/null +++ b/inc/3rdparty/site_config/standard/usatoday.com.txt @@ -0,0 +1,8 @@ +date: //meta[@itemprop="datePublished"]/@content +author: //div[@itemprop="author"] +body: //div[@itemprop='articleBody'] + +strip_id_or_class: share-tools + +test_url: http://www.usatoday.com/story/news/world/2014/03/18/malaysia-plane-search/6552429/ +test_url: http://rssfeeds.usatoday.com/usatoday-NewsTopStories \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/usccb.org.txt b/inc/3rdparty/site_config/standard/usccb.org.txt old mode 100644 new mode 100755 index eb10a48f..30c28823 --- a/inc/3rdparty/site_config/standard/usccb.org.txt +++ b/inc/3rdparty/site_config/standard/usccb.org.txt @@ -1,6 +1,6 @@ -body: //div[@id='CS_Element_maincontent'] - -tidy: no -prune: no +body: //div[@id='CS_Element_maincontent'] + +tidy: no +prune: no test_url: http://www.usccb.org/bible/readings/072412.cfm \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/useit.com.txt b/inc/3rdparty/site_config/standard/useit.com.txt old mode 100644 new mode 100755 index f6be84c4..b8511c7c --- a/inc/3rdparty/site_config/standard/useit.com.txt +++ b/inc/3rdparty/site_config/standard/useit.com.txt @@ -1,8 +1,8 @@ -title: //h1 - -date: substring-after(//p[@class='overline']/strong, ',') -body: //div[@class="maintext"] -strip: //p[@class='overline'] -strip: //h1 +title: //h1 + +date: substring-after(//p[@class='overline']/strong, ',') +body: //div[@class="maintext"] +strip: //p[@class='overline'] +strip: //h1 tidy: no test_url: http://www.useit.com/alertbox/mobile-startup-screen.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/usfirst.org.txt b/inc/3rdparty/site_config/standard/usfirst.org.txt new file mode 100755 index 00000000..f02b2d3e --- /dev/null +++ b/inc/3rdparty/site_config/standard/usfirst.org.txt @@ -0,0 +1,6 @@ +title: //meta[@property='dc:title']/@content +date: //div[@class='content']//span[@property='dc:date']/@content +body: //div[@property='content:encoded'] +prune: no + +test_url: http://www.usfirst.org/roboticsprograms/frc/Photo-From-Kickoff-Filming \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/utdailybeacon.com.txt b/inc/3rdparty/site_config/standard/utdailybeacon.com.txt new file mode 100755 index 00000000..d37911bc --- /dev/null +++ b/inc/3rdparty/site_config/standard/utdailybeacon.com.txt @@ -0,0 +1,5 @@ +title: //h1 +author: //*[@class='byline'] +date: substring-after(//*[@class='pubdatetime'], 'Published: ') +body: //*[@class='body-block'] +test_url: http://utdailybeacon.com/news/2012/oct/8/energy-forum-continues/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/ux.artu.tv.txt b/inc/3rdparty/site_config/standard/ux.artu.tv.txt old mode 100644 new mode 100755 index a893bda0..c69f2df9 --- a/inc/3rdparty/site_config/standard/ux.artu.tv.txt +++ b/inc/3rdparty/site_config/standard/ux.artu.tv.txt @@ -1,7 +1,7 @@ -author: ("Arturo Toledo") -title: //div[@class="post"]/h2 -body: //div[@class="entry"] - -# Remove Twitter button +author: ("Arturo Toledo") +title: //div[@class="post"]/h2 +body: //div[@class="entry"] + +# Remove Twitter button strip: //div[@class="entry"]/p[2]/a/img test_url: http://ux.artu.tv/?p=192 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/uzivatelsketestovani.cz.txt b/inc/3rdparty/site_config/standard/uzivatelsketestovani.cz.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/vanityfair.com.txt b/inc/3rdparty/site_config/standard/vanityfair.com.txt old mode 100644 new mode 100755 index bfc47d1f..efa38224 --- a/inc/3rdparty/site_config/standard/vanityfair.com.txt +++ b/inc/3rdparty/site_config/standard/vanityfair.com.txt @@ -1,30 +1,30 @@ -title: //meta[@property="og:title"]/@content -author: //div[contains(@class, 'byline')]//span[contains(@class, 'name')] -date: //div[contains(@class, 'cn_date_time')] -body: //div[contains(@class, 'pageContainers')] -body: //article[@id='items-container'] -#body: //h2[@class='sub-header'] | //div[contains(@class, 'contributor-type') or @class='display-date' or @class='content-container'] - -strip_id_or_class: bc -strip_id_or_class: utilities -strip_id_or_class: list-supporting -strip_id_or_class: yrail -strip_id_or_class: urail - -prune: no -#tidy: no - -strip_id_or_class: super-rubric-section -strip_id_or_class: cn_date_time -strip_id_or_class: cn_contributors -strip_id_or_class: cn_pagination_controls -strip_id_or_class: cn_features_container -strip_id_or_class: global-footer -strip_id_or_class: cn_ecom_placement -strip: //li[@class='blogNavPrev'] - -single_page_link: //a[@title='Print this page'] - -test_url: http://www.vanityfair.com/politics/features/2011/05/egypt-revolutionaries-201105 -test_url: http://www.vanityfair.com/politics/features/2008/08/hitchens200808 +title: //meta[@property="og:title"]/@content +author: //div[contains(@class, 'byline')]//span[contains(@class, 'name')] +date: //div[contains(@class, 'cn_date_time')] +body: //div[contains(@class, 'pageContainers')] +body: //article[@id='items-container'] +#body: //h2[@class='sub-header'] | //div[contains(@class, 'contributor-type') or @class='display-date' or @class='content-container'] + +strip_id_or_class: bc +strip_id_or_class: utilities +strip_id_or_class: list-supporting +strip_id_or_class: yrail +strip_id_or_class: urail + +prune: no +#tidy: no + +strip_id_or_class: super-rubric-section +strip_id_or_class: cn_date_time +strip_id_or_class: cn_contributors +strip_id_or_class: cn_pagination_controls +strip_id_or_class: cn_features_container +strip_id_or_class: global-footer +strip_id_or_class: cn_ecom_placement +strip: //li[@class='blogNavPrev'] + +single_page_link: //a[@title='Print this page'] + +test_url: http://www.vanityfair.com/politics/features/2011/05/egypt-revolutionaries-201105 +test_url: http://www.vanityfair.com/politics/features/2008/08/hitchens200808 test_url: http://www.vanityfair.com/style/2012/01/prisoners-of-style-201201 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/varingen.no.txt b/inc/3rdparty/site_config/standard/varingen.no.txt old mode 100644 new mode 100755 index 6b5e0ae0..c0133c95 --- a/inc/3rdparty/site_config/standard/varingen.no.txt +++ b/inc/3rdparty/site_config/standard/varingen.no.txt @@ -1,5 +1,5 @@ -title: //div[@class='ArticleHeadlineDetailedView'] -date: //span[@class='ArticlePublicationDateTimeDetailedView'] -author://span[@class='ArticleBylineDetailedView'] +title: //div[@class='ArticleHeadlineDetailedView'] +date: //span[@class='ArticlePublicationDateTimeDetailedView'] +author://span[@class='ArticleBylineDetailedView'] body: //div[@class='ArticleTextDetailedView'] test_url: http://www.varingen.no/Nyheter/tabid/392/Default.aspx?ModuleId=56651&articleView=true \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/varsity.co.uk.txt b/inc/3rdparty/site_config/standard/varsity.co.uk.txt old mode 100644 new mode 100755 index b1db4c35..dfbf69cf --- a/inc/3rdparty/site_config/standard/varsity.co.uk.txt +++ b/inc/3rdparty/site_config/standard/varsity.co.uk.txt @@ -1,4 +1,4 @@ -# FB comments are inside an h2. Weird. Without this, the line 'Comments' is preserved by the text parser - +# FB comments are inside an h2. Weird. Without this, the line 'Comments' is preserved by the text parser + strip: //h2 test_url: http://www.varsity.co.uk/reviews/2662 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/vea.gov.vn.txt b/inc/3rdparty/site_config/standard/vea.gov.vn.txt new file mode 100755 index 00000000..9c8420ce --- /dev/null +++ b/inc/3rdparty/site_config/standard/vea.gov.vn.txt @@ -0,0 +1,7 @@ +title://div[@class="detail-new-title"] +body://div[@class="innerpad"] +strip://div[@class="ArticleUtility"] +strip://div[@class="commentPost"] +strip://div[@class="comment-box"] +strip://div[@id="TinLienQuan"] +test_url: http://vea.gov.vn/vn/tintuc/tintuchangngay/Pages/T%C4%83ng-c%C6%B0%E1%BB%9Dng-b%E1%BA%A3o-t%E1%BB%93n-%C4%91%E1%BB%99ng-v%E1%BA%ADt-hoang-d%C3%A3-%E1%BB%9F-Vi%E1%BB%87t-Nam.aspx \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/vedomosti.ru.txt b/inc/3rdparty/site_config/standard/vedomosti.ru.txt old mode 100644 new mode 100755 index ba999171..265f9fc7 --- a/inc/3rdparty/site_config/standard/vedomosti.ru.txt +++ b/inc/3rdparty/site_config/standard/vedomosti.ru.txt @@ -1,3 +1,3 @@ -title: //td[@class='second_content']/h1 +title: //td[@class='second_content']/h1 body: //td[@class='second_content']/div[@class='article_text'] test_url: http://www.vedomosti.ru/newspaper/article/259377/rasprodazha_mailru \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/veggbilder.no.txt b/inc/3rdparty/site_config/standard/veggbilder.no.txt old mode 100644 new mode 100755 index 14144c0f..2a44c317 --- a/inc/3rdparty/site_config/standard/veggbilder.no.txt +++ b/inc/3rdparty/site_config/standard/veggbilder.no.txt @@ -1,5 +1,5 @@ -author: //div[@class="blogginnleggForfatter"] -date: concat(//div[@class='blogginnleggDatoDag'],' ',//div[@class='blogginnleggDatoMnd']) -strip: //div[contains(@id,"bloggDelingslenker")] +author: //div[@class="blogginnleggForfatter"] +date: concat(//div[@class='blogginnleggDatoDag'],' ',//div[@class='blogginnleggDatoMnd']) +strip: //div[contains(@id,"bloggDelingslenker")] strip: //div[contains(@id,"bloggDelingslenker")] test_url: http://veggbilder.no/blogginnlegg/fristelser \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/vemedio.com.txt b/inc/3rdparty/site_config/standard/vemedio.com.txt old mode 100644 new mode 100755 index 294ace9c..d22fc5cf --- a/inc/3rdparty/site_config/standard/vemedio.com.txt +++ b/inc/3rdparty/site_config/standard/vemedio.com.txt @@ -1,6 +1,6 @@ -title: //h2 -date: substring-before(//small," • Permalink") -author:string('Martin Hering') - +title: //h2 +date: substring-before(//small," • Permalink") +author:string('Martin Hering') + Strip: //p/small test_url: http://vemedio.com/blog/posts/state-of-support-and-icloud \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/venturebeat.com.txt b/inc/3rdparty/site_config/standard/venturebeat.com.txt old mode 100644 new mode 100755 index 41bfa8c5..d6321d79 --- a/inc/3rdparty/site_config/standard/venturebeat.com.txt +++ b/inc/3rdparty/site_config/standard/venturebeat.com.txt @@ -1,6 +1,6 @@ -title: //h1[@class="entry-title"] -author: //div[@class="author-name"] -date: //span[@class="the-time"] -body: //div[@class="entry-content"] +title: //h1[@class="entry-title"] +author: //div[@class="author-name"] +date: //span[@class="the-time"] +body: //div[@class="entry-content"] strip: //div[@class="vb-gallery"] test_url: http://venturebeat.com/2012/07/17/marissa-mayer-yahoo/#s:mayer-1 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/version2.dk.txt b/inc/3rdparty/site_config/standard/version2.dk.txt old mode 100644 new mode 100755 index 74203cad..418b83a1 --- a/inc/3rdparty/site_config/standard/version2.dk.txt +++ b/inc/3rdparty/site_config/standard/version2.dk.txt @@ -1,12 +1,12 @@ -title: //article/header/h1 - -author: //article/header/section[@class='byline']/span[contains(@class, 'author')]/a -date: //article/header/section[@class='byline']/span[@class='published']/span - -body: //article/section[@class='body'] - -convert_double_br_tags: yes - -# This is required, because Tidy chokes on the HTML5 tags... +title: //article/header/h1 + +author: //article/header/section[@class='byline']/span[contains(@class, 'author')]/a +date: //article/header/section[@class='byline']/span[@class='published']/span + +body: //article/section[@class='body'] + +convert_double_br_tags: yes + +# This is required, because Tidy chokes on the HTML5 tags... tidy: no test_url: http://www.version2.dk/artikel/17069-amerikansk-hit-investor-er-vild-med-danske-net-ivaerksaettere \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/verybestbaking.com.txt b/inc/3rdparty/site_config/standard/verybestbaking.com.txt old mode 100644 new mode 100755 index 4cdd0c0f..ad0fec66 --- a/inc/3rdparty/site_config/standard/verybestbaking.com.txt +++ b/inc/3rdparty/site_config/standard/verybestbaking.com.txt @@ -1,7 +1,7 @@ -title: //title -body: //div[contains(@class, 'printRecipe')] -strip: //div[@class='recipeHeader'] -prune: no -tidy: no +title: //title +body: //div[contains(@class, 'printRecipe')] +strip: //div[@class='recipeHeader'] +prune: no +tidy: no single_page_link: //ul[@class='printOptions']//a[contains(@href, 'detail.aspx?p=1&showphoto=true')] test_url: http://www.verybestbaking.com/recipes/143190/Penne-Pasta-with-Sun-dried-Tomato-Cream-Sauce/detail.aspx \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/vg.no.txt b/inc/3rdparty/site_config/standard/vg.no.txt old mode 100644 new mode 100755 index fceeea09..bfadb4a7 --- a/inc/3rdparty/site_config/standard/vg.no.txt +++ b/inc/3rdparty/site_config/standard/vg.no.txt @@ -1,3 +1,3 @@ -body: //div[@id='artikkelspalte'] +body: //div[@id='artikkelspalte'] strip_id_or_class: 'breadcrumb' test_url: http://www.vg.no/spill/artikkel.php?artid=10003628 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/video.forbes.com.txt b/inc/3rdparty/site_config/standard/video.forbes.com.txt old mode 100644 new mode 100755 index 1dca55a3..5db77463 --- a/inc/3rdparty/site_config/standard/video.forbes.com.txt +++ b/inc/3rdparty/site_config/standard/video.forbes.com.txt @@ -1,9 +1,9 @@ -title: concat("Video: ", //div[@id='currentVideoTitleDivId']) -body: //div[@id='currentVideoDescriptionId'] -author: //meta[@name='author']/@content - -replace_string(<div id="currentVideoDescriptionId" style="display): <div id="currentVideoDescriptionId" style="displayitplease - -replace_string(<div id="currentVideoTitleDivId" style="display): <div id="currentVideoTitleDivId" style="displayitplease - +title: concat("Video: ", //div[@id='currentVideoTitleDivId']) +body: //div[@id='currentVideoDescriptionId'] +author: //meta[@name='author']/@content + +replace_string(<div id="currentVideoDescriptionId" style="display): <div id="currentVideoDescriptionId" style="displayitplease + +replace_string(<div id="currentVideoTitleDivId" style="display): <div id="currentVideoTitleDivId" style="displayitplease + test_url: http://video.forbes.com/fvn/business/wells-fargo-inside-the-bank-that-works \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/videogum.com.txt b/inc/3rdparty/site_config/standard/videogum.com.txt old mode 100644 new mode 100755 index a1663813..d93780ca --- a/inc/3rdparty/site_config/standard/videogum.com.txt +++ b/inc/3rdparty/site_config/standard/videogum.com.txt @@ -1,6 +1,6 @@ -title: //h2[@class='posttitle'] -date: substring-before(substring-after(//span[@class='postdate'], 'on '), ' by') -date: //span[@class='postdate'] -author: //span[@class='postdate']/a +title: //h2[@class='posttitle'] +date: substring-before(substring-after(//span[@class='postdate'], 'on '), ' by') +date: //span[@class='postdate'] +author: //span[@class='postdate']/a body: //div[@class='entry line_top'] test_url: http://videogum.com/395042/here-are-some-afternoon-links-92/list/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/villagevoice.com.txt b/inc/3rdparty/site_config/standard/villagevoice.com.txt old mode 100644 new mode 100755 index df374602..36e4a2f5 --- a/inc/3rdparty/site_config/standard/villagevoice.com.txt +++ b/inc/3rdparty/site_config/standard/villagevoice.com.txt @@ -1,9 +1,9 @@ -title: //h2[@class='headline'] - +title: //h2[@class='headline'] + body: //div[@class='ContentPrint'] - -prune: no - -single_page_link: //a[contains(@href, '/printVersion/')] - + +prune: no + +single_page_link: //a[contains(@href, '/printVersion/')] + test_url: http://www.villagevoice.com/2010-03-16/news/new-york-s-ten-worst-landlords/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/vimeo.com.txt b/inc/3rdparty/site_config/standard/vimeo.com.txt old mode 100644 new mode 100755 index d6c6701a..f36c9c57 --- a/inc/3rdparty/site_config/standard/vimeo.com.txt +++ b/inc/3rdparty/site_config/standard/vimeo.com.txt @@ -1,17 +1,17 @@ -title: //title -body: //iframe - -find_string: <html><iframe -replace_string: <iframe id="video" - -find_string: ></iframe></html> -replace_string: ></iframe> - -replace_string("): " - -single_page_link: //link[@type='text/xml+oembed'] - -prune: no -tidy: no - +title: //title +body: //iframe + +find_string: <html><iframe +replace_string: <iframe id="video" + +find_string: ></iframe></html> +replace_string: ></iframe> + +replace_string("): " + +single_page_link: //link[@type='text/xml+oembed'] + +prune: no +tidy: no + test_url: http://vimeo.com/35941909 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/viply.de.txt b/inc/3rdparty/site_config/standard/viply.de.txt new file mode 100755 index 00000000..e3599c9d --- /dev/null +++ b/inc/3rdparty/site_config/standard/viply.de.txt @@ -0,0 +1,12 @@ +title: //div[@id='singletext']//h1 +body: //div[contains(@class, 'mypictureborder')] | //div[@id='singletext'] +prune: no + +strip_id_or_class: singletostart +strip_id_or_class: navigation +strip_id_or_class: social +strip_id_or_class: single_topwrapper +strip: //a[contains(., 'Nächster Artikel')] + +test_url: http://www.viply.de/?p=87973 +test_url: http://www.viply.de/?feed=rss2 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/visir.is.txt b/inc/3rdparty/site_config/standard/visir.is.txt old mode 100644 new mode 100755 index 0f03198e..04e09102 --- a/inc/3rdparty/site_config/standard/visir.is.txt +++ b/inc/3rdparty/site_config/standard/visir.is.txt @@ -1,14 +1,14 @@ -# Author's name, when present, has 'skrifar:' ('writes:') appended to it. -# In case of multiple authors, this would be 'skrifa:', hence only 7 characters -# are stripped off. -author: substring(//div[@class='paragraph']/div[@class='meta'], 0, string-length(//div[@class='paragraph']/div[@class='meta']) - 7) - -date: //span[@class='date'] -title: //h1 -body: //div[@class='paragraph'] - -# Strip out author string when present -strip: //div[@class='paragraph']/div[@class='meta'] - +# Author's name, when present, has 'skrifar:' ('writes:') appended to it. +# In case of multiple authors, this would be 'skrifa:', hence only 7 characters +# are stripped off. +author: substring(//div[@class='paragraph']/div[@class='meta'], 0, string-length(//div[@class='paragraph']/div[@class='meta']) - 7) + +date: //span[@class='date'] +title: //h1 +body: //div[@class='paragraph'] + +# Strip out author string when present +strip: //div[@class='paragraph']/div[@class='meta'] + convert_double_br_tags: yes test_url: http://visir.is/esb,-ipa,-bhm-og-bsrb/article/2012701319997 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/vitispr.com.txt b/inc/3rdparty/site_config/standard/vitispr.com.txt old mode 100644 new mode 100755 index 8b2a300e..f2d11c7c --- a/inc/3rdparty/site_config/standard/vitispr.com.txt +++ b/inc/3rdparty/site_config/standard/vitispr.com.txt @@ -1,6 +1,6 @@ -strip: //*[(@id = "ja-search")] -body: //*[(@id = "ja-mainbody")] -body: //*[(@id = "content-mass-bottom")] -strip://h3[contains(span,'Related Posts')] +strip: //*[(@id = "ja-search")] +body: //*[(@id = "ja-mainbody")] +body: //*[(@id = "content-mass-bottom")] +strip://h3[contains(span,'Related Posts')] strip://img test_url: http://vitispr.com/blog/coventry-is-a-technology-hotspot \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/vivirmexico.com.txt b/inc/3rdparty/site_config/standard/vivirmexico.com.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/vnexpress.net.txt b/inc/3rdparty/site_config/standard/vnexpress.net.txt old mode 100644 new mode 100755 index 23c928bf..e5ebc435 --- a/inc/3rdparty/site_config/standard/vnexpress.net.txt +++ b/inc/3rdparty/site_config/standard/vnexpress.net.txt @@ -1,8 +1,8 @@ -body: //div[@cpms_content]//h2[@class='Lead'] | //div[@cpms_content]//p[@class='Normal'] | //div[@cpms_content]//table -strip://div[@class="box-item"] -strip://div[@id="ARTICLE_BANNER"] -strip://a -strip://div[@class="tag-parent"] -strip://div[@class="email-print txtr"] - +body: //div[@cpms_content]//h2[@class='Lead'] | //div[@cpms_content]//p[@class='Normal'] | //div[@cpms_content]//table +strip://div[@class="box-item"] +strip://div[@id="ARTICLE_BANNER"] +strip://a +strip://div[@class="tag-parent"] +strip://div[@class="email-print txtr"] + test_url: http://vnexpress.net/gl/xa-hoi/2011/04/tim-thay-nan-nhan-cuoi-cung-vu-sap-mo-da-o-len-co/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/voices.washingtonpost.com.txt b/inc/3rdparty/site_config/standard/voices.washingtonpost.com.txt old mode 100644 new mode 100755 index 6bd0e855..b754aeb8 --- a/inc/3rdparty/site_config/standard/voices.washingtonpost.com.txt +++ b/inc/3rdparty/site_config/standard/voices.washingtonpost.com.txt @@ -1,3 +1,3 @@ -title: //h1 +title: //h1 body: //div[@class='entrytext'] test_url: http://voices.washingtonpost.com/ezra-klein/2010/10/why_isnt_monetary_policy_discr.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/vworker.com.txt b/inc/3rdparty/site_config/standard/vworker.com.txt old mode 100644 new mode 100755 index a39c9f4e..cfb9ea1c --- a/inc/3rdparty/site_config/standard/vworker.com.txt +++ b/inc/3rdparty/site_config/standard/vworker.com.txt @@ -1,3 +1,3 @@ -body: //div[contains(@class, 'KonaBody')] +body: //div[contains(@class, 'KonaBody')] test_url: http://www.vworker.com/RentACoder/misc/BidRequests/ShowBidRequest.asp?lngBidRequestId=1634186 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/waffle.wootest.net.txt b/inc/3rdparty/site_config/standard/waffle.wootest.net.txt old mode 100644 new mode 100755 index afcba0f3..e92757d7 --- a/inc/3rdparty/site_config/standard/waffle.wootest.net.txt +++ b/inc/3rdparty/site_config/standard/waffle.wootest.net.txt @@ -1,4 +1,4 @@ -title: //h2[@class="title"] -body: //div[@class="post"] +title: //h2[@class="title"] +body: //div[@class="post"] test_url: http://waffle.wootest.net/2011/06/22/on-reading-news/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/walrusmagazine.com.txt b/inc/3rdparty/site_config/standard/walrusmagazine.com.txt old mode 100644 new mode 100755 index 3ab22172..c53eb0dd --- a/inc/3rdparty/site_config/standard/walrusmagazine.com.txt +++ b/inc/3rdparty/site_config/standard/walrusmagazine.com.txt @@ -1,14 +1,14 @@ -title: //div[@id='pr']/h3 -author: //div[@class='dateline']//a[contains(@href, '/author/')] - -# print page -body: //div[@id='prbody'] -# standard page -body: //div[@id='pgbody'] - -# for multi-page articles -single_page_link: //div[@class='tipjar']//a[contains(@href, '/printerFriendly.php?')] - -prune: no +title: //div[@id='pr']/h3 +author: //div[@class='dateline']//a[contains(@href, '/author/')] + +# print page +body: //div[@id='prbody'] +# standard page +body: //div[@id='pgbody'] + +# for multi-page articles +single_page_link: //div[@class='tipjar']//a[contains(@href, '/printerFriendly.php?')] + +prune: no test_url: http://www.walrusmagazine.com/articles/2011.12-memoir-kidnapped \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/warnerbros.fr.txt b/inc/3rdparty/site_config/standard/warnerbros.fr.txt old mode 100644 new mode 100755 index a41a3511..21f56352 --- a/inc/3rdparty/site_config/standard/warnerbros.fr.txt +++ b/inc/3rdparty/site_config/standard/warnerbros.fr.txt @@ -1,3 +1,3 @@ -title: //h3 +title: //h3 body: //div[@class="content_wysiwyg"] test_url: http://www.warnerbros.fr/game-of-thrones-un-junket-vu-de-l-interieur-268.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/washingtoninstitute.org.txt b/inc/3rdparty/site_config/standard/washingtoninstitute.org.txt new file mode 100755 index 00000000..17f45677 --- /dev/null +++ b/inc/3rdparty/site_config/standard/washingtoninstitute.org.txt @@ -0,0 +1,6 @@ +body: //div[@class='main']//article + +prune: no + +test_url: http://www.washingtoninstitute.org/policy-analysis/view/striking-syria-lessons-from-the-israeli-experience?goback=.gde_3822158_member_273623672 +test_url: http://www.washingtoninstitute.org/rss/11/10 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/washingtonmonthly.com.txt b/inc/3rdparty/site_config/standard/washingtonmonthly.com.txt old mode 100644 new mode 100755 index edf16422..8f8902a5 --- a/inc/3rdparty/site_config/standard/washingtonmonthly.com.txt +++ b/inc/3rdparty/site_config/standard/washingtonmonthly.com.txt @@ -1,10 +1,10 @@ -title://a[@class = 'headline-article'] - -author: substring-after(//div[@class = 'article']/p[@class = 'author'], 'By ') -date://div[@class = 'article']/span[@class = 'date'] -body://div[@class = 'article'] -single_page_link://a[@class = 'print'] -strip://p[@class = 'author'] -strip://a[@class = 'headline-article'] +title://a[@class = 'headline-article'] + +author: substring-after(//div[@class = 'article']/p[@class = 'author'], 'By ') +date://div[@class = 'article']/span[@class = 'date'] +body://div[@class = 'article'] +single_page_link://a[@class = 'print'] +strip://p[@class = 'author'] +strip://a[@class = 'headline-article'] strip://span[@class = 'date'] test_url: http://www.washingtonmonthly.com/magazine/julyaugust_2011/features/the_trinity_sisters030380.php \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/washingtonpost.com.txt b/inc/3rdparty/site_config/standard/washingtonpost.com.txt old mode 100644 new mode 100755 index 2931ca5f..0aa9f1d8 --- a/inc/3rdparty/site_config/standard/washingtonpost.com.txt +++ b/inc/3rdparty/site_config/standard/washingtonpost.com.txt @@ -1,21 +1,32 @@ -body: //div[@class="article_body"] -author://meta[@name='DC.creator']/@content -title://meta[@name='title']/@content -date://div[contains(@class,'byline')]//span[contains(@class,'published')]/@title -date://meta[@name="DC.date.issued"]/@content -strip://div[@class="relative primary-slot padding-top img-border gallery-container photo-wrapper"] -strip://div[@id="wp-column six end"] -strip://div[contains(@class,'hidden')] -strip://div[@id='article-side-rail'] -strip://div[@class="module component todays-paper-module curved"] -strip://div[@class="module component live-qa curved img-border"] -strip://div[@class="module component newsletter-signup curved"] -strip://div[@class="module featured-stories component curved img-border"] - -strip_id_or_class: carousel -strip_id_or_class: toolbar -strip_id_or_class: module - -test_url: http://www.washingtonpost.com/world/europe/in-europe-new-fears-of-german-might/2011/10/19/gIQA3baZ7L_story.html?hpid=z1 -test_url: http://www.washingtonpost.com/national/health-science/radical-theory-of-first-americans-places-stone-age-europeans-in-delmarva-20000-years-ago/2012/02/28/gIQA4mriiR_story.html +# Seems to be redirecting to articles.washingtonpost.com for many users + +body: //div[contains(@class, "article_body")] +# print view +body: //div[@id='print_facet']//div[@id='body'] + +author://meta[@name='DC.creator']/@content +title://meta[@name='title']/@content +date://div[contains(@class,'byline')]//span[contains(@class,'published')]/@title +date://meta[@name="DC.date.issued"]/@content +strip://div[@class="relative primary-slot padding-top img-border gallery-container photo-wrapper"] +strip://div[@id="wp-column six end"] +strip://div[contains(@class,'hidden')] +strip://div[@id='article-side-rail'] +strip://div[@class="module component todays-paper-module curved"] +strip://div[@class="module component live-qa curved img-border"] +strip://div[@class="module component newsletter-signup curved"] +strip://div[@class="module featured-stories component curved img-border"] + +strip_id_or_class: carousel +strip_id_or_class: toolbar +strip_id_or_class: module + +# Change gJQAwdJG4U_story.html to gJQAwdJG4U_print.html +single_page_link: concat(substring-before(//link[@rel="canonical"]/@href, "_story.html"), "_print.html") + +# [OLD] Change gJQAwdJG4U_story.html to gJQAwdJG4U_story_print.html +#single_page_link: concat(substring-before(//link[@rel="canonical"]/@href, "_story.html"), "_story_print.html") + +test_url: http://www.washingtonpost.com/world/europe/in-europe-new-fears-of-german-might/2011/10/19/gIQA3baZ7L_story.html?hpid=z1 +test_url: http://www.washingtonpost.com/national/health-science/radical-theory-of-first-americans-places-stone-age-europeans-in-delmarva-20000-years-ago/2012/02/28/gIQA4mriiR_story.html test_url: http://www.washingtonpost.com/lifestyle/magazine/the-sorry-fate-of-a-tech-pioneer-halsey-minor-and-historic-virginia-estate-carters-grove/2012/05/30/gJQAwdJG4U_story.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/web-libre.org.txt b/inc/3rdparty/site_config/standard/web-libre.org.txt old mode 100644 new mode 100755 index dfcd0081..9ed43a25 --- a/inc/3rdparty/site_config/standard/web-libre.org.txt +++ b/inc/3rdparty/site_config/standard/web-libre.org.txt @@ -1,6 +1,6 @@ -body: //div[@id='template_article'] - -strip_id_or_class: article_more -strip: //hr +body: //div[@id='template_article'] + +strip_id_or_class: article_more +strip: //hr test_url: http://www.web-libre.org/dossiers/jacuzzi-gonflable,8493.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/weblog.bignerdranch.com.txt b/inc/3rdparty/site_config/standard/weblog.bignerdranch.com.txt old mode 100644 new mode 100755 index 9e75a8a8..578ba523 --- a/inc/3rdparty/site_config/standard/weblog.bignerdranch.com.txt +++ b/inc/3rdparty/site_config/standard/weblog.bignerdranch.com.txt @@ -1,5 +1,5 @@ -title://div[@class="post"]/h2 -author://p[@class="postinfo"]/a -date:substring-before(substring-after(//p[@class="postinfo"],' on '),' under ') +title://div[@class="post"]/h2 +author://p[@class="postinfo"]/a +date:substring-before(substring-after(//p[@class="postinfo"],' on '),' under ') body://div[@class="contenttext"] test_url: http://weblog.bignerdranch.com/?p=304 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/weblogs.asp.net.txt b/inc/3rdparty/site_config/standard/weblogs.asp.net.txt old mode 100644 new mode 100755 index 3fabda0b..7cfa49d2 --- a/inc/3rdparty/site_config/standard/weblogs.asp.net.txt +++ b/inc/3rdparty/site_config/standard/weblogs.asp.net.txt @@ -1,9 +1,9 @@ -title: //h2[@class="pageTitle"] -strip: //div[@class="postfoot"] -strip: //h2[@class="pageTitle"] -strip: //h3[@class="pageTitle"] -body: //div[@class="post"] -author: substring-before(substring-after(//div[@class="postfoot"], 'by'), 'Filed') -date: substring-before(substring-after(//div[@class="postfoot"], 'Published'), 'by') +title: //h2[@class="pageTitle"] +strip: //div[@class="postfoot"] +strip: //h2[@class="pageTitle"] +strip: //h3[@class="pageTitle"] +body: //div[@class="post"] +author: substring-before(substring-after(//div[@class="postfoot"], 'by'), 'Filed') +date: substring-before(substring-after(//div[@class="postfoot"], 'Published'), 'by') test_url: http://weblogs.asp.net/scottgu/archive/2011/08/31/html-editor-smart-tasks-and-event-handler-generation-asp-net-vnext-series.aspx \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/webpaper.nzz.ch.txt b/inc/3rdparty/site_config/standard/webpaper.nzz.ch.txt old mode 100644 new mode 100755 index 8922b02f..cea10147 --- a/inc/3rdparty/site_config/standard/webpaper.nzz.ch.txt +++ b/inc/3rdparty/site_config/standard/webpaper.nzz.ch.txt @@ -1,8 +1,8 @@ -tidy: no -dissolve: //div[@id="content"]/div/article/header -body: //div[@id="content"]/div/article -title: //div[@id="content"]/div/article/h1 -date: //div[@id="content"]/div/article/header/div[@id="issueSelectTrigger"] -strip: //div[@id="content"]/div/article/h1 +tidy: no +dissolve: //div[@id="content"]/div/article/header +body: //div[@id="content"]/div/article +title: //div[@id="content"]/div/article/h1 +date: //div[@id="content"]/div/article/header/div[@id="issueSelectTrigger"] +strip: //div[@id="content"]/div/article/h1 test_url: http://webpaper.nzz.ch/2012/06/23/front/JJKMS/aphrodite-und-die-kommunisten?guest_pass=24a3ca5b6d%3AJJKMS%3Ad30e1be8628c099669671d4da56cdce4187790ba \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/webwereld.nl.txt b/inc/3rdparty/site_config/standard/webwereld.nl.txt new file mode 100755 index 00000000..40a5aa36 --- /dev/null +++ b/inc/3rdparty/site_config/standard/webwereld.nl.txt @@ -0,0 +1,8 @@ +strip: //*[@class="paginator"] +body: //*[@id="articleText"] +next_page_link: //a[@class="next"] + +# No author detection +# No publishing date detection +# No author and intro deduplication over multiple pages +test_url: http://webwereld.nl/analyse/111452/de-code-van-dorifel-nader-bekeken.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/welt.de.txt b/inc/3rdparty/site_config/standard/welt.de.txt old mode 100644 new mode 100755 index 6e4f828f..42e65e97 --- a/inc/3rdparty/site_config/standard/welt.de.txt +++ b/inc/3rdparty/site_config/standard/welt.de.txt @@ -1,22 +1,22 @@ -# set body -tidy: no -body: //div[contains(@class, 'articleContent')] - -# remove clutter -strip: //div[@class='advertising'] -strip: //div[@class='themenalarm'] -strip: //div[contains(@class, 'inTextTeaser')] - -# remove captions -strip: //span[@class='copyRight'] - -# remove photo galleries and extras -strip: //div[contains(@class, 'textGallery')] -strip: //div[contains(@class, 'videoGallery')] -strip: //div[contains(@class, 'imageGallery')] -strip: //div[contains(@class, 'openContent')] - -# remove comments -strip: //div[@id = 'writeComment'] - +# set body +tidy: no +body: //div[contains(@class, 'articleContent')] + +# remove clutter +strip: //div[@class='advertising'] +strip: //div[@class='themenalarm'] +strip: //div[contains(@class, 'inTextTeaser')] + +# remove captions +strip: //span[@class='copyRight'] + +# remove photo galleries and extras +strip: //div[contains(@class, 'textGallery')] +strip: //div[contains(@class, 'videoGallery')] +strip: //div[contains(@class, 'imageGallery')] +strip: //div[contains(@class, 'openContent')] + +# remove comments +strip: //div[@id = 'writeComment'] + test_url: http://www.welt.de/vermischtes/weltgeschehen/article11050589/27-Bergleute-in-neuseelaendischer-Mine-vermisst.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/westhamtillidie.com.txt b/inc/3rdparty/site_config/standard/westhamtillidie.com.txt old mode 100644 new mode 100755 index b9343029..3132e98a --- a/inc/3rdparty/site_config/standard/westhamtillidie.com.txt +++ b/inc/3rdparty/site_config/standard/westhamtillidie.com.txt @@ -1,6 +1,6 @@ -title: substring-before(//title, '�') - -body: //div[@class='entry'] -strip: //div[@class='sharing_label'] +title: substring-before(//title, '«') + +body: //div[@class='entry'] +strip: //div[@class='sharing_label'] strip: //div[@class='snap_nopreview sharing robots-nocontent'] test_url: http://www.westhamtillidie.com/2012/03/11/twelve-things-we-learned-from-the-doncaster-game/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/what-if.xkcd.com.txt b/inc/3rdparty/site_config/standard/what-if.xkcd.com.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/whatever.scalzi.com.txt b/inc/3rdparty/site_config/standard/whatever.scalzi.com.txt old mode 100644 new mode 100755 index 52c5cf1b..100a8c88 --- a/inc/3rdparty/site_config/standard/whatever.scalzi.com.txt +++ b/inc/3rdparty/site_config/standard/whatever.scalzi.com.txt @@ -1,7 +1,7 @@ -strip: //div[@class="navigation"] -strip: //div[@id="sidebar"] -strip: //div[@id="post-extra-content"] -strip: //div[@id="footer"] -strip: //div[contains(@class, "sharing")] +strip: //div[@class="navigation"] +strip: //div[@id="sidebar"] +strip: //div[@id="post-extra-content"] +strip: //div[@id="footer"] +strip: //div[contains(@class, "sharing")] test_url: http://whatever.scalzi.com/2011/01/09/quick-giffords-follow-up/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/wheelyric.com.txt b/inc/3rdparty/site_config/standard/wheelyric.com.txt old mode 100644 new mode 100755 index aa9783cf..b9eeaa0c --- a/inc/3rdparty/site_config/standard/wheelyric.com.txt +++ b/inc/3rdparty/site_config/standard/wheelyric.com.txt @@ -1,11 +1,11 @@ -body://div[contains(@class,'oAndtLyrics')] -strip://div[contains(@class,'info')] -strip://div[contains(@id,'romanization')] -strip://div[contains(@id,'youtube')] -strip://div[contains(@id,'romanizationSelector')] -strip://div[contains(@id,'langSelectWrap')] -strip://div[contains(@id,'requestTranslationWrap')] -strip://div[contains(@id,'viewMore')] -strip://div[contains(@class,'lyricsListInMainContent')] +body://div[contains(@class,'oAndtLyrics')] +strip://div[contains(@class,'info')] +strip://div[contains(@id,'romanization')] +strip://div[contains(@id,'youtube')] +strip://div[contains(@id,'romanizationSelector')] +strip://div[contains(@id,'langSelectWrap')] +strip://div[contains(@id,'requestTranslationWrap')] +strip://div[contains(@id,'viewMore')] +strip://div[contains(@class,'lyricsListInMainContent')] strip://div[contains(@class,'descIpNoti')] test_url: http://wheelyric.com/lyrics/121#2 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/wiki.guildwars.com.txt b/inc/3rdparty/site_config/standard/wiki.guildwars.com.txt old mode 100644 new mode 100755 index 1f262a0a..b80fe5d1 --- a/inc/3rdparty/site_config/standard/wiki.guildwars.com.txt +++ b/inc/3rdparty/site_config/standard/wiki.guildwars.com.txt @@ -1,8 +1,8 @@ -title: //h1 -body: //div[@id='content'] -strip_id_or_class: editsection -strip_id_or_class: toc -strip: //div[@id='siteNotice'] -strip: //div[@id='content']//table[last()] +title: //h1 +body: //div[@id='content'] +strip_id_or_class: editsection +strip_id_or_class: toc +strip: //div[@id='siteNotice'] +strip: //div[@id='content']//table[last()] prune: no test_url: http://wiki.guildwars.com/wiki/Monk \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/wiki.guildwars2.com.txt b/inc/3rdparty/site_config/standard/wiki.guildwars2.com.txt old mode 100644 new mode 100755 index e176907e..e9233998 --- a/inc/3rdparty/site_config/standard/wiki.guildwars2.com.txt +++ b/inc/3rdparty/site_config/standard/wiki.guildwars2.com.txt @@ -1,8 +1,8 @@ -title: //h1 -body: //div[@id='content'] -strip_id_or_class: editsection -strip_id_or_class: toc -strip: //div[@id='siteNotice'] -strip: //div[@id='content']//table[last()] +title: //h1 +body: //div[@id='content'] +strip_id_or_class: editsection +strip_id_or_class: toc +strip: //div[@id='siteNotice'] +strip: //div[@id='content']//table[last()] prune: no test_url: http://wiki.guildwars2.com/wiki/Guardian \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/wikihow.com.txt b/inc/3rdparty/site_config/standard/wikihow.com.txt new file mode 100755 index 00000000..fe95d3f9 --- /dev/null +++ b/inc/3rdparty/site_config/standard/wikihow.com.txt @@ -0,0 +1,15 @@ +# ...&printable=yes +body: //div[@id='bodycontents'] +prune: no +tidy: no +strip_id_or_class: gatEditSection +strip_id_or_class: relatedwikihows +#strip: //div[contains(@class, 'step_num')] + +replace_string(<script ): <div style="display: none" +replace_string(</script>): </div> + +single_page_link: //a[@id='gatPrintView'] +single_page_link: concat(//link[@rel='canonical']/@href, '?printable=yes') + +test_url: http://www.wikihow.com/Start-Your-Own-Country \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/wikitravel.org.txt b/inc/3rdparty/site_config/standard/wikitravel.org.txt old mode 100644 new mode 100755 index da5bd0b5..1f32a372 --- a/inc/3rdparty/site_config/standard/wikitravel.org.txt +++ b/inc/3rdparty/site_config/standard/wikitravel.org.txt @@ -1,14 +1,14 @@ -# copied from .wikipedia.org.txt -title: //h1[@id='firstHeading' or @class='firstHeading'] -body: //div[@id = 'bodyContent'] -strip_id_or_class: editsection -#strip_id_or_class: toc -strip_id_or_class: vertical-navbox -strip: //table[@id='toc'] | //div[@id='p-toc'] -strip: //div[@id='catlinks' or @id='contentSub'] -strip: //div[@id='jump-to-nav'] -strip: //div[@class='thumbcaption']//div[@class='magnify'] -strip: //table[@class='navbox'] -prune: no +# copied from .wikipedia.org.txt +title: //h1[@id='firstHeading' or @class='firstHeading'] +body: //div[@id = 'bodyContent'] +strip_id_or_class: editsection +#strip_id_or_class: toc +strip_id_or_class: vertical-navbox +strip: //table[@id='toc'] | //div[@id='p-toc'] +strip: //div[@id='catlinks' or @id='contentSub'] +strip: //div[@id='jump-to-nav'] +strip: //div[@class='thumbcaption']//div[@class='magnify'] +strip: //table[@class='navbox'] +prune: no tidy: no test_url: http://wikitravel.org/wiki/en/index.php?title=Bangkok&printable=yes \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/will-self.com.txt b/inc/3rdparty/site_config/standard/will-self.com.txt old mode 100644 new mode 100755 index 24467c22..394f9ca4 --- a/inc/3rdparty/site_config/standard/will-self.com.txt +++ b/inc/3rdparty/site_config/standard/will-self.com.txt @@ -1,4 +1,4 @@ -strip: //div[@class="widget-area"] -title: //*[@class="entry-title"] +strip: //div[@class="widget-area"] +title: //*[@class="entry-title"] date: //time[@class="entry-date"] test_url: http://will-self.com/2012/02/01/real-meals-dominos-pizza/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/williampfaff.com.txt b/inc/3rdparty/site_config/standard/williampfaff.com.txt old mode 100644 new mode 100755 index fb5f92ed..cefabec0 --- a/inc/3rdparty/site_config/standard/williampfaff.com.txt +++ b/inc/3rdparty/site_config/standard/williampfaff.com.txt @@ -1,3 +1,3 @@ -title: substring-after(//span[@class='itemTitle'], ':') +title: substring-after(//span[@class='itemTitle'], ':') body: //div[@id='content'] test_url: http://www.williampfaff.com/modules/news/article.php?storyid=491 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/winfuture.de.txt b/inc/3rdparty/site_config/standard/winfuture.de.txt old mode 100644 new mode 100755 index bc936370..dddc6f9e --- a/inc/3rdparty/site_config/standard/winfuture.de.txt +++ b/inc/3rdparty/site_config/standard/winfuture.de.txt @@ -1,12 +1,12 @@ -title: //h1/span - -body: //div[@id="news_content"] - -author: //div[@class="bookmarks_btm"]/p[1]/a[1]/text() - -date: //span[@class='date'] - -# Rubrikenbild entfernen -strip: //div[@id="news_content"]/a[1] +title: //h1/span + +body: //div[@id="news_content"] + +author: //div[@class="bookmarks_btm"]/p[1]/a[1]/text() + +date: //span[@class='date'] + +# Rubrikenbild entfernen +strip: //div[@id="news_content"]/a[1] test_url: http://winfuture.de/news,69672.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/winrumors.com.txt b/inc/3rdparty/site_config/standard/winrumors.com.txt old mode 100644 new mode 100755 index cedb4390..f25f9c9e --- a/inc/3rdparty/site_config/standard/winrumors.com.txt +++ b/inc/3rdparty/site_config/standard/winrumors.com.txt @@ -1,6 +1,6 @@ -title: //h1[@class='page-heading'] -author: //small/strong/a -#their date string is relative, so if you save the page 2 hours after it is posted it may say 'two hours ago, instead of providing a useful date/time' -date: substring-before(substring-after(//small,'on'),'with') -body: //div[@class='entry'] +title: //h1[@class='page-heading'] +author: //small/strong/a +#their date string is relative, so if you save the page 2 hours after it is posted it may say 'two hours ago, instead of providing a useful date/time' +date: substring-before(substring-after(//small,'on'),'with') +body: //div[@class='entry'] test_url: http://www.winrumors.com/chinese-windows-phone-launch-still-on-track-for-early-2012/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/winsupersite.com.txt b/inc/3rdparty/site_config/standard/winsupersite.com.txt old mode 100644 new mode 100755 index db6a6fc9..f725b67a --- a/inc/3rdparty/site_config/standard/winsupersite.com.txt +++ b/inc/3rdparty/site_config/standard/winsupersite.com.txt @@ -1,3 +1,3 @@ -date: //*[@class='kicker'] -body: //*[@class='KonaBody'] +date: //*[@class='kicker'] +body: //*[@class='KonaBody'] test_url: http://www.winsupersite.com/article/paul-thurrotts-wininfo/android-malware-surges-separate-studies-141364 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/wired.com.txt b/inc/3rdparty/site_config/standard/wired.com.txt old mode 100644 new mode 100755 index 69bbf5b7..f5a72d14 --- a/inc/3rdparty/site_config/standard/wired.com.txt +++ b/inc/3rdparty/site_config/standard/wired.com.txt @@ -1,22 +1,25 @@ -title: //meta[@property="og:title"]/@content -title: //h1 -title: //*[@class='posttitle'] -author: //*[@class='entryAuthor']/a[1] -author://*[@class='member-title'] -author://li[@class='author']/a[contains(@href, '/author/')] -date: substring-after(//div[@class='entryAuthor'], '�') -date: substring-before(//*[@class='entryDate'], '|') -body: //div[@class='entry'] -strip: //span[contains(@class, 'nextprev')] -#strip_id_or_class: ngg-galleryoverview -# ngg-galleryoverview is the whole content sometimes, e.g. http://www.wired.com/underwire/2011/12/best-mixtapes-of-2011/?pid=5736&viewall=true - -strip: //p[span[contains(@class, 'contentjump')]] -strip: //text()[contains(., 'nextpage')] - -prune: no - -single_page_link: //a[contains(@href, '/all/1') and contains(@class, 'contentjumpall')] - -test_url: http://www.wired.com/cloudline/2011/10/meet-arms-cortex-a15-the-future-of-the-ipad-and-possibly-the-macbook-air/ -test_url: http://www.wired.com/threatlevel/2012/05/ff_counterfeiter/all/1 \ No newline at end of file +title: //meta[@name='Title']/@content +author: //meta[@name='Author']/@content +date: //meta[@name='DisplayDate']/@content +body: //div[@class='entry'] +strip: //p[contains(., 'Pages:') and contains(., 'View All')] +strip: //p[@class='caption'] +strip: //div[@class='desc' or @class='slide' or @id='slide-info'] + +strip_id_or_class: pullquote +strip_id_or_class: left_rail +strip_id_or_class: related-container +strip_id_or_class: radvert-caption-wrap + +# Remove gallery? +strip_id_or_class: wpgallery + +#strip: //text()[contains(., 'nextpage')] + +prune: no + +single_page_link: //a[.='View All' and contains(@href, '/all/')] + +test_url: http://www.wired.com/cloudline/2011/10/meet-arms-cortex-a15-the-future-of-the-ipad-and-possibly-the-macbook-air/ +test_url: http://www.wired.com/wiredenterprise/2013/09/docker/ +test_url: http://www.wired.com/threatlevel/2012/05/ff_counterfeiter/all/ diff --git a/inc/3rdparty/site_config/standard/wmnf.org.txt b/inc/3rdparty/site_config/standard/wmnf.org.txt old mode 100644 new mode 100755 index ffb6b2d1..1d403a91 --- a/inc/3rdparty/site_config/standard/wmnf.org.txt +++ b/inc/3rdparty/site_config/standard/wmnf.org.txt @@ -1,13 +1,13 @@ -title: //div[@class="bodyText"]/h1/text() -body: //div[@class="bodyText"] - -# author and date are separated by only a newline -# can't figure out how to tokenize that yet -author: //div[@class="bodyText"]/span[@class="info"]/text() -date: //div[@class="bodyText"]/span[@class="info"]/text() - -# strip metdata from body text -strip: //div[@class="bodyText"]/h1/text() -strip: //div[@class="bodyText"]/span[@class="info"] +title: //div[@class="bodyText"]/h1/text() +body: //div[@class="bodyText"] + +# author and date are separated by only a newline +# can't figure out how to tokenize that yet +author: //div[@class="bodyText"]/span[@class="info"]/text() +date: //div[@class="bodyText"]/span[@class="info"]/text() + +# strip metdata from body text +strip: //div[@class="bodyText"]/h1/text() +strip: //div[@class="bodyText"]/span[@class="info"] strip: //div[@class="bodyText"]/span[@class="info"] test_url: http://www.wmnf.org/news_stories/light-rail-advocates-join-forces-to-combat-opposition-in-pinellas \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/wmpoweruser.com.txt b/inc/3rdparty/site_config/standard/wmpoweruser.com.txt old mode 100644 new mode 100755 index d9011d24..70168fbe --- a/inc/3rdparty/site_config/standard/wmpoweruser.com.txt +++ b/inc/3rdparty/site_config/standard/wmpoweruser.com.txt @@ -1,4 +1,4 @@ -date://*[@class="entry-date"] -author://*[@class="author vcard"] +date://*[@class="entry-date"] +author://*[@class="author vcard"] strip://*[@style="position:relative;left:72px;top:2px;"]|//*[@id="authorbox"] test_url: http://wmpoweruser.com/breaking-nokia-announces-nfc-support-in-lumia-610-windows-phone-device/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/worldpoultry.net.txt b/inc/3rdparty/site_config/standard/worldpoultry.net.txt old mode 100644 new mode 100755 index 0e42ca5e..b88f9279 --- a/inc/3rdparty/site_config/standard/worldpoultry.net.txt +++ b/inc/3rdparty/site_config/standard/worldpoultry.net.txt @@ -1,5 +1,5 @@ -title: //div[@class="content article"]/h1 -date: substring-after(//*[@class='date'], '//') -body: //*[@class='article-content'] +title: //div[@class="content article"]/h1 +date: substring-after(//*[@class='date'], '//') +body: //*[@class='article-content'] strip: //*[@id='nomodal'] test_url: http://www.worldpoultry.net/news/kyrgyzstan-restricts-poultry-imports-from-russia-and-kazakhstan-9332.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/worldwidewords.org.txt b/inc/3rdparty/site_config/standard/worldwidewords.org.txt old mode 100644 new mode 100755 index 733d607f..4682e0d3 --- a/inc/3rdparty/site_config/standard/worldwidewords.org.txt +++ b/inc/3rdparty/site_config/standard/worldwidewords.org.txt @@ -1,4 +1,4 @@ -title: //p[@id='content'] - +title: //p[@id='content'] + body: //div[@class='contentblock'] test_url: http://www.worldwidewords.org/weirdwords/ww-gro1.htm \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/wow.joystiq.com.txt b/inc/3rdparty/site_config/standard/wow.joystiq.com.txt old mode 100644 new mode 100755 index 759fb81f..44add9c9 --- a/inc/3rdparty/site_config/standard/wow.joystiq.com.txt +++ b/inc/3rdparty/site_config/standard/wow.joystiq.com.txt @@ -1,6 +1,6 @@ -title: //h2[@class="posttitle"] -body: //div[@class="post"] -strip: //h2[@class="posttitle"] -strip: //p[@class="filed-under"] +title: //h2[@class="posttitle"] +body: //div[@class="post"] +strip: //h2[@class="posttitle"] +strip: //p[@class="filed-under"] convert_double_br_tags: yes test_url: http://wow.joystiq.com/2011/06/20/the-overachiever-guide-to-midsummer-festival-2011-achievements/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/wpmayor.com.txt b/inc/3rdparty/site_config/standard/wpmayor.com.txt new file mode 100755 index 00000000..bb4fffc7 --- /dev/null +++ b/inc/3rdparty/site_config/standard/wpmayor.com.txt @@ -0,0 +1,8 @@ +body: //div[@id='nrelate_flyout_placeholder'] + +strip_id_or_class: share + +prune: no + +test_url: http://www.wpmayor.com/themes/wordpress-portfolio-resume-themes/ +test_url: http://www.wpmayor.com/feed/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/wtatennis.com.txt b/inc/3rdparty/site_config/standard/wtatennis.com.txt new file mode 100755 index 00000000..1000ab26 --- /dev/null +++ b/inc/3rdparty/site_config/standard/wtatennis.com.txt @@ -0,0 +1,7 @@ +title: //h1[contains(@class, 'header-2')] +body: //article//*[contains(@class, 'teaserText') or contains(@class, 'lastUpdated') or contains(@class, 'image') or contains(@class, 'body')] +strip_id_or_class: articleIndex +prune: no + +test_url: http://www.wtatennis.com/news/article/3190914 +test_url: http://www.wtatennis.com/news/article/3190244 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/www1.folha.uol.com.br.txt b/inc/3rdparty/site_config/standard/www1.folha.uol.com.br.txt old mode 100644 new mode 100755 index 0846be2c..97a5c19d --- a/inc/3rdparty/site_config/standard/www1.folha.uol.com.br.txt +++ b/inc/3rdparty/site_config/standard/www1.folha.uol.com.br.txt @@ -1,15 +1,15 @@ -body://div[@id='articleNew'] -strip://div[@id='articleBy'] -strip://div[@id='articleDate'] -strip://td[@class='articleGraphicCredit'] -strip://h1 -strip://div[@id='articleEnd'] -strip://p[@class='tagline'] -strip://div[@class='openBox adslibraryArticle'] -strip_id_or_class:ad-180x150-1 - - -title: //div[@id="articleNew"]/h1 -author: //div[@id="articleBy"]/p/b -date: substring-before(//div[@id="articleDate"], "-") +body://div[@id='articleNew'] +strip://div[@id='articleBy'] +strip://div[@id='articleDate'] +strip://td[@class='articleGraphicCredit'] +strip://h1 +strip://div[@id='articleEnd'] +strip://p[@class='tagline'] +strip://div[@class='openBox adslibraryArticle'] +strip_id_or_class:ad-180x150-1 + + +title: //div[@id="articleNew"]/h1 +author: //div[@id="articleBy"]/p/b +date: substring-before(//div[@id="articleDate"], "-") test_url: http://www1.folha.uol.com.br/mundo/1115805-ex-ditador-argentino-videla-e-condenado-a-50-anos-de-prisao.shtml \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/www3.imperial.ac.uk.txt b/inc/3rdparty/site_config/standard/www3.imperial.ac.uk.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/wyborcza.pl.txt b/inc/3rdparty/site_config/standard/wyborcza.pl.txt old mode 100644 new mode 100755 index f99467c2..638583dc --- a/inc/3rdparty/site_config/standard/wyborcza.pl.txt +++ b/inc/3rdparty/site_config/standard/wyborcza.pl.txt @@ -1,11 +1,9 @@ -title:h1 -author: //*[@class = 'author'] -date: //*[@class = 'date'] -body: //*[@id = 'art'] -next_page_link: //*[@id='Str']/a[contains(text(), 'nastepne')] -strip: //*[@class = 'rel_zdjTOP'] -strip: //*[@id = 'rel'] -strip: //*[@class = 'txt_upl'] -strip: //*[@id='Str'] -strip: //*[@id='source'] -test_url: http://wyborcza.pl/1,123455,11536088,Gdy_peknie_fejs__obryzga_wszystko.html?as=1&startsz=x \ No newline at end of file +body: //div[@id='article'] +strip: //div[@class='head'] + +strip_id_or_class: txt_upl + +single_page_link: //div[@id='gazeta_article_tools']//a[contains(@class, 'print')] + +test_url: http://wyborcza.pl/1,123455,11536088,Gdy_peknie_fejs__obryzga_wszystko.html?as=1&startsz=x +test_url: http://wyborcza.pl/1,75478,14880255,Biskup_Dydycz_o_pedofilii_i_tajemnicy_spowiedzi__Zamiast.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/wyctim.com.txt b/inc/3rdparty/site_config/standard/wyctim.com.txt old mode 100644 new mode 100755 index d8c8713b..bd7ecf2a --- a/inc/3rdparty/site_config/standard/wyctim.com.txt +++ b/inc/3rdparty/site_config/standard/wyctim.com.txt @@ -1,3 +1,3 @@ -body: //div[@class='article-body'] +body: //div[@class='article-body'] title: //h1 test_url: http://wyctim.com/icloud-sync-regebbi-rendszereken/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/wz-newsline.de.txt b/inc/3rdparty/site_config/standard/wz-newsline.de.txt old mode 100644 new mode 100755 index fbc1d3d2..5b2be744 --- a/inc/3rdparty/site_config/standard/wz-newsline.de.txt +++ b/inc/3rdparty/site_config/standard/wz-newsline.de.txt @@ -1,5 +1,5 @@ -title://h1 - -date://p[@class='articleDate'] +title://h1 + +date://p[@class='articleDate'] body://div[@class='articleBody wzStandardArticle'] test_url: http://www.wz-newsline.de/home/sport/tennis/federer-zum-vierten-mal-sieger-in-indian-wells-1.938050 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/xfgjls.com.txt b/inc/3rdparty/site_config/standard/xfgjls.com.txt new file mode 100755 index 00000000..2dc247a0 --- /dev/null +++ b/inc/3rdparty/site_config/standard/xfgjls.com.txt @@ -0,0 +1,11 @@ +# This filter is tested on: +# http://www.xfgjls.com/magazine/html/?131.html +# http://www.xfgjls.com/magazine/html/?170.html + +body://h3/following-sibling::div +title: //h3 +date: substring-before(//h3/following-sibling::div/p, ' ') +author: substring-before(substring-after(//h3/following-sibling::div/p, '作者:'), '来源') +wrap_in(strong)://span[contains(@style, "FONT-WEIGHT: bold")] +dissolve://span[@style="FONT-FAMILY: '宋体'; FONT-SIZE: 10.5pt; FONT-WEIGHT: bold; mso-spacerun: 'yes'"] +test_url: http://www.xfgjls.com/magazine/html/?170.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/xoeb.us.txt b/inc/3rdparty/site_config/standard/xoeb.us.txt old mode 100644 new mode 100755 index e02960e0..c09fa4df --- a/inc/3rdparty/site_config/standard/xoeb.us.txt +++ b/inc/3rdparty/site_config/standard/xoeb.us.txt @@ -1,4 +1,4 @@ -title: //h1[@class="entry-title"] -author: //span[@class="fn"] +title: //h1[@class="entry-title"] +author: //span[@class="fn"] date: //p[@class="meta"] test_url: http://xoeb.us/blog/2012/03/16/my-mistakes-with-our-first-release/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/yated.com.txt b/inc/3rdparty/site_config/standard/yated.com.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/ynet.co.il.txt b/inc/3rdparty/site_config/standard/ynet.co.il.txt new file mode 100755 index 00000000..aa86566a --- /dev/null +++ b/inc/3rdparty/site_config/standard/ynet.co.il.txt @@ -0,0 +1,26 @@ +body: //span[@id='article_content' or @class='text16g'] + +# ads +strip: //div[.//div[contains(@id, 'ads.')]] +# related content heading +strip: //p[contains(., 'עוד בערוץ החדשות של ynet:')] +strip: //p[contains(., 'כותרות אחרונות מהעולם בחדשות ynet:')] +strip: //div[contains(., 'אינציקלופדיית ynet:')] +# related content links +strip: //a[@class='bluelink'] +# strip image bullets +strip_image_src: ynet_manual_bullet.png + +prune: no +tidy: no + +# prevent JS issues +find_string: <script type='text/javascript'> +replace_string: <div style="display:none;"> +find_string: </script> +replace_string: </div> + +test_url: http://www.ynet.co.il/articles/0,7340,L-4354266,00.html +test_url: http://www.ynet.co.il/articles/0,7340,L-4354268,00.html +#feed +test_url: http://www.ynet.co.il/Integration/StoryRss2.xml \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/yostivanich.com.txt b/inc/3rdparty/site_config/standard/yostivanich.com.txt old mode 100644 new mode 100755 index 9e24db3c..2aeb7e05 --- a/inc/3rdparty/site_config/standard/yostivanich.com.txt +++ b/inc/3rdparty/site_config/standard/yostivanich.com.txt @@ -1,5 +1,5 @@ -title://div[@class='entry-title'] -body://div[@class='entry-content'] -strip_comments:yes +title://div[@class='entry-title'] +body://div[@class='entry-content'] +strip_comments:yes convert_double_br_tags:yes test_url: http://www.yostivanich.com/2010/07/11/wired-com-with-world-watching-wikileaks-falls-into-disrepair/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/yourerie.com.txt b/inc/3rdparty/site_config/standard/yourerie.com.txt new file mode 100755 index 00000000..b46b09e8 --- /dev/null +++ b/inc/3rdparty/site_config/standard/yourerie.com.txt @@ -0,0 +1,2 @@ +body: //div[@class="nxFullTextData"] +test_url: http://yourerie.com/fulltext?nxd_id=306552 diff --git a/inc/3rdparty/site_config/standard/youtube.com.txt b/inc/3rdparty/site_config/standard/youtube.com.txt old mode 100644 new mode 100755 index d52b7356..b0d95f1f --- a/inc/3rdparty/site_config/standard/youtube.com.txt +++ b/inc/3rdparty/site_config/standard/youtube.com.txt @@ -1,15 +1,15 @@ -title: //title -body: //iframe - -find_string: <html><iframe -replace_string: <iframe id="video" - -find_string: ></iframe></html> -replace_string: ></iframe> - -single_page_link: //link[@type='text/xml+oembed'] - -prune: no -tidy: no - +title: //title +body: //iframe + +find_string: <html><iframe +replace_string: <iframe id="video" + +find_string: ></iframe></html> +replace_string: ></iframe> + +single_page_link: //link[@type='text/xml+oembed'] + +prune: no +tidy: no + test_url: http://www.youtube.com/watch?v=F6gLH0r3iVU \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/zcommunications.org.txt b/inc/3rdparty/site_config/standard/zcommunications.org.txt new file mode 100755 index 00000000..4deb49bf --- /dev/null +++ b/inc/3rdparty/site_config/standard/zcommunications.org.txt @@ -0,0 +1,7 @@ +title: //h1[@id='view_title'] +author: //div[contains(@class, 'content_authors')]//a +body: //div[@id='view_body'] + +prune: no + +test_url: http://www.zcommunications.org/orwellian-language-update-by-edward-s-herman.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/zdnet.com.txt b/inc/3rdparty/site_config/standard/zdnet.com.txt old mode 100644 new mode 100755 index b244b229..939fb0e3 --- a/inc/3rdparty/site_config/standard/zdnet.com.txt +++ b/inc/3rdparty/site_config/standard/zdnet.com.txt @@ -1,10 +1,10 @@ -title: //h1[@class="h s-1"] -author: substring-before(substring-after(//p[@class="meta s-10"], 'By'), '|') -author: substring-after(//div[@class="bio"]//h3, 'About ') -date: substring-after(//p[@class="meta s-10"], '|') -date: substring-after(//p[@class="meta"], '|') -body: //div[@class="content-1 entry space-1 clear"] -body: //div[@class="storyBody"] - -test_url: http://www.zdnet.com/blog/microsoft/the-bing-back-end-more-on-cosmos-tiger-and-scope/10920 +title: //h1[@class="h s-1"] +author: substring-before(substring-after(//p[@class="meta s-10"], 'By'), '|') +author: substring-after(//div[@class="bio"]//h3, 'About ') +date: substring-after(//p[@class="meta s-10"], '|') +date: substring-after(//p[@class="meta"], '|') +body: //div[@class="content-1 entry space-1 clear"] +body: //div[@class="storyBody"] + +test_url: http://www.zdnet.com/blog/microsoft/the-bing-back-end-more-on-cosmos-tiger-and-scope/10920 test_url: http://www.zdnet.com/researchers-find-web-tracking-up-privacy-down-7000000358/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/zeit.de.txt b/inc/3rdparty/site_config/standard/zeit.de.txt old mode 100644 new mode 100755 index 66a7f1ac..9815d478 --- a/inc/3rdparty/site_config/standard/zeit.de.txt +++ b/inc/3rdparty/site_config/standard/zeit.de.txt @@ -1,44 +1,45 @@ -# 2012-12-23 [carlo@...] fixed half-assed headlines in articles, removed inline author profiles, adjusted picture captions -# 2012-03-17 [dkless@...] Cut metadata parts in the beginning and the ends of the content block; copyright entries for pictures removed; Author fixed, not sure if old entries still valid (I left them); Weird problems with some pages addressed (see last section for removing hidden section) -# 2011-12-09 [carlo@...] Removed "related articles" block -# 2011-08-23 [carlo@...] changed single page link to use print version: page works better, less ambiguity. Related cleanups and simplifications. -# 2011-08-20 [carlo@...] added author, fixed date - - -single_page_link: //a[@title='Druckversion'] -tidy: no - -title: //title -date: substring-before( //li[@class="date"], " " ) -author: //li[@class="author"]/a/text() | //li[@class="author first"]/a/text() -author: substring-after(//li[@class='source first '], 'Quelle: ') - -strip_id_or_class: articleheader -strip: //div[@id="comments"] | //div[@class="pagination block"] | //p[@class="ressortbacklink"] | //div[@id="relatedArticles"] | // div[@class="inline portrait"] - -#Removes author and date from the start -strip: //ul[@class="tools"] -#Removes copyright statement - often disturb as first line of the news -strip: //p[@class="copyright"] -strip: //div[@class="copyright"] -#Removes pagination links at the end -strip: //div[@class="pagination"] - -# Fix picture captions -wrap_in(small): //p[@class="caption"]/text() - -# Fix sub-headlines -wrap_in(h2): //p/strong -dissolve: //h2/strong - -#Sometimes things are embedded in the print version that are not displayed on the web, but will be displayed in the mobilized versions and lead even to problems. These sections are removed here. -strip_id_or_class:"informatives" -strip_id_or_class:"bottom" -strip_id_or_class:"teasermosaic" -strip_id_or_class:"comments" -strip_id_or_class:"articlefooter af" -strip_id_or_class:"relateds" -strip_id_or_class:"pagination" - -footnotes: no -test_url: http://www.zeit.de/kultur/film/2012-12/Kurzfilmtag \ No newline at end of file +# 2013.10.30 [rezor92] fixed single_page_link +# 2012-12-23 [carlo@...] fixed half-assed headlines in articles, removed inline author profiles, adjusted picture captions +# 2012-03-17 [dkless@...] Cut metadata parts in the beginning and the ends of the content block; copyright entries for pictures removed; Author fixed, not sure if old entries still valid (I left them); Weird problems with some pages addressed (see last section for removing hidden section) +# 2011-12-09 [carlo@...] Removed "related articles" block +# 2011-08-23 [carlo@...] changed single page link to use print version: page works better, less ambiguity. Related cleanups and simplifications. +# 2011-08-20 [carlo@...] added author, fixed date + + +single_page_link: //a[@title='Auf einer Seite'] +tidy: no + +title: //title +date: substring-before( //li[@class="date"], " " ) +author: //li[@class="author"]/a/text() | //li[@class="author first"]/a/text() +author: substring-after(//li[@class='source first '], 'Quelle: ') + +strip_id_or_class: articleheader +strip: //div[@id="comments"] | //div[@class="pagination block"] | //p[@class="ressortbacklink"] | //div[@id="relatedArticles"] | // div[@class="inline portrait"] + +#Removes author and date from the start +strip: //ul[@class="tools"] +#Removes copyright statement - often disturb as first line of the news +strip: //p[@class="copyright"] +strip: //div[@class="copyright"] +#Removes pagination links at the end +strip: //div[@class="pagination"] + +# Fix picture captions +wrap_in(small): //p[@class="caption"]/text() + +# Fix sub-headlines +wrap_in(h2): //p/strong +dissolve: //h2/strong + +#Sometimes things are embedded in the print version that are not displayed on the web, but will be displayed in the mobilized versions and lead even to problems. These sections are removed here. +strip_id_or_class:"informatives" +strip_id_or_class:"bottom" +strip_id_or_class:"teasermosaic" +strip_id_or_class:"comments" +strip_id_or_class:"articlefooter af" +strip_id_or_class:"relateds" +strip_id_or_class:"pagination" + +footnotes: no +test_url: http://www.zeit.de/kultur/film/2012-12/Kurzfilmtag diff --git a/inc/3rdparty/site_config/standard/zerohedge.com.txt b/inc/3rdparty/site_config/standard/zerohedge.com.txt new file mode 100755 index 00000000..7e76aee5 --- /dev/null +++ b/inc/3rdparty/site_config/standard/zerohedge.com.txt @@ -0,0 +1,10 @@ +author: //span[@class='submitted']/a +strip: //div[@class='clear-block clr'] +strip: //div[@class='picture'] +strip: //span[@class='submitted'] +strip: //div[@class='breadcrumb'] +strip: //div[@class='fivestar-static-form-item'] +strip: //div[@class='js-links'] +strip: //div[@class='links clear-block clear'] +strip: //div[@class='block block-block'] +test_url: http://www.zerohedge.com/news/bernankes-columbus-voyage-end-monetary-policy-world \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/zerokspot.com.txt b/inc/3rdparty/site_config/standard/zerokspot.com.txt old mode 100644 new mode 100755 index ea9132aa..afa964db --- a/inc/3rdparty/site_config/standard/zerokspot.com.txt +++ b/inc/3rdparty/site_config/standard/zerokspot.com.txt @@ -1,3 +1,3 @@ -title: //h1 +title: //h1 body: //div[@id="primarycontent"] test_url: http://zerokspot.com/weblog/2011/06/26/europython2011/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/zhihu.com.txt b/inc/3rdparty/site_config/standard/zhihu.com.txt new file mode 100755 index 00000000..3c9d8c1a --- /dev/null +++ b/inc/3rdparty/site_config/standard/zhihu.com.txt @@ -0,0 +1,19 @@ +# This filter is tested on: +# http://www.zhihu.com/question/19587406 +# http://www.zhihu.com/question/20649035 +# http://www.zhihu.com/question/20637942 + +author: //h3[@class='zm-item-answer-author-wrap'] +title://h2[@class='zm-item-title'] +date://a[@class='answer-date-link meta-item'] +convert_double_br_tags: yes + +wrap_in(blockquote)://div[@class='zm-editable-content'] +wrap_in(blockquote)://sup/text() +dissolve://sup + +strip://div[@class='zh-answers-title'] +strip:///div[@class='zm-item-vote-info '] +strip://div[@class='zm-item-answer-author-info'] +strip://div[@class='zu-blue-info-board zg-r3px'] +test_url: http://www.zhihu.com/question/20637942 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/zingtrain.com.txt b/inc/3rdparty/site_config/standard/zingtrain.com.txt old mode 100644 new mode 100755 index 2a2f58a8..188d4dd6 --- a/inc/3rdparty/site_config/standard/zingtrain.com.txt +++ b/inc/3rdparty/site_config/standard/zingtrain.com.txt @@ -1,3 +1,3 @@ -title: substring-after(id, 'post')/h2 +title: substring-after(id, 'post')/h2 body://div[@class = 'entry'] test_url: http://www.zingtrain.com/category/ontrack/january-2007/ \ No newline at end of file -- cgit v1.2.3 From 0ce85e0a7fa873c69f1ec159bc188c6a58a2ad21 Mon Sep 17 00:00:00 2001 From: Maryana Rozhankivska <mariroz@mr.lviv.ua> Date: Wed, 23 Jul 2014 14:27:57 +0300 Subject: config for habrahabr.ru to grep articles with comments --- inc/3rdparty/site_config/standard/habrahabr.ru.txt | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100755 inc/3rdparty/site_config/standard/habrahabr.ru.txt (limited to 'inc/3rdparty/site_config') diff --git a/inc/3rdparty/site_config/standard/habrahabr.ru.txt b/inc/3rdparty/site_config/standard/habrahabr.ru.txt new file mode 100755 index 00000000..67538359 --- /dev/null +++ b/inc/3rdparty/site_config/standard/habrahabr.ru.txt @@ -0,0 +1,21 @@ +title: //span[@class="post_title"] +author: //div[@class="author"] +date: //div[@class="published + +body: //div[@class='content html_format'] | //div[@id='comments'] + +strip: //a[@class="link_to_comment"] +strip: //div[@class="show_tree"] +strip: //a[@class="to_parent"] + + +replace_string(class="reply_comments"): style="padding-left: 20px" +replace_string(class="voting "): style="float: right" +replace_string(src="//habrastorage.org/getpro/habr/avatars/): style="width:24px; height:24px;" class="123" src="//habrastorage.org/getpro/habr/avatars/ +replace_string(class="info "): style="padding-top:5px;font-size:0.85em;line-height:24px;" + + +prune: no +tidy: no + +test_url: http://habrahabr.ru/post/229883/ \ No newline at end of file -- cgit v1.2.3