--- /dev/null
+# Author: cirnod@gmail.com
+
+tidy: no
+prune: no
+
+title: //h1
+date: /html/body/div[3]/div[1]/div[6]/div/div[1]/div[2]/div[1]/div/p
+body: //div[@class='published clearfix'] | //div[@class='story_titles']/h3 | //div[@class='story_text']
+
+# General Cleanup
+strip_id_or_class: info_panel
+strip_id_or_class: info_poll
+strip_id_or_class: teaser
+strip_id_or_class: panelbox
+strip_id_or_class: polls
+strip_id_or_class: warning
+strip_id_or_class: vplaceholder
+
+# visual removal only -> complete removal doesn't work
+replace_string(Print</a>): </a>
+
+# Try yourself
+test_url: http://www.20min.ch/wissen/news/story/31588952
+test_url: http://www.20min.ch/digital/dossier/apple/story/So-einfach-laesst-sich-das-iPhone-6-Plus-verbiegen-24651169
--- /dev/null
+title: //div[@id='DivTitle']
+body: //div[@id='divImages' or @id='Divkhabarcontent']
+author: //div[@id='DivAuthor']
+
+prune: no
+
+test_url: http://24.ae/article.aspx?ArticleId=123304
+test_url: http://24.ae/rss.aspx?pageId=30
--- /dev/null
+# Generated by FiveFilters.org's web-based selection tool
+# Place this file inside your site_config/custom/ folder
+# Source: http://siteconfig.fivefilters.org/grab.php?url=http%3A%2F%2F9gag.com%2Fgag%2FaDwQnO7
+
+body: //div[contains(concat(' ',normalize-space(@class),' '),' badge-post-container ')]
+test_url: http://9gag.com/gag/aDwQnO7
--- /dev/null
+#bypass cookie check
+single_page_link: //a[contains(@href, '/acceptCookieCheck.do?url=')]
+
+test_url: http://www.ad.nl/ad/nl/10444/Offside/article/detail/4043834/2015/05/31/Dani-Alves-voetbalt-met-drol-op-zijn-hoofd.dhtml
+test_contains: De nieuwe coupe van Alves
+
+test_url: http://www.ad.nl/digitaal/rss.xml
\ No newline at end of file
body: //div[@id='main-column']//div[@class='content']
+strip_id_or_class: social-buttons
+
prune: no
test_url: http://www.albayan.ae/across-the-uae/education/2013-08-29-1.1949645
body: //div[@class='post_content']
date: //div[@class='date_day'] | div[@class='date_month']
+strip_id_or_class: author-box
+author: //h2[@class='author-box-heading']/a
test_url: http://www.androidpolice.com/2014/03/30/music-boss-for-pebble-can-now-control-playback-and-volume-on-chromecast-content-from-your-smartwatch/
-
--- /dev/null
+parser: html5php
+date: //article/p[contains(@class, 'single-date')]
+author: //article/p[contains(@class, 'byline')]
+
+test_url: http://www.artofmanliness.com/2013/01/31/relationship-red-flags/
+test_contains: It seems that once we get close to a person
\ No newline at end of file
--- /dev/null
+title://div[@class="sl-layout-post"]/h1
+body: //div[@id='content_post']
+strip: //div[contains(@class, "post-sidebar")]
+strip: //div[@id='related-links']
+strip: //img[@class='size_xlarge']
+author://div[@class="byline"]/a
+date://div[@class="byline"]/span[@class="date"]
+prune: no
+tidy: no
+
+
+test_url: http://www.businessinsider.com/microsoft-just-put-one-of-its-hardcore-technical-geniuses-on-xbox-2012-1
--- /dev/null
+strip: //a[contains(text(), "RELATED:")]
+author: //div[@class="info"]//span[@class="association printer-source"]
+author: //div[@class="info"]//span[@class="stamp printer-date"]
+
strip: //div[contains(@class, 'share-tools')]
strip: //div[@id='also-related-links']
+find_string: http://ichef.bbci.co.uk/news/200/
+replace_string: http://ichef.bbci.co.uk/news/624/
+
+find_string: http://ichef.bbci.co.uk/news/304/
+replace_string: http://ichef.bbci.co.uk/news/624/
+
strip_id_or_class: share-help
strip_id_or_class: comments_module
strip_id_or_class: share-help
strip_id_or_class: comments_module
+find_string: http://ichef.bbci.co.uk/news/200/
+replace_string: http://ichef.bbci.co.uk/news/624/
+
+find_string: http://ichef.bbci.co.uk/news/304/
+replace_string: http://ichef.bbci.co.uk/news/624/
+
replace_string(<noscript>): <div>
replace_string(</noscript>): </div>
author: //a[ contains(@href, '/people') ]
-body: //article[contains(concat(' ',normalize-space(@class),' '),' post ')]
-
-strip_id_or_class: section learn-more
-strip_id_or_class: section comments
-strip_id_or_class: disqus_thread
+body: //div[ @class='post' ]
# Date is impossible to retrieve since they use those stupid "fuzzy" dates, inserted through javascript, at posterous.
-test_url: http://blog.cloudflare.com/understanding-analytics-when-is-a-page-view-n
+test_url: http://blog.cloudflare.com/understanding-analytics-when-is-a-page-view-n
\ No newline at end of file
title: //h3[@class="post-name"]
author: //span[@class="user-name"]
-date: //div[@class="post-date"]
+date: //div[@class="post-date"]/span[@class="value"]
body: //div[@class="post-content user-defined-markup"]
footnotes: no
-test_url: http://blogs.msdn.com/b/b8/archive/2011/10/04/designing-the-start-screen.aspx
\ No newline at end of file
+test_url: http://blogs.msdn.com/b/b8/archive/2011/10/04/designing-the-start-screen.aspx
-# set body
-body: //div[@id='theContent']
-# set title
-title: //div[@id='theContent']/h3
-strip: //div[@id='theContent']/h3
-test_url: http://www.brandeins.de/archiv/magazin/gegessen-wird-immer/artikel/hunger.html
\ No newline at end of file
+body: //div[@class="articleTeaser"] | //section[@class="contentSection"]
+
+strip: //section[@class="greenBox italic"]
+
+author: //div[@class="articleAuthor"]
+# no publish date on page (the articles are from a monthly periodical)
+
+test_url: http://www.brandeins.de/archiv/2015/fuehrung/ministry-group-mach-doch-mal-ne-ansage/
--- /dev/null
+author: //span[@itemprop="author"]
+date: //span[@itemprop="datePublished"]
--- /dev/null
+# 2011-10-25 - carlo@... - Initial setup.
+
+single_page_link: //li[@class='print']/a/@href
+
+title: //h1
+author: //meta[@name="byline"]/@content
+date: //meta[@name="date"]/@content
+
+strip: //span[@class="see"]
+strip: //div[@class="byline"]
+strip: //div[@id="date2"]
+strip: //h1
+strip: //div[@class='post-rail-ad']
+strip: //div[@class='post-rail-content']
+strip: //aside[@class='post-rail']
+
+test_url: http://www.time.com/time/specials/packages/article/0,28804,2094921_2094923_2094924,00.html
--- /dev/null
+
+body: //div[@id='content']//div[@id='mainBlogContentWrapper']//*[self::p or self::img or self::ul] | //div[@class='mainArticleIntro')]
+
+date: //span[@class='date']
--- /dev/null
+title: //meta[@property="og:title"]/@content
+body: //div[contains(@class, 'postBody')]
+date: //div[@id='nameAndTime']/time
+author: //div[@id='nameAndTime']/span[@class='author']
+
+strip_id_or_class: image-credit
+strip_id_or_class: noAutolink
+strip_id_or_class: related
+strip_id_or_class: cite
+
+prune: no
+tidy: no
+
+# early end
+replace_string(Download today's podcast</a>): Download today's podcast</a></div></body></html>
+
+test_url: http://www.cnet.com/8301-13952_1-57367607-81/the-404-981-where-the-world-is-a-vampire-podcast/
author://div[@id="news-meta"]/a
-body://*[@id="main"]/div[1]
+body: //div[contains(@class, 'text-content')]
strip://*[@id="main"]/div[2]
strip://*[@id="main"]/div[3]
#figures are not displayed in instapaper...
strip://figure | //figcaption
-test_url: http://www.computerbase.de/news/2012-06/verbraucherzentrale-mahnt-blizzard-fuer-diablo-3-ab/
\ No newline at end of file
+test_url: http://www.computerbase.de/news/2012-06/verbraucherzentrale-mahnt-blizzard-fuer-diablo-3-ab/
title: //meta[@name='headline']/@content
-body://div[@id="drr-container"]
+date: //meta[@name='date']/@content
+author: //meta[@name='author']/@content
+body: //div[contains(@class, 'article')]
+body://div[@id="article_body"]
strip_id_or_class: banner
strip: //noscript
single_page_link: concat('http://www.computerworld.com/s/article/print/', substring-after(//link[@rel='canonical']/@href, '/s/article/'))
test_url: http://www.computerworld.com/s/article/9224348/Apple_s_new_OS_X_tightens_screws_on_some_malware
-test_url: http://www.computerworld.com/s/article/9227679/Windows_8_Release_Preview_Updated_but_still_uneasy
+test_url: http://www.computerworld.com/s/article/9227679/Windows_8_Release_Preview_Updated_but_still_uneasy
\ No newline at end of file
--- /dev/null
+# Contrepoints.org
+# As of 2015-04, it's a wordpress-powered website.
+
+title: //h1[contains(concat(' ',normalize-space(@class),' '),' page-title ')]//span[contains(concat(' ',normalize-space(@class),' '),' inner-text ')]
+date: //time[contains(concat(' ',normalize-space(@class),' '),' art-date ')]
+author: //h1[contains(concat(' ',normalize-space(@class),' '),' author-name ')]
+body: //article[contains(concat(' ',normalize-space(@class),' '),' plain-art ')]
+
+# no toolbar, meta, etc, but misses excerpt
+# body: //div[contains(concat(' ',normalize-space(@class),' '),' entry ')]
+
+# Thus, we need to strip useless elements from the "plain-art"
+strip: //div[contains(concat(' ',normalize-space(@class),' '),' plain-post-topbar ')]
+strip: //div[contains(concat(' ',normalize-space(@class),' '),' single-type-block ')]
+strip: //header[contains(concat(' ',normalize-space(@class),' '),' entry-header ')]
+
+# And no pruning is needed because we stripped unwanted elements.
+prune: no
+
+test_url: http://www.contrepoints.org/2015/04/25/205709-leconomie-selon-ray-dalio
+test_url: http://www.contrepoints.org/2015/04/25/205734-huile-et-gaz-de-schiste-revolution-durable
\ No newline at end of file
-body: //*[contains(@class,'body')]
+body: //div[contains(@class,'post-body')]
date: //abbr[@class='published']
-test_url: http://www.cooper.com/journal/2012/08/2-weeks-left-to-win-your-way-to-the-woodstock-of-ux-coopers-ux-boot-camp.html/
\ No newline at end of file
+test_url: http://www.cooper.com/journal/2015/6/creating-personas
--- /dev/null
+title: //div[@class='entry-pad']//h2
+body: //div[contains(concat(' ',normalize-space(@class),' '),' entry-pad ')]
+strip: //h1
+strip: //p
+strip: //h2
+strip: //div[@class='clear']
+
+prune: no
+tidy: no
+
+autodetect_on_failure: no
+
+test_url: https://www.cwnp.com/wotd.php
+test_url: https://www.cwnp.com/qotd.php
strip_id_or_class: digg-button
strip_id_or_class: article-icon-links-container
strip_id_or_class: clickToEnlarge
+strip_id_or_class: articleIconLinksContainer
+strip_id_or_class: related-carousel
+strip_id_or_class: reader-comments
+strip_id_or_class: most-watched
+strip_id_or_class: most-read
+
+find_string:blkBorder img-share
+replace_string: nothing
+
tidy: no
test_url: http://www.dailymail.co.uk/news/article-1375423/Royal-wedding-Texan-billionaire-Joe-Albritton-invited-Prince-Charles.html
\ No newline at end of file
--- /dev/null
+title: //h1[@class="heading"]
+author: //cite[@class='author']
+date: //li[contains(@class, 'date-and-time')]
+
+
--- /dev/null
+http_header(user-agent): PHP/5.3
\ No newline at end of file
--- /dev/null
+# Author: cirnod@gmail.com
+
+tidy: no
+prune: no
+
+body: //div[@id="article"]/h3 | //*[@id="mainContent"]
+
+# General Cleanup
+#strip_id_or_class: info_panel
+
+
+# Try yourself
+test_url: http://www.derbund.ch/bern/nachrichten/Fossilienforscher-stehen-auf-Heavy-Metal/story/20919522
--- /dev/null
+date: substring-after(//p[@class='post_date'], 'on')
+
-#title: substring(substring-after(//title, ':'), 1, string-length(substring-after(//title, ':')) - 10)
-title: //div[contains(@class, 'SB_Title')]//a
-body: //div[contains(@class, 'STR_Image')]
-body: //*[contains(@class, 'SB_Content')]
+title: //a[@class="post-title"]/text()
+title: //meta[@name="twitter:title"]/@content
+body: //img[@class="img-responsive img-comic"]
author: string('Scott Adams')
-date: //*[contains(@class, 'SB_Detail')]/text()[1]
-
+date: //meta[@property="article:publish_date"]/@content
test_url: http://dilbert.com/blog/entry/death_by_hypnosis_or_not/
test_url: http://dilbert.com/strips/comic/2013-10-22
-test_url: http://feed.dilbert.com/dilbert/daily_strip
\ No newline at end of file
+test_url: http://feed.dilbert.com/dilbert/daily_strip
strip_id_or_class: right
strip_id_or_class: footer
+strip_id_or_class: ad-head
+strip_id_or_class: atc-share-title
+
# Other news
strip: //div[@id="mirrors"]
date: substring(substring-after(//p[@class="published"], 'Publicerad '), 0, 11)
test_url: http://www.dn.se/nyheter/varlden/landade-flygplan-mitt-i-villaomrade
-test_url: http://www.dn.se/m/rss/senaste-nytt
\ No newline at end of file
+test_contains: Ett tekniskt haveri tvingade
+test_url: http://www.dn.se/rss/senaste-nytt
--- /dev/null
+body: //div[contains(@class, 'txtVisu')]
+prune: no
+
+test_url: http://www.economie.gouv.fr/dgccrf/Publications/Vie-pratique/Fiches-pratiques/Assurance
\ No newline at end of file
--- /dev/null
+title: //h1[@class="post-title"]
+body: //section[@class="article-content"]
+author: //div[@class="post-bottom-meta"]/span[@class="post-author"]
+date: //div[@class="post-date"]/time/@datetime
+
+test_url: https://entwickler.de/online/mobile-welt-offline-welt-was-der-offline-first-ansatz-fuer-app-entwickler-heisst-140602.html
+test_url: https://entwickler.de/online/development/plex-docker-joomla-165345.html
--- /dev/null
+body: //img[@id='main-comic']
+author: substring(//small[@class="author-credit-name"], 4)
+
+test_url: http://explosm.net/comics/3954/
body: //div[@id='imagestage']
body: //div[contains(@class, 'userContentWrapper')]
-
+body: //div[@id='m_story_permalink_view' or contains(@data-sigil, 'm-story-view')]
strip_id_or_class: commentable
+strip: //div[contains(@data-sigil, 'm-mentions-expand')]
prune: no
tidy: no
-# single_page_link: replace(substring-after(//noscript//meta[@http-equiv="refresh"]/@content, 'URL='), "&", "&")
+single_page_link: concat("https://m.", substring-after(//link[@rel="alternate" and @media="handheld"]/@href, "//www."))
+if_page_contains: //link[@rel="alternate" and @media="handheld"]
test_url: https://www.facebook.com/permalink.php?story_fbid=10154584776550183&id=294468630182
test_contains: holding an extraordinary session in Brussels this month
-title: //h1
-author: //h5[@class='byline']//a
-date: //h5[@class='date']
-body: //figure[@class='node-poster'] | //div[contains(@class, "node-content")]
-strip_id_or_class: article-top-wrapper
-strip_id_or_class: footer-message
-strip_id_or_class: print-logo
-strip: //cite
-strip://*[@class='timestamp']
-strip://div[@id='page_right']
-strip://section[@id='header_region']
-strip://h1[@class='node-title']
-strip://div[@class='node-submitted']
-strip_id_or_class: skipnav
+author: //div[@class='byline']//a
+date: //meta[@property='article:published_time']/@content
+body: //figure[@class='jumbotron'] | //div[@itemprop='body']
+
+prune: no
+
+#strip_id_or_class: article-top-wrapper
+#strip_id_or_class: footer-message
+#strip_id_or_class: print-logo
+#strip: //cite
+#strip://*[@class='timestamp']
+#strip://div[@id='page_right']
+#strip://section[@id='header_region']
+#strip://h1[@class='node-title']
+#strip://div[@class='node-submitted']
+#strip_id_or_class: skipnav
+
test_url: http://www.fastcompany.com/3000226/link-between-quietness-and-productivity
-test_url: http://www.fastcompany.com/3003586/6-simple-rituals-reach-your-potential-every-day
\ No newline at end of file
+test_contains: Some of you may have tried to reach me this morning
+test_url: http://www.fastcompany.com/3003586/6-simple-rituals-reach-your-potential-every-day
--- /dev/null
+# skip cookie warning
+single_page_link: concat(//form/@action, '?allowcookies=yes')
+
+test_url: http://fok.nl/687116
\ No newline at end of file
# remove some SharePoint webpart label junk
strip: //div[@id="ctl00_PlaceHolderMain_ArticleLeadField_label"]
strip: //div[@id="ctl00_PlaceHolderMain_PublishingPageContentField_label"]
-test_url: http://forsvaret.no/aktuelt/publisert/nyheter/Sider/F5-fly-til-Skedsmo.aspx
\ No newline at end of file
+test_url: https://forsvaret.no/aktuelt/historisk-medaljeutdeling
+test_contains: Samarbeidet med Marinen har vært en sann glede
--- /dev/null
+# Generated by FiveFilters.org's web-based selection tool
+# Place this file inside your site_config/custom/ folder
+# Source: http://siteconfig.fivefilters.org/grab.php?url=http%3A%2F%2Fwww.france24.com%2Fen%2F20150427-togo-gnassingbe-poised-extend-power-election%2F
+
+body: //article[contains(concat(' ',normalize-space(@class),' '),' article-long ')]//div[contains(concat(' ',normalize-space(@class),' '),' bd ')]
+title: //h1[@class="title"]
+author://p[@class="author"]
+date://p[@class="modification"]
+
+find_string: <p class="modification">Latest update :
+replace_string: <p class="modification">
+
+
+test_url: http://www.france24.com/en/20150427-togo-gnassingbe-poised-extend-power-election/
\ No newline at end of file
--- /dev/null
+title: //div[@class='leftCol']/h1
+
+prune: no
title: //meta[@property="og:title"]/@content
body: //div[@id='GBTVPlayer'] | //div[contains(@class, 'col490')]
+author: //span[contains(concat(' ',normalize-space(@class),' '),' author ')]
+date: //header[@id='gbArticleHeader']//div//time/@datetime
prune: no
strip: //div[@id='gbNewsTextContent']/following-sibling::*
test_url: http://www.gameblog.fr/news/26330-les-sims-3-showtime-s-annonce-en-video
-test_url: http://www.gameblog.fr/news/26306-mise-a-jour-du-dashboard-de-la-xbox-360-disponible
\ No newline at end of file
+test_url: http://www.gameblog.fr/news/26306-mise-a-jour-du-dashboard-de-la-xbox-360-disponible
# Remove 'content is restricted'
strip: //div[@id='agegate_IDHERE']
+http_header(user-agent): PHP/5.3
+
test_url: http://gawker.com/#!5782070/russian-bomb-squad-successfully-defuses-sex-toy
\ No newline at end of file
--- /dev/null
+http_header(user-agent): PHP/5.3
\ No newline at end of file
+body: //div[@class="highlight"]/pre
-title: //div[contains(@class,'gist-description')]
-body: //div[contains(@class,'blob-wrapper')]
-test_url: https://gist.github.com/staltz/868e7e9bc2a7b8c1f754
+prune: no
+tidy: no
+
+test_url: https://gist.github.com/1258908
\ No newline at end of file
--- /dev/null
+body: //div[@id='content_post' or @class="post-body" or contains(@class, 'illustration top')]
+author: (//cite//span[@class="plus-icon"])[1]
+date: //span[@class="date"]
+date: //time
+
+prune: no
+
+test_url: http://gizmodo.com/5880147/kuhn-rikon-improves-their-spice-grinder-with-grade-school-science
prune: no
+http_header(user-agent): PHP/5.3
+
test_url: http://gizmodo.com/5880147/kuhn-rikon-improves-their-spice-grinder-with-grade-school-science
test_url: http://gizmodo.com/what-van-goghs-paintings-would-look-like-if-they-came-874035680
test_url: http://gizmodo.com/vip.xml
\ No newline at end of file
--- /dev/null
+body: //div[contains(@class, 'content-body')]
+
+prune: no
+
+test_url: http://globalgrind.com/2015/04/26/listen-jeremih-featuring-chance-the-rapper-the-social-experiment-planes-remix-new-music/
+test_contains: The Chicago rapper has made a name for himself
\ No newline at end of file
--- /dev/null
+body: //a[@class="photo"]/img[@class="strip"]
+author: //meta[@name="author"]/@content
+date: //meta[@property="gocomics:publish_date"]/@content
+
+test_url: http://www.gocomics.com/garfield/2015/06/13
--- /dev/null
+title: //div[@class="title"]/h3
+date: substring-after(//div[@class="meta"], ": ")
--- /dev/null
+#body: //div[@class='story-body']
+body: //div[contains(@class, 'story-body')]
+title: //div[@class='story-headline']//h1
+author: //cite[contains(@class, 'author')]
+date: //span[@class='datestamp']
+
+strip_id_or_class: story-info
+strip: //div[contains(@class, 'story-promo')]
+strip: //div[contains(@class, 'story-related')]
+
+prune: no
+tidy: no
--- /dev/null
+# Generated by FiveFilters.org's web-based selection tool
+# Place this file inside your site_config/custom/ folder
+# Source: http://siteconfig.fivefilters.org/grab.php?url=http%3A%2F%2Fwww.hiiraan.com%2Fnews%2F2014%2FDec%2Fwararka_maanta20-89428.htm
+
+body: //div[contains(concat(' ',normalize-space(@class),' '),' single ')]//div[contains(concat(' ',normalize-space(@class),' '),' description ')]
+
+prune: no
+
+test_url: http://www.hiiraan.com/news/2014/Dec/wararka_maanta20-89428.htm
+test_url: http://rss.hiiraan.com/wararka_maanta_rss.xml
\ No newline at end of file
title: //meta[@property='og:title']/@content
-body: //div[contains(@class, 'articleContent')]
+body: //img[contains(@class, 'FirstImage')] | //div[contains(@class, 'articleContent')]
date: //meta[@property='article:published_time']/@content
author: //div[@id='main']//div[@class='byline']//span[@class='authorName']
strip_id_or_class: RelatedArtTag
+strip: //h5[contains(., 'READ MORE:')]
+strip: //h5[contains(., 'Read more:')]
+
tidy: no
-test_url: http://www.independent.co.uk/news/world/middle-east/syria-could-face-human-rights-probe-2274326.html
\ No newline at end of file
+test_url: http://www.independent.co.uk/news/world/middle-east/syria-could-face-human-rights-probe-2274326.html
+test_url: http://www.independent.co.uk/voices/comment/robert-fisk-on-the-cia-torture-report-once-again-language-is-distorted-in-order-to-hide-us-state-wrongdoing-9924501.html
+test_contains: Thank God for Noam Chomsky.
+
+test_url: http://www.independent.co.uk/news/uk/rss
\ No newline at end of file
--- /dev/null
+http_header(user-agent): PHP/5.3
\ No newline at end of file
--- /dev/null
+title: //div[@class="content_title"]//h2
+author: substring-after(//div[@class="byline"], "By ")
+date: //div[@class="publish_date"]
+strip: //div[@class="read_image_box"]
--- /dev/null
+title: //h1[@class='article-header']
+body: //div[@class='body-content']
+author: //span[@class='author-byline']/a[contains(@id, 'Author')]
+
+strip: //span[contains(@id, 'Article_SourceLabel')]
author: //span[@class='plus-icon']
+
+http_header(user-agent): PHP/5.3
+
test_url: http://jalopnik.com/5892124/1955-porsche-550-spyder-sells-for-record-3685-million/
\ No newline at end of file
--- /dev/null
+http_header(user-agent): PHP/5.3
\ No newline at end of file
author: //span[@class="plus-icon"]
+
+http_header(user-agent): PHP/5.3
+
test_url: http://kotaku.com/5920211/save-the-furries-on-your-wii-in-this-weeks-nintendo-download
\ No newline at end of file
author: //span[@class='sign']//a[@class='journaliste']
author: //meta[@name='author']/@content
body: //*[@id='article']/div[@class='photo'] | //*[@id='article']/h2 | //*[@id='article']/div[@class='texte']
-date: //time[@pubdate]/@datetime
+date: //li[contains(concat(' ',normalize-space(@class),' '),' fig-date-pub ')]//time
prune: no
test_url: http://www.lefigaro.fr/environnement/2011/11/10/01029-20111110ARTFIG00801-la-chine-confrontee-a-un-immense-defi-ecologique.php
-test_url: http://www.lefigaro.fr/conjoncture/2012/11/20/20002-20121120ARTFIG00609-l-usager-devrait-payer-plus-pour-financer-les-transports.php
\ No newline at end of file
+test_url: http://www.lefigaro.fr/conjoncture/2012/11/20/20002-20121120ARTFIG00609-l-usager-devrait-payer-plus-pour-financer-les-transports.php
+test_url: http://www.lefigaro.fr/social/2015/03/10/09010-20150310ARTFIG00312-encore-une-annee-noire-pour-l-emploi-salarie.php
# Remove "track" image from article body
strip: //img[@alt="track"]
+
+# Remove hidden URLs
+strip: //a[@x-inset="hidden"]
+
+http_header(user-agent): PHP/5.3
+
test_url: http://lifehacker.com/5925801/how-can-i-turn-vague-goals-into-actionable-to+dos
test_url: http://lifehacker.com/5941600/hack-an-old-computer-mouse-into-a-wireless-bluetooth-mouse
-test_url: http://lifehacker.com/what-happens-to-the-brain-when-you-meditate-and-how-it-1202533314
\ No newline at end of file
+test_url: http://lifehacker.com/what-happens-to-the-brain-when-you-meditate-and-how-it-1202533314
--- /dev/null
+body: //div[@class='content-area']
+next_page_link: //a[@title='Go to next page']
+author: //a[@title='View user profile.']
+strip_id_or_class: comments
+
+test_url: http://www.linuxjournal.com/content/be-mechanicwith-android-and-linux
date: //p[@class='date']/strong
author: substring-after(//div[@class="story-inner"]/div[@class="byline"]//span[@class='name'], 'By')
+find_string: http://ichef.bbci.co.uk/news/200/
+replace_string: http://ichef.bbci.co.uk/news/624/
+
+find_string: http://ichef.bbci.co.uk/news/304/
+replace_string: http://ichef.bbci.co.uk/news/624/
+
strip: //div[@class="story-inner"]/div[@class="byline"]
test_url: http://m.bbc.co.uk/news/science-environment-19144464
\ No newline at end of file
--- /dev/null
+body: //div[@id='m_story_permalink_view' or contains(@data-sigil, 'm-story-view')]
+
+title: //div[@id='m_story_permalink_view' or contains(@data-sigil, 'm-story-view')]//h3
+
+strip_id_or_class: commentable
+strip: //*[contains(@data-sigil, 'm-mentions-expand') or contains(@data-sigil, 'story-popup-context') or contains(@data-sigil, 'share') or contains(@data-sigil, 'translate')]
+
+prune: no
+tidy: no
+
+test_url: https://www.facebook.com/permalink.php?story_fbid=10154584776550183&id=294468630182
+test_contains: holding an extraordinary session in Brussels this month
--- /dev/null
+strip: //div[@class='wptl btm']
+body: //div[@id='article']//h2 | //div[@id='body']
+
+test_url: http://m.theregister.co.uk/2015/07/06/geeks_guide_spaceguard_center/
--- /dev/null
+strip: //h3[@class="related-posts"]
-body: //div[contains(@class, 'postContent-inner')]
+body: //div[contains(@class, 'postArticle-content')]
strip_id_or_class: supplementalPostContent
prune: no
tidy: no
-test_url: http://www.menshealth.com.sg/fitness/mh-picks-under-armour-clutchfit-nitro-mid-cleats
-test_contains: These cleats are made for one thing
-
-test_url: http://www.menshealth.com.sg/fitness/top-10-fat-burning-bodyweight-moves-you-can-do-10-minutes
-test_contains: let this workout fool you
-
-test_url: http://www.menshealth.com.sg/fitness/feed
\ No newline at end of file
+# broken feed?
+test_url: http://www.menshealth.com.sg/fitness/feed
--- /dev/null
+body: //div[@class='section']
+strip_id_or_class: mediumtxt
+strip: //strong[contains
title: //h1[contains(@class, 'headline')]
body: //article[contains(@class, 'full-art')]
+date: //meta[@name="pdate"]/@content
+author: //meta[@name="byl"]/@content
+
strip_id_or_class: image-credit
test_url: http://mobile.nytimes.com/2014/06/19/opinion/gail-collins-romney-and-the-2016-contenders-huddle.html
\ No newline at end of file
--- /dev/null
+date: //span[@class="publishdate"]//time
+author: //span[@class="byline"]
--- /dev/null
+body: //div[contains(concat(' ',normalize-space(@class),' '),' entry ') or contains(@class, 'single-post-thumb')]
+test_url: http://www.nbnnews.com.au/2015/03/24/lismore-man-will-attempt-to-run-around-australia/
+test_url: http://www.nbnnews.com.au/category/nthn-rivers-sport/feed/
--- /dev/null
+body: //div[@class='story-body']
+prune: no
+tidy: no
--- /dev/null
+strip: //span[@style="color: #cf1206;"]
--- /dev/null
+strip: //a[@class="contact"]
+strip: //div[@class="article-media video-item"]
+date: //div[@class='display-date']
--- /dev/null
+#bypass cookie check
+single_page_link: //a[contains(@href, '/acceptCookieCheck.do?url=')]
+
+test_url: http://www.parool.nl/parool/nl/4/AMSTERDAM/article/detail/4042734/2015/05/29/MRSA-bacterie-niet-verder-verspreid-in-Bijlmerbajes.dhtml
+test_contains: De twee gevangenen die
+
+test_url: http://www.parool.nl/amsterdam/rss.xml
\ No newline at end of file
--- /dev/null
+#body: //div[@class='story-body']
+body: //div[contains(@class, 'story-body')]
+title: //div[@class='story-headline']//h1
+author: //cite[contains(@class, 'author')]
+date: //span[@class='datestamp']
+
+strip_id_or_class: story-info
+strip: //div[contains(@class, 'story-promo')]
+strip: //div[contains(@class, 'story-related')]
+
+prune: no
+tidy: no
--- /dev/null
+strip_id_or_class: author-bio-box
--- /dev/null
+body: //div[contains(concat(' ',normalize-space(@class),' '),' story-text ')]
+
+strip_id_or_class: news-bodycopy
+
+parser: html5php
+tidy: no
+
+test_url: http://www.presseportal.de/pm/103258/2930232/felix-neureuther-vor-der-ski-wm-ich-denke-von-rennen-zu-rennen
+test_url: http://www.presseportal.de/pm/66749/2933779/koelner-stadt-anzeiger-bahnmitarbeiter-werden-in-nrw-immer-haeufiger-angegriffen-zahl-der/rss
+test_contains: kleineren Bahnhöfen installieren und erhofft
+test_url: http://www.presseportal.de/rss/presseportal.rss2
tidy: no
prune: no
-body: //div[contains(@class, 'main_col')]
-title: //h1
+body: //div[contains(concat(' ',normalize-space(@class),' '),' Answer ')] | //div[contains(concat(' ',normalize-space(@class),' '),' header ')] | //div[contains(concat(' ',normalize-space(@class),' '),' AnswerWikiArea ')] | //hr
+#body: //div[contains(@class, 'main_col')]
+strip_id_or_class: AnswerFooter
+strip_id_or_class: ActionBar
strip_id_or_class: hidden
strip_id_or_class: item_action_bar
strip_id_or_class: answer_voters
strip_id_or_class: include_details
strip_id_or_class: sig_edit
strip_id_or_class: profile_photo_img
+strip_id_or_class: question_text_icons
-test_url: http://www.quora.com/What-everyday-habit-do-you-wish-you-had-developed-earlier-in-life
\ No newline at end of file
+# insert hr between answers
+find_string: <div class="Answer"
+replace_string: <hr /><div class="Answer"
+
+test_url: http://www.quora.com/What-everyday-habit-do-you-wish-you-had-developed-earlier-in-life
+test_contains: Please provide a specific practical/measurable action-based everyday
+test_contains: Exercise every day
+
+test_url: http://www.quora.com/What-is-the-greatest-illusion-in-life
+test_contains: What is the greatest illusion in life?
# this doesn't work for some reason...?
date: //p[@class="tagline"]//@datetime
-#body: (//div[contains(@class, 'noncollapsed')]//div[contains(@class, 'usertext-body')])[1]
-
-body: //div[contains(concat(' ',normalize-space(@class),' '),' usertext-body ') and (contains(concat(' ',normalize-space(@class),' '),' may-blank-within ')) and (contains(concat(' ',normalize-space(@class),' '),' md-container '))]//div[contains(concat(' ',normalize-space(@class),' '),' md ')]
+body: (//div[contains(@class, 'noncollapsed')]//div[contains(@class, 'usertext-body')])[1]
strip_id_or_class: tagline
strip_id_or_class: unvotable-message
test_url: http://www.reddit.com/r/truegaming/comments/wfe7r/i_wrote_about_the_problems_i_honestly_feel_that/
test_url: http://www.reddit.com/r/worldnews/comments/1as37r/twelve_north_korean_soldiers_attempting_to_defect/
-test_url: http://www.reddit.com/r/WritingPrompts/comments/2786lw/wp_in_a_world_where_puns_are_illegal_one_man/chybk8e
+test_url: http://www.reddit.com/r/WritingPrompts/comments/2786lw/wp_in_a_world_where_puns_are_illegal_one_man/chybk8e
\ No newline at end of file
title: //h2
strip: //div[ contains(@class, 'respond') ] | //h2 | //h1
+strip_id_or_class: social
+strip_id_or_class: dd_post_share
date: substring-after(//p[@class='info'], ' on ')
author: //p[@class='info']//a
-test_url: http://www.rockpapershotgun.com/2010/07/29/rps-half-verdict-starcraft-2/
\ No newline at end of file
+test_url: http://www.rockpapershotgun.com/2010/07/29/rps-half-verdict-starcraft-2/
--- /dev/null
+body: //div[contains(@class, 'section-content-left')]
+
+strip_id_or_class: related
+strip_id_or_class: nocontent
+strip_id_or_class: comment
+strip_id_or_class: widget
+strip_id_or_class: respond
+strip: //h3[.='Comments']
+strip: //p[.='comments']
+
+test_url: http://saadaalnews.net/?p=42624
--- /dev/null
+body: //div[@id='content']
+title: //h1[@class='cN-headingPage']
+author: //h3[@class='authorName']
+date: //dd[@class='updated dtstamp']
+
+strip: //ul[@class='social sponsored cfix']
+strip: //div[contains(@class, 'hiddenVisually')]
+strip: //dd[@class='updated dtstamp']
+strip: //h3[@class='authorName']
+strip: //ul[@class='social cfix']
+strip: //div[contains(@id, 'adspot')]
+
+strip: //div[contains(@class, 'overlayPlayCountdown')]
+strip: //div[@class='fdVideoWof']//span[@class='gone']
--- /dev/null
+body: //div[@id='content']
+title: //h1[@class='cN-headingPage']
+author: //h3[@class='authorName']
+date: //dd[@class='updated dtstamp']
+
+strip: //ul[@class='social sponsored cfix']
+strip: //div[contains(@class, 'hiddenVisually')]
+strip: //dd[@class='updated dtstamp']
+strip: //h3[@class='authorName']
+strip: //ul[@class='social cfix']
+strip: //div[contains(@id, 'adspot')]
+
+test_url: http://smh.drive.com.au/roads-and-traffic/driver-distraction-responsible-for-more-car-crashes-than-alcohol-20130503-2iyg0.html
# full content
single_page_link://td/li[@class = 'article-singlepage']/a
+# continue link
+single_page_link: //a[@id='continue-btn']
+
# caption clean up
wrap_in(i)://span[@class='articleImageCaptionwide']
move_into (//span[@class='articleImageCaptionwide'])://div[@id = 'articleImage']/p
strip://ul[contains(@class, 'cat-breadcrumb')]
strip://div [@class= 'viewMorePhotos']
-test_url: http://www.smithsonianmag.com/history-archaeology/The-Goddess-Goes-Home.html
\ No newline at end of file
+test_url: http://www.smithsonianmag.com/history-archaeology/The-Goddess-Goes-Home.html
--- /dev/null
+single_page_link: //meta[@property="og:url"]/@content
+
+test_url: http://snip.ly/qa1R
\ No newline at end of file
--- /dev/null
+strip_id_or_class: sharing
+
+test_url: http://soundcity.tv/feed/
# - Fixed single_page_link
# - Included intro text in single page view
# - Added body in default view
+# stesie@
+# - removed copyright box
+# - removed "print more" box
# set body
tidy: no
body: //div[@id="spArticleContent"]
# body in default view
body: //div[@id="spArticleSection"]
+body: //div[contains(@class, 'article-section')] | //div[@id='js-article-top-wide-asset'] | //p[contains(@class, 'article-intro')] | //div[contains(@class, 'js-module-box-image')]
# body in "Fotostrecke"
body: //div[@id="spBigaContent"]
# set date in "Fotostrecke"
date: //div[@id="spBigaDatum"]
+# title in default view
+title: //h2[contains(@class, 'article-title')]
#set title in single page view
title: //div[@id='spArticleContent']/h2
# strip title
strip: //div[@class='spMInline']
# remove photogalleries and extras
-strip: //div[@class='spPhotoGallery']
+strip: //div[contains(@class, 'spPhotoGallery')]
strip: //div[@class='spPhotoGallery']/following-sibling::br
strip: //div[@class='spAssetAlignleft']
strip: //div[contains(@class,'spAsset')]
strip: //div[contains(@class, 'spBigaNavi')]
strip: //div[@class='spDottedLine']
+strip: //div[@class='asset-box article-print-more']
+strip: //div[@class='article-copyright']
+strip: //span[@class='image-buttons']
+
# Use link to print article for single page view
single_page_link: //a[contains(@href, '-druck')]
+if_page_contains: //div[contains(@class, 'multi-pager-control')]
+
+# Clean up title in print view
+find_string: <title>Druckversion -
+replace_string: <title>
# use next link in "Fotostrecke"
next_page_link: //a[@class='spBigaControlForw']
-test_url: http://www.spiegel.de/politik/deutschland/0,1518,787602,00.html
\ No newline at end of file
+test_url: http://www.spiegel.de/politik/deutschland/0,1518,787602,00.html
+
+# regular article
+test_url: http://www.spiegel.de/wirtschaft/soziales/griechenland-was-den-griechischen-buergern-nun-droht-a-1042682.html
+
+# multipage article
+test_url: http://www.spiegel.de/spiegel/a-710880.html
\ No newline at end of file
--- /dev/null
+# Author: cirnod@gmail.com
+
+tidy: no
+prune: no
+
+body: //div[@id="article-content"]/p | //div[@class="main-article-content clearfix"]
+
+# General Cleanup
+strip_id_or_class: offscreen
+strip_id_or_class: video-description
+strip_id_or_class: v2 big-video
+strip_id_or_class: module smb freetext
+strip_id_or_class: asset span3
+strip_id_or_class: module smb related-links
+
+# fix image-galleries
+strip_id_or_class: module lightbox-gallery image hide
+replace_string(width="624"): width="100%"
+replace_string(height="468"): height="%"
+
+# Try yourself
+test_url: http://www.srf.ch/news/wirtschaft/weltbank-korrigiert-konjunktur-erwartungen-nach-unten
+test_url: http://www.srf.ch/news/wirtschaft/ural-statt-alpen-russische-touristen-bleiben-zuhause
+test_url: http://www.srf.ch/news/international/zwei-schweizer-bei-blutigem-attentat-in-mali-verletzt
\ No newline at end of file
# 2012-12-04: complete rewrite after Süddeutsche.de relaunch - carlo@...
-single_page_link: //a[ contains( @href, "/2.220/" ) ]
+single_page_link: //li[@id="article-sidebar-action-print"]/@data-clickurl
body: //article[@id="sitecontent"]/section[@class="body"]
author: //address[@class="author"]
--- /dev/null
+body: //section//article//p
+
+strip: //aside
+strip: //div[@class='margin-top-15']
+strip: //p[@class='tags']
+
+author: //span[@class='byline']//ul[@class='piped']//li[1]
+date: //span[@class='byline']//ul[@class='piped']//li[2]
+
+parser: html5lib
# 2012-12-04: complete rewrite after Süddeutsche.de relaunch - carlo@...
-single_page_link: //a[ contains( @href, "/2.220/" ) ]
+single_page_link: //li[@id="article-sidebar-action-print"]/@data-clickurl
body: //article[@id="sitecontent"]/section[@class="body"]
author: //address[@class="author"]
--- /dev/null
+# Author: cirnod@gmail.com
+
+tidy: no
+prune: no
+
+body: //div[@id="article"]/h3 | //*[@id="mainContent"]
+
+# General Cleanup
+#strip_id_or_class: info_panel
+
+
+# Try yourself
+test_url: http://www.tagesanzeiger.ch/zuerich/stadt/Nach-spektakulaerer-Abseilaktion-verhaftet/story/18039895
+test_url: http://www.tagesanzeiger.ch/ausland/naher-osten-und-afrika/IS-zerstoert-auch-das-antike-Hatra/story/19865699
-title://h1[1]
+body: //div[contains(@class, 'sectionArticle')]//div[contains(concat(' ',normalize-space(@class),' '),' box ')]
-author: substring-after(//em, 'Von ')
-author:string('tagesschau.de')
+strip_id_or_class: infokasten
+strip_id_or_class: teaserImTeaser
+strip_id_or_class: Comments
+strip_id_or_class: mediaInfo
+strip: //div[contains(@class, 'mediaCon')]//iframe
-date:substring-after(//div[@class='standDatum'], 'Stand: ')
+prune: no
-body://div[contains(@class, 'article')] | //div[contains(@class, 'centerCol')]
+test_url: http://www.tagesschau.de/ausland/snowden-dateien-entschluesselung-101.html
+test_contains: Snowden hatte zunächst für
-strip://h1[1]
-strip: //div[contains(@class, 'directLinks')]
-strip: //div[contains(@class, 'zitatBox')]
-strip: //div[contains(@class, 'teaserBox metaBlock')]
-strip: //*[contains(@class, 'inv')]
-strip: //span[@class='imgSubline']
-strip: //*[contains(@class, 'topline')][1]
-strip: //div[@id='rightCol'][1]
-strip: //div[@id="footer"][1]
-strip: //div[@class="fPlayer"]
-strip: //div[@id='seitenanfang']
-strip: //div[@class='standDatum']
-strip: //em
-test_url: http://www.tagesschau.de/ausland/wahlkampffrankreich102.html
\ No newline at end of file
+test_url: http://www.tagesschau.de/xml/rss2
date: //div[@class='secthead']
-body: //div[@class='sectbody']
+body: (//div[@class='sectbody'])[1]
title: concat(//div[@class='sectbody']/h4,': ',//div[@class='sectbody']/h1)
author: //span[@class='author']
strip: //p[@class='caption']
+strip_id_or_class: ad_bin
strip_id_or_class: rack
test_url: http://www.taz.de/Protestbewegung-Occupy/!80188/
\ No newline at end of file
title: //div[contains(@class, 'articleHead')]//h1
+body: //div[@itemprop='articleBody']
body: //div[@class='articleText']
body: //div[@class='articleContent']
body: //div[@id='article']
strip: //p[contains(., 'This article available online at:')]/following::*
strip: //div[@class='earthbox']
-single_page_link: //article//a[contains(@class, 'print')]
+single_page_link: //div[contains(@class, 'article-tools')]//a[contains(@class, 'print')]
native_ad_clue: //meta[@property="og:url" and contains(@content, '/sponsored/')]
+#multi-page article
+test_url: http://www.theatlantic.com/magazine/archive/2014/12/the-real-roots-of-midlife-crisis/382235/
+test_contains: The curve tends to evince itself
+
test_url: http://www.theatlantic.com/technology/archive/2011/04/want-to-see-how-crazy-a-bot-run-market-can-be/237773/
test_url: http://www.theatlantic.com/magazine/archive/2007/11/the-autumn-of-the-multitaskers/6342/
-test_url: http://www.theatlantic.com/entertainment/archive/2012/04/30-rock-live-a-funny-reminder-of-why-sitcoms-arent-shot-live-anymore/256447/
\ No newline at end of file
+test_url: http://www.theatlantic.com/entertainment/archive/2012/04/30-rock-live-a-funny-reminder-of-why-sitcoms-arent-shot-live-anymore/256447/
--- /dev/null
+body: //div[contains(@class, 'story-body')]
+author: //cite[contains(@class, 'author')]
+date: //span[@class='datestamp']
+
+strip: //div[@class='story-info']
+
title: //meta[@name='og:title']/@content
date: //meta[@name='created']/@content
-body: //div[@class="StoryBody" or @class="storyTeaser"]
+body: //div[contains(@class, "article-body")]
replace_string(<p></p>): <br /><br />
-test_url: http://www.thebostonchannel.com/slideshow/news/28210648/detail.html
\ No newline at end of file
+test_url: http://www.wcvb.com/news/2-teens-arrested-in-fatal-dorchester-shooting-of-16yearold-boy/33564886
title: //div[@id='main-article-info']//h1
-body: //div[@id='article-wrapper']
+body: //figure[contains(@itemprop, "associatedMedia")] | //div[contains(@itemprop, "articleBody")]
date: //li[@class='publication']//time[@pubdate] | //li[@class='publication']//data[@pubdate]
strip: //div[contains(@class, 'email-subscription')]
strip: //div[contains(@class, 'kindleWidget')]
prune: no
tidy: no
+strip_id_or_class: -expand-
+
test_url: http://www.theguardian.com/world/2013/oct/04/nsa-gchq-attack-tor-network-encryption
test_contains: The National Security Agency has made repeated attempts to develop
test_contains: The agency did not directly address those questions, instead providing a statement.
-# Updated 25-Jan-2014
-single_page_link: //a[contains(@href, '/Print/')]
+single_page_link: //link[contains(@href, 'm.theregister')]
+if_page_contains: //div[@id='nextpage']
+strip: //div[@class='wptl btm']
+body: //div[contains(@class,'article_head')]//h2 | //div[@id='body']
-title: //div[@id="article"]/h2
-author: //p[@class="byline"]/a
-date: //p[@class="dateline"]/a[last()]
-
-test_url: http://www.theregister.co.uk/2014/01/24/thirty_years_of_the_apple_macintosh_part_2/
+#multipage
+test_url: http://www.theregister.co.uk/2015/07/06/geeks_guide_spaceguard_center/
+#singlepage
+test_url: http://www.theregister.co.uk/2015/07/06/us_japan_massive_robots_in_the_ring/
strip_id_or_class: social-sharing
strip_id_or_class: m-video-entry__excerpt
strip_id_or_class: hidden
+strip_id_or_class: m-article__follow-bar
+strip_id_or_class: m-article__share-buttons
replace_string(<noscript>): <div>
replace_string(</noscript>): </div>
--- /dev/null
+body: //div[contains(@class, 'tt_news-bodytext')]
+
+# cut html short
+find_string: <!--TYPO3SEARCH_end-->
+replace_string: </div></body></html>
+
+test_url: http://www.titanic-magazin.de/ich.war.bei.der.waffen.rss
+test_url: http://www.titanic-magazin.de/news/wenig-bekannte-fakten-ueber-2014-6986/
\ No newline at end of file
--- /dev/null
+prune: false
+tidy: false
+
+body: //div[@class='col-md-9']
+author: //meta[@name='author']
+date: //i[@class='fa fa-calendar']/../span
+title: //div[@class='page-header']/h1
+
+test_url: https://truongtx.me/2014/04/20/emacs-javascript-completion-and-refactoring/
-title: //h1
-author: //*[@class='byline']
-date: substring-after(//*[@class='pubdatetime'], 'Published: ')
-body: //*[@class='body-block']
-test_url: http://utdailybeacon.com/news/2012/oct/8/energy-forum-continues/
\ No newline at end of file
+body: //div[@id='blox-story-text']
+test_url: http://www.utdailybeacon.com/news/article_ccf6d024-0f15-11e5-ae29-9f63598deb81.html
--- /dev/null
+#bypass cookie check
+single_page_link: //a[contains(@href, '/cookiewall/accept.do?')]
+
+title: //h1[@itemprop="headline"]
+body: //figure[contains(@class, 'article__top-image')] | //div[@itemprop="articleBody"]
+
+strip: //div[contains(@class, 'media-container') and contains(@class, 'pull-right')]
+
+tidy: no
+prune: no
+
+test_url: http://www.volkskrant.nl/sport/dossier-wereldvoetbalbond-fifa-wankelt~a4042695/
+test_contains: De belangrijkste spil in het
+
+test_url: http://www.volkskrant.nl/nieuws/rss.xml
\ No newline at end of file
-title: //h3
-body: //div[@class="content_wysiwyg"]
-test_url: http://www.warnerbros.fr/game-of-thrones-un-junket-vu-de-l-interieur-268.html
\ No newline at end of file
+body: //div[@class="article-body"]
+test_url: https://www.warnerbros.fr/articles/magic-mike-xxl-adam-rodriguez-portrait
body: //div[@id='print_facet']//div[@id='body']
author://meta[@name='DC.creator']/@content
+author://span[@class="pb-byline"]
+author://h3[@property='dc.creator']//a[@rel='author']
title://meta[@name='title']/@content
date://div[contains(@class,'byline')]//span[contains(@class,'published')]/@title
date://meta[@name="DC.date.issued"]/@content
+date://span[contains(@class,"pb-timestamp")]
+date://meta[@name="eomportal-lastUpdate"]/@content
+
strip://div[@class="relative primary-slot padding-top img-border gallery-container photo-wrapper"]
strip://div[@id="wp-column six end"]
strip://div[contains(@class,'hidden')]
# Change gJQAwdJG4U_story.html to gJQAwdJG4U_print.html
single_page_link: concat(substring-before(//link[@rel="canonical"]/@href, "_story.html"), "_print.html")
+if_page_contains: //link[@rel="canonical" and contains(@href, '_story.html')]
# [OLD] Change gJQAwdJG4U_story.html to gJQAwdJG4U_story_print.html
#single_page_link: concat(substring-before(//link[@rel="canonical"]/@href, "_story.html"), "_story_print.html")
--- /dev/null
+author: //h3[@class="authorName"]
+date: //dd[@class='updated dtstamp']//time
+
+strip: //div[contains(@class, "adspot")]
+strip: //noscript
+strip: //p//small
+
--- /dev/null
+body: //div[@class='main-col' or @class='article-image-wide']
+title: //h1[@class='article-title']
+author: substring-before(//span[@class='author'], "|")
+date: //span[@class='date']
--- /dev/null
+title: //header[contains(@class, "news-article-title")]//h1
+date: //div[@class="news-article-byline"]//time
+author: //h2[@class="news-article-author"]//cite
+
+# Turns out that westernadvocate is doing funky things with the slide show images. :<
+# body: //ul[@class="slides"]//img | //div[contains(@class, "news-article-body")]
+body: //div[contains(@class, "news-article-body")]
+
+strip: //div[contains(@class, "flexslider")]
+
+test_url: http://www.westernadvocate.com.au/story/1532050/roos-accept-ziebell-ban-commentators-do-not/
--- /dev/null
+title: //meta[@property="og:title"]/@content
+body: //div[@id='wsj-article-wrap']
+# is this still used?
+body: //div[@id='article_story_body']
+
+author: //h3[@class='byline']/a
+# for slide show content
+body: //ul[@id='imageSlide']//li[@class='firstSlide']//img | (//div[@class='txt_body']//p)[1]
+date: //li[@class='dateStamp']/small
+
+strip_id_or_class: insetFullBracket
+strip_id_or_class: insettipBox
+#strip_id_or_class: legacyInset
+strip_id_or_class: recipeACShopAndBuyText
+
+strip: //div[contains(@class, 'insetContent')]//cite
+strip: //*[contains(@style, 'visibility: hidden;')]
+strip: //div[contains(@class, 'insetContent') and not(contains(@class, 'image'))]
+strip: //div[contains(@class, 'carousel')]
+
+prune: no
+tidy: no
+
+test_url: http://www.wsj.com/articles/airasia-flight-8501-tail-recovered-1420878809
+test_contains: Saturday evening that the black boxes
+test_url: http://www.wsj.com/news/articles/SB10001424052702304626304579509100018004342
+test_url: http://www.wsj.com/article/SB10001424052970203363504577185322849515102.html
+# slide show
+test_url: http://www.wsj.com/article/SB10001424052970204791104577110550376458164.html
-body: //div[@class="nxFullTextData"]
-test_url: http://yourerie.com/fulltext?nxd_id=306552
+body: //div[@itemprop="articleBody"]
+test_url: http://www.yourerie.com/news/news-article/d/story/cd-release-party-at-pi-downs/22898/G_gFL3mSQkWH_DW2wLuMOA
+# 2015.07.08 [Marvin Dickhaus] fixed single_page_link
# 2013.10.30 [rezor92] fixed single_page_link
# 2012-12-23 [carlo@...] fixed half-assed headlines in articles, removed inline author profiles, adjusted picture captions
# 2012-03-17 [dkless@...] Cut metadata parts in the beginning and the ends of the content block; copyright entries for pictures removed; Author fixed, not sure if old entries still valid (I left them); Weird problems with some pages addressed (see last section for removing hidden section)
# 2011-08-23 [carlo@...] changed single page link to use print version: page works better, less ambiguity. Related cleanups and simplifications.
# 2011-08-20 [carlo@...] added author, fixed date
-
-single_page_link: //a[@title='Auf einer Seite']
+single_page_link: //a[contains(@href, 'komplettansicht')]
tidy: no
title: //title
strip: //div[@class="copyright"]
#Removes pagination links at the end
strip: //div[@class="pagination"]
+#Removes link to main page at the bottom of some articles (Zur Startseite)
+strip: //a[@href='http://www.zeit.de']
# Fix picture captions
wrap_in(small): //p[@class="caption"]/text()
footnotes: no
test_url: http://www.zeit.de/kultur/film/2012-12/Kurzfilmtag
+test_url: http://www.zeit.de/kultur/2015-07/kapitalismuskritik-selbstberuhigung-armin-nassehi